From 9262f8c555444ec8d04b17b0198087b67f02e178 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolas=20FERNANDEZ=20NU=C3=91EZ?=
 <nicolas.fernandez@ird.fr>
Date: Wed, 8 Jan 2025 17:15:25 +0100
Subject: [PATCH] Feat: ver. 2025.01 (Prepare for Snakedeploy)

---
 Run_GeVarLi.sh                                |  140 +-
 configuration/config.yaml                     |   50 +-
 version.txt                                   |    2 +-
 workflow/Snakefile                            | 1458 ++++++++++++++++-
 .../bamclipper_v.1.0.0.yaml                   |    0
 .../bcftools_v.1.20.yaml                      |    0
 .../bedtools_v.2.31.1.yaml                    |    0
 .../bowtie2_v.2.5.4.yaml                      |    0
 .../{environments => envs}/bwa_v.0.7.18.yaml  |    0
 .../cutadapt_v.4.9.yaml                       |    0
 .../fastq-screen_v.0.15.3.yaml                |    0
 .../fastqc_v.0.12.1.yaml                      |    0
 .../{environments => envs}/gawk_v.5.3.0.yaml  |    0
 .../{environments => envs}/ivar_v.1.4.3.yaml  |    0
 .../minimap2_v.2.28.yaml                      |    0
 .../multiqc_v.1.23.yaml                       |    0
 .../nextclade_v.3.9.1.yaml                    |    0
 .../pangolin_v.4.3.yaml                       |    0
 .../samtools_v.1.20.yaml                      |    0
 .../sickle-trim_v.1.33.yaml                   |    0
 .../{environments => envs}/tsv2vcf_v.1.1.yaml |    0
 .../workflow-base_v.2024.11.yaml              |    0
 workflow/{snakefiles => rules}/gevarli.smk    |  167 +-
 .../indexing_genomes.smk                      |    0
 .../{snakefiles => rules}/quality_control.smk |    0
 25 files changed, 1684 insertions(+), 133 deletions(-)
 rename workflow/{environments => envs}/bamclipper_v.1.0.0.yaml (100%)
 rename workflow/{environments => envs}/bcftools_v.1.20.yaml (100%)
 rename workflow/{environments => envs}/bedtools_v.2.31.1.yaml (100%)
 rename workflow/{environments => envs}/bowtie2_v.2.5.4.yaml (100%)
 rename workflow/{environments => envs}/bwa_v.0.7.18.yaml (100%)
 rename workflow/{environments => envs}/cutadapt_v.4.9.yaml (100%)
 rename workflow/{environments => envs}/fastq-screen_v.0.15.3.yaml (100%)
 rename workflow/{environments => envs}/fastqc_v.0.12.1.yaml (100%)
 rename workflow/{environments => envs}/gawk_v.5.3.0.yaml (100%)
 rename workflow/{environments => envs}/ivar_v.1.4.3.yaml (100%)
 rename workflow/{environments => envs}/minimap2_v.2.28.yaml (100%)
 rename workflow/{environments => envs}/multiqc_v.1.23.yaml (100%)
 rename workflow/{environments => envs}/nextclade_v.3.9.1.yaml (100%)
 rename workflow/{environments => envs}/pangolin_v.4.3.yaml (100%)
 rename workflow/{environments => envs}/samtools_v.1.20.yaml (100%)
 rename workflow/{environments => envs}/sickle-trim_v.1.33.yaml (100%)
 rename workflow/{environments => envs}/tsv2vcf_v.1.1.yaml (100%)
 rename workflow/{environments => envs}/workflow-base_v.2024.11.yaml (100%)
 rename workflow/{snakefiles => rules}/gevarli.smk (90%)
 rename workflow/{snakefiles => rules}/indexing_genomes.smk (100%)
 rename workflow/{snakefiles => rules}/quality_control.smk (100%)

diff --git a/Run_GeVarLi.sh b/Run_GeVarLi.sh
index 95a7c63..c550985 100755
--- a/Run_GeVarLi.sh
+++ b/Run_GeVarLi.sh
@@ -10,13 +10,13 @@
 ###                                                                         ###
 ###I###R###D######U###2###3###3#######T###R###A###N###S###V###I###H###M###I####
 # Name ___________________ Run_GeVarLi.sh
-# Version ________________ v.2024.10
+# Version ________________ v.2025.01
 # Author _________________ Nicolas Fernandez
 # Affiliation ____________ IRD_U233_TransVIHMI
 # Aim ____________________ Bash script running gevarli.smk snakefile
 # Date ___________________ 2021.10.12
-# Latest modifications ___ 2024.11.15 (Update Snakemake)
-# Use ____________________ 'bash ./Run_GeVarLi.sh'
+# Latest modifications ___ 2025.01.08 (Prepare for Snakedeploy)
+# Use ____________________ 'bash Run_GeVarLi.sh'
 
 ###############################################################################
 ### COLORS ###
@@ -31,7 +31,7 @@ nc="\033[0m"       # no color
 ###############################################################################
 ### ABOUT ###
 #############
-version="2024.11"                                      # Version
+version="2025.01"                                      # Version
 workdir=$(cd "$(dirname "${BASH_SOURCE[0]}" )" && pwd) # Get working directory
 sample_test="SARS-CoV-2_Omicron-BA1_Covid-Seq-Lib-on-MiSeq_250000-reads"
 
@@ -47,8 +47,8 @@ ${blue}Author${nc} _________________ Nicolas Fernandez
 ${blue}Affiliation${nc} ____________ IRD_U233_TransVIHMI
 ${blue}Aim${nc} ____________________ Bash script for ${red}Ge${nc}nome assembling, ${red}Var${nc}iant calling and ${red}Li${nc}neage assignation
 ${blue}Date${nc} ___________________ 2021.10.12
-${blue}Latest modifications${nc} ___ 2024.10.01 (Dedicated installation scripts creation)
-${blue}Use${nc} ____________________ '${ylo}./Run_GeVarLi.sh${nc}'
+${blue}Latest modifications${nc} ___ 2025.01.08 (Prepare for Snakedeploy)
+${blue}Use${nc} ____________________ '${ylo}bash Run_GeVarLi.sh${nc}'
 "
 
 
@@ -158,9 +158,9 @@ ${green}#####${nc} ${red}CONDA ACTIVATION${nc} ${green}#####${nc}
 ${green}----------------------------${nc}
 "
 
-echo -e "conda activate ${ylo}workflow-base_v.${version}${nc}"
-
-conda activate workflow-base_v.${version}
+workflowbase_version="2024.11"
+echo -e "conda activate ${ylo}workflow-base_v.${workflowbase_version}${nc}"
+conda activate workflow-base_v.${workflowbase_version}
 
 
 ###############################################################################
@@ -179,6 +179,7 @@ yq_version=$(yq --version | sed 's/yq //')                      # Yq version
 rename_version="1.601"                                          # Rename version    (ver. 1.601  from 2024-02)
 graphviz_version="11.0.0"                                       # GraphViz version  (ver. 11.0.0 from 2024-02)
 #graphviz_version=$#(dot -V | sed 's/dot - graphviz version //')  # GraphViz version  (ver. 11.0.0 from 2024-02)
+nextclade_version=""
 
 fastq=$(expr $(ls -l ${workdir}/resources/reads/*.fastq.gz 2> /dev/null | wc -l)) # Get fastq.gz files count
 if [[ "${fastq}" == "0" ]]                                                         # If no sample,
@@ -241,9 +242,7 @@ ${blue}Samples processed${nc} ______ ${red}${samples}${nc} samples (${ylo}${fast
 
 ${blue}Snakemake version${nc} ______ ${ylo}${snakemake_version}${nc}
 ${blue}Conda version${nc} __________ ${ylo}${conda_version}${nc}
-${blue}Conda frontend${nc} _________ ${ylo}${conda_frontend}${nc}
 ${blue}Mamba version${nc} __________ ${ylo}${mamba_version}${nc}  
-${blue}Nextclade version${nc} ______ ${ylo}${nextclade_version}${nc}
 
 ${blue}Quality Ccontrol${nc} _______ [ ${red}${quality}${nc} ]
 ${blue}Trimming${nc} _______________ [ ${red}${trimming}${nc} ]
@@ -258,6 +257,7 @@ ${blue}Reference genome${nc} _______ ${ylo}${reference}${nc}
 ${blue}Aligner${nc} ________________ ${ylo}${aligner}${nc}
 ${blue}Min coverage${nc} ___________ ${red}${min_cov}${nc} X
 ${blue}Min allele frequency${nc} ___ ${red}${min_freq}${nc}
+
 ${blue}Nextclade dataset${nc} ______ ${red}${nextclade_dataset}${nc}
 "
 
@@ -324,22 +324,26 @@ fi
  
 
 ###############################################################################
-### RENAME SAMPLES ###
-######################
+### RENAME SYMLINKS ###
+#######################
 echo -e "
 ${green}------------------------------------------------------------------------${nc}
-${green}#####${nc} ${red}RENAME FASTQ FILES${nc} ${green}#####${nc}
-${green}------------------------------${nc}
+${green}#####${nc} ${red}RENAME FASTQ SYMLINKS${nc} ${green}#####${nc}
+${green}---------------------------------${nc}
 "
 
-# Rename fastq files to remove "_001" Illumina pattern (mandatory)
-## De/comment line (#) if you want keep Illumina barcode-ID and/or Illumina line-ID
-echo -e "Removing ${red}'_S'${nc} index tag ID"
-rename "s/_S\d+_/_/" ${workdir}/resources/reads/*.fastq.gz 2> /dev/null                # Remove barcode-ID like {_S001_}
-echo -e "Removing ${red}'_L'${nc} line tag ID"
-rename "s/_L\d+_/_/" ${workdir}/resources/reads/*.fastq.gz 2> /dev/null                # Remove line-ID ID like {_L001_}
-echo -e "Removing ${red}'_001'${nc} illumina tag ID"
-rename "s/_001.fastq.gz/.fastq.gz/" ${workdir}/resources/reads/*.fastq.gz 2> /dev/null # Remove end-name ID like {_001}.fastq.gz
+# Remove tags from symlinks:
+## barcode-ID like {_S001_}
+## line-ID ID like {_L001_}
+## end-name ID like {_001}.fastq.gz
+mkdir -p ${workdir}/resources/symlinks/
+for fastq in ${workdir}/resources/reads/*.fastq.gz; do
+    symlinks=$(echo $(basename "${fastq}") | \
+		   sed -E "s/_S\d+_//" | \
+		   sed -E "s/_L\d+_//" | \
+		   sed -E "s/_001.fastq.gz/.fastq.gz/")
+    ln -s "${fastq}" "${workdir}/resources/symlinks/${symlinks}"
+done
 
 echo -e "
 If you want to keep Illumina ${blue}barcode-ID${nc} and/or Illumina ${blue}line-ID${nc}, please edit ${ylo}Run_GeVarLi.sh${nc} script (l.335).
@@ -355,9 +359,6 @@ ${green}#####${nc} ${red}SNAKEMAKE PIPELINES${nc} ${green}#####${nc}
 ${green}-------------------------------${nc}
 "
 
-# MODULES
-snakefiles_list="indexing_genomes gevarli"
-
 echo -e "
 ${blue}## Unlocking Working Directory ##${nc}
 ${blue}---------------------------------${nc}
@@ -367,14 +368,11 @@ ${blue}---------------------------------${nc}
 # Re-run all jobs the output of which is recognized as incomplete.
 # Remove a lock on the working directory.
 
-for snakefile in ${snakefiles_list} ; do
-    echo -e "${blue}-- ${snakefile} --${nc}" ;
-    snakemake \
-	--directory ${workdir}/ \
-        --snakefile ${workdir}/workflow/snakefiles/${snakefile}.smk \
-        --rerun-incomplete \
-        --unlock ;
-done
+snakemake \
+    --directory ${workdir}/ \
+    --snakefile ${workdir}/workflow/Snakefile \
+    --rerun-incomplete \
+    --unlock
 
 echo -e "
 ${blue}## Let's Run! ##${nc}
@@ -390,22 +388,17 @@ ${blue}----------------${nc}
 # If defined in the rule, run job in a conda environment.
 # Print out the shell commands that will be executed.
 
-for snakefile in ${snakefiles_list} ; do
-    echo -e "${blue}-- ${snakefile} --${nc}" ;
-    snakemake \
-        --directory ${workdir}/ \
-        --snakefile ${workdir}/workflow/snakefiles/${snakefile}.smk \
-        --cores ${max_threads} \
-        --max-threads ${max_threads} \
-        --resources mem_gb=${max_memory} \
-        --rerun-incomplete \
-        --keep-going \
-        --use-conda ;
-#        --quiet host \
-#        --quiet progress \
-#        --quiet rules ;
-done
-
+snakemake \
+    --directory ${workdir}/ \
+    --snakefile ${workdir}/workflow/Snakefile\
+    --cores ${max_threads} \
+    --max-threads ${max_threads} \
+    --resources mem_gb=${max_memory} \
+    --rerun-incomplete \
+    --keep-going \
+    --use-conda \
+    --quiet host
+# Possible choices: all, host, progress, rules
 
 ###############################################################################
 ### CONCATENATE RESULTS ###
@@ -477,27 +470,23 @@ mkdir -p ${workdir}/results/10_Reports/files-summaries/ 2> /dev/null
 graph_list="dag rulegraph filegraph"
 extention_list="pdf png"
 
-for snakefile in ${snakefiles_list} ; do
-    for graph in ${graph_list} ; do
-	for extention in ${extention_list} ; do
-	    snakemake \
-		--directory ${workdir}/ \
-                --snakefile ${workdir}/workflow/snakefiles/${snakefile}.smk \
-                --${graph} \
-	    | dot -T${extention} \
-            2> /dev/null \
-	    1> ${workdir}/results/10_Reports/graphs/${snakefile}_${graph}.${extention} ;
-	done ;
+for graph in ${graph_list} ; do
+    for extention in ${extention_list} ; do
+        snakemake \
+            --directory ${workdir}/ \
+            --snakefile ${workdir}/workflow/Snakefile \
+            --${graph} \
+	| dot -T${extention} \
+        2> /dev/null \
+	1> ${workdir}/results/10_Reports/graphs/${graph}.${extention} ;
     done ;
 done
 
-for snakefile in ${snakefiles_list} ; do
-    snakemake \
-        --directory ${workdir} \
-        --snakefile ${workdir}/workflow/snakefiles/${snakefile}.smk \
-        --summary > ${workdir}/results/10_Reports/files-summaries/${snakefile}_files-summary.txt \
-    2> /dev/null ;
-done
+snakemake \
+    --directory ${workdir} \
+    --snakefile ${workdir}/workflow/Snakefile \
+    --summary > ${workdir}/results/10_Reports/files-summaries/files-summary.txt \
+    2> /dev/null
 
 cp ${config_file} ${workdir}/results/10_Reports/config.log 2> /dev/null
 
@@ -512,14 +501,15 @@ ${green}------------------------${nc}
 "
 
 # Save and deactive environments
-mkdir -p ${workdir}/results/10_Reports/conda_env/ 2> /dev/null
-cp ${workdir}/workflow/environments/*.yaml ${workdir}/results/10_Reports/conda_env/
+mkdir -p ${workdir}/results/10_Reports/conda_envs/ 2> /dev/null
+cp ${workdir}/workflow/envs/*.yaml ${workdir}/results/10_Reports/conda_envs/
 conda deactivate
 
 # Cleanup
 find ${workdir}/results/ -type f -empty -delete # Remove empty file (like empty log)
 find ${workdir}/results/ -type d -empty -delete # Remove empty directory
 rm -f ${workdir}/resources/reads/${sample_test}_R*.fastq.gz 2> /dev/null
+rm -rf ${workdir}/resources/symlinks/ 2> /dev/null
 
 # Timer
 time_stamp_end=$(date +"%Y-%m-%d %H:%M") # Get date / hour ending analyzes
@@ -542,8 +532,8 @@ Author _________________ Nicolas Fernandez
 Affiliation ____________ IRD_U233_TransVIHMI
 Aim ____________________ Bash script for GeVarLi
 Date ___________________ 2021.10.12
-Latest modifications ___ 2024.10.01 (Dedicated installation scripts creation)
-Run ____________________ ./Run_GeVarLi.sh
+Latest modifications ___ 2025.01.08 (Prepare for Snakedeploy)
+Run ____________________ 'bash Run_GeVarLi.sh'
 
 Operating System _______ ${os}
 Shell __________________ ${shell}
@@ -567,9 +557,8 @@ Samples processed _______ ${samples} samples (${ylo}${fastq} fastq files)
 
 Snakemake version _______ ${snakemake_version}
 Conda version ___________ ${conda_version}
-Conda frontend __________ ${conda_frontend}
 Mamba version ___________ ${mamba_version}  
-Nextclade version _______ ${nextclade_version}
+
 
 Quality Ccontrol ________ [ ${quality} ]
 Trimming ________________ [ ${trimming} ]
@@ -584,16 +573,17 @@ Reference genome ________ ${reference}
 Aligner _________________ ${aligner}
 Min coverage ____________ ${min_cov} X
 Min allele frequency ____ ${min_freq}
+
 Nextclade dataset _______ ${nextclade_dataset}
 " > ${workdir}/results/10_Reports/settings.log
 
 # Gzip reports directory
 cd ${workdir}/results/
 tar -zcf 10_Reports_archive.tar.gz 10_Reports
+cd ${workdir}
 
 # Gzip results directory
 #mkdir -p ${workdir}/archives/ 2> /dev/null
-#cd ${workdir}/
 #tar -zcf archives/Results_${time_stamp_archive}_${reference}_${aligner}-${min_cov}X_${samples}sp_archive.tar.gz results/
 
 echo -e "
diff --git a/configuration/config.yaml b/configuration/config.yaml
index c636b9d..6b1b4c6 100755
--- a/configuration/config.yaml
+++ b/configuration/config.yaml
@@ -36,12 +36,12 @@ resources:
 ###############################################################################
 modules: # GeVarLi modules analysis
   # Available options, set at least one: 
-  - 'quality'   # Perform reads quality control (FastQC, Fastq-Screen, MultiQC)      default: ON
+  #- 'quality'   # Perform reads quality control (FastQC, Fastq-Screen, MultiQC)      default: ON
   #- 'trimming'  # Keep trimmed reads files after alignment (Cutadapt / Sickle-trim) default: OFF
   #- 'cleapping' # Perform reads clipping (Bamclipper)                               default: OFF
   - 'consensus' # Perform reads mapping to reference (BWA, Bowtie2, Minimap2)        default: ON
-  - 'covstats'  # Perform coverage statistics (Fagstat, Covstats)                    default: ON
-  - 'nextclade' # Perform nextclade clade assignation (Nexclade)                     default: ON
+  #- 'covstats'  # Perform coverage statistics (Fagstat, Covstats)                    default: ON
+  #- 'nextclade' # Perform nextclade clade assignation (Nexclade)                     default: ON
   #- 'pangolin'  # Perform pangolin lineage assignation (Pangolin)                   default: OFF
   ###- 'gisaid'    # Perform gisaid submission (GisAid : TODO)                       default: OFF
   
@@ -51,7 +51,7 @@ consensus:
   reference: # Your reference, in fasta format (default: "SARS-CoV-2_Wuhan_MN-908947-3")
   # Available options (not exhaustive), set one:
     - 'SARS-CoV-2_Wuhan_MN-908947-3'            # SARS-CoV-2  (Nextclade and Pangolin)
-    #- 'Monkeypox-virus_Zaire_AF-380138-1'       # Monkeypox   (Nextclade and Pangolin)
+    - 'Monkeypox-virus_Zaire_AF-380138-1'       # Monkeypox   (Nextclade and Pangolin)
     #- 'Monkeypox-virus_UK_MT-903345-1'          # Monkeypox   (Nextclade and Pangolin)
     #- 'Swinepox-virus_India_MW-036632-1'        # Swinepox    (Nextclade)
     #- 'Ebola-virus_Zaire_AF-272001-1'           # Ebola       (na)
@@ -96,8 +96,8 @@ consensus:
   aligner: # Select your favorite aligner (default: 'bwa')
   # Available options, set one:
     - 'bwa'      # Better, Faster, (Stronger, Harder...)
-    #- 'bowtie2'  # Depreciated (slower),"sensitivity" requiried (see below "Bowtie2" options)
-    #- 'minimap2' # A versatile sequence alignment program
+    - 'bowtie2'  # Depreciated (slower),"sensitivity" requiried (see below "Bowtie2" options)
+    - 'minimap2' # A versatile sequence alignment program
   caller: 'ivar'
 
 ###############################################################################
@@ -275,24 +275,24 @@ fastq_screen:
 
 ###############################################################################
 conda:
-  yaml: # Conda environement yaml files:
-    bamclipper:    '../environments/bamclipper_v.1.0.0.yaml'      # Bamclipper    ver. 1.0.0
-    bcftools:      '../environments/bcftools_v.1.20.yaml'         # Bcftools      ver. 1.20
-    bedtools:      '../environments/bedtools_v.2.31.1.yaml'       # Bedtools      ver. 2.31.1
-    bowtie2:       '../environments/bowtie2_v.2.5.4.yaml'         # Bowtie2       ver. 2.5.4
-    bwa:           '../environments/bwa_v.0.7.18.yaml'            # BWA           ver. 0.7.18
-    cutadapt:      '../environments/cutadapt_v.4.9.yaml'          # Cutadapt      ver. 4.9
-    fastq_screen:  '../environments/fastq-screen_v.0.15.3.yaml'   # Fastq-Screen  ver. 0.15.3 (with BWA ver. 0.7.18)
-    fastqc:        '../environments/fastqc_v.0.12.1.yaml'         # FastQC        ver. 0.12.1
-    gawk:          '../environments/gawk_v.5.3.0.yaml'            # Awk (GNU)     ver. 5.3.0
-    ivar:          '../environments/ivar_v.1.4.3.yaml'            # iVar          ver. 1.4.3 (with Samtools ver. 1.20)
-    minimap2:      '../environments/minimap2_v.2.28.yaml'         # Minimap2      ver. 2.28
-    multiqc:       '../environments/multiqc_v.1.23.yaml'          # MultiQC       ver. 1.23 (with Pandoc ver. 3.3)
-    nextclade:     '../environments/nextclade_v.3.9.1.yaml'       # Nextclade     ver. 3.9.1
-    pangolin:      '../environments/pangolin_v.4.3.yaml'          # Pangolin      ver. 4.3
-    samtools:      '../environments/samtools_v.1.20.yaml'         # Samtools      ver. 1.20
-    sickle_trim:   '../environments/sickle-trim_v.1.33.yaml'      # Sickle-trim   ver. 1.33
-    tsv2vcf:       '../environments/tsv2vcf_v.1.1.yaml'           # tsv2vcf       ver. 1.1 (bcftools biopython numpy)
-    workflow_base: '../environments/workflow-base_v.2024.11.yaml' # Workflow-Base ver. 2024.11
+  envs: # Conda environement yaml files:
+    bamclipper:    'envs/bamclipper_v.1.0.0.yaml'      # Bamclipper    ver. 1.0.0
+    bcftools:      'envs/bcftools_v.1.20.yaml'         # Bcftools      ver. 1.20
+    bedtools:      'envs/bedtools_v.2.31.1.yaml'       # Bedtools      ver. 2.31.1
+    bowtie2:       'envs/bowtie2_v.2.5.4.yaml'         # Bowtie2       ver. 2.5.4
+    bwa:           'envs/bwa_v.0.7.18.yaml'            # BWA           ver. 0.7.18
+    cutadapt:      'envs/cutadapt_v.4.9.yaml'          # Cutadapt      ver. 4.9
+    fastq_screen:  'envs/fastq-screen_v.0.15.3.yaml'   # Fastq-Screen  ver. 0.15.3 (with BWA ver. 0.7.18)
+    fastqc:        'envs/fastqc_v.0.12.1.yaml'         # FastQC        ver. 0.12.1
+    gawk:          'envs/gawk_v.5.3.0.yaml'            # Awk (GNU)     ver. 5.3.0
+    ivar:          'envs/ivar_v.1.4.3.yaml'            # iVar          ver. 1.4.3 (with Samtools ver. 1.20)
+    minimap2:      'envs/minimap2_v.2.28.yaml'         # Minimap2      ver. 2.28
+    multiqc:       'envs/multiqc_v.1.23.yaml'          # MultiQC       ver. 1.23 (with Pandoc ver. 3.3)
+    nextclade:     'envs/nextclade_v.3.9.1.yaml'       # Nextclade     ver. 3.9.1
+    pangolin:      'envs/pangolin_v.4.3.yaml'          # Pangolin      ver. 4.3
+    samtools:      'envs/samtools_v.1.20.yaml'         # Samtools      ver. 1.20
+    sickle_trim:   'envs/sickle-trim_v.1.33.yaml'      # Sickle-trim   ver. 1.33
+    tsv2vcf:       'envs/tsv2vcf_v.1.1.yaml'           # tsv2vcf       ver. 1.1 (bcftools biopython numpy)
+    workflow_base: 'envs/workflow-base_v.2024.11.yaml' # Workflow-Base ver. 2024.11
 
 ###############################################################################
diff --git a/version.txt b/version.txt
index 424805b..eb11b6f 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-2024.11
+2025.01
diff --git a/workflow/Snakefile b/workflow/Snakefile
index 118850c..e589831 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -1,4 +1,1454 @@
-include: "workflow/snakefiles/quality_control.smk"
-include: "workflow/snakefiles/indexing_genomes.smk"
-include: "workflow/snakefiles/gevarli.smk"
-conda_prefix: "workflow/environments/"
\ No newline at end of file
+###I###R###D######U###2###3###3#######T###R###A###N###S###V###I###H###M###I####
+###                                                                         ###
+###    /\  ______      ___ ____ _  _ __   ____ __   ____     ______  /\     ###
+###    ||  \ \ \ \    / __( ___( \/ )__\ (  _ (  ) (_  _)   / / / /  ||     ###
+###    ||   > > > >  ( (_-.)__) \  /(__)\ )   /)(__ _)(_   < < < <   ||     ###
+###    ||  /_/_/_/    \___(____) \(__)(__(_)\_(____(____)   \_\_\_\  ||     ###
+###    \/                                                            \/     ###
+###                                                                         ###
+###I###R###D######U###2###3###3#######T###R###A###N###S###V###I###H###M###I####
+# Name ___________________ gevarli.smk
+# Version ________________ v.2025.01
+# Author _________________ Nicolas Fernandez
+# Affiliation ____________ IRD_U233_TransVIHMI
+# Aim ____________________ Snakefile with GeVarLi rules
+# Date ___________________ 2021.10.12
+# Latest modifications ___ 2025.01.08 (Prepare for Snakedeploy)
+# Use ____________________ snakemake -s Snakemake --use-conda 
+
+###############################################################################
+### CONFIGURATION ###
+#####################
+
+configfile: "configuration/config.yaml"
+
+###############################################################################
+### FUNCTIONS ###
+#################
+
+def get_memory_per_thread(wildcards):
+    memory_per_thread = int(RAM) // int(CPUS)
+    if memory_per_thread < 1:
+        memory_per_thread = 1
+    return memory_per_thread
+
+def get_quality_input(wildcards):
+    quality_list = []
+    if "quality" in MODULES:
+        quality_list = "results/00_Quality_Control/multiqc/"
+    return quality_list
+
+def get_trimming_input(wildcards):
+    trimming_list = []
+    if "trimming" in MODULES:
+        trimming_list = expand("results/01_Trimming/sickle/{sample}_sickle-trimmed_SE.fastq.gz",
+                               sample = SAMPLE)
+    return trimming_list
+
+def stash_or_trash(path):
+    if "trimming" in MODULES:
+        return path
+    else:
+        return temp(path)
+
+def get_cleapping_input(wildcards):
+    cleapping_list = []
+    if "cleapping" in MODULES:
+        cleapping_list = ""
+    return cleapping_list
+
+def get_bam_input(wildcards):
+    markdup = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.bam"
+    if "cleapping" in MODULES:
+        markdup = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.primerclipped.bam"
+    return markdup
+
+def get_bai_input(wildcards):
+    index = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.bam.bai"
+    if "cleapping" in MODULES:
+        index = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.primerclipped.bam.bai"
+    return index
+
+def get_flagstat_input(wildcards):
+    flagstat_list = []
+    if "covstats" in MODULES:
+        flagstat_list = expand(
+            "results/03_Coverage/{reference}/flagstat/{sample}_{aligner}_flagstat.{ext}",
+            reference = REFERENCE,
+            sample = SAMPLE,
+            aligner = ALIGNER,
+            ext = ["txt", "tsv", "json"]
+        )
+    return flagstat_list
+
+def get_covstats_input(wildcards):
+    covstats_list = []
+    if "covstats" in MODULES:
+        covstats_list = expand(
+            "results/03_Coverage/{reference}/{sample}_{aligner}_{min_cov}X_coverage-stats.tsv",
+            reference = REFERENCE,
+            sample = SAMPLE,
+            aligner = ALIGNER,
+            min_cov = MIN_COV
+        )
+    return covstats_list
+
+def get_consensus_input(wildcards):
+    consensus_list = []
+    if "consensus" in MODULES:
+        consensus_list = expand(
+            "results/05_Consensus/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_consensus.fasta",
+            reference = REFERENCE,
+            sample = SAMPLE,
+            aligner = ALIGNER,
+            min_cov = MIN_COV,
+            caller = CALLER
+        )
+    return consensus_list
+
+def get_vcf_input(wildcards):
+    vcf_list = []
+    if "consensus" in MODULES:
+        vcf_list = expand(
+            "results/04_Variants/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_variant-filt.vcf",
+            reference = REFERENCE,
+            sample = SAMPLE,
+            aligner = ALIGNER,
+            min_cov = MIN_COV,
+            caller = CALLER
+        )
+    return vcf_list
+
+def get_pangolin_input(wildcards):
+    pangolin_list = []
+    if "pangolin" in MODULES:
+        pangolin_list = expand(
+            "results/06_Lineages/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_pangolin-report.csv",
+            reference = REFERENCE,
+            sample = SAMPLE,
+            aligner = ALIGNER,
+            min_cov = MIN_COV,
+            caller = CALLER
+        )
+    return pangolin_list
+
+def get_nextclade_input(wildcards):
+    nextclade_list = []
+    if "nextclade" in MODULES:
+        nextclade_list = expand(
+            "results/06_Lineages/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_nextclade-report.tsv",
+            reference = REFERENCE,
+            sample = SAMPLE,
+            aligner = ALIGNER,
+            min_cov = MIN_COV,
+            caller = CALLER
+        )
+    return nextclade_list
+
+def get_gisaid_input(wildcards):
+    gisaid_list = []
+    if "gisaid" in MODULES:
+        gisaid_list = expand(
+            ""
+        )
+    return gisaid_list
+
+###############################################################################
+### WILDCARDS ###
+#################
+
+FASTQ, = glob_wildcards("resources/symlinks/{fastq}.fastq.gz")
+SAMPLE, = glob_wildcards("resources/symlinks/{sample}_R1.fastq.gz")
+
+###############################################################################
+### RESOURCES ###
+#################
+
+CPUS = config["resources"]["cpus"]       # Threads (maximum)
+RAM = config["resources"]["ram"]         # Memory (RAM) in Gb (maximum)
+MEM_GB = get_memory_per_thread           # Memory per thread in GB (maximum)
+TMP_DIR = config["resources"]["tmp_dir"] # Temporary directory
+
+###############################################################################
+### ENVIRONMENTS ###
+####################
+
+MULTIQC = config["conda"]["envs"]["multiqc"]           # Multi-QC conda env
+FASTQ_SCREEN = config["conda"]["envs"]["fastq_screen"] # Fastq-Screen conda env
+FASTQC= config["conda"]["envs"]["fastqc"]              # FastQC conda env
+CUTADAPT = config["conda"]["envs"]["cutadapt"]         # Cutadapt conda environment
+SICKLE_TRIM = config["conda"]["envs"]["sickle_trim"]   # Sickle-Trim conda environment
+MINIMAP2 = config["conda"]["envs"]["minimap2"]         # BWA conda environment
+BWA = config["conda"]["envs"]["bwa"]                   # BWA conda environment
+BOWTIE2 = config["conda"]["envs"]["bowtie2"]           # Bowtie2 conda environment
+SAMTOOLS = config["conda"]["envs"]["samtools"]         # SamTools conda environment
+BEDTOOLS = config["conda"]["envs"]["bedtools"]         # BedTools conda environment
+BAMCLIPPER = config["conda"]["envs"]["bamclipper"]     # BAMClipper
+GAWK = config["conda"]["envs"]["gawk"]                 # Awk (GNU) conda environment
+IVAR = config["conda"]["envs"]["ivar"]                 # iVar conda environment
+TSV2VCF = config["conda"]["envs"]["tsv2vcf"]           # tsv2vcf conda environment
+BCFTOOLS = config["conda"]["envs"]["bcftools"]         # BcfTools conda environment
+PANGOLIN = config["conda"]["envs"]["pangolin"]         # Pangolin conda environment
+NEXTCLADE = config["conda"]["envs"]["nextclade"]       # Nextclade conda environment
+
+###############################################################################
+### PARAMETERS ###
+##################
+
+MODULES = config["modules"] # Modules
+
+SUBSET = config["fastq_screen"]["subset"]     # Fastq-Screen --subset
+FQC_CONFIG = config["fastq_screen"]["config"] # Fastq-Screen --conf
+#MQC_PATH = config["multiqc"]["path"]          # MultiQC --conf
+#MQC_CONF = config["multiqc"]["config"]        # MultiQC --conf
+#TAG = config["multiqc"]["tag"]                # MultiQC --tag
+
+KMER_SIZE = config["minimap2"]["algorithm"]["k-mer_size"]          # MM2 k-mer size
+MINIMIZER_SIZE = config["minimap2"]["algorithm"]["minimizer_size"] # MM2 minimizer window size
+SPLIT_SIZE = config["minimap2"]["algorithm"]["split_size"]         # MM2 split index
+#HOMOPOLYMER = config["minimap2"]["algorithm"]["homopolymer"]       # MM2 if PacBio
+BWA_ALGO = config["bwa"]["algorithm"]     # BWA indexing algorithm
+BT2_ALGO = config["bowtie2"]["algorithm"] # BT2 indexing algorithm
+
+REFERENCE = config["consensus"]["reference"] # Genome reference sequence, in fasta format
+REF_PATH = config["consensus"]["path"]       # Path to genomes references
+MIN_COV = config["consensus"]["min_cov"]     # Minimum coverage, mask lower regions with 'N'
+IUPAC = config["consensus"]["iupac"]         # Output variants in the form of IUPAC ambiguity codes
+ALIGNER = config["consensus"]["aligner"]     # Aligner ('minimap2', 'bwa' or 'bowtie2')
+CALLER = config["consensus"]["caller"]       # Variant Caller ('lofreq' or 'ivar')
+
+CUT_LENGTH = config["cutadapt"]["length"]           # Cutadapt --minimum-length
+CUT_TRUSEQ = config["cutadapt"]["kits"]["truseq"]   # Cutadapt --adapter Illumina TruSeq
+CUT_NEXTERA = config["cutadapt"]["kits"]["nextera"] # Cutadapt --adapter Illumina Nextera
+CUT_SMALL = config["cutadapt"]["kits"]["small"]     # Cutadapt --adapter Illumina Small
+CUT_CLIPPING = config["cutadapt"]["clipping"]       # Cutadapt --cut
+
+SIC_COMMAND = config["sickle_trim"]["command"]   # Sickle-trim command
+SIC_ENCODING = config["sickle_trim"]["encoding"] # Sickle-trim --qual-type 
+SIC_QUALITY = config["sickle_trim"]["quality"]   # Sickle-trim --qual-threshold
+SIC_LENGTH = config["sickle_trim"]["length"]     # Sickle-trim --length-treshold
+
+MM2_PATH = config["minimap2"]["path"]     # Minimpa2 path to indexes
+MM2_PRESET = config["minimap2"]["preset"] # Minimap2 preset
+#MM2_LENGTH = config["minimap2"]["length"]                              # Minimap2 length
+#MM2_KMER_SIZE = config["minimap2"]["algorithm"]["k-mer_size"]          # Minimap2 k-mer size
+#MM2_MINIMIZER_SIZE = config["minimap2"]["algorithm"]["minimizer_size"] # Minimap2 minimizer window size  
+#MM2_SPLIT_SIZE = config["minimap2"]["algorithm"]["split_size"]         # Minimap2 split index
+#MM2_HOMOPOLYMER = config["minimap2"]["algorithm"]["homopolymer"]       # Minimap2 for PacBio
+
+BWA_PATH = config["bwa"]["path"]      # BWA path to indexes
+BWA_ALGO = config["bwa"]["algorithm"] # BWA indexing algorithm
+
+BT2_PATH = config["bowtie2"]["path"]               # Bowtie2 path to indexes
+BT2_ALGO = config["bowtie2"]["algorithm"]          # Bowtie2 indexing algorithm
+BT2_SENSITIVITY = config["bowtie2"]["sensitivity"] # Bowtie2 sensitivity preset
+
+CLIPPATH = config["bamclipper"]["path"]         # Bamclipper path to primers
+PRIMERS = config["bamclipper"]["primers"]       # Bamclipper primers bed files
+UPSTREAM = config["bamclipper"]["upstream"]     # Bamclipper upstream nucleotides
+DOWNSTREAM = config["bamclipper"]["downstream"] # Bamclipper downstream nucleotides
+
+IVAR_MIN_DEPTH = config["consensus"]["min_cov"]   # iVar
+IVAR_MIN_FREQ = config["consensus"]["min_freq"]   # iVar minimum allele frequency allowed 
+IVAR_MIN_INSERT = config["consensus"]["min_freq"] # iVar minimum insertion frequency allowed
+#IVAR_MIN_DEPTH = config["ivar"]["min_depth"]      # iVar
+#IVAR_MIN_FREQ = config["ivar"]["min_freq"]        # iVar minimum allele frequency allowed 
+#IVAR_MIN_INSERT = config["ivar"]["min_insert"]    # iVar minimum insertion frequency allowed
+IVAR_MAX_DEPTH = config["ivar"]["max_depth"]      # iVar 
+IVAR_MIN_BQ = config["ivar"]["min_bq"]            # iVar
+IVAR_MIN_QUAL = config["ivar"]["min_qual"]        # iVar
+IVAR_MAP_QUAL = config["ivar"]["map_qual"]        # iVar mapping quality
+
+NEXT_PATH = config["nextclade"]["path"]       # Path to nextclade dataset
+NEXT_DATASET = config["nextclade"]["dataset"] # Nextclade dataset
+
+###############################################################################
+### RULES ###
+#############
+rule all:
+    input:
+        multiqc = get_quality_input,
+        trimming = get_trimming_input,
+        consensus = get_consensus_input,
+        flagstat = get_flagstat_input,
+        covstats = get_covstats_input,
+        vcf = get_vcf_input,
+        pangolin = get_pangolin_input,
+        nextclade = get_nextclade_input,
+        gisaid = get_gisaid_input
+
+
+###############################################################################
+################################## LINEAGES ###################################
+###############################################################################
+
+###############################################################################
+rule nextclade_lineage:
+    # Aim: nextclade lineage assignation
+    # Use: nextclade [QUERY.fasta] -t [THREADS] --outfile [NAME.csv]
+    message:
+        """
+        ~ Nextclade ∞ Assign Lineage ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        Min. cov.: _______ {wildcards.min_cov}X
+        Variant caller: __ {wildcards.caller}
+        """
+    conda:
+        NEXTCLADE
+    resources:
+        cpus = CPUS
+    params:
+        path = NEXT_PATH,
+        dataset = NEXT_DATASET
+    input:
+        consensus = "results/05_Consensus/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_consensus.fasta"
+    output:
+        lineage = "results/06_Lineages/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_nextclade-report.tsv",
+        alignment = directory("results/06_Lineages/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_nextclade-all/")
+    log:
+        "results/10_Reports/tools-log/nextclade/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_lineage.log"
+    shell:
+        "nextclade "                                    # Nextclade, assign queries sequences to clades and reports potential quality issues
+        "run "                                           # Run analyzis
+        "--jobs {resources.cpus} "                       # -j: Number of CPU threads used by the algorithm (default: all available threads)
+        "--input-dataset {params.path}{params.dataset} " # -raq: Path to a directory containing a dataset (root-seq, tree and qc-config required)
+        "--output-tsv {output.lineage} "                 # -t: Path to output TSV results file
+        "--output-all {output.alignment} "               # -O: Produce all of the output files into this directory, using default basename
+        "{input.consensus} "                             # Path to a .fasta file with input sequences
+        "&> {log}"                                       # Log redirection
+
+###############################################################################
+rule pangolin_lineage:
+    # Aim: lineage mapping
+    # Use: pangolin [QUERY.fasta] -t [THREADS] --outfile [NAME.csv]
+    message:
+        """
+        ~ Pangolin ∞ Assign Lineage ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        Min. cov.: _______ {wildcards.min_cov}X
+        Variant caller: __ {wildcards.caller}
+        """
+    conda:
+        PANGOLIN
+    resources:
+        cpus = CPUS,
+        tmp_dir = TMP_DIR
+    input:
+        consensus = "results/05_Consensus/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_consensus.fasta"
+    output:
+        lineage = "results/06_Lineages/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_pangolin-report.csv"
+    log:
+        "results/10_Reports/tools-log/pangolin/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_lineage.log"
+    shell:
+        "pangolin "                     # Pangolinn, Phylogenetic Assignment of Named Global Outbreak LINeages
+        "{input.consensus} "             # Query fasta file of sequences to analyse
+        "--threads {resources.cpus} "    # -t: Number of threads
+        "--tempdir {resources.tmp_dir} " # Specify where you want the temp stuff to go (default: $TMPDIR)
+        "--outfile {output.lineage} "    # Optional output file name (default: lineage_report.csv)
+        "&> {log}"                       # Log redirection
+
+###############################################################################
+################################## CONSENSUS ##################################
+###############################################################################
+
+###############################################################################
+rule sed_rename_headers:
+    # Aim: rename all fasta header with sample name
+    # Use: sed 's/[OLD]/[NEW]/' [IN] > [OUT]
+    message:
+        """
+        ~ Sed ∞ Rename Fasta Header ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        Min. cov.: _______ {wildcards.min_cov}X
+        Variant caller: __ {wildcards.caller}
+        """
+    conda:
+        GAWK
+    input:
+        cons_tmp = "results/05_Consensus/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_consensus.fasta.tmp"
+    output:
+        consensus = "results/05_Consensus/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_consensus.fasta"
+    log:
+        "results/10_Reports/tools-log/sed/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_fasta-header.log"
+    shell:
+        "sed " # Sed, a Stream EDitor used to perform basic text transformations on an input stream
+        "'s/^>.*$/>{wildcards.sample}_{wildcards.aligner}_{wildcards.min_cov}X_{wildcards.caller}/' "
+        "{input.cons_tmp} "      # Input file
+        "1> {output.consensus} " # Output file
+        "2> {log}"               # Log redirection
+
+###############################################################################
+########################### VARIANTS CALLING - IVAR ###########################
+###############################################################################
+
+###############################################################################
+rule convert_tsv2vcf:
+    message:
+        """
+        ~ iVar ∞ Convert TSV to VCF file ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        Min. cov.: _______ {wildcards.min_cov}X
+        Variant caller: __ iVar
+        """
+    conda:
+        TSV2VCF
+    input:
+        tsv = "results/04_Variants/{reference}/{sample}_{aligner}_{min_cov}X_ivar_variant-call.tsv"
+    output:
+        vcf = "results/04_Variants/{reference}/{sample}_{aligner}_{min_cov}X_ivar_variant-filt.vcf"
+    log:
+        "results/10_Reports/tools-log/tsv2vcf/{reference}/{sample}_{aligner}_{min_cov}X_ivar_tsv2vcf.log"
+    shell:
+        "python3 "                                  # Python 3
+        "workflow/scripts/ivar_variants_to_vcf.py "  # Script (from viralrecon)
+        "{input.tsv} "                               # TSV input
+        "{output.vcf} "                              # VCF output
+        "&> {log}"                                   # Log redirection
+
+###############################################################################
+rule ivar_consensus:
+    # Aim:
+    # Use:
+    message:
+        """
+        ~ iVar ∞ Call Consensus ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        Min. cov.: _______ {wildcards.min_cov}X
+        Variant caller: __ iVar
+        """
+    conda:
+        IVAR
+    params:
+        min_depth = IVAR_MIN_DEPTH,
+        min_freq = IVAR_MIN_FREQ,
+        min_insert = IVAR_MIN_INSERT,
+        max_depth = IVAR_MAX_DEPTH,
+        min_bq = IVAR_MIN_BQ,
+        min_qual = IVAR_MIN_QUAL,
+        baq = IVAR_MAP_QUAL
+    input:
+        mark_dup = get_bam_input,
+        variant_call = "results/04_Variants/{reference}/{sample}_{aligner}_{min_cov}X_ivar_variant-call.tsv"
+    output:
+        prefix = temp("results/05_Consensus/{reference}/{sample}_{aligner}_{min_cov}X_ivar_consensus"),
+        cons_tmp = temp("results/05_Consensus/{reference}/{sample}_{aligner}_{min_cov}X_ivar_consensus.fasta.tmp"),
+        qual_txt = "results/05_Consensus/{reference}/ivar_consensus-quality/{sample}_{aligner}_{min_cov}X_ivar_consensus.qual.txt",
+    log:
+        "results/10_Reports/tools-log/ivar/{reference}/{sample}_{aligner}_{min_cov}X_ivar_consensus.log"
+    shell:
+        "samtools mpileup "              # Samtools mpileup, tools for alignments in the SAM format with command multi-way pileup
+        "--verbosity 0 "                  # Set level of verbosity [INT]
+        "-a "                             # -a: output all positions (including zero depth)
+        "-a "                             # -a -a / -aa: output absolutely all positions, including unused ref. sequences
+        "--count-orphans "                # -A: do not discard anomalous read pairs
+        "--max-depth {params.max_depth} " # -d: max per-file depth; avoids excessive memory usage [INT] (default: 8000)
+        "{params.baq} "                   # --no-BAQ / -B: disable BAQ (per-Base Alignment Quality)
+        "--min-BQ {params.min_bq} "       # -Q: skip bases with baseQ/BAQ smaller than [INT] (default: 13)
+        #"--reference {input.masked_ref} " # Reference sequence FASTA FILE
+        "{input.mark_dup} "               # Markdup BAM input
+        "2>&1 | grep -v '[mpileup] 1 samples in 1 input files' " # Remove this stdout
+        "| "                               ### PIPE to iVar
+        "ivar consensus "                # iVar, with command 'consensus': Call consensus from aligned BAM file
+        "-p {output.prefix} "             # -p: prefix
+        "-q {params.min_qual} "           # -q: Minimum quality score threshold to count base [INT] (Default: 20)
+        "-t {params.min_freq} "           # -t: Minimum frequency threshold to call variants [FLOAT] (Default: 0.03)
+        "-c {params.min_insert} "         # -c: Minimum insertion frequency threshold to call consensus [FLOAT] (Default: 0.8)    
+        "-m {params.min_depth} "          # -m: Minimum read depth to call variants [INT] (Default: 0)
+        "-n N "                           # -n: Character to print in regions with less than minimum coverage (Default: N)
+        #"-i {wildcards.sample} "          # -i: Name of fasta header (default: Consensus_<prefix>_threshold_<min_freq>_quality_<min_qual>_<min_insert>
+        "&> {log} "                       # Log redirection
+        "&& mv {output.prefix}.fa {output.cons_tmp} "       # copy consensus.fa (temp) to consensus.fasta.tmp (tmp)
+        "&& mv {output.prefix}.qual.txt {output.qual_txt} " # cppty consensus.qual.txt (tmp) to ivar_consensus-quality/ directory
+        "&& touch {output.prefix}"                          # Touch done
+
+###############################################################################
+rule ivar_variant_calling:
+    # Aim: SNVs and Indels calling 
+    # Use: samtools mpileup -aa -A -d 0 -B -Q 0 --reference [<reference-fasta] <input.bam> | ivar variants -p <prefix> [-q <min-quality>] [-t <min-frequency-threshold>] [-m <minimum depth>] [-r <reference-fasta>] [-g GFF file]
+    # Note: samtools mpileup output must be piped into ivar variants
+    message:
+        """
+        ~ iVar ∞ Call Variants ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        Min. cov.: _______ {wildcards.min_cov}X
+        Variant caller: __ iVar
+        """
+    conda:
+        IVAR
+    params:
+        min_depth = IVAR_MIN_DEPTH,
+        min_freq = IVAR_MIN_FREQ,
+        min_insert = IVAR_MIN_INSERT,
+        max_depth = IVAR_MAX_DEPTH,
+        min_bq = IVAR_MIN_BQ,
+        min_qual = IVAR_MIN_QUAL,
+        baq = IVAR_MAP_QUAL
+    input:
+        masked_ref = "results/04_Variants/{reference}/{sample}_{aligner}_{min_cov}X_masked-ref.fasta",
+        mark_dup = get_bam_input
+    output:
+        variant_call = "results/04_Variants/{reference}/{sample}_{aligner}_{min_cov}X_ivar_variant-call.tsv"
+    log:
+        "results/10_Reports/tools-log/ivar/{reference}/{sample}_{aligner}_{min_cov}X_ivar_variant-call.log"
+    shell:
+        "samtools mpileup "              # Samtools mpileup, tools for alignments in the SAM format with command multi-way pileup
+        "--verbosity 0 "                  # Set level of verbosity [INT]
+        "-a "                             # -a: output all positions (including zero depth)
+        "-a "                             # -a -a / -aa: output absolutely all positions, including unused ref. sequences
+        "--count-orphans "                # -A: do not discard anomalous read pairs
+        "--max-depth {params.max_depth} " # -d: max per-file depth; avoids excessive memory usage (default: 8000) [INT]
+        "{params.baq} "                   # --no-BAQ / -B: disable BAQ (per-Base Alignment Quality)
+        "--min-BQ {params.min_bq} "       # -Q: skip bases with baseQ/BAQ smaller than (default: 13) [INT]
+        "--reference {input.masked_ref} " # Reference sequence FASTA FILE
+        "{input.mark_dup} "               # Markdup BAM input
+        "2>&1 | grep -v '[mpileup] 1 samples in 1 input files' " # Remove this stdout
+        "| "                               ### pipe to iVar
+        "ivar variants "                 # iVar, with command 'variants': Call variants from aligned BAM file
+        "-p {output.variant_call} "       # -p: prefix
+        "-q {params.min_qual} "           # -q: Minimum quality score threshold to count base (Default: 20) [INT]
+        "-t {params.min_freq} "           # -t: Minimum frequency threshold to call variants (Default: 0.03) [FLOAT]
+        "-m {params.min_depth} "          # -m: Minimum read depth to call variants (Default: 0) [INT]
+        "-r {input.masked_ref} "          # -r: Reference file used for alignment (translate the nuc. sequences and identify intra host single nuc. variants) 
+        #"-g "                            # -g: A GFF file in the GFF3 format can be supplied to specify coordinates of open reading frames (ORFs)
+        "&> {log}"                        # Log redirection 
+
+
+###############################################################################
+############################ MASKING LOW COVERAGE #############################
+###############################################################################
+
+###############################################################################
+rule bedtools_masking:
+    # Aim: masking low coverage regions
+    # Use: bedtools maskfasta [OPTIONS] -fi [REFERENCE.fasta] -bed [RANGE.bed] -fo [MASKEDREF.fasta]
+    message:
+        """
+        ~ BedTools ∞ Mask Low Coverage Regions ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        Min. cov.: _______ {wildcards.min_cov}X
+        """
+    conda:
+        BEDTOOLS
+    params:
+        path = REF_PATH
+    input:
+        low_cov_mask = "results/03_Coverage/{reference}/bed/{sample}_{aligner}_{min_cov}X_low-cov-mask.bed"
+    output:
+        masked_ref = "results/04_Variants/{reference}/{sample}_{aligner}_{min_cov}X_masked-ref.fasta"
+    log:
+        "results/10_Reports/tools-log/bedtools/{reference}/{sample}_{aligner}_{min_cov}X_masking.log"
+    shell:
+        "bedtools maskfasta "                          # Bedtools maskfasta, mask a fasta file based on feature coordinates
+        "-fi {params.path}{wildcards.reference}.fasta " # Input FASTA file 
+        "-bed {input.low_cov_mask} "                    # BED/GFF/VCF file of ranges to mask in -fi
+        "-fo {output.masked_ref} "                      # Output masked FASTA file
+        "&> {log}"                                      # Log redirection 
+
+###############################################################################
+rule bedtools_merged_mask:
+    # Aim: merging overlaps
+    # Use: bedtools merge [OPTIONS] -i [FILTERED.bed] -g [GENOME.fasta] 
+    message:
+        """
+        ~ BedTools ∞ Merge Overlaps ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        Min. cov.: _______ {wildcards.min_cov}X
+        """
+    conda:
+        BEDTOOLS
+    input:
+        min_cov_filt = "results/03_Coverage/{reference}/bed/{sample}_{aligner}_{min_cov}X_min-cov-filt.bed"
+    output:
+        low_cov_mask = temp("results/03_Coverage/{reference}/bed/{sample}_{aligner}_{min_cov}X_low-cov-mask.bed")
+    log:
+        "results/10_Reports/tools-log/bedtools/{reference}/{sample}_{aligner}_{min_cov}X_merging.log"
+    shell:
+        "bedtools merge "          # Bedtools merge, merges overlapping BED/GFF/VCF entries into a single interval
+        "-i {input.min_cov_filt} "  # -i: BED/GFF/VCF input to merge 
+        "1> {output.low_cov_mask} " # merged output
+        "2> {log}"                  # Log redirection
+
+###############################################################################
+rule awk_min_covfilt:
+    # Aim: minimum coverage filtration
+    # Use: awk '$4 < [MIN_COV]' [BEDGRAPH.bed] 1> [FILTERED.bed]
+    message:
+        """
+        ~ Awk ∞ Minimum Coverage Filtration ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        Min. cov.: _______ {wildcards.min_cov}X
+        """
+    conda:
+        GAWK
+    input:
+        genome_cov = "results/02_Mapping/{reference}/{sample}_{aligner}_genome-cov.bed"
+    output:
+        min_cov_filt = temp("results/03_Coverage/{reference}/bed/{sample}_{aligner}_{min_cov}X_min-cov-filt.bed")
+    log:
+        "results/10_Reports/tools-log/awk/{reference}/{sample}_{aligner}_{min_cov}X_min-cov-filt.log"
+    shell:
+        "awk "                      # Awk, a program that you can use to select particular records in a file and perform operations upon them
+        "'$4 < {wildcards.min_cov}' " # Minimum coverage for masking regions in consensus sequence
+        "{input.genome_cov} "         # BedGraph coverage input
+        "1> {output.min_cov_filt} "   # Minimum coverage filtered bed output
+        "2> {log} "                   # Log redirection
+
+###############################################################################
+################################## STATISTICS #################################
+###############################################################################
+
+###############################################################################
+rule awk_coverage_statistics:
+    # Aim: computing genomme coverage stats
+    # Use: awk {FORMULA} END {{print [RESULTS.tsv] [BEDGRAPH.bed]
+    message:
+        """
+        ~ Awk ∞ Compute Genome Coverage Statistics from BED file ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        """
+    conda:
+        GAWK
+    input:
+        cutadapt = "results/10_Reports/tools-log/cutadapt/{sample}.log",
+        sickle = "results/10_Reports/tools-log/sickle-trim/{sample}.log",
+        samtools = "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_mark-dup.log",
+        flagstat = "results/03_Coverage/{reference}/flagstat/{sample}_{aligner}_flagstat.json",
+        histogram = "results/03_Coverage/{reference}/histogram/{sample}_{aligner}_coverage-histogram.txt",
+        genome_cov = "results/02_Mapping/{reference}/{sample}_{aligner}_genome-cov.bed"
+    output:
+        cov_stats = "results/03_Coverage/{reference}/{sample}_{aligner}_{min_cov}X_coverage-stats.tsv"
+    log:
+        "results/10_Reports/tools-log/awk/{reference}/{sample}_{aligner}_{min_cov}X_coverage-stats.log"
+    shell:
+        r""" rawReads=$(grep -o -E  """                                  # Get raw reads 
+        r""" 'Total read pairs processed:.+' {input.cutadapt}  """       #
+        r""" | sed -E 's/Total read pairs processed:\ +//'  """          #
+        r""" | sed 's/,//g') ; """                                       #
+        #
+        r""" cutadaptPF=$(grep -o -E """                                 # Get cutadapt Passing Filtes reads
+        r""" 'Pairs written \(passing filters\):.+' {input.cutadapt} """ #
+        r""" | sed -E 's/Pairs written \(passing filters\):\ +//' """    #
+        r""" | sed 's/,//g') ; """                                       #
+        #
+        r""" sicklePF=$(grep -o -E """                                   # Get sickle Passing Filtes reads
+        r""" 'FastQ paired records kept:.+' {input.sickle} """           #
+        r""" | sed -E 's/FastQ paired records kept:\ +//') ; """         #
+        #
+        r""" totalDuplicate=$(grep -o -E """                             # Get total duplicated reads
+        r""" 'DUPLICATE TOTAL:.+' {input.samtools} """                   #
+        r""" | sed -E 's/DUPLICATE TOTAL:\ +//') ; """                   #
+        #
+        r""" estimatedLibrarySize=$(grep -o -E """                       # Get estimated library size
+        r""" 'ESTIMATED_LIBRARY_SIZE:.+' {input.samtools} """            #
+        r""" | sed -E 's/ESTIMATED_LIBRARY_SIZE:\ +//') ; """            #
+        #
+        r""" samtoolsPF=$(grep -o -E """                                 # Get samtool Passing Filter reads
+        r""" 'WRITTEN: .+' {input.samtools} """                          #
+        r""" | sed -E 's/WRITTEN:\ +//') ; """                           #
+        #
+        r""" mappedReads=$(grep -o -E -m 1 """                           # Get mapped reads
+        r""" '"mapped": .+' {input.flagstat} """                         #
+        r""" | sed -E 's/"mapped":\ +//' """                             #
+        r""" | sed 's/,//g') ; """                                       #
+        #
+        r""" mappedPercentReads=$(grep -o -E -m 1 """                    # Get mapped precent reads
+        r""" '"mapped %": .+' {input.flagstat} """                       #
+        r""" | sed -E 's/"mapped %":\ +//' """                           #
+        r""" | sed 's/,//g') ; """                                       #
+        #
+        r""" covPercentAt1X=$(grep -o -E """                             # Get coverage percent @1X
+        r""" 'Percent covered:.+' {input.histogram} """                  #
+        r""" | sed -E 's/Percent covered:\ +//') ; """                   #
+        #
+        r""" awk """                                                   # Awk, a program to select particular records in a file and perform operations upon them
+        r""" -v rawReads="${{rawReads}}" """                             # Define external variable
+        r""" -v cutadaptPF="${{cutadaptPF}}" """                         # Define external variable
+        r""" -v sicklePF="${{sicklePF}}" """                             # Define external variable
+        r""" -v totalDuplicate="${{totalDuplicate}}" """                 # Define external variable
+        r""" -v estimatedLibrarySize="${{estimatedLibrarySize}}" """     # Define external variable
+        r""" -v samtoolsPF="${{samtoolsPF}}" """                         # Define external variable
+        r""" -v mappedReads="${{mappedReads}}" """                       # Define external variable
+        r""" -v mappedPercentReads="${{mappedPercentReads}}" """         # Define external variable
+        r""" -v covPercentAt1X="${{covPercentAt1X}}" """                 # Define external variable
+        r""" '$4 >= {wildcards.min_cov} {{supMin_Cov+=$3-$2}} ; """      # Genome size (>= min_cov @X)
+        r""" {{genomeSize+=$3-$2}} ; """                                 # Genome size (total)
+        r""" {{totalBases+=($3-$2)*$4}} ; """                            # Total bases @1X
+        r""" {{totalBasesSq+=(($3-$2)*$4)**2}} """                       # Total bases square @1X
+        r""" END """                                                    # END
+        r""" {{print """                                                # Print
+        r""" "sample_id", "\t", """                                      # header: Sample ID
+        r""" "raw_paired_reads", "\t", """                               # header: Raw paired reads
+        r""" "cutadapt_pairs_PF", "\t", """                              # header: Cutadapt Passing Filters
+        r""" "sickle_reads_PF", "\t", """                                # header: Sickle-trim Passing Filters
+        r""" "duplicated_reads", "\t", """                               # header:
+        r""" "duplicated_percent_%","\t", """                            # header:
+        r""" "estimated_library_size*", "\t", """                        # header:
+        r""" "samtools_pairs_PF", "\t", """                              # header:
+        r""" "mapped_with", "\t",  """                                   # header: aligner
+        r""" "mapped_on", "\t",  """                                     # header: reference
+        r""" "mapped_reads", "\t", """                                   # header:
+        r""" "mapped_percent_%", "\t", """                               # header:
+        r""" "mean_depth", "\t", """                                     # header: mean depth
+        r""" "standard_deviation", "\t", """                             # header: standard deviation
+        r""" "cov_percent_%_@1X", "\t", """                              # header: coverage percentage @1X
+        r""" "cov_percent_%" "\t", """                                   # header: coverage percentage
+        r""" "@_min_cov" """                                             # header: @_[min_cov]_X
+        r""" ORS """                                                      # \n newline
+        r""" "{wildcards.sample}", "\t", """                             # value: Sample ID
+        r""" rawReads, "\t", """                                         # value: Raw sequences
+        r""" cutadaptPF, "\t", """                                       # value: Cutadapt Passing Filter
+        r""" sicklePF, "\t", """                                         # value: Sickle Passing Filter
+        r""" totalDuplicate, "\t", """                                   # value:
+        r""" int(((totalDuplicate)/(rawReads*2))*100), "%", "\t", """    # value: (divided by 2 to estimated pairs)
+        r""" estimatedLibrarySize, "\t", """                             # value:
+        r""" samtoolsPF, "\t", """                                       # value:
+        r""" "{wildcards.aligner}", "\t",  """                           # value: aligner
+        r""" "{wildcards.reference}", "\t",  """                         # value: reference
+        r""" mappedReads, "\t", """                                      # value:
+        r""" mappedPercentReads, "%", "\t", """                          # value:
+        r""" int(totalBases/genomeSize), "\t", """                       # value: mean depth
+        r""" int(sqrt((totalBasesSq/genomeSize)-(totalBases/genomeSize)**2)), "\t", """ # Standard deviation value
+        r""" covPercentAt1X, "\t", """                                   # value
+        r""" supMin_Cov/genomeSize*100, "%", "\t", """                   # Coverage percent (@ min_cov X) value
+        r""" "@{wildcards.min_cov}X" """                                 # @ min_cov X value
+        r""" }}' """                                                     # Close print
+        r""" {input.genome_cov} """                                      # BedGraph coverage input
+        r""" 1> {output.cov_stats} """                                   # Mean depth output
+        r""" 2> {log}"""                                                 # Log redirection
+        
+###############################################################################
+rule bedtools_genome_coverage:
+    # Aim: computing genome coverage sequencing
+    # Use: bedtools genomecov [OPTIONS] -ibam [MARK-DUP.bam] 1> [BEDGRAPH.bed]
+    message:
+        """
+        ~ BedTools ∞ Compute Genome Coverage ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        """
+    conda:
+        BEDTOOLS
+    input:
+        mark_dup = get_bam_input,
+        index = get_bai_input
+    output:
+        genome_cov = "results/02_Mapping/{reference}/{sample}_{aligner}_genome-cov.bed"
+    log:
+        "results/10_Reports/tools-log/bedtools/{reference}/{sample}_{aligner}_genome-cov.log"
+    shell:
+        "bedtools genomecov "    # Bedtools genomecov, compute the coverage of a feature file among a genome
+        "-bga "                   # Report depth in BedGraph format, regions with zero coverage are also reported
+        "-ibam {input.mark_dup} " # The input file is in BAM format, must be sorted by position
+        "1> {output.genome_cov} " # BedGraph output
+        "2> {log} "               # Log redirection
+
+###############################################################################
+rule samtools_coverage_histogram:
+    # Aim: alignment depth and percent coverage histogram
+    # Use: samtools coverage --histogram [INPUT.bam]
+    message:
+        """
+        ~ SamTools ∞ Calcul Depth and Coverage from BAM file ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        """
+    conda:
+        SAMTOOLS
+    resources:
+       cpus = CPUS
+       #bins = BINS,
+       #depth = DEPTH
+    input:
+        mark_dup = get_bam_input
+    output:
+        histogram = "results/03_Coverage/{reference}/histogram/{sample}_{aligner}_coverage-histogram.txt"
+    log:
+        "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_coverage-histogram.log"
+    shell:
+        "samtools coverage "          # Samtools coverage, tools for alignments in the SAM format with command to alignment depth and percent coverage
+        "--histogram "                 # -m: show histogram instead of tabular output
+        "--verbosity 4 "               # Set level of verbosity [INT] (default: 3)
+        "--n-bins 149 "                # -w: number of bins in histogram (default: terminal width - 40) (todo: {params.bins}) 
+        "--depth 0 "                   # -d maximum allowed coverage depth [INT] (default: 1000000 ; 0 removing any depth limit) (todo: {params.depth}) 
+        "--output {output.histogram} " # write output to FILE (default: stdout)
+        "{input.mark_dup} ; "          # Mark_dup bam input
+        "echo >> {output.histogram} ; " # Newline
+        "samtools coverage "    # Samtools coverage, tools for alignments in the SAM format with command to alignment depth and percent coverage
+        "--verbosity 4 "         # Set level of verbosity [INT] (default: 3)
+        "{input.mark_dup} "      # Mark_dup bam input
+        ">> {output.histogram} " # write output to FILE (default: stdout)
+        "2> {log}"               # Log redirection
+
+###############################################################################
+rule samtools_flagstat_ext:
+    # Aim: simple stats
+    # Use: samtools flagstat -@ [THREADS] [INPUT.bam]
+    message:
+        """
+        ~ SamTools ∞ Calcul simple Stats from BAM file ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        """
+    conda:
+        SAMTOOLS
+    resources:
+       cpus = CPUS
+    input:
+        mark_dup = get_bam_input
+    output:
+        flagstat = "results/03_Coverage/{reference}/flagstat/{sample}_{aligner}_flagstat.{ext}"
+    log:
+        "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_flagstat-{ext}.log"
+    shell:
+        "samtools flagstat "           # Samtools flagstat, tools for alignments in the SAM format with command to simple stat
+        "--threads {resources.cpus} "   # -@: Number of additional threads to use (default: 1)
+        "--verbosity 4 "                # Set level of verbosity [INT] (default: 3)
+        "--output-fmt {wildcards.ext} " # -O Specify output format (none, tsv and json)
+        "{input.mark_dup} "             # Mark_dup bam input
+        "1> {output.flagstat} "         # Mark_dup index output
+        "2> {log}"                      # Log redirection
+
+###############################################################################
+############################## PRIMER CLEAPING  ###############################
+###############################################################################
+
+###############################################################################
+rule bamclipper_amplicon_primers:
+    # Aim: soft-clip amplicon PCR primers from BAM alignments
+    # Use: bamclipper.sh -n [THREADS] -b [MARKDUP.bam] -p [PRIMER.bed] -u [UPSTREAM] -d [DOWNSTREAM]
+    message:
+        """
+        ~ BAMClipper ∞ soft-clipping amplicon PCR primers from BAM alignments ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        """
+    conda:
+        BAMCLIPPER
+    resources:
+       cpus = CPUS
+    params:
+        path = CLIPPATH,
+        primers = PRIMERS,
+        upstream = UPSTREAM,
+        downstream = DOWNSTREAM
+    input:
+        markdup = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.bam",
+        index = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.bam.bai"
+    output:
+        bamclip = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.primerclipped.bam",
+        baiclip = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.primerclipped.bam.bai"
+    log:
+        "results/10_Reports/tools-log/bamclipper/{reference}/{sample}_{aligner}_primers-clip.log"
+    shell:
+        "bamclipper.sh "                         # BAMClipper, remove primer sequences from BAM alignments of PCR amplicons by soft-clipping
+        "-b {input.markdup} "                     # Indexed BAM alignment file
+        "-p {params.path}{params.primers}.bedpe " # BEDPE of primer pair locations
+        "-n {resources.cpus} "                    # Number of threads (default: 1)
+        "-u {params.upstream} "                   # Number of nuc. upstream for assigning alignments to primers (default: 5)
+        "-d {params.downstream} "                 # Number of nuc. downstream for assigning alignments to primers (default: 5)
+        #"-o results/02_Mapping/ "                 # Path to write output (if BamClipper v.1.1.2) (todo)
+        "&> {log} "                               # Log redirection
+        "&& mv {wildcards.sample}_{wildcards.aligner}_mark-dup.primerclipped.bam {output.bamclip} "    # because BamClipper v.1 default output system...
+        "&& mv {wildcards.sample}_{wildcards.aligner}_mark-dup.primerclipped.bam.bai {output.baiclip}" # because BamClipper v.1 default output system...
+
+###############################################################################
+############################## REMOVE DUPLICATS ###############################
+###############################################################################
+
+###############################################################################
+rule samtools_index_markdup:
+    # Aim: indexing marked as duplicate BAM file
+    # Use: samtools index -@ [THREADS] -b [MARK-DUP.bam] [INDEX.bai]
+    message:
+        """
+        ~ SamTools ∞ Index 'Marked as Duplicate' BAM file ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        """
+    conda:
+        SAMTOOLS
+    resources:
+       cpus = CPUS
+    input:
+        mark_dup = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.bam"
+    output:
+        index = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.bam.bai"
+    log:
+        "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_mark-dup-index.log"
+    shell:
+        "samtools index "     # Samtools index, tools for alignments in the SAM format with command to index alignment
+        "-@ {resources.cpus} " # --threads: Number of additional threads to use (default: 1)(NB, --threads form dose'nt work)
+        "-b "                  # -b: Generate BAI-format index for BAM files (default)
+        "{input.mark_dup} "    # Mark_dup bam input
+        "{output.index} "      # Mark_dup index output
+        "&> {log}"             # Log redirection
+
+###############################################################################
+rule samtools_markdup:
+    # Aim: marking duplicate alignments
+    # Use: samtools markdup -@ [THREADS] -r -s -O BAM [SORTED.bam] [MARK-DUP.bam] 
+    message:
+        """
+        ~ SamTools ∞ Mark Duplicate Alignments ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        """
+    conda:
+        SAMTOOLS
+    resources:
+       cpus = CPUS
+    input:
+        sorted = "results/02_Mapping/{reference}/{sample}_{aligner}_sorted.bam"
+    output:
+        mark_dup = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.bam"
+    log:
+        "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_mark-dup.log"
+    shell:
+        "samtools markdup "          # Samtools markdup, tools for alignments in the SAM format with command mark duplicates
+        "--threads {resources.cpus} " # -@: Number of additional threads to use (default: 1)
+        "-r "                         # -r: Remove duplicate reads
+        "-s "                         # -s: Report stats
+        "--output-fmt BAM "           # -O: Specify output format: SAM, BAM, CRAM (here, BAM format)
+        "{input.sorted} "             # Sorted bam input
+        "{output.mark_dup} "          # Mark_dup bam output
+        "&> {log}"                    # Log redirection 
+
+###############################################################################
+rule samtools_sorting:
+    # Aim: sorting
+    # Use: samtools sort -@ [THREADS] -m [MEM_GB] -T [TMP_DIR] -O BAM -o [SORTED.bam] [FIX-MATE.bam] 
+    message:
+        """
+        ~ SamTools ∞ Sort BAM file ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        """
+    conda:
+        SAMTOOLS
+    resources:
+       cpus = CPUS,
+       mem_gb = MEM_GB,
+       tmp_dir = TMP_DIR
+    input:
+        fix_mate = "results/02_Mapping/{reference}/{sample}_{aligner}_fix-mate.bam"
+    output:
+        sorted = temp("results/02_Mapping/{reference}/{sample}_{aligner}_sorted.bam")
+    log:
+        "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_sorted.log"
+    shell:
+        "samtools sort "             # Samtools sort, tools for alignments in the SAM format with command to sort alignment file
+        "--threads {resources.cpus} " # -@: Number of additional threads to use (default: 1)
+        "-m {resources.mem_gb}G "     # -m: Set maximum memory per thread, suffix K/M/G recognized (default: 768M)
+        "-T {resources.tmp_dir} "     # -T: Write temporary files to PREFIX.nnnn.bam
+        "--output-fmt BAM "           # -O: Specify output format: SAM, BAM, CRAM (here, BAM format)
+        "-o {output.sorted} "         # Sorted bam output
+        "{input.fix_mate} "           # Fixmate bam input
+        "&> {log}"                    # Log redirection 
+
+###############################################################################
+rule samtools_fixmate:
+    # Aim: filling in mate coordinates
+    # Use: samtools fixmate -@ [THREADS] -m -O BAM [SORT-BY-NAMES.bam] [FIX-MATE.bam] 
+    message:
+        """
+        ~ SamTools ∞ Fil Mate Coordinates ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        """
+    conda:
+        SAMTOOLS
+    resources:
+       cpus = CPUS
+    input:
+        sort_by_names = "results/02_Mapping/{reference}/{sample}_{aligner}_sort-by-names.bam"
+    output:
+        fix_mate = temp("results/02_Mapping/{reference}/{sample}_{aligner}_fix-mate.bam")
+    log:
+        "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_fix-mate.log"
+    shell:
+        "samtools fixmate "          # Samtools fixmate, tools for alignments in the SAM format with command to fix mate information
+        "--threads {resources.cpus} " # -@: Number of additional threads to use (default: 1)
+        "-m "                         # -m: Add mate score tag 
+        "--output-fmt BAM "           # -O: Specify output format: SAM, BAM, CRAM (here, BAM format)
+        "{input.sort_by_names} "      # Sort_by_names bam input
+        "{output.fix_mate} "          # Fix_mate bam output 
+        "&> {log}"                    # Log redirection 
+
+###############################################################################
+rule samtools_sortbynames:
+    # Aim: sorting by names
+    # Use: samtools sort -t [THREADS] -m [MEM_GB] -n -O BAM -o [SORT-BY-NAMES.bam] [MAPPED.sam]
+    message:
+        """
+        ~ SamTools ∞ Sort by Names BAM file ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        """
+    conda:
+        SAMTOOLS
+    resources:
+       cpus = CPUS,
+       mem_gb = MEM_GB
+    input:
+        mapped = "results/02_Mapping/{reference}/{sample}_{aligner}-mapped.sam"
+    output:
+        sort_by_names = temp("results/02_Mapping/{reference}/{sample}_{aligner}_sort-by-names.bam")
+    log:
+        "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_sort-by-names.log"
+    shell:
+        "samtools sort "             # Samtools sort, tools for alignments in the SAM format with command to sort alignment file
+        "--threads {resources.cpus} " # -@: Number of additional threads to use (default: 1)
+        "-m {resources.mem_gb}G "     # -m: Set maximum memory per thread, suffix K/M/G recognized (default: 768M)
+        "-n "                         # -n: Sort by read name (not compatible with samtools index command) 
+        "--output-fmt BAM "           # -O: Specify output format: SAM, BAM, CRAM (here, BAM format)
+        "-o {output.sort_by_names} "  # -o: Write final output to FILE rather than standard output
+        "{input.mapped} "             # Mapped reads input
+        "&> {log}"                    # Log redirection 
+
+
+###############################################################################
+################################### MAPPING ###################################
+###############################################################################
+
+###############################################################################
+rule minimap2_mapping:
+    # Aim: reads mapping against reference sequence
+    # Use: minimap2
+    message:
+        """
+        ~ Minimap2 ∞ Map Reads ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ MiniMap2
+        """
+    conda:
+        MINIMAP2
+    resources:
+        cpus = CPUS
+    params:
+        preset = MM2_PRESET
+        #length = LENGTH
+    input:
+        mm2_indexes = "resources/indexes/minimap2/{reference}.mmi",
+        fwd_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R1.fastq.gz",
+        rev_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R2.fastq.gz"
+    output:
+        mapped = temp("results/02_Mapping/{reference}/{sample}_minimap2-mapped.sam")
+    log:
+        "results/10_Reports/tools-log/minimap2/{sample}_{reference}.log"
+    shell:
+        "minimap2 "           # Minimap2, a versatile sequence alignment program
+        "-x {params.preset} "  # -x: presets (always applied before other options)
+        "-t {resources.cpus} " # -t: Number of threads (default: 3)
+        "-a "                  # -a: output in the SAM format (PAF by default)
+        #"-F {params.length} "  # -F: max fragment length, effective with -x sr mode (default: 800)
+        "{input.mm2_indexes} " # Reference index filename prefix.mmi
+                               # (-k, -w, -I and -H can't be changed during mapping)
+        #"resources/genomes/{wildcards.reference}.fasta " # Reference genome fasta format (for custom -kwIH)
+        #"-k {params.kmer_size} "      # -k: k-mer size (default: "21", no larger than "28") [INT]
+        #"-w {params.minimizer_size} " # -w: minimizer window size (default: "11") [INT]
+        #"-I {params.split_size} "     # -I: split index for every {NUM} input bases (default: "8G") [INT]
+        #"{params.homopolymer} "       # -H: use homopolymer-compressed k-mer (preferrable for PacBio)
+        "{input.fwd_reads} "   # Forward input reads
+        "{input.rev_reads} "   # Reverse input reads
+        "1> {output.mapped} "  # SAM output
+        "2> {log}"             # Log redirection 
+
+###############################################################################
+rule bwa_mapping:
+    # Aim: reads mapping against reference sequence
+    # Use: bwa mem -t [THREADS] -x [REFERENCE] [FWD_R1.fq] [REV_R2.fq] 1> [MAPPED.sam]
+    message:
+        """
+        ~ BWA-MEM ∞ Map Reads ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ BWA
+        """
+    conda:
+        BWA
+    resources:
+        cpus = CPUS
+    input:
+        bwa_indexes = "resources/indexes/bwa/{reference}",
+        fwd_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R1.fastq.gz",
+        rev_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R2.fastq.gz"
+    output:
+        mapped = temp("results/02_Mapping/{reference}/{sample}_bwa-mapped.sam")
+    log:
+        "results/10_Reports/tools-log/bwa/{sample}_{reference}.log"
+    shell:
+        "bwa mem "            # BWA-MEM algorithm, performs local alignment
+        "-t {resources.cpus} " # -t: Number of threads (default: 12)
+        "-v 1 "                # -v: Verbosity level: 1=error, 2=warning, 3=message, 4+=debugging
+        "{input.bwa_indexes} " # Reference index filename prefix
+        "{input.fwd_reads} "   # Forward input reads
+        "{input.rev_reads} "   # Reverse input reads
+        "1> {output.mapped} "  # SAM output
+        "2> {log}"             # Log redirection 
+
+###############################################################################
+rule bowtie2_mapping:
+    # Aim: reads mapping against reference sequence
+    # Use: bowtie2 -p [THREADS] -x [REFERENCE] -1 [FWD_R1.fq] -2 [REV_R2.fq] -S [MAPPED.sam]
+    message:
+        """
+        ~ Bowtie2 ∞ Map Reads ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ Bowtie2
+        """
+    conda:
+        BOWTIE2
+    resources:
+        cpus = CPUS
+    params:
+        sensitivity = BT2_SENSITIVITY
+    input:
+        bt2_indexes = "resources/indexes/bowtie2/{reference}",
+        fwd_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R1.fastq.gz",
+        rev_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R2.fastq.gz"
+    output:
+        mapped = temp("results/02_Mapping/{reference}/{sample}_bowtie2-mapped.sam")
+    log:
+        "results/10_Reports/tools-log/bowtie2/{sample}_{reference}.log"
+    shell:
+        "bowtie2 "                   # Bowtie2, an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences
+        "--threads {resources.cpus} " # -p: Number of alignment threads to launch (default: 1)
+        "--reorder "                  # Keep the original read order (if multi-processor option -p is used)
+        "-x {input.bt2_indexes} "     # -x: Reference index filename prefix (minus trailing .X.bt2)
+        "{params.sensitivity} "       # Preset (default: "--sensitive")
+                                       # sensitive: same as [-D 15 -R 2 -N 0 -L 22 -i S,1,1.15]
+        "-q "                         # -q: Query input files are FASTQ .fq/.fastq (default)
+        "-1 {input.fwd_reads} "       # Forward input reads
+        "-2 {input.rev_reads} "       # Reverse input reads
+        "1> {output.mapped} "         # -S: File for SAM output (default: stdout) 
+        "2> {log}"                    # Log redirection 
+
+###############################################################################
+################################## INDEXING ###################################
+###############################################################################
+
+###############################################################################
+rule minimap2_genome_indexing:
+    # Aim: index sequences in the FASTA format
+    # Use: minimap2 [OPTIONS] -d [INDEX.mmi] <query.fasta>
+    message:
+        """
+        ~ Minimap2 ∞ Index Genome ~
+        Reference: _______ {wildcards.reference}
+        """
+    conda:
+        MINIMAP2
+    params:
+        kmer_size = KMER_SIZE,
+        minimizer_size = MINIMIZER_SIZE,
+        split_size = SPLIT_SIZE
+        #homopolymer = HOMOPOLYMER
+    input:
+        fasta = "resources/genomes/{reference}.fasta"
+    output:
+        mm2_indexes = multiext("resources/indexes/minimap2/{reference}",
+                               ".mmi")
+    log:
+        "results/10_Reports/tools-log/minimap2-indexes/{reference}.log"
+    shell:
+        "minimap2 "                  # Minimap2, index sequences
+        "-k {params.kmer_size} "      # -k: k-mer size (default: "21", no larger than "28") [INT]
+        "-w {params.minimizer_size} " # -w: minimizer window size (default: "11") [INT]
+        "-I {params.split_size} "     # -I: split index for every {NUM} input bases (default: "8G") [INT]
+        #"{params.homopolymer} "       # use homopolymer-compressed k-mer (preferrable for PacBio)
+        "-d {output.mm2_indexes} "    # -d: dump index to FILE []
+        "{input.fasta} "              # Reference sequences in the FASTA format
+        "&> {log}"                    # Log redirection
+
+###############################################################################
+rule bwa_genome_indexing:
+    # Aim: index sequences in the FASTA format
+    # Use: bwa index -a [ALGO] -p [PREFIX] <genome.fasta>
+    message:
+        """
+        ~ BWA-SW ∞ Index Genome ~
+        Reference: _______ {wildcards.reference}
+        """
+    conda:
+        BWA
+    params:
+        algorithm = BWA_ALGO
+    input:
+        fasta = "resources/genomes/{reference}.fasta"
+    output:
+        prefix = "resources/indexes/bwa/{reference}",
+        bwa_indexes = multiext("resources/indexes/bwa/{reference}",
+                               ".amb", ".ann", ".bwt", ".pac", ".sa")
+    log:
+        "results/10_Reports/tools-log/bwa-indexes/{reference}.log"
+    shell:
+        "bwa index "              # BWA-SW algorithm, index sequences
+        "{params.algorithm} "      # -a: Algorithm for constructing BWT index (default: auto)
+        "-p {output.prefix} "      # -p: Prefix of the output database
+        "{input.fasta} "           # Reference sequences in the FASTA format
+        "&> {log} "                # Log redirection
+        "&& touch {output.prefix}" # Touch done
+
+###############################################################################
+rule bowtie2_genome_indexing:
+    # Aim: index sequences in the FASTA format
+    # Use: bowtie2-build [OPTIONS] <reference_in> <bt2_index_base>
+    message:
+        """
+        ~ Bowtie2-build ∞ Index Genome ~
+        Reference: _______ {wildcards.reference}
+        """
+    conda:
+        BOWTIE2
+    resources:
+        cpus = CPUS
+    params:
+        algorithm = BT2_ALGO
+    input:
+        fasta = "resources/genomes/{reference}.fasta"
+    output:
+        prefix = "resources/indexes/bowtie2/{reference}",
+        bt2_indexes = multiext("resources/indexes/bowtie2/{reference}",
+                               ".1.bt2", ".2.bt2", ".3.bt2", ".4.bt2",
+                               ".rev.1.bt2", ".rev.2.bt2")
+    log:
+        "results/10_Reports/tools-log/bowtie2-indexes/{reference}.log"
+    shell:
+        "bowtie2-build "             # Bowtie2-build, index sequences
+        "--quiet "                    # -q: quiet
+        "--threads {resources.cpus} " # Number of threads
+        "{params.algorithm} "         # Force (or no by default) generated index to be 'large',
+                                       # even if ref has fewer than 4 billion nucleotides
+        "-f "                         # Reference files are FASTA (default)
+        "{input.fasta} "              # Reference sequences files (comma-separated list) in the FASTA format
+        "{output.prefix} "            # Write bt2 data to files with this dir/basename
+        "&> {log} "                   # Log redirection
+        "&& touch {output.prefix}"    # Touch done
+
+
+###############################################################################
+################################## TRIMMING ###################################
+###############################################################################
+
+###############################################################################
+rule sickle_trim_quality:
+    # Aim: windowed adaptive trimming tool for FASTQ files using quality
+    # Use: sickle [COMMAND] [OPTIONS]
+    message:
+        """
+        ~ Sickle-trim ∞ Trim Low Quality Sequences ~
+        Sample: __________ {wildcards.sample}
+        """
+    conda:
+        SICKLE_TRIM
+    params:
+        command = SIC_COMMAND,
+        encoding = SIC_ENCODING,
+        quality = SIC_QUALITY,
+        length = SIC_LENGTH
+    input:
+        fwd_reads = "results/01_Trimming/cutadapt/{sample}_cutadapt-removed_R1.fastq.gz",
+        rev_reads = "results/01_Trimming/cutadapt/{sample}_cutadapt-removed_R2.fastq.gz"
+    output:
+        fwd_reads = stash_or_trash("results/01_Trimming/sickle/{sample}_sickle-trimmed_R1.fastq.gz"),
+        rev_reads = stash_or_trash("results/01_Trimming/sickle/{sample}_sickle-trimmed_R2.fastq.gz"),
+        single = stash_or_trash("results/01_Trimming/sickle/{sample}_sickle-trimmed_SE.fastq.gz")
+    log:
+        "results/10_Reports/tools-log/sickle-trim/{sample}.log"
+    shell:
+        "sickle "               # Sickle, a windowed adaptive trimming tool for FASTQ files using quality
+        "{params.command} "      # Paired-end or single-end sequence trimming
+        "-t {params.encoding} "  # --qual-type: Type of quality values, solexa ; illumina ; sanger ; CASAVA, < 1.3 ; 1.3 to 1.7 ; >= 1.8
+        "-q {params.quality} "   # --qual-threshold: Threshold for trimming based on average quality in a window (default: 20)
+        "-l {params.length} "    # --length-threshold: Threshold to keep a read based on length after trimming (default: 20)
+        "-f {input.fwd_reads} "  # --pe-file1: Input paired-end forward fastq file
+        "-r {input.rev_reads} "  # --pe-file2: Input paired-end reverse fastq file
+        "-g "                    # --gzip-output: Output gzipped files
+        "-o {output.fwd_reads} " # --output-pe1: Output trimmed forward fastq file
+        "-p {output.rev_reads} " # --output-pe2: Output trimmed reverse fastq file (must use -s option)
+        "-s {output.single} "    # --output-single: Output trimmed singles fastq file
+        "&> {log} "              # Log redirection
+
+###############################################################################
+rule cutadapt_adapters_removing:
+    # Aim: finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence from your high-throughput sequencing reads
+    # Use: cutadapt [OPTIONS] -a/-A [ADAPTER] -o [OUT_FWD.fastq.gz] -p [OUT_REV.fastq.gz] [IN_FWD.fastq.gz] [IN_REV.fastq.gz]
+    # Rmq: multiple adapter sequences can be given using further -a options, but only the best-matching adapter will be removed
+    message:
+        """
+        ~ Cutadapt ∞ Remove Adapters ~
+        Sample: __________ {wildcards.sample}
+        """
+    conda:
+        CUTADAPT
+    resources:
+        cpus = CPUS
+    params:
+        length = CUT_LENGTH,
+        truseq = CUT_TRUSEQ,
+        nextera = CUT_NEXTERA,
+        small = CUT_SMALL,
+        cut = CUT_CLIPPING
+    input:
+        fwd_reads = "resources/reads/{sample}_R1.fastq.gz",
+        rev_reads = "resources/reads/{sample}_R2.fastq.gz"
+        #fwd_reads = "/users/illumina/local/data/run_1/FATSQ/{sample}_R1.fastq.gz",
+        #rev_reads = "/users/illumina/local/data/run_1/FATSQ/{sample}_R2.fastq.gz"
+    output:
+        fwd_reads = temp("results/01_Trimming/cutadapt/{sample}_cutadapt-removed_R1.fastq.gz"),
+        rev_reads = temp("results/01_Trimming/cutadapt/{sample}_cutadapt-removed_R2.fastq.gz")
+    log:
+        "results/10_Reports/tools-log/cutadapt/{sample}.log"
+    shell:
+        "cutadapt "                          # Cutadapt, finds and removes unwanted sequence from your HT-seq reads
+        "--cores {resources.cpus} "           # -j: Number of CPU cores to use. Use 0 to auto-detect (default: 1)
+        "--cut {params.cut} "                 # -u: Remove 'n' first bases (5') from forward R1 (hard-clipping, default: 0)
+        "-U {params.cut} "                    # -U: Remove 'n' first bases (5') from reverse R2 (hard-clipping, default: 0)
+        "--trim-n "                           # --trim-n: Trim N's on ends (3') of reads
+        "--minimum-length {params.length} "   # -m: Discard reads shorter than length
+        "--adapter {params.truseq} "          # -a: Sequence of an adapter ligated to the 3' end of the first read
+        "-A {params.truseq} "                 # -A: 3' adapter to be removed from second read in a pair
+        "--adapter {params.nextera} "         # -a: Sequence of an adapter ligated to the 3' end of the first read
+        "-A {params.nextera} "                # -A: 3' adapter to be removed from second read in a pair
+        "--adapter {params.small} "           # -a: Sequence of an adapter ligated to the 3' end of the first read
+        "-A {params.small} "                  # -A: 3' adapter to be removed from second read in a pair
+        "--output {output.fwd_reads} "        # -o: Write trimmed reads to FILE
+        "--paired-output {output.rev_reads} " # -p: Write second read in a pair to FILE
+        "{input.fwd_reads} "                  # Input forward reads R1.fastq
+        "{input.rev_reads} "                  # Input reverse reads R2.fastq
+        "&> {log} "                           # Log redirection
+
+###############################################################################
+############################### QUALITY CONTROL ###############################
+###############################################################################
+
+###############################################################################
+rule multiqc_reports_aggregation:
+    # Aim: aggregates bioinformatics analyses results into a single report
+    # Use: multiqc [OPTIONS] --output [MULTIQC/] [FASTQC/] [MULTIQC/]
+    priority: 999 # Explicit high priority
+    message:
+        """
+        ~ MultiQC ∞ Aggregat HTML Qualities Reports ~
+        """
+    conda:
+        MULTIQC
+    params:
+        #config = MQC_CONFIG,
+        #tag = TAG
+    input:
+        fastqc = expand("results/00_Quality_Control/fastqc/{fastq}/",
+                        fastq = FASTQ),
+        fastq_screen = expand("results/00_Quality_Control/fastq-screen/{fastq}/",
+                             fastq = FASTQ)
+    output:
+        multiqc = directory("results/00_Quality_Control/multiqc/")
+    log:
+        "results/10_Reports/tools-log/multiqc.log"
+    shell:
+        "multiqc "                  # Multiqc, searches in given directories for analysis & compiles a HTML report
+        "--quiet "                   # -q: Only show log warning
+        "--no-ansi "                 # Disable coloured log
+        #"--config {params.config} "  # Specific config file to load
+        #"--tag {params.tag} "        # Use only modules which tagged with this keyword
+        #"--pdf "                     # Creates PDF report with 'simple' template (require xelatex)
+        "--export "                  # Export plots as static images in addition to the report
+        "--outdir {output.multiqc} " # -o: Create report in the specified output directory
+        "{input.fastqc} "            # Input FastQC files
+        "{input.fastq_screen} "      # Input Fastq-Screen
+        "&> {log}"                   # Log redirection
+
+###############################################################################
+rule fastqscreen_contamination_checking:
+    # Aim: screen if the composition of the library matches with  what you expect
+    # Use fastq_screen [OPTIONS] --outdir [DIR/] [FASTQ.GZ]
+    message:
+        """
+        ~ Fasts-Screen ∞ Screen Contamination ~
+        Fastq: __________ {wildcards.fastq}
+        """
+    conda:
+        FASTQ_SCREEN
+    resources:
+        cpus = CPUS
+    params:
+        config = FQC_CONFIG,
+        subset = SUBSET
+    input:
+        fastq = "resources/reads/{fastq}.fastq.gz"
+    output:
+        fastq_screen = directory("results/00_Quality_Control/fastq-screen/{fastq}/")
+    log:
+        "results/10_Reports/tools-log/fastq-screen/{fastq}.log"
+    shell:
+        "fastq_screen "                  # FastqScreen, what did you expect ?
+        "-q "                             # --quiet: Only show log warning
+        "--threads {resources.cpus} "     # --threads: Specifies across how many aligner  will be allowed to run
+        "--aligner 'bwa' "                # -a: choose aligner 'bowtie', 'bowtie2', 'bwa'
+        "--conf {params.config} "         # path to configuration file
+        "--subset {params.subset} "       # Don't use the whole sequence file, but create a subset of specified size
+        "--outdir {output.fastq_screen} " # Output directory
+        "{input.fastq} "                  # Input file.fastq
+        "&> {log}"                        # Log redirection
+
+###############################################################################
+rule fastqc_quality_control:
+    # Aim: reads sequence files and produces a quality control report
+    # Use: fastqc [OPTIONS] --output [DIR/] [FASTQ.GZ]
+    message:
+        """
+        ~ FastQC ∞ Quality Control ~
+        Fastq: __________ {wildcards.fastq}
+        """
+    conda:
+        FASTQC
+    resources:
+        cpus = CPUS
+    input:
+        fastq = "resources/reads/{fastq}.fastq.gz"
+    output:
+        fastqc = directory("results/00_Quality_Control/fastqc/{fastq}/")
+    log:
+        "results/10_Reports/tools-log/fastqc/{fastq}.log"
+    shell:
+        "mkdir -p {output.fastqc} "    # (*) this directory must exist as the program will not create it
+        "2> /dev/null && "             # in silence and then... 
+        "fastqc "                    # FastQC, a high throughput sequence QC analysis tool
+        "--quiet "                    # -q: Supress all progress messages on stdout and only report errors
+        "--threads {resources.cpus} " # -t: Specifies files number which can be processed simultaneously
+        "--outdir {output.fastqc} "   # -o: Create all output files in the specified output directory (*)
+        "{input.fastq} "              # Input file.fastq
+        "&> {log}"                    # Log redirection
+
+###############################################################################
diff --git a/workflow/environments/bamclipper_v.1.0.0.yaml b/workflow/envs/bamclipper_v.1.0.0.yaml
similarity index 100%
rename from workflow/environments/bamclipper_v.1.0.0.yaml
rename to workflow/envs/bamclipper_v.1.0.0.yaml
diff --git a/workflow/environments/bcftools_v.1.20.yaml b/workflow/envs/bcftools_v.1.20.yaml
similarity index 100%
rename from workflow/environments/bcftools_v.1.20.yaml
rename to workflow/envs/bcftools_v.1.20.yaml
diff --git a/workflow/environments/bedtools_v.2.31.1.yaml b/workflow/envs/bedtools_v.2.31.1.yaml
similarity index 100%
rename from workflow/environments/bedtools_v.2.31.1.yaml
rename to workflow/envs/bedtools_v.2.31.1.yaml
diff --git a/workflow/environments/bowtie2_v.2.5.4.yaml b/workflow/envs/bowtie2_v.2.5.4.yaml
similarity index 100%
rename from workflow/environments/bowtie2_v.2.5.4.yaml
rename to workflow/envs/bowtie2_v.2.5.4.yaml
diff --git a/workflow/environments/bwa_v.0.7.18.yaml b/workflow/envs/bwa_v.0.7.18.yaml
similarity index 100%
rename from workflow/environments/bwa_v.0.7.18.yaml
rename to workflow/envs/bwa_v.0.7.18.yaml
diff --git a/workflow/environments/cutadapt_v.4.9.yaml b/workflow/envs/cutadapt_v.4.9.yaml
similarity index 100%
rename from workflow/environments/cutadapt_v.4.9.yaml
rename to workflow/envs/cutadapt_v.4.9.yaml
diff --git a/workflow/environments/fastq-screen_v.0.15.3.yaml b/workflow/envs/fastq-screen_v.0.15.3.yaml
similarity index 100%
rename from workflow/environments/fastq-screen_v.0.15.3.yaml
rename to workflow/envs/fastq-screen_v.0.15.3.yaml
diff --git a/workflow/environments/fastqc_v.0.12.1.yaml b/workflow/envs/fastqc_v.0.12.1.yaml
similarity index 100%
rename from workflow/environments/fastqc_v.0.12.1.yaml
rename to workflow/envs/fastqc_v.0.12.1.yaml
diff --git a/workflow/environments/gawk_v.5.3.0.yaml b/workflow/envs/gawk_v.5.3.0.yaml
similarity index 100%
rename from workflow/environments/gawk_v.5.3.0.yaml
rename to workflow/envs/gawk_v.5.3.0.yaml
diff --git a/workflow/environments/ivar_v.1.4.3.yaml b/workflow/envs/ivar_v.1.4.3.yaml
similarity index 100%
rename from workflow/environments/ivar_v.1.4.3.yaml
rename to workflow/envs/ivar_v.1.4.3.yaml
diff --git a/workflow/environments/minimap2_v.2.28.yaml b/workflow/envs/minimap2_v.2.28.yaml
similarity index 100%
rename from workflow/environments/minimap2_v.2.28.yaml
rename to workflow/envs/minimap2_v.2.28.yaml
diff --git a/workflow/environments/multiqc_v.1.23.yaml b/workflow/envs/multiqc_v.1.23.yaml
similarity index 100%
rename from workflow/environments/multiqc_v.1.23.yaml
rename to workflow/envs/multiqc_v.1.23.yaml
diff --git a/workflow/environments/nextclade_v.3.9.1.yaml b/workflow/envs/nextclade_v.3.9.1.yaml
similarity index 100%
rename from workflow/environments/nextclade_v.3.9.1.yaml
rename to workflow/envs/nextclade_v.3.9.1.yaml
diff --git a/workflow/environments/pangolin_v.4.3.yaml b/workflow/envs/pangolin_v.4.3.yaml
similarity index 100%
rename from workflow/environments/pangolin_v.4.3.yaml
rename to workflow/envs/pangolin_v.4.3.yaml
diff --git a/workflow/environments/samtools_v.1.20.yaml b/workflow/envs/samtools_v.1.20.yaml
similarity index 100%
rename from workflow/environments/samtools_v.1.20.yaml
rename to workflow/envs/samtools_v.1.20.yaml
diff --git a/workflow/environments/sickle-trim_v.1.33.yaml b/workflow/envs/sickle-trim_v.1.33.yaml
similarity index 100%
rename from workflow/environments/sickle-trim_v.1.33.yaml
rename to workflow/envs/sickle-trim_v.1.33.yaml
diff --git a/workflow/environments/tsv2vcf_v.1.1.yaml b/workflow/envs/tsv2vcf_v.1.1.yaml
similarity index 100%
rename from workflow/environments/tsv2vcf_v.1.1.yaml
rename to workflow/envs/tsv2vcf_v.1.1.yaml
diff --git a/workflow/environments/workflow-base_v.2024.11.yaml b/workflow/envs/workflow-base_v.2024.11.yaml
similarity index 100%
rename from workflow/environments/workflow-base_v.2024.11.yaml
rename to workflow/envs/workflow-base_v.2024.11.yaml
diff --git a/workflow/snakefiles/gevarli.smk b/workflow/rules/gevarli.smk
similarity index 90%
rename from workflow/snakefiles/gevarli.smk
rename to workflow/rules/gevarli.smk
index 501a422..ba9ad8c 100755
--- a/workflow/snakefiles/gevarli.smk
+++ b/workflow/rules/gevarli.smk
@@ -203,6 +203,13 @@ FQC_CONFIG = config["fastq_screen"]["config"] # Fastq-Screen --conf
 #MQC_CONF = config["multiqc"]["config"]        # MultiQC --conf
 #TAG = config["multiqc"]["tag"]                # MultiQC --tag
 
+KMER_SIZE = config["minimap2"]["algorithm"]["k-mer_size"]          # MM2 k-mer size
+MINIMIZER_SIZE = config["minimap2"]["algorithm"]["minimizer_size"] # MM2 minimizer window size
+SPLIT_SIZE = config["minimap2"]["algorithm"]["split_size"]         # MM2 split index
+#HOMOPOLYMER = config["minimap2"]["algorithm"]["homopolymer"]       # MM2 if PacBio
+BWA_ALGO = config["bwa"]["algorithm"]     # BWA indexing algorithm
+BT2_ALGO = config["bowtie2"]["algorithm"] # BT2 indexing algorithm
+
 REFERENCE = config["consensus"]["reference"] # Genome reference sequence, in fasta format
 REF_PATH = config["consensus"]["path"]       # Path to genomes references
 MIN_COV = config["consensus"]["min_cov"]     # Minimum coverage, mask lower regions with 'N'
@@ -1046,10 +1053,10 @@ rule minimap2_mapping:
     resources:
         cpus = CPUS
     params:
-        mm2_path = MM2_PATH,
         preset = MM2_PRESET
         #length = LENGTH
     input:
+        mm2_indexes = "resources/indexes/minimap2/{reference}.mmi"
         fwd_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R1.fastq.gz",
         rev_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R2.fastq.gz"
     output:
@@ -1057,21 +1064,22 @@ rule minimap2_mapping:
     log:
         "results/10_Reports/tools-log/minimap2/{sample}_{reference}.log"
     shell:
-        "minimap2 "                                  # Minimap2, a versatile sequence alignment program
-        "-x {params.preset} "                         # -x: presets (always applied before other options)
-        "-t {resources.cpus} "                        # -t: Number of threads (default: 3)
-        "-a "                                         # -a: output in the SAM format (PAF by default)
-        #"-F {params.length} "                         # -F: max fragment length, effective with -x sr mode (default: 800)
-        "{params.mm2_path}{wildcards.reference}.mmi " # Reference index filename prefix.mmi (-k, -w, -I and -H can't be changed during mapping)
+        "minimap2 "           # Minimap2, a versatile sequence alignment program
+        "-x {params.preset} "  # -x: presets (always applied before other options)
+        "-t {resources.cpus} " # -t: Number of threads (default: 3)
+        "-a "                  # -a: output in the SAM format (PAF by default)
+        #"-F {params.length} "  # -F: max fragment length, effective with -x sr mode (default: 800)
+        "{input.mm2_indexes} " # Reference index filename prefix.mmi
+                               # (-k, -w, -I and -H can't be changed during mapping)
         #"resources/genomes/{wildcards.reference}.fasta " # Reference genome fasta format (for custom -kwIH)
-        #"-k {params.kmer_size} "                          # -k: k-mer size (default: "21", no larger than "28") [INT]
-        #"-w {params.minimizer_size} "                     # -w: minimizer window size (default: "11") [INT]
-        #"-I {params.split_size} "                         # -I: split index for every {NUM} input bases (default: "8G") [INT]
-        #"{params.homopolymer} "                           # -H: use homopolymer-compressed k-mer (preferrable for PacBio)
-        "{input.fwd_reads} "                          # Forward input reads
-        "{input.rev_reads} "                          # Reverse input reads
-        "1> {output.mapped} "                         # SAM output
-        "2> {log}"                                    # Log redirection 
+        #"-k {params.kmer_size} "      # -k: k-mer size (default: "21", no larger than "28") [INT]
+        #"-w {params.minimizer_size} " # -w: minimizer window size (default: "11") [INT]
+        #"-I {params.split_size} "     # -I: split index for every {NUM} input bases (default: "8G") [INT]
+        #"{params.homopolymer} "       # -H: use homopolymer-compressed k-mer (preferrable for PacBio)
+        "{input.fwd_reads} "   # Forward input reads
+        "{input.rev_reads} "   # Reverse input reads
+        "1> {output.mapped} "  # SAM output
+        "2> {log}"             # Log redirection 
 
 ###############################################################################
 rule bwa_mapping:
@@ -1088,9 +1096,8 @@ rule bwa_mapping:
         BWA
     resources:
         cpus = CPUS
-    params:
-        bwa_path = BWA_PATH
     input:
+        bwa_indexes = "resources/indexes/bwa/{reference}"
         fwd_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R1.fastq.gz",
         rev_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R2.fastq.gz"
     output:
@@ -1098,14 +1105,14 @@ rule bwa_mapping:
     log:
         "results/10_Reports/tools-log/bwa/{sample}_{reference}.log"
     shell:
-        "bwa mem "                               # BWA-MEM algorithm, performs local alignment
-        "-t {resources.cpus} "                    # -t: Number of threads (default: 12)
-        "-v 1 "                                   # -v: Verbosity level: 1=error, 2=warning, 3=message, 4+=debugging
-        "{params.bwa_path}{wildcards.reference} " # Reference index filename prefix
-        "{input.fwd_reads} "                      # Forward input reads
-        "{input.rev_reads} "                      # Reverse input reads
-        "1> {output.mapped} "                     # SAM output
-        "2> {log}"                                # Log redirection 
+        "bwa mem "            # BWA-MEM algorithm, performs local alignment
+        "-t {resources.cpus} " # -t: Number of threads (default: 12)
+        "-v 1 "                # -v: Verbosity level: 1=error, 2=warning, 3=message, 4+=debugging
+        "{input.bwa_indexes} " # Reference index filename prefix
+        "{input.fwd_reads} "   # Forward input reads
+        "{input.rev_reads} "   # Reverse input reads
+        "1> {output.mapped} "  # SAM output
+        "2> {log}"             # Log redirection 
 
 ###############################################################################
 rule bowtie2_mapping:
@@ -1123,9 +1130,9 @@ rule bowtie2_mapping:
     resources:
         cpus = CPUS
     params:
-        bt2_path = BT2_PATH,
         sensitivity = BT2_SENSITIVITY
     input:
+        bt2_indexes = "resources/indexes/bowtie2/{reference}",
         fwd_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R1.fastq.gz",
         rev_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R2.fastq.gz"
     output:
@@ -1136,14 +1143,118 @@ rule bowtie2_mapping:
         "bowtie2 "                   # Bowtie2, an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences
         "--threads {resources.cpus} " # -p: Number of alignment threads to launch (default: 1)
         "--reorder "                  # Keep the original read order (if multi-processor option -p is used)
-        "-x {params.bt2_path}{wildcards.reference} " # -x: Reference index filename prefix (minus trailing .X.bt2) [Bowtie-1 indexes are not compatible]
-        "{params.sensitivity} "       # Preset (default: "--sensitive", same as [-D 15 -R 2 -N 0 -L 22 -i S,1,1.15]) 
+        "-x {input.indexes} "         # -x: Reference index filename prefix (minus trailing .X.bt2)
+        "{params.sensitivity} "       # Preset (default: "--sensitive")
+                                       # sensitive: same as [-D 15 -R 2 -N 0 -L 22 -i S,1,1.15]
         "-q "                         # -q: Query input files are FASTQ .fq/.fastq (default)
         "-1 {input.fwd_reads} "       # Forward input reads
         "-2 {input.rev_reads} "       # Reverse input reads
         "1> {output.mapped} "         # -S: File for SAM output (default: stdout) 
         "2> {log}"                    # Log redirection 
 
+###############################################################################
+################################## INDEXING ###################################
+###############################################################################
+
+###############################################################################
+rule minimap2_genome_indexing:
+    # Aim: index sequences in the FASTA format
+    # Use: minimap2 [OPTIONS] -d [INDEX.mmi] <query.fasta>
+    message:
+        """
+        ~ Minimap2 ∞ Index Genome ~
+        Reference: _______ {wildcards.reference}
+        """
+    conda:
+        MINIMAP2
+    params:
+        kmer_size = KMER_SIZE,
+        minimizer_size = MINIMIZER_SIZE,
+        split_size = SPLIT_SIZE
+        #homopolymer = HOMOPOLYMER
+    input:
+        fasta = "resources/genomes/{reference}.fasta"
+    output:
+        mm2_indexes = multiext("resources/indexes/minimap2/{reference}",
+                           ".mmi")
+    log:
+        "results/10_Reports/tools-log/minimap2-indexes/{reference}.log"
+    shell:
+        "minimap2 "                  # Minimap2, index sequences
+        "-k {params.kmer_size} "      # -k: k-mer size (default: "21", no larger than "28") [INT]
+        "-w {params.minimizer_size} " # -w: minimizer window size (default: "11") [INT]
+        "-I {params.split_size} "     # -I: split index for every {NUM} input bases (default: "8G") [INT]
+        #"{params.homopolymer} "       # use homopolymer-compressed k-mer (preferrable for PacBio)
+        "-d {output.indexes} "        # -d: dump index to FILE []
+        "{input.fasta} "              # Reference sequences in the FASTA format
+        "&> {log}"                    # Log redirection
+
+###############################################################################
+rule bwa_genome_indexing:
+    # Aim: index sequences in the FASTA format
+    # Use: bwa index -a [ALGO] -p [PREFIX] <genome.fasta>
+    message:
+        """
+        ~ BWA-SW ∞ Index Genome ~
+        Reference: _______ {wildcards.reference}
+        """
+    conda:
+        BWA
+    params:
+        algorithm = BWA_ALGO
+    input:
+        fasta = "resources/genomes/{reference}.fasta"
+    output:
+        prefix = temp("resources/indexes/bwa/{reference}"),
+        indexes = multiext("resources/indexes/bwa/{ref_seq}",
+                           ".amb", ".ann", ".bwt", ".pac", ".sa")
+    log:
+        "results/10_Reports/tools-log/bwa-indexes/{reference}.log"
+    shell:
+        "bwa index "              # BWA-SW algorithm, index sequences
+        "{params.algorithm} "      # -a: Algorithm for constructing BWT index (default: auto)                                  
+        "-p {output.prefix} "      # -p: Prefix of the output database
+        "{input.fasta} "           # Reference sequences in the FASTA format
+        "&> {log} "                # Log redirection
+        "&& touch {output.prefix}" # Touch done
+
+###############################################################################
+rule bowtie2_genome_indexing:
+    # Aim: index sequences in the FASTA format
+    # Use: bowtie2-build [OPTIONS] <reference_in> <bt2_index_base>
+    message:
+        """
+        ~ Bowtie2-build ∞ Index Genome ~
+        Reference: _______ {wildcards.reference}
+        """
+    conda:
+        BOWTIE2
+    resources:
+        cpus = CPUS
+    params:
+        algorithm = BT2_ALGO
+    input:
+        fasta = "resources/genomes/{reference}.fasta"
+    output:
+        prefix = temp("resources/indexes/bowtie2/{reference}"),
+        indexes = multiext("resources/indexes/bowtie2/{ref_seq}",
+                           ".1.bt2", ".2.bt2", ".3.bt2", ".4.bt2",
+                           ".rev.1.bt2", ".rev.2.bt2")
+    log:
+        "results/10_Reports/tools-log/bowtie2-indexes/{reference}.log"
+    shell:
+        "bowtie2-build "             # Bowtie2-build, index sequences
+        "--quiet "                    # -q: quiet
+        "--threads {resources.cpus} " # Number of threads
+        "{params.algorithm} "         # Force (or no by default) generated index to be 'large',
+                                       # even if ref has fewer than 4 billion nucleotides
+        "-f "                         # Reference files are FASTA (default)
+        "{input.fasta} "              # Reference sequences files (comma-separated list) in the FASTA format
+        "{output.prefix} "            # Write bt2 data to files with this dir/basename
+        "&> {log} "                   # Log redirection
+        "&& touch {output.prefix}"    # Touch done
+
+
 ###############################################################################
 ################################## TRIMMING ###################################
 ###############################################################################
diff --git a/workflow/snakefiles/indexing_genomes.smk b/workflow/rules/indexing_genomes.smk
similarity index 100%
rename from workflow/snakefiles/indexing_genomes.smk
rename to workflow/rules/indexing_genomes.smk
diff --git a/workflow/snakefiles/quality_control.smk b/workflow/rules/quality_control.smk
similarity index 100%
rename from workflow/snakefiles/quality_control.smk
rename to workflow/rules/quality_control.smk
-- 
GitLab