From a13f7f5df6b3ac131a9c3d6ce8e65d79f74d0694 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolas=20FERNANDEZ=20NU=C3=91EZ?=
 <nicolas.fernandez@ird.fr>
Date: Mon, 13 Jan 2025 17:42:48 +0100
Subject: [PATCH] Feat: return to the modular rules.smk

---
 .gitlab-ci.yml => .gitlab-ci.yaml             |    0
 .snakemake-workflow-catalog.yml               |    4 +
 {resources/data_test => .test}/.gitkeep       |    0
 ...-Seq-Lib-on-MiSeq_250000-reads_R1.fastq.gz |  Bin
 ...-Seq-Lib-on-MiSeq_250000-reads_R2.fastq.gz |  Bin
 CHANGELOG.md                                  |    0
 Run_GeVarLi.sh                                |    4 +-
 config/README.md                              |    0
 {configuration => config}/config.yaml         |   34 +-
 {configuration => config}/fastq-screen.conf   |    0
 .../multiqc/default.yaml                      |    0
 workflow/Snakefile                            | 1354 +----------------
 .../{rules/gevarli.smk => Snakefile_SAVE}     |  107 +-
 workflow/report/workflow.rst                  |    0
 workflow/rules/consensus_calling.smk          |   33 +
 workflow/rules/coverage_stats.smk             |  221 +++
 workflow/rules/indexing_genomes.smk           |  161 +-
 workflow/rules/lineages_calling.smk           |   74 +
 workflow/rules/mask_lowcov.smk                |   86 ++
 workflow/rules/primer_cleaping.smk            |   45 +
 ...ality_control.smk => quality_controls.smk} |   69 +-
 workflow/rules/reads_cleaning.smk             |   91 ++
 workflow/rules/reads_mapping.smk              |  120 ++
 workflow/rules/remove_duplicates.smk          |  160 ++
 workflow/rules/variant_calling.smk            |  142 ++
 workflow/schemas/config.schema.yaml           |    0
 .../__pycache__/__init__.cpython-312.pyc      |  Bin 0 -> 154 bytes
 .../__pycache__/functions.cpython-312.pyc     |  Bin 0 -> 4247 bytes
 workflow/scripts/functions.py                 |  127 ++
 29 files changed, 1275 insertions(+), 1557 deletions(-)
 rename .gitlab-ci.yml => .gitlab-ci.yaml (100%)
 create mode 100644 .snakemake-workflow-catalog.yml
 rename {resources/data_test => .test}/.gitkeep (100%)
 rename {resources/data_test => .test}/SARS-CoV-2_Omicron-BA1_Covid-Seq-Lib-on-MiSeq_250000-reads_R1.fastq.gz (100%)
 rename {resources/data_test => .test}/SARS-CoV-2_Omicron-BA1_Covid-Seq-Lib-on-MiSeq_250000-reads_R2.fastq.gz (100%)
 create mode 100644 CHANGELOG.md
 create mode 100644 config/README.md
 rename {configuration => config}/config.yaml (90%)
 rename {configuration => config}/fastq-screen.conf (100%)
 rename {configuration => config}/multiqc/default.yaml (100%)
 rename workflow/{rules/gevarli.smk => Snakefile_SAVE} (95%)
 mode change 100755 => 100644
 create mode 100644 workflow/report/workflow.rst
 create mode 100644 workflow/rules/consensus_calling.smk
 create mode 100644 workflow/rules/coverage_stats.smk
 mode change 100755 => 100644 workflow/rules/indexing_genomes.smk
 create mode 100644 workflow/rules/lineages_calling.smk
 create mode 100644 workflow/rules/mask_lowcov.smk
 create mode 100644 workflow/rules/primer_cleaping.smk
 rename workflow/rules/{quality_control.smk => quality_controls.smk} (63%)
 mode change 100755 => 100644
 create mode 100644 workflow/rules/reads_cleaning.smk
 create mode 100644 workflow/rules/reads_mapping.smk
 create mode 100644 workflow/rules/remove_duplicates.smk
 create mode 100644 workflow/rules/variant_calling.smk
 create mode 100644 workflow/schemas/config.schema.yaml
 create mode 100644 workflow/scripts/__pycache__/__init__.cpython-312.pyc
 create mode 100644 workflow/scripts/__pycache__/functions.cpython-312.pyc
 create mode 100644 workflow/scripts/functions.py

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yaml
similarity index 100%
rename from .gitlab-ci.yml
rename to .gitlab-ci.yaml
diff --git a/.snakemake-workflow-catalog.yml b/.snakemake-workflow-catalog.yml
new file mode 100644
index 0000000..fc26f44
--- /dev/null
+++ b/.snakemake-workflow-catalog.yml
@@ -0,0 +1,4 @@
+usage:
+  software-stack-deployment:
+    conda: true 
+  report: true
diff --git a/resources/data_test/.gitkeep b/.test/.gitkeep
similarity index 100%
rename from resources/data_test/.gitkeep
rename to .test/.gitkeep
diff --git a/resources/data_test/SARS-CoV-2_Omicron-BA1_Covid-Seq-Lib-on-MiSeq_250000-reads_R1.fastq.gz b/.test/SARS-CoV-2_Omicron-BA1_Covid-Seq-Lib-on-MiSeq_250000-reads_R1.fastq.gz
similarity index 100%
rename from resources/data_test/SARS-CoV-2_Omicron-BA1_Covid-Seq-Lib-on-MiSeq_250000-reads_R1.fastq.gz
rename to .test/SARS-CoV-2_Omicron-BA1_Covid-Seq-Lib-on-MiSeq_250000-reads_R1.fastq.gz
diff --git a/resources/data_test/SARS-CoV-2_Omicron-BA1_Covid-Seq-Lib-on-MiSeq_250000-reads_R2.fastq.gz b/.test/SARS-CoV-2_Omicron-BA1_Covid-Seq-Lib-on-MiSeq_250000-reads_R2.fastq.gz
similarity index 100%
rename from resources/data_test/SARS-CoV-2_Omicron-BA1_Covid-Seq-Lib-on-MiSeq_250000-reads_R2.fastq.gz
rename to .test/SARS-CoV-2_Omicron-BA1_Covid-Seq-Lib-on-MiSeq_250000-reads_R2.fastq.gz
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..e69de29
diff --git a/Run_GeVarLi.sh b/Run_GeVarLi.sh
index 26189c0..62d83eb 100755
--- a/Run_GeVarLi.sh
+++ b/Run_GeVarLi.sh
@@ -216,8 +216,8 @@ fastq=$(expr $(ls -l ${workdir}/resources/reads/*.fastq.gz 2> /dev/null | wc -l)
 if [[ "${fastq}" == "0" ]]                                                         # If no sample,
 then                                                                                # start GeVarLi with at least 1 sample
     echo -e "${red}¡${nc} No FASTQ files detected in ${ylo}resources/reads/${nc} ${red}!${nc}
-${red}${sample_test}${nc} in ${ylo}resources/data_test/${nc} FASTQ files were be used as sample example"
-    cp ${workdir}/resources/data_test/${sample_test}_R*.fastq.gz ${workdir}/resources/reads/ # use data_test fastq
+${red}${sample_test}${nc} FASTQ files were be used as sample test"
+    cp ${workdir}/.test/${sample_test}_R*.fastq.gz ${workdir}/resources/reads/ # use .test/fastq files
     fastq="2"
 fi
 samples=$(expr ${fastq} \/ 2) # {fastq.gz count} / 2 = samples count (paired-end)
diff --git a/config/README.md b/config/README.md
new file mode 100644
index 0000000..e69de29
diff --git a/configuration/config.yaml b/config/config.yaml
similarity index 90%
rename from configuration/config.yaml
rename to config/config.yaml
index 1f00aaf..82f8d10 100755
--- a/configuration/config.yaml
+++ b/config/config.yaml
@@ -249,20 +249,20 @@ resources:
     # -''         # Custom (set your own)
 
 envs: # Conda environement yaml files:
-  bamclipper:    'envs/bamclipper_v.1.0.0.yaml'      # Bamclipper    ver. 1.0.0
-  bcftools:      'envs/bcftools_v.1.20.yaml'         # Bcftools      ver. 1.20
-  bedtools:      'envs/bedtools_v.2.31.1.yaml'       # Bedtools      ver. 2.31.1
-  bowtie2:       'envs/bowtie2_v.2.5.4.yaml'         # Bowtie2       ver. 2.5.4
-  bwa:           'envs/bwa_v.0.7.18.yaml'            # BWA           ver. 0.7.18
-  cutadapt:      'envs/cutadapt_v.4.9.yaml'          # Cutadapt      ver. 4.9
-  fastq_screen:  'envs/fastq-screen_v.0.15.3.yaml'   # Fastq-Screen  ver. 0.15.3 (with BWA ver. 0.7.18)
-  fastqc:        'envs/fastqc_v.0.12.1.yaml'         # FastQC        ver. 0.12.1
-  gawk:          'envs/gawk_v.5.3.0.yaml'            # Awk (GNU)     ver. 5.3.0
-  ivar:          'envs/ivar_v.1.4.3.yaml'            # iVar          ver. 1.4.3 (with Samtools ver. 1.20)
-  minimap2:      'envs/minimap2_v.2.28.yaml'         # Minimap2      ver. 2.28
-  multiqc:       'envs/multiqc_v.1.23.yaml'          # MultiQC       ver. 1.23 (with Pandoc ver. 3.3)
-  nextclade:     'envs/nextclade_v.3.9.1.yaml'       # Nextclade     ver. 3.9.1
-  pangolin:      'envs/pangolin_v.4.3.yaml'          # Pangolin      ver. 4.3
-  samtools:      'envs/samtools_v.1.20.yaml'         # Samtools      ver. 1.20
-  sickle_trim:   'envs/sickle-trim_v.1.33.yaml'      # Sickle-trim   ver. 1.33
-  tsv2vcf:       'envs/tsv2vcf_v.1.1.yaml'           # tsv2vcf       ver. 1.1 (bcftools biopython numpy)
+  bamclipper:    '../envs/bamclipper_v.1.0.0.yaml'      # Bamclipper    ver. 1.0.0
+  bcftools:      '../envs/bcftools_v.1.20.yaml'         # Bcftools      ver. 1.20
+  bedtools:      '../envs/bedtools_v.2.31.1.yaml'       # Bedtools      ver. 2.31.1
+  bowtie2:       '../envs/bowtie2_v.2.5.4.yaml'         # Bowtie2       ver. 2.5.4
+  bwa:           '../envs/bwa_v.0.7.18.yaml'            # BWA           ver. 0.7.18
+  cutadapt:      '../envs/cutadapt_v.4.9.yaml'          # Cutadapt      ver. 4.9
+  fastq_screen:  '../envs/fastq-screen_v.0.15.3.yaml'   # Fastq-Screen  ver. 0.15.3 (with BWA ver. 0.7.18)
+  fastqc:        '../envs/fastqc_v.0.12.1.yaml'         # FastQC        ver. 0.12.1
+  gawk:          '../envs/gawk_v.5.3.0.yaml'            # Awk (GNU)     ver. 5.3.0
+  ivar:          '../envs/ivar_v.1.4.3.yaml'            # iVar          ver. 1.4.3 (with Samtools ver. 1.20)
+  minimap2:      '../envs/minimap2_v.2.28.yaml'         # Minimap2      ver. 2.28
+  multiqc:       '../envs/multiqc_v.1.23.yaml'          # MultiQC       ver. 1.23 (with Pandoc ver. 3.3)
+  nextclade:     '../envs/nextclade_v.3.9.1.yaml'       # Nextclade     ver. 3.9.1
+  pangolin:      '../envs/pangolin_v.4.3.yaml'          # Pangolin      ver. 4.3
+  samtools:      '../envs/samtools_v.1.20.yaml'         # Samtools      ver. 1.20
+  sickle_trim:   '../envs/sickle-trim_v.1.33.yaml'      # Sickle-trim   ver. 1.33
+  tsv2vcf:       '../envs/tsv2vcf_v.1.1.yaml'           # tsv2vcf       ver. 1.1 (bcftools biopython numpy)
diff --git a/configuration/fastq-screen.conf b/config/fastq-screen.conf
similarity index 100%
rename from configuration/fastq-screen.conf
rename to config/fastq-screen.conf
diff --git a/configuration/multiqc/default.yaml b/config/multiqc/default.yaml
similarity index 100%
rename from configuration/multiqc/default.yaml
rename to config/multiqc/default.yaml
diff --git a/workflow/Snakefile b/workflow/Snakefile
index 4eff7a1..1fa20a2 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -15,6 +15,7 @@
 # Date ___________________ 2021.10.12
 # Latest modifications ___ 2025.01.08 (Prepare for Snakedeploy)
 # Use ____________________ snakemake -s Snakemake --use-conda -j
+###############################################################################
 
 ###############################################################################
 ### CONFIGURATION ###
@@ -26,132 +27,37 @@ configfile: "configuration/config.yaml"
 ### FUNCTIONS ###
 #################
 
-def get_memory_per_thread(wildcards):
-    memory_per_thread = int(RAM) // int(CPUS)
-    if memory_per_thread < 1:
-        memory_per_thread = 1
-    return memory_per_thread
-
-def get_quality_input(wildcards):
-    quality_list = []
-    if "quality" in MODULES:
-        quality_list = "results/00_Quality_Control/multiqc/"
-    return quality_list
-
-def get_trimming_input(wildcards):
-    trimming_list = []
-    if "trimming" in MODULES:
-        trimming_list = expand("results/01_Trimming/sickle/{sample}_sickle-trimmed_SE.fastq.gz",
-                               sample = SAMPLE)
-    return trimming_list
-
-def stash_or_trash(path):
-    if "keeptrim" in MODULES:
-        return path
-    else:
-        return temp(path)
-
-def get_cleapping_input(wildcards):
-    cleapping_list = []
-    if "cleapping" in MODULES:
-        cleapping_list = ""
-    return cleapping_list
+# Imports
+import sys
+import glob
+import pandas as pandas
+import scripts.functions as functions
+from scripts.functions import stash_or_trash
+from scripts.functions import get_bam_input
+from scripts.functions import get_bai_input
+import snakemake.utils as snakutils 
 
-def get_bam_input(wildcards):
-    markdup = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.bam"
-    if "cleapping" in MODULES:
-        markdup = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.primerclipped.bam"
-    return markdup
+# Exports
+sys.path.append("/Users/2021nf001/git/GeVarLi")
 
-def get_bai_input(wildcards):
-    index = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.bam.bai"
-    if "cleapping" in MODULES:
-        index = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.primerclipped.bam.bai"
-    return index
+functions.CPUS = config["resources"]["cpus"] # Threads (maximum)
+functions.RAM = config["resources"]["ram"]   # Memory (RAM) in Gb (maximum)
 
-def get_flagstat_input(wildcards):
-    flagstat_list = []
-    if "covstats" in MODULES:
-        flagstat_list = expand(
-            "results/03_Coverage/{reference}/flagstat/{sample}_{aligner}_flagstat.{ext}",
-            reference = REFERENCE,
-            sample = SAMPLE,
-            aligner = ALIGNER,
-            ext = ["txt", "tsv", "json"]
-        )
-    return flagstat_list
+functions.MODULES = config["modules"] # Modules
 
-def get_covstats_input(wildcards):
-    covstats_list = []
-    if "covstats" in MODULES:
-        covstats_list = expand(
-            "results/03_Coverage/{reference}/{sample}_{aligner}_{min_cov}X_coverage-stats.tsv",
-            reference = REFERENCE,
-            sample = SAMPLE,
-            aligner = ALIGNER,
-            min_cov = MIN_COV
-        )
-    return covstats_list
+functions.SAMPLE, = glob_wildcards("resources/symlinks/{sample}_R1.fastq.gz") # Samples
 
-def get_consensus_input(wildcards):
-    consensus_list = []
-    if "consensus" in MODULES:
-        consensus_list = expand(
-            "results/05_Consensus/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_consensus.fasta",
-            reference = REFERENCE,
-            sample = SAMPLE,
-            aligner = ALIGNER,
-            min_cov = MIN_COV,
-            caller = CALLER
-        )
-    return consensus_list
+functions.REFERENCE = config["consensus"]["reference"] # Genome reference sequence, in fasta format
+functions.ALIGNER = config["consensus"]["aligner"]     # Aligner ('minimap2', 'bwa' or 'bowtie2')
+functions.MIN_COV = config["consensus"]["min_cov"]     # Minimum coverage, mask lower regions with 'N'
+functions.CALLER = config["consensus"]["caller"]       # Variant Caller ('lofreq' or 'ivar')
+#functions.IUPAC = config["consensus"]["iupac"]         # Output variants in the form of IUPAC ambiguity codes
 
-def get_vcf_input(wildcards):
-    vcf_list = []
-    if "consensus" in MODULES:
-        vcf_list = expand(
-            "results/04_Variants/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_variant-filt.vcf",
-            reference = REFERENCE,
-            sample = SAMPLE,
-            aligner = ALIGNER,
-            min_cov = MIN_COV,
-            caller = CALLER
-        )
-    return vcf_list
+# Snakemake version
+snakutils.min_version("8.27.0")
 
-def get_pangolin_input(wildcards):
-    pangolin_list = []
-    if "pangolin" in MODULES:
-        pangolin_list = expand(
-            "results/06_Lineages/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_pangolin-report.csv",
-            reference = REFERENCE,
-            sample = SAMPLE,
-            aligner = ALIGNER,
-            min_cov = MIN_COV,
-            caller = CALLER
-        )
-    return pangolin_list
-
-def get_nextclade_input(wildcards):
-    nextclade_list = []
-    if "nextclade" in MODULES:
-        nextclade_list = expand(
-            "results/06_Lineages/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_nextclade-report.tsv",
-            reference = REFERENCE,
-            sample = SAMPLE,
-            aligner = ALIGNER,
-            min_cov = MIN_COV,
-            caller = CALLER
-        )
-    return nextclade_list
-
-def get_gisaid_input(wildcards):
-    gisaid_list = []
-    if "gisaid" in MODULES:
-        gisaid_list = expand(
-            ""
-        )
-    return gisaid_list
+# Schemas
+#snakutils.validate(config, schema="schemas/config.schema.yaml")
 
 ###############################################################################
 ### WILDCARDS ###
@@ -166,7 +72,7 @@ SAMPLE, = glob_wildcards("resources/symlinks/{sample}_R1.fastq.gz")
 
 CPUS = config["resources"]["cpus"]       # Threads (maximum)
 RAM = config["resources"]["ram"]         # Memory (RAM) in Gb (maximum)
-MEM_GB = get_memory_per_thread           # Memory per thread in GB (maximum)
+MEM_GB = functions.get_memory_per_thread  # Memory per thread in GB (maximum)
 TMP_DIR = config["resources"]["tmp_dir"] # Temporary directory
 
 ###############################################################################
@@ -195,8 +101,6 @@ NEXTCLADE = config["envs"]["nextclade"]       # Nextclade conda environment
 ### PARAMETERS ###
 ##################
 
-MODULES = config["modules"] # Modules
-
 SUBSET = config["fastq_screen"]["subset"]     # Fastq-Screen --subset
 FQC_CONFIG = config["fastq_screen"]["config"] # Fastq-Screen --conf
 #MQC_PATH = config["multiqc"]["path"]          # MultiQC --conf
@@ -211,7 +115,7 @@ SPLIT_SIZE = config["minimap2"]["algorithm"]["split_size"]         # MM2 split i
 BWA_ALGO = config["bwa"]["algorithm"]     # BWA indexing algorithm
 BT2_ALGO = config["bowtie2"]["algorithm"] # BT2 indexing algorithm
 
-REFERENCE = config["consensus"]["reference"] # Genome reference sequence, in fasta format
+#REFERENCE = config["consensus"]["reference"] # Genome reference sequence, in fasta format
 MIN_COV = config["consensus"]["min_cov"]     # Minimum coverage, mask lower regions with 'N'
 IUPAC = config["consensus"]["iupac"]         # Output variants in the form of IUPAC ambiguity codes
 ALIGNER = config["consensus"]["aligner"]     # Aligner ('minimap2', 'bwa' or 'bowtie2')
@@ -260,1189 +164,35 @@ NEXT_PATH = config["nextclade"]["path"]       # Path to nextclade dataset
 NEXT_DATASET = config["nextclade"]["dataset"] # Nextclade dataset
 
 ###############################################################################
-### RULES ###
-#############
-rule all:
-    input:
-        multiqc = get_quality_input,
-        trimming = get_trimming_input,
-        consensus = get_consensus_input,
-        flagstat = get_flagstat_input,
-        covstats = get_covstats_input,
-        vcf = get_vcf_input,
-        pangolin = get_pangolin_input,
-        nextclade = get_nextclade_input,
-        gisaid = get_gisaid_input
-
-
-###############################################################################
-################################## LINEAGES ###################################
-###############################################################################
-
-###############################################################################
-rule nextclade_lineage:
-    # Aim: nextclade lineage assignation
-    # Use: nextclade [QUERY.fasta] -t [THREADS] --outfile [NAME.csv]
-    message:
-        """
-        ~ Nextclade ∞ Assign Lineage ~
-        Sample: __________ {wildcards.sample}
-        Reference: _______ {wildcards.reference}
-        Aligner: _________ {wildcards.aligner}
-        Min. cov.: _______ {wildcards.min_cov}X
-        Variant caller: __ {wildcards.caller}
-        """
-    conda:
-        NEXTCLADE
-    resources:
-        cpus = CPUS
-    params:
-        path = NEXT_PATH,
-        dataset = NEXT_DATASET
-    input:
-        consensus = "results/05_Consensus/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_consensus.fasta"
-    output:
-        lineage = "results/06_Lineages/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_nextclade-report.tsv",
-        alignment = directory("results/06_Lineages/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_nextclade-all/")
-    log:
-        "results/10_Reports/tools-log/nextclade/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_lineage.log"
-    shell:
-        "nextclade "                                    # Nextclade, assign queries sequences to clades and reports potential quality issues
-        "run "                                           # Run analyzis
-        "--jobs {resources.cpus} "                       # -j: Number of CPU threads used by the algorithm (default: all available threads)
-        "--input-dataset {params.path}{params.dataset} " # -raq: Path to a directory containing a dataset (root-seq, tree and qc-config required)
-        "--output-tsv {output.lineage} "                 # -t: Path to output TSV results file
-        "--output-all {output.alignment} "               # -O: Produce all of the output files into this directory, using default basename
-        "{input.consensus} "                             # Path to a .fasta file with input sequences
-        "&> {log}"                                       # Log redirection
-
-###############################################################################
-rule pangolin_lineage:
-    # Aim: lineage mapping
-    # Use: pangolin [QUERY.fasta] -t [THREADS] --outfile [NAME.csv]
-    message:
-        """
-        ~ Pangolin ∞ Assign Lineage ~
-        Sample: __________ {wildcards.sample}
-        Reference: _______ {wildcards.reference}
-        Aligner: _________ {wildcards.aligner}
-        Min. cov.: _______ {wildcards.min_cov}X
-        Variant caller: __ {wildcards.caller}
-        """
-    conda:
-        PANGOLIN
-    resources:
-        cpus = CPUS,
-        tmp_dir = TMP_DIR
-    input:
-        consensus = "results/05_Consensus/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_consensus.fasta"
-    output:
-        lineage = "results/06_Lineages/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_pangolin-report.csv"
-    log:
-        "results/10_Reports/tools-log/pangolin/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_lineage.log"
-    shell:
-        "pangolin "                     # Pangolinn, Phylogenetic Assignment of Named Global Outbreak LINeages
-        "{input.consensus} "             # Query fasta file of sequences to analyse
-        "--threads {resources.cpus} "    # -t: Number of threads
-        "--tempdir {resources.tmp_dir} " # Specify where you want the temp stuff to go (default: $TMPDIR)
-        "--outfile {output.lineage} "    # Optional output file name (default: lineage_report.csv)
-        "&> {log}"                       # Log redirection
-
-###############################################################################
-################################## CONSENSUS ##################################
-###############################################################################
-
-###############################################################################
-rule sed_rename_headers:
-    # Aim: rename all fasta header with sample name
-    # Use: sed 's/[OLD]/[NEW]/' [IN] > [OUT]
-    message:
-        """
-        ~ Sed ∞ Rename Fasta Header ~
-        Sample: __________ {wildcards.sample}
-        Reference: _______ {wildcards.reference}
-        Aligner: _________ {wildcards.aligner}
-        Min. cov.: _______ {wildcards.min_cov}X
-        Variant caller: __ {wildcards.caller}
-        """
-    conda:
-        GAWK
-    input:
-        cons_tmp = "results/05_Consensus/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_consensus.fasta.tmp"
-    output:
-        consensus = "results/05_Consensus/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_consensus.fasta"
-    log:
-        "results/10_Reports/tools-log/sed/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_fasta-header.log"
-    shell:
-        "sed " # Sed, a Stream EDitor used to perform basic text transformations on an input stream
-        "'s/^>.*$/>{wildcards.sample}_{wildcards.aligner}_{wildcards.min_cov}X_{wildcards.caller}/' "
-        "{input.cons_tmp} "      # Input file
-        "1> {output.consensus} " # Output file
-        "2> {log}"               # Log redirection
-
-###############################################################################
-########################### VARIANTS CALLING - IVAR ###########################
-###############################################################################
-
-###############################################################################
-rule convert_tsv2vcf:
-    message:
-        """
-        ~ iVar ∞ Convert TSV to VCF file ~
-        Sample: __________ {wildcards.sample}
-        Reference: _______ {wildcards.reference}
-        Aligner: _________ {wildcards.aligner}
-        Min. cov.: _______ {wildcards.min_cov}X
-        Variant caller: __ iVar
-        """
-    conda:
-        TSV2VCF
-    input:
-        tsv = "results/04_Variants/{reference}/{sample}_{aligner}_{min_cov}X_ivar_variant-call.tsv"
-    output:
-        vcf = "results/04_Variants/{reference}/{sample}_{aligner}_{min_cov}X_ivar_variant-filt.vcf"
-    log:
-        "results/10_Reports/tools-log/tsv2vcf/{reference}/{sample}_{aligner}_{min_cov}X_ivar_tsv2vcf.log"
-    shell:
-        "python3 "                                  # Python 3
-        "workflow/scripts/ivar_variants_to_vcf.py "  # Script (from viralrecon)
-        "{input.tsv} "                               # TSV input
-        "{output.vcf} "                              # VCF output
-        "&> {log}"                                   # Log redirection
-
-###############################################################################
-rule ivar_consensus:
-    # Aim:
-    # Use:
-    message:
-        """
-        ~ iVar ∞ Call Consensus ~
-        Sample: __________ {wildcards.sample}
-        Reference: _______ {wildcards.reference}
-        Aligner: _________ {wildcards.aligner}
-        Min. cov.: _______ {wildcards.min_cov}X
-        Variant caller: __ iVar
-        """
-    conda:
-        IVAR
-    params:
-        min_depth = IVAR_MIN_DEPTH,
-        min_freq = IVAR_MIN_FREQ,
-        min_insert = IVAR_MIN_INSERT,
-        max_depth = IVAR_MAX_DEPTH,
-        min_bq = IVAR_MIN_BQ,
-        min_qual = IVAR_MIN_QUAL,
-        baq = IVAR_MAP_QUAL
-    input:
-        mark_dup = get_bam_input,
-        variant_call = "results/04_Variants/{reference}/{sample}_{aligner}_{min_cov}X_ivar_variant-call.tsv"
-    output:
-        prefix = temp("results/05_Consensus/{reference}/{sample}_{aligner}_{min_cov}X_ivar_consensus"),
-        cons_tmp = temp("results/05_Consensus/{reference}/{sample}_{aligner}_{min_cov}X_ivar_consensus.fasta.tmp"),
-        qual_txt = "results/05_Consensus/{reference}/ivar_consensus-quality/{sample}_{aligner}_{min_cov}X_ivar_consensus.qual.txt",
-    log:
-        "results/10_Reports/tools-log/ivar/{reference}/{sample}_{aligner}_{min_cov}X_ivar_consensus.log"
-    shell:
-        "samtools mpileup "              # Samtools mpileup, tools for alignments in the SAM format with command multi-way pileup
-        "--verbosity 0 "                  # Set level of verbosity [INT]
-        "-a "                             # -a: output all positions (including zero depth)
-        "-a "                             # -a -a / -aa: output absolutely all positions, including unused ref. sequences
-        "--count-orphans "                # -A: do not discard anomalous read pairs
-        "--max-depth {params.max_depth} " # -d: max per-file depth; avoids excessive memory usage [INT] (default: 8000)
-        "{params.baq} "                   # --no-BAQ / -B: disable BAQ (per-Base Alignment Quality)
-        "--min-BQ {params.min_bq} "       # -Q: skip bases with baseQ/BAQ smaller than [INT] (default: 13)
-        #"--reference {input.masked_ref} " # Reference sequence FASTA FILE
-        "{input.mark_dup} "               # Markdup BAM input
-        "2>&1 | grep -v '[mpileup] 1 samples in 1 input files' " # Remove this stdout
-        "| "                               ### PIPE to iVar
-        "ivar consensus "                # iVar, with command 'consensus': Call consensus from aligned BAM file
-        "-p {output.prefix} "             # -p: prefix
-        "-q {params.min_qual} "           # -q: Minimum quality score threshold to count base [INT] (Default: 20)
-        "-t {params.min_freq} "           # -t: Minimum frequency threshold to call variants [FLOAT] (Default: 0.03)
-        "-c {params.min_insert} "         # -c: Minimum insertion frequency threshold to call consensus [FLOAT] (Default: 0.8)    
-        "-m {params.min_depth} "          # -m: Minimum read depth to call variants [INT] (Default: 0)
-        "-n N "                           # -n: Character to print in regions with less than minimum coverage (Default: N)
-        #"-i {wildcards.sample} "          # -i: Name of fasta header (default: Consensus_<prefix>_threshold_<min_freq>_quality_<min_qual>_<min_insert>
-        "&> {log} "                       # Log redirection
-        "&& mv {output.prefix}.fa {output.cons_tmp} "       # copy consensus.fa (temp) to consensus.fasta.tmp (tmp)
-        "&& mv {output.prefix}.qual.txt {output.qual_txt} " # cppty consensus.qual.txt (tmp) to ivar_consensus-quality/ directory
-        "&& touch {output.prefix}"                          # Touch done
-
-###############################################################################
-rule ivar_variant_calling:
-    # Aim: SNVs and Indels calling 
-    # Use: samtools mpileup -aa -A -d 0 -B -Q 0 --reference [<reference-fasta] <input.bam> | ivar variants -p <prefix> [-q <min-quality>] [-t <min-frequency-threshold>] [-m <minimum depth>] [-r <reference-fasta>] [-g GFF file]
-    # Note: samtools mpileup output must be piped into ivar variants
-    message:
-        """
-        ~ iVar ∞ Call Variants ~
-        Sample: __________ {wildcards.sample}
-        Reference: _______ {wildcards.reference}
-        Aligner: _________ {wildcards.aligner}
-        Min. cov.: _______ {wildcards.min_cov}X
-        Variant caller: __ iVar
-        """
-    conda:
-        IVAR
-    params:
-        min_depth = IVAR_MIN_DEPTH,
-        min_freq = IVAR_MIN_FREQ,
-        min_insert = IVAR_MIN_INSERT,
-        max_depth = IVAR_MAX_DEPTH,
-        min_bq = IVAR_MIN_BQ,
-        min_qual = IVAR_MIN_QUAL,
-        baq = IVAR_MAP_QUAL
-    input:
-        masked_ref = "results/04_Variants/{reference}/{sample}_{aligner}_{min_cov}X_masked-ref.fasta",
-        mark_dup = get_bam_input
-    output:
-        variant_call = "results/04_Variants/{reference}/{sample}_{aligner}_{min_cov}X_ivar_variant-call.tsv"
-    log:
-        "results/10_Reports/tools-log/ivar/{reference}/{sample}_{aligner}_{min_cov}X_ivar_variant-call.log"
-    shell:
-        "samtools mpileup "              # Samtools mpileup, tools for alignments in the SAM format with command multi-way pileup
-        "--verbosity 0 "                  # Set level of verbosity [INT]
-        "-a "                             # -a: output all positions (including zero depth)
-        "-a "                             # -a -a / -aa: output absolutely all positions, including unused ref. sequences
-        "--count-orphans "                # -A: do not discard anomalous read pairs
-        "--max-depth {params.max_depth} " # -d: max per-file depth; avoids excessive memory usage (default: 8000) [INT]
-        "{params.baq} "                   # --no-BAQ / -B: disable BAQ (per-Base Alignment Quality)
-        "--min-BQ {params.min_bq} "       # -Q: skip bases with baseQ/BAQ smaller than (default: 13) [INT]
-        "--reference {input.masked_ref} " # Reference sequence FASTA FILE
-        "{input.mark_dup} "               # Markdup BAM input
-        "2>&1 | grep -v '[mpileup] 1 samples in 1 input files' " # Remove this stdout
-        "| "                               ### pipe to iVar
-        "ivar variants "                 # iVar, with command 'variants': Call variants from aligned BAM file
-        "-p {output.variant_call} "       # -p: prefix
-        "-q {params.min_qual} "           # -q: Minimum quality score threshold to count base (Default: 20) [INT]
-        "-t {params.min_freq} "           # -t: Minimum frequency threshold to call variants (Default: 0.03) [FLOAT]
-        "-m {params.min_depth} "          # -m: Minimum read depth to call variants (Default: 0) [INT]
-        "-r {input.masked_ref} "          # -r: Reference file used for alignment (translate the nuc. sequences and identify intra host single nuc. variants) 
-        #"-g "                            # -g: A GFF file in the GFF3 format can be supplied to specify coordinates of open reading frames (ORFs)
-        "&> {log}"                        # Log redirection 
-
-
-###############################################################################
-############################ MASKING LOW COVERAGE #############################
-###############################################################################
-
-###############################################################################
-rule bedtools_masking:
-    # Aim: masking low coverage regions
-    # Use: bedtools maskfasta [OPTIONS] -fi [REFERENCE.fasta] -bed [RANGE.bed] -fo [MASKEDREF.fasta]
-    message:
-        """
-        ~ BedTools ∞ Mask Low Coverage Regions ~
-        Sample: __________ {wildcards.sample}
-        Reference: _______ {wildcards.reference}
-        Aligner: _________ {wildcards.aligner}
-        Min. cov.: _______ {wildcards.min_cov}X
-        """
-    conda:
-        BEDTOOLS
-    input:
-        reference = "resources/genomes/{reference}.fasta",
-        low_cov_mask = "results/03_Coverage/{reference}/bed/{sample}_{aligner}_{min_cov}X_low-cov-mask.bed"
-    output:
-        masked_ref = "results/04_Variants/{reference}/{sample}_{aligner}_{min_cov}X_masked-ref.fasta"
-    log:
-        "results/10_Reports/tools-log/bedtools/{reference}/{sample}_{aligner}_{min_cov}X_masking.log"
-    shell:
-        "bedtools maskfasta "       # Bedtools maskfasta, mask a fasta file based on feature coordinates
-        "-fi {input.reference} "     # Input FASTA file 
-        "-bed {input.low_cov_mask} " # BED/GFF/VCF file of ranges to mask in -fi
-        "-fo {output.masked_ref} "   # Output masked FASTA file
-        "&> {log}"                   # Log redirection 
-
-###############################################################################
-rule bedtools_merged_mask:
-    # Aim: merging overlaps
-    # Use: bedtools merge [OPTIONS] -i [FILTERED.bed] -g [GENOME.fasta] 
-    message:
-        """
-        ~ BedTools ∞ Merge Overlaps ~
-        Sample: __________ {wildcards.sample}
-        Reference: _______ {wildcards.reference}
-        Aligner: _________ {wildcards.aligner}
-        Min. cov.: _______ {wildcards.min_cov}X
-        """
-    conda:
-        BEDTOOLS
-    input:
-        min_cov_filt = "results/03_Coverage/{reference}/bed/{sample}_{aligner}_{min_cov}X_min-cov-filt.bed"
-    output:
-        low_cov_mask = temp("results/03_Coverage/{reference}/bed/{sample}_{aligner}_{min_cov}X_low-cov-mask.bed")
-    log:
-        "results/10_Reports/tools-log/bedtools/{reference}/{sample}_{aligner}_{min_cov}X_merging.log"
-    shell:
-        "bedtools merge "          # Bedtools merge, merges overlapping BED/GFF/VCF entries into a single interval
-        "-i {input.min_cov_filt} "  # -i: BED/GFF/VCF input to merge 
-        "1> {output.low_cov_mask} " # merged output
-        "2> {log}"                  # Log redirection
-
-###############################################################################
-rule awk_min_covfilt:
-    # Aim: minimum coverage filtration
-    # Use: awk '$4 < [MIN_COV]' [BEDGRAPH.bed] 1> [FILTERED.bed]
-    message:
-        """
-        ~ Awk ∞ Minimum Coverage Filtration ~
-        Sample: __________ {wildcards.sample}
-        Reference: _______ {wildcards.reference}
-        Aligner: _________ {wildcards.aligner}
-        Min. cov.: _______ {wildcards.min_cov}X
-        """
-    conda:
-        GAWK
-    input:
-        genome_cov = "results/02_Mapping/{reference}/{sample}_{aligner}_genome-cov.bed"
-    output:
-        min_cov_filt = temp("results/03_Coverage/{reference}/bed/{sample}_{aligner}_{min_cov}X_min-cov-filt.bed")
-    log:
-        "results/10_Reports/tools-log/awk/{reference}/{sample}_{aligner}_{min_cov}X_min-cov-filt.log"
-    shell:
-        "awk "                      # Awk, a program that you can use to select particular records in a file and perform operations upon them
-        "'$4 < {wildcards.min_cov}' " # Minimum coverage for masking regions in consensus sequence
-        "{input.genome_cov} "         # BedGraph coverage input
-        "1> {output.min_cov_filt} "   # Minimum coverage filtered bed output
-        "2> {log} "                   # Log redirection
-
-###############################################################################
-################################## STATISTICS #################################
-###############################################################################
-
-###############################################################################
-rule awk_coverage_statistics:
-    # Aim: computing genomme coverage stats
-    # Use: awk {FORMULA} END {{print [RESULTS.tsv] [BEDGRAPH.bed]
-    message:
-        """
-        ~ Awk ∞ Compute Genome Coverage Statistics from BED file ~
-        Sample: __________ {wildcards.sample}
-        Reference: _______ {wildcards.reference}
-        Aligner: _________ {wildcards.aligner}
-        """
-    conda:
-        GAWK
-    input:
-        cutadapt = "results/10_Reports/tools-log/cutadapt/{sample}.log",
-        sickle = "results/10_Reports/tools-log/sickle-trim/{sample}.log",
-        samtools = "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_mark-dup.log",
-        flagstat = "results/03_Coverage/{reference}/flagstat/{sample}_{aligner}_flagstat.json",
-        histogram = "results/03_Coverage/{reference}/histogram/{sample}_{aligner}_coverage-histogram.txt",
-        genome_cov = "results/02_Mapping/{reference}/{sample}_{aligner}_genome-cov.bed"
-    output:
-        cov_stats = "results/03_Coverage/{reference}/{sample}_{aligner}_{min_cov}X_coverage-stats.tsv"
-    log:
-        "results/10_Reports/tools-log/awk/{reference}/{sample}_{aligner}_{min_cov}X_coverage-stats.log"
-    shell:
-        r""" rawReads=$(grep -o -E  """                                  # Get raw reads 
-        r""" 'Total read pairs processed:.+' {input.cutadapt}  """       #
-        r""" | sed -E 's/Total read pairs processed:\ +//'  """          #
-        r""" | sed 's/,//g') ; """                                       #
-        #
-        r""" cutadaptPF=$(grep -o -E """                                 # Get cutadapt Passing Filtes reads
-        r""" 'Pairs written \(passing filters\):.+' {input.cutadapt} """ #
-        r""" | sed -E 's/Pairs written \(passing filters\):\ +//' """    #
-        r""" | sed 's/,//g') ; """                                       #
-        #
-        r""" sicklePF=$(grep -o -E """                                   # Get sickle Passing Filtes reads
-        r""" 'FastQ paired records kept:.+' {input.sickle} """           #
-        r""" | sed -E 's/FastQ paired records kept:\ +//') ; """         #
-        #
-        r""" totalDuplicate=$(grep -o -E """                             # Get total duplicated reads
-        r""" 'DUPLICATE TOTAL:.+' {input.samtools} """                   #
-        r""" | sed -E 's/DUPLICATE TOTAL:\ +//') ; """                   #
-        #
-        r""" estimatedLibrarySize=$(grep -o -E """                       # Get estimated library size
-        r""" 'ESTIMATED_LIBRARY_SIZE:.+' {input.samtools} """            #
-        r""" | sed -E 's/ESTIMATED_LIBRARY_SIZE:\ +//') ; """            #
-        #
-        r""" samtoolsPF=$(grep -o -E """                                 # Get samtool Passing Filter reads
-        r""" 'WRITTEN: .+' {input.samtools} """                          #
-        r""" | sed -E 's/WRITTEN:\ +//') ; """                           #
-        #
-        r""" mappedReads=$(grep -o -E -m 1 """                           # Get mapped reads
-        r""" '"mapped": .+' {input.flagstat} """                         #
-        r""" | sed -E 's/"mapped":\ +//' """                             #
-        r""" | sed 's/,//g') ; """                                       #
-        #
-        r""" mappedPercentReads=$(grep -o -E -m 1 """                    # Get mapped precent reads
-        r""" '"mapped %": .+' {input.flagstat} """                       #
-        r""" | sed -E 's/"mapped %":\ +//' """                           #
-        r""" | sed 's/,//g') ; """                                       #
-        #
-        r""" covPercentAt1X=$(grep -o -E """                             # Get coverage percent @1X
-        r""" 'Percent covered:.+' {input.histogram} """                  #
-        r""" | sed -E 's/Percent covered:\ +//') ; """                   #
-        #
-        r""" awk """                                                   # Awk, a program to select particular records in a file and perform operations upon them
-        r""" -v rawReads="${{rawReads}}" """                             # Define external variable
-        r""" -v cutadaptPF="${{cutadaptPF}}" """                         # Define external variable
-        r""" -v sicklePF="${{sicklePF}}" """                             # Define external variable
-        r""" -v totalDuplicate="${{totalDuplicate}}" """                 # Define external variable
-        r""" -v estimatedLibrarySize="${{estimatedLibrarySize}}" """     # Define external variable
-        r""" -v samtoolsPF="${{samtoolsPF}}" """                         # Define external variable
-        r""" -v mappedReads="${{mappedReads}}" """                       # Define external variable
-        r""" -v mappedPercentReads="${{mappedPercentReads}}" """         # Define external variable
-        r""" -v covPercentAt1X="${{covPercentAt1X}}" """                 # Define external variable
-        r""" '$4 >= {wildcards.min_cov} {{supMin_Cov+=$3-$2}} ; """      # Genome size (>= min_cov @X)
-        r""" {{genomeSize+=$3-$2}} ; """                                 # Genome size (total)
-        r""" {{totalBases+=($3-$2)*$4}} ; """                            # Total bases @1X
-        r""" {{totalBasesSq+=(($3-$2)*$4)**2}} """                       # Total bases square @1X
-        r""" END """                                                    # END
-        r""" {{print """                                                # Print
-        r""" "sample_id", "\t", """                                      # header: Sample ID
-        r""" "raw_paired_reads", "\t", """                               # header: Raw paired reads
-        r""" "cutadapt_pairs_PF", "\t", """                              # header: Cutadapt Passing Filters
-        r""" "sickle_reads_PF", "\t", """                                # header: Sickle-trim Passing Filters
-        r""" "duplicated_reads", "\t", """                               # header:
-        r""" "duplicated_percent_%","\t", """                            # header:
-        r""" "estimated_library_size*", "\t", """                        # header:
-        r""" "samtools_pairs_PF", "\t", """                              # header:
-        r""" "mapped_with", "\t",  """                                   # header: aligner
-        r""" "mapped_on", "\t",  """                                     # header: reference
-        r""" "mapped_reads", "\t", """                                   # header:
-        r""" "mapped_percent_%", "\t", """                               # header:
-        r""" "mean_depth", "\t", """                                     # header: mean depth
-        r""" "standard_deviation", "\t", """                             # header: standard deviation
-        r""" "cov_percent_%_@1X", "\t", """                              # header: coverage percentage @1X
-        r""" "cov_percent_%" "\t", """                                   # header: coverage percentage
-        r""" "@_min_cov" """                                             # header: @_[min_cov]_X
-        r""" ORS """                                                      # \n newline
-        r""" "{wildcards.sample}", "\t", """                             # value: Sample ID
-        r""" rawReads, "\t", """                                         # value: Raw sequences
-        r""" cutadaptPF, "\t", """                                       # value: Cutadapt Passing Filter
-        r""" sicklePF, "\t", """                                         # value: Sickle Passing Filter
-        r""" totalDuplicate, "\t", """                                   # value:
-        r""" int(((totalDuplicate)/(rawReads*2))*100), "%", "\t", """    # value: (divided by 2 to estimated pairs)
-        r""" estimatedLibrarySize, "\t", """                             # value:
-        r""" samtoolsPF, "\t", """                                       # value:
-        r""" "{wildcards.aligner}", "\t",  """                           # value: aligner
-        r""" "{wildcards.reference}", "\t",  """                         # value: reference
-        r""" mappedReads, "\t", """                                      # value:
-        r""" mappedPercentReads, "%", "\t", """                          # value:
-        r""" int(totalBases/genomeSize), "\t", """                       # value: mean depth
-        r""" int(sqrt((totalBasesSq/genomeSize)-(totalBases/genomeSize)**2)), "\t", """ # Standard deviation value
-        r""" covPercentAt1X, "\t", """                                   # value
-        r""" supMin_Cov/genomeSize*100, "%", "\t", """                   # Coverage percent (@ min_cov X) value
-        r""" "@{wildcards.min_cov}X" """                                 # @ min_cov X value
-        r""" }}' """                                                     # Close print
-        r""" {input.genome_cov} """                                      # BedGraph coverage input
-        r""" 1> {output.cov_stats} """                                   # Mean depth output
-        r""" 2> {log}"""                                                 # Log redirection
-        
-###############################################################################
-rule bedtools_genome_coverage:
-    # Aim: computing genome coverage sequencing
-    # Use: bedtools genomecov [OPTIONS] -ibam [MARK-DUP.bam] 1> [BEDGRAPH.bed]
-    message:
-        """
-        ~ BedTools ∞ Compute Genome Coverage ~
-        Sample: __________ {wildcards.sample}
-        Reference: _______ {wildcards.reference}
-        Aligner: _________ {wildcards.aligner}
-        """
-    conda:
-        BEDTOOLS
-    input:
-        mark_dup = get_bam_input,
-        index = get_bai_input
-    output:
-        genome_cov = "results/02_Mapping/{reference}/{sample}_{aligner}_genome-cov.bed"
-    log:
-        "results/10_Reports/tools-log/bedtools/{reference}/{sample}_{aligner}_genome-cov.log"
-    shell:
-        "bedtools genomecov "    # Bedtools genomecov, compute the coverage of a feature file among a genome
-        "-bga "                   # Report depth in BedGraph format, regions with zero coverage are also reported
-        "-ibam {input.mark_dup} " # The input file is in BAM format, must be sorted by position
-        "1> {output.genome_cov} " # BedGraph output
-        "2> {log} "               # Log redirection
-
-###############################################################################
-rule samtools_coverage_histogram:
-    # Aim: alignment depth and percent coverage histogram
-    # Use: samtools coverage --histogram [INPUT.bam]
-    message:
-        """
-        ~ SamTools ∞ Calcul Depth and Coverage from BAM file ~
-        Sample: __________ {wildcards.sample}
-        Reference: _______ {wildcards.reference}
-        Aligner: _________ {wildcards.aligner}
-        """
-    conda:
-        SAMTOOLS
-    resources:
-       cpus = CPUS
-       #bins = BINS,
-       #depth = DEPTH
-    input:
-        mark_dup = get_bam_input
-    output:
-        histogram = "results/03_Coverage/{reference}/histogram/{sample}_{aligner}_coverage-histogram.txt"
-    log:
-        "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_coverage-histogram.log"
-    shell:
-        "samtools coverage "          # Samtools coverage, tools for alignments in the SAM format with command to alignment depth and percent coverage
-        "--histogram "                 # -m: show histogram instead of tabular output
-        "--verbosity 4 "               # Set level of verbosity [INT] (default: 3)
-        "--n-bins 149 "                # -w: number of bins in histogram (default: terminal width - 40) (todo: {params.bins}) 
-        "--depth 0 "                   # -d maximum allowed coverage depth [INT] (default: 1000000 ; 0 removing any depth limit) (todo: {params.depth}) 
-        "--output {output.histogram} " # write output to FILE (default: stdout)
-        "{input.mark_dup} ; "          # Mark_dup bam input
-        "echo >> {output.histogram} ; " # Newline
-        "samtools coverage "    # Samtools coverage, tools for alignments in the SAM format with command to alignment depth and percent coverage
-        "--verbosity 4 "         # Set level of verbosity [INT] (default: 3)
-        "{input.mark_dup} "      # Mark_dup bam input
-        ">> {output.histogram} " # write output to FILE (default: stdout)
-        "2> {log}"               # Log redirection
-
-###############################################################################
-rule samtools_flagstat_ext:
-    # Aim: simple stats
-    # Use: samtools flagstat -@ [THREADS] [INPUT.bam]
-    message:
-        """
-        ~ SamTools ∞ Calcul simple Stats from BAM file ~
-        Sample: __________ {wildcards.sample}
-        Reference: _______ {wildcards.reference}
-        Aligner: _________ {wildcards.aligner}
-        """
-    conda:
-        SAMTOOLS
-    resources:
-       cpus = CPUS
-    input:
-        mark_dup = get_bam_input
-    output:
-        flagstat = "results/03_Coverage/{reference}/flagstat/{sample}_{aligner}_flagstat.{ext}"
-    log:
-        "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_flagstat-{ext}.log"
-    shell:
-        "samtools flagstat "           # Samtools flagstat, tools for alignments in the SAM format with command to simple stat
-        "--threads {resources.cpus} "   # -@: Number of additional threads to use (default: 1)
-        "--verbosity 4 "                # Set level of verbosity [INT] (default: 3)
-        "--output-fmt {wildcards.ext} " # -O Specify output format (none, tsv and json)
-        "{input.mark_dup} "             # Mark_dup bam input
-        "1> {output.flagstat} "         # Mark_dup index output
-        "2> {log}"                      # Log redirection
-
-###############################################################################
-############################## PRIMER CLEAPING  ###############################
-###############################################################################
-
-###############################################################################
-rule bamclipper_amplicon_primers:
-    # Aim: soft-clip amplicon PCR primers from BAM alignments
-    # Use: bamclipper.sh -n [THREADS] -b [MARKDUP.bam] -p [PRIMER.bed] -u [UPSTREAM] -d [DOWNSTREAM]
-    message:
-        """
-        ~ BAMClipper ∞ soft-clipping amplicon PCR primers from BAM alignments ~
-        Sample: __________ {wildcards.sample}
-        Reference: _______ {wildcards.reference}
-        Aligner: _________ {wildcards.aligner}
-        """
-    conda:
-        BAMCLIPPER
-    resources:
-       cpus = CPUS
-    params:
-        path = CLIPPATH,
-        primers = PRIMERS,
-        upstream = UPSTREAM,
-        downstream = DOWNSTREAM
-    input:
-        markdup = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.bam",
-        index = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.bam.bai"
-    output:
-        bamclip = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.primerclipped.bam",
-        baiclip = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.primerclipped.bam.bai"
-    log:
-        "results/10_Reports/tools-log/bamclipper/{reference}/{sample}_{aligner}_primers-clip.log"
-    shell:
-        "bamclipper.sh "                         # BAMClipper, remove primer sequences from BAM alignments of PCR amplicons by soft-clipping
-        "-b {input.markdup} "                     # Indexed BAM alignment file
-        "-p {params.path}{params.primers}.bedpe " # BEDPE of primer pair locations
-        "-n {resources.cpus} "                    # Number of threads (default: 1)
-        "-u {params.upstream} "                   # Number of nuc. upstream for assigning alignments to primers (default: 5)
-        "-d {params.downstream} "                 # Number of nuc. downstream for assigning alignments to primers (default: 5)
-        #"-o results/02_Mapping/ "                 # Path to write output (if BamClipper v.1.1.2) (todo)
-        "&> {log} "                               # Log redirection
-        "&& mv {wildcards.sample}_{wildcards.aligner}_mark-dup.primerclipped.bam {output.bamclip} "    # because BamClipper v.1 default output system...
-        "&& mv {wildcards.sample}_{wildcards.aligner}_mark-dup.primerclipped.bam.bai {output.baiclip}" # because BamClipper v.1 default output system...
-
-###############################################################################
-############################## REMOVE DUPLICATS ###############################
-###############################################################################
-
-###############################################################################
-rule samtools_index_markdup:
-    # Aim: indexing marked as duplicate BAM file
-    # Use: samtools index -@ [THREADS] -b [MARK-DUP.bam] [INDEX.bai]
-    message:
-        """
-        ~ SamTools ∞ Index 'Marked as Duplicate' BAM file ~
-        Sample: __________ {wildcards.sample}
-        Reference: _______ {wildcards.reference}
-        Aligner: _________ {wildcards.aligner}
-        """
-    conda:
-        SAMTOOLS
-    resources:
-       cpus = CPUS
-    input:
-        mark_dup = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.bam"
-    output:
-        index = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.bam.bai"
-    log:
-        "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_mark-dup-index.log"
-    shell:
-        "samtools index "     # Samtools index, tools for alignments in the SAM format with command to index alignment
-        "-@ {resources.cpus} " # --threads: Number of additional threads to use (default: 1)(NB, --threads form dose'nt work)
-        "-b "                  # -b: Generate BAI-format index for BAM files (default)
-        "{input.mark_dup} "    # Mark_dup bam input
-        "{output.index} "      # Mark_dup index output
-        "&> {log}"             # Log redirection
-
-###############################################################################
-rule samtools_markdup:
-    # Aim: marking duplicate alignments
-    # Use: samtools markdup -@ [THREADS] -r -s -O BAM [SORTED.bam] [MARK-DUP.bam] 
-    message:
-        """
-        ~ SamTools ∞ Mark Duplicate Alignments ~
-        Sample: __________ {wildcards.sample}
-        Reference: _______ {wildcards.reference}
-        Aligner: _________ {wildcards.aligner}
-        """
-    conda:
-        SAMTOOLS
-    resources:
-       cpus = CPUS
-    input:
-        sorted = "results/02_Mapping/{reference}/{sample}_{aligner}_sorted.bam"
-    output:
-        mark_dup = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.bam"
-    log:
-        "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_mark-dup.log"
-    shell:
-        "samtools markdup "          # Samtools markdup, tools for alignments in the SAM format with command mark duplicates
-        "--threads {resources.cpus} " # -@: Number of additional threads to use (default: 1)
-        "-r "                         # -r: Remove duplicate reads
-        "-s "                         # -s: Report stats
-        "--output-fmt BAM "           # -O: Specify output format: SAM, BAM, CRAM (here, BAM format)
-        "{input.sorted} "             # Sorted bam input
-        "{output.mark_dup} "          # Mark_dup bam output
-        "&> {log}"                    # Log redirection 
-
-###############################################################################
-rule samtools_sorting:
-    # Aim: sorting
-    # Use: samtools sort -@ [THREADS] -m [MEM_GB] -T [TMP_DIR] -O BAM -o [SORTED.bam] [FIX-MATE.bam] 
-    message:
-        """
-        ~ SamTools ∞ Sort BAM file ~
-        Sample: __________ {wildcards.sample}
-        Reference: _______ {wildcards.reference}
-        Aligner: _________ {wildcards.aligner}
-        """
-    conda:
-        SAMTOOLS
-    resources:
-       cpus = CPUS,
-       mem_gb = MEM_GB,
-       tmp_dir = TMP_DIR
-    input:
-        fix_mate = "results/02_Mapping/{reference}/{sample}_{aligner}_fix-mate.bam"
-    output:
-        sorted = temp("results/02_Mapping/{reference}/{sample}_{aligner}_sorted.bam")
-    log:
-        "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_sorted.log"
-    shell:
-        "samtools sort "             # Samtools sort, tools for alignments in the SAM format with command to sort alignment file
-        "--threads {resources.cpus} " # -@: Number of additional threads to use (default: 1)
-        "-m {resources.mem_gb}G "     # -m: Set maximum memory per thread, suffix K/M/G recognized (default: 768M)
-        "-T {resources.tmp_dir} "     # -T: Write temporary files to PREFIX.nnnn.bam
-        "--output-fmt BAM "           # -O: Specify output format: SAM, BAM, CRAM (here, BAM format)
-        "-o {output.sorted} "         # Sorted bam output
-        "{input.fix_mate} "           # Fixmate bam input
-        "&> {log}"                    # Log redirection 
-
-###############################################################################
-rule samtools_fixmate:
-    # Aim: filling in mate coordinates
-    # Use: samtools fixmate -@ [THREADS] -m -O BAM [SORT-BY-NAMES.bam] [FIX-MATE.bam] 
-    message:
-        """
-        ~ SamTools ∞ Fil Mate Coordinates ~
-        Sample: __________ {wildcards.sample}
-        Reference: _______ {wildcards.reference}
-        Aligner: _________ {wildcards.aligner}
-        """
-    conda:
-        SAMTOOLS
-    resources:
-       cpus = CPUS
-    input:
-        sort_by_names = "results/02_Mapping/{reference}/{sample}_{aligner}_sort-by-names.bam"
-    output:
-        fix_mate = temp("results/02_Mapping/{reference}/{sample}_{aligner}_fix-mate.bam")
-    log:
-        "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_fix-mate.log"
-    shell:
-        "samtools fixmate "          # Samtools fixmate, tools for alignments in the SAM format with command to fix mate information
-        "--threads {resources.cpus} " # -@: Number of additional threads to use (default: 1)
-        "-m "                         # -m: Add mate score tag 
-        "--output-fmt BAM "           # -O: Specify output format: SAM, BAM, CRAM (here, BAM format)
-        "{input.sort_by_names} "      # Sort_by_names bam input
-        "{output.fix_mate} "          # Fix_mate bam output 
-        "&> {log}"                    # Log redirection 
-
-###############################################################################
-rule samtools_sortbynames:
-    # Aim: sorting by names
-    # Use: samtools sort -t [THREADS] -m [MEM_GB] -n -O BAM -o [SORT-BY-NAMES.bam] [MAPPED.sam]
-    message:
-        """
-        ~ SamTools ∞ Sort by Names BAM file ~
-        Sample: __________ {wildcards.sample}
-        Reference: _______ {wildcards.reference}
-        Aligner: _________ {wildcards.aligner}
-        """
-    conda:
-        SAMTOOLS
-    resources:
-       cpus = CPUS,
-       mem_gb = MEM_GB
-    input:
-        mapped = "results/02_Mapping/{reference}/{sample}_{aligner}-mapped.sam"
-    output:
-        sort_by_names = temp("results/02_Mapping/{reference}/{sample}_{aligner}_sort-by-names.bam")
-    log:
-        "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_sort-by-names.log"
-    shell:
-        "samtools sort "             # Samtools sort, tools for alignments in the SAM format with command to sort alignment file
-        "--threads {resources.cpus} " # -@: Number of additional threads to use (default: 1)
-        "-m {resources.mem_gb}G "     # -m: Set maximum memory per thread, suffix K/M/G recognized (default: 768M)
-        "-n "                         # -n: Sort by read name (not compatible with samtools index command) 
-        "--output-fmt BAM "           # -O: Specify output format: SAM, BAM, CRAM (here, BAM format)
-        "-o {output.sort_by_names} "  # -o: Write final output to FILE rather than standard output
-        "{input.mapped} "             # Mapped reads input
-        "&> {log}"                    # Log redirection 
-
-
-###############################################################################
-################################### MAPPING ###################################
-###############################################################################
-
-###############################################################################
-rule minimap2_mapping:
-    # Aim: reads mapping against reference sequence
-    # Use: minimap2
-    message:
-        """
-        ~ Minimap2 ∞ Map Reads ~
-        Sample: __________ {wildcards.sample}
-        Reference: _______ {wildcards.reference}
-        Aligner: _________ MiniMap2
-        """
-    conda:
-        MINIMAP2
-    resources:
-        cpus = CPUS
-    params:
-        preset = MM2_PRESET
-        #length = LENGTH
-    input:
-        mm2_indexes = "resources/indexes/minimap2/{reference}.mmi",
-        fwd_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R1.fastq.gz",
-        rev_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R2.fastq.gz"
-    output:
-        mapped = temp("results/02_Mapping/{reference}/{sample}_minimap2-mapped.sam")
-    log:
-        "results/10_Reports/tools-log/minimap2/{sample}_{reference}.log"
-    shell:
-        "minimap2 "           # Minimap2, a versatile sequence alignment program
-        "-x {params.preset} "  # -x: presets (always applied before other options)
-        "-t {resources.cpus} " # -t: Number of threads (default: 3)
-        "-a "                  # -a: output in the SAM format (PAF by default)
-        #"-F {params.length} "  # -F: max fragment length, effective with -x sr mode (default: 800)
-        "{input.mm2_indexes} " # Reference index filename prefix.mmi
-                               # (-k, -w, -I and -H can't be changed during mapping)
-        #"resources/genomes/{wildcards.reference}.fasta " # Reference genome fasta format (for custom -kwIH)
-        #"-k {params.kmer_size} "      # -k: k-mer size (default: "21", no larger than "28") [INT]
-        #"-w {params.minimizer_size} " # -w: minimizer window size (default: "11") [INT]
-        #"-I {params.split_size} "     # -I: split index for every {NUM} input bases (default: "8G") [INT]
-        #"{params.homopolymer} "       # -H: use homopolymer-compressed k-mer (preferrable for PacBio)
-        "{input.fwd_reads} "   # Forward input reads
-        "{input.rev_reads} "   # Reverse input reads
-        "1> {output.mapped} "  # SAM output
-        "2> {log}"             # Log redirection 
-
-###############################################################################
-rule bwa_mapping:
-    # Aim: reads mapping against reference sequence
-    # Use: bwa mem -t [THREADS] -x [REFERENCE] [FWD_R1.fq] [REV_R2.fq] 1> [MAPPED.sam]
-    message:
-        """
-        ~ BWA-MEM ∞ Map Reads ~
-        Sample: __________ {wildcards.sample}
-        Reference: _______ {wildcards.reference}
-        Aligner: _________ BWA
-        """
-    conda:
-        BWA
-    resources:
-        cpus = CPUS
-    input:
-        bwa_indexes = "resources/indexes/bwa/{reference}",
-        fwd_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R1.fastq.gz",
-        rev_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R2.fastq.gz"
-    output:
-        mapped = temp("results/02_Mapping/{reference}/{sample}_bwa-mapped.sam")
-    log:
-        "results/10_Reports/tools-log/bwa/{sample}_{reference}.log"
-    shell:
-        "bwa mem "            # BWA-MEM algorithm, performs local alignment
-        "-t {resources.cpus} " # -t: Number of threads (default: 12)
-        "-v 1 "                # -v: Verbosity level: 1=error, 2=warning, 3=message, 4+=debugging
-        "{input.bwa_indexes} " # Reference index filename prefix
-        "{input.fwd_reads} "   # Forward input reads
-        "{input.rev_reads} "   # Reverse input reads
-        "1> {output.mapped} "  # SAM output
-        "2> {log}"             # Log redirection 
-
-###############################################################################
-rule bowtie2_mapping:
-    # Aim: reads mapping against reference sequence
-    # Use: bowtie2 -p [THREADS] -x [REFERENCE] -1 [FWD_R1.fq] -2 [REV_R2.fq] -S [MAPPED.sam]
-    message:
-        """
-        ~ Bowtie2 ∞ Map Reads ~
-        Sample: __________ {wildcards.sample}
-        Reference: _______ {wildcards.reference}
-        Aligner: _________ Bowtie2
-        """
-    conda:
-        BOWTIE2
-    resources:
-        cpus = CPUS
-    params:
-        sensitivity = BT2_SENSITIVITY
-    input:
-        bt2_indexes = "resources/indexes/bowtie2/{reference}",
-        fwd_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R1.fastq.gz",
-        rev_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R2.fastq.gz"
-    output:
-        mapped = temp("results/02_Mapping/{reference}/{sample}_bowtie2-mapped.sam")
-    log:
-        "results/10_Reports/tools-log/bowtie2/{sample}_{reference}.log"
-    shell:
-        "bowtie2 "                   # Bowtie2, an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences
-        "--threads {resources.cpus} " # -p: Number of alignment threads to launch (default: 1)
-        "--reorder "                  # Keep the original read order (if multi-processor option -p is used)
-        "-x {input.bt2_indexes} "     # -x: Reference index filename prefix (minus trailing .X.bt2)
-        "{params.sensitivity} "       # Preset (default: "--sensitive")
-                                       # sensitive: same as [-D 15 -R 2 -N 0 -L 22 -i S,1,1.15]
-        "-q "                         # -q: Query input files are FASTQ .fq/.fastq (default)
-        "-1 {input.fwd_reads} "       # Forward input reads
-        "-2 {input.rev_reads} "       # Reverse input reads
-        "1> {output.mapped} "         # -S: File for SAM output (default: stdout) 
-        "2> {log}"                    # Log redirection 
-
-###############################################################################
-################################## INDEXING ###################################
-###############################################################################
-
-###############################################################################
-rule minimap2_genome_indexing:
-    # Aim: index sequences in the FASTA format
-    # Use: minimap2 [OPTIONS] -d [INDEX.mmi] <query.fasta>
-    message:
-        """
-        ~ Minimap2 ∞ Index Genome ~
-        Reference: _______ {wildcards.reference}
-        """
-    conda:
-        MINIMAP2
-    params:
-        kmer_size = KMER_SIZE,
-        minimizer_size = MINIMIZER_SIZE,
-        split_size = SPLIT_SIZE
-        #homopolymer = HOMOPOLYMER
-    input:
-        fasta = "resources/genomes/{reference}.fasta"
-    output:
-        mm2_indexes = multiext("resources/indexes/minimap2/{reference}",
-                               ".mmi")
-    log:
-        "results/10_Reports/tools-log/minimap2-indexes/{reference}.log"
-    shell:
-        "minimap2 "                  # Minimap2, index sequences
-        "-k {params.kmer_size} "      # -k: k-mer size (default: "21", no larger than "28") [INT]
-        "-w {params.minimizer_size} " # -w: minimizer window size (default: "11") [INT]
-        "-I {params.split_size} "     # -I: split index for every {NUM} input bases (default: "8G") [INT]
-        #"{params.homopolymer} "       # use homopolymer-compressed k-mer (preferrable for PacBio)
-        "-d {output.mm2_indexes} "    # -d: dump index to FILE []
-        "{input.fasta} "              # Reference sequences in the FASTA format
-        "&> {log}"                    # Log redirection
-
-###############################################################################
-rule bwa_genome_indexing:
-    # Aim: index sequences in the FASTA format
-    # Use: bwa index -a [ALGO] -p [PREFIX] <genome.fasta>
-    message:
-        """
-        ~ BWA-SW ∞ Index Genome ~
-        Reference: _______ {wildcards.reference}
-        """
-    conda:
-        BWA
-    params:
-        algorithm = BWA_ALGO
-    input:
-        fasta = "resources/genomes/{reference}.fasta"
-    output:
-        prefix = "resources/indexes/bwa/{reference}",
-        bwa_indexes = multiext("resources/indexes/bwa/{reference}",
-                               ".amb", ".ann", ".bwt", ".pac", ".sa")
-    log:
-        "results/10_Reports/tools-log/bwa-indexes/{reference}.log"
-    shell:
-        "bwa index "              # BWA-SW algorithm, index sequences
-        "{params.algorithm} "      # -a: Algorithm for constructing BWT index (default: auto)
-        "-p {output.prefix} "      # -p: Prefix of the output database
-        "{input.fasta} "           # Reference sequences in the FASTA format
-        "&> {log} "                # Log redirection
-        "&& touch {output.prefix}" # Touch done
-
-###############################################################################
-rule bowtie2_genome_indexing:
-    # Aim: index sequences in the FASTA format
-    # Use: bowtie2-build [OPTIONS] <reference_in> <bt2_index_base>
-    message:
-        """
-        ~ Bowtie2-build ∞ Index Genome ~
-        Reference: _______ {wildcards.reference}
-        """
-    conda:
-        BOWTIE2
-    resources:
-        cpus = CPUS
-    params:
-        algorithm = BT2_ALGO
-    input:
-        fasta = "resources/genomes/{reference}.fasta"
-    output:
-        prefix = "resources/indexes/bowtie2/{reference}",
-        bt2_indexes = multiext("resources/indexes/bowtie2/{reference}",
-                               ".1.bt2", ".2.bt2", ".3.bt2", ".4.bt2",
-                               ".rev.1.bt2", ".rev.2.bt2")
-    log:
-        "results/10_Reports/tools-log/bowtie2-indexes/{reference}.log"
-    shell:
-        "bowtie2-build "             # Bowtie2-build, index sequences
-        "--quiet "                    # -q: quiet
-        "--threads {resources.cpus} " # Number of threads
-        "{params.algorithm} "         # Force (or no by default) generated index to be 'large',
-                                       # even if ref has fewer than 4 billion nucleotides
-        "-f "                         # Reference files are FASTA (default)
-        "{input.fasta} "              # Reference sequences files (comma-separated list) in the FASTA format
-        "{output.prefix} "            # Write bt2 data to files with this dir/basename
-        "&> {log} "                   # Log redirection
-        "&& touch {output.prefix}"    # Touch done
-
-
-###############################################################################
-################################## TRIMMING ###################################
-###############################################################################
-
-###############################################################################
-rule sickle_trim_quality:
-    # Aim: windowed adaptive trimming tool for FASTQ files using quality
-    # Use: sickle [COMMAND] [OPTIONS]
-    message:
-        """
-        ~ Sickle-trim ∞ Trim Low Quality Sequences ~
-        Sample: __________ {wildcards.sample}
-        """
-    conda:
-        SICKLE_TRIM
-    params:
-        command = SIC_COMMAND,
-        encoding = SIC_ENCODING,
-        quality = SIC_QUALITY,
-        length = SIC_LENGTH
-    input:
-        fwd_reads = "results/01_Trimming/cutadapt/{sample}_cutadapt-removed_R1.fastq.gz",
-        rev_reads = "results/01_Trimming/cutadapt/{sample}_cutadapt-removed_R2.fastq.gz"
-    output:
-        fwd_reads = stash_or_trash("results/01_Trimming/sickle/{sample}_sickle-trimmed_R1.fastq.gz"),
-        rev_reads = stash_or_trash("results/01_Trimming/sickle/{sample}_sickle-trimmed_R2.fastq.gz"),
-        single = stash_or_trash("results/01_Trimming/sickle/{sample}_sickle-trimmed_SE.fastq.gz")
-    log:
-        "results/10_Reports/tools-log/sickle-trim/{sample}.log"
-    shell:
-        "sickle "               # Sickle, a windowed adaptive trimming tool for FASTQ files using quality
-        "{params.command} "      # Paired-end or single-end sequence trimming
-        "-t {params.encoding} "  # --qual-type: Type of quality values, solexa ; illumina ; sanger ; CASAVA, < 1.3 ; 1.3 to 1.7 ; >= 1.8
-        "-q {params.quality} "   # --qual-threshold: Threshold for trimming based on average quality in a window (default: 20)
-        "-l {params.length} "    # --length-threshold: Threshold to keep a read based on length after trimming (default: 20)
-        "-f {input.fwd_reads} "  # --pe-file1: Input paired-end forward fastq file
-        "-r {input.rev_reads} "  # --pe-file2: Input paired-end reverse fastq file
-        "-g "                    # --gzip-output: Output gzipped files
-        "-o {output.fwd_reads} " # --output-pe1: Output trimmed forward fastq file
-        "-p {output.rev_reads} " # --output-pe2: Output trimmed reverse fastq file (must use -s option)
-        "-s {output.single} "    # --output-single: Output trimmed singles fastq file
-        "&> {log} "              # Log redirection
-
-###############################################################################
-rule cutadapt_adapters_removing:
-    # Aim: finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence from your high-throughput sequencing reads
-    # Use: cutadapt [OPTIONS] -a/-A [ADAPTER] -o [OUT_FWD.fastq.gz] -p [OUT_REV.fastq.gz] [IN_FWD.fastq.gz] [IN_REV.fastq.gz]
-    # Rmq: multiple adapter sequences can be given using further -a options, but only the best-matching adapter will be removed
-    message:
-        """
-        ~ Cutadapt ∞ Remove Adapters ~
-        Sample: __________ {wildcards.sample}
-        """
-    conda:
-        CUTADAPT
-    resources:
-        cpus = CPUS
-    params:
-        length = CUT_LENGTH,
-        truseq = CUT_TRUSEQ,
-        nextera = CUT_NEXTERA,
-        small = CUT_SMALL,
-        cut = CUT_CLIPPING
-    input:
-        fwd_reads = "resources/symlinks/{sample}_R1.fastq.gz",
-        rev_reads = "resources/symlinks/{sample}_R2.fastq.gz"
-    output:
-        fwd_reads = temp("results/01_Trimming/cutadapt/{sample}_cutadapt-removed_R1.fastq.gz"),
-        rev_reads = temp("results/01_Trimming/cutadapt/{sample}_cutadapt-removed_R2.fastq.gz")
-    log:
-        "results/10_Reports/tools-log/cutadapt/{sample}.log"
-    shell:
-        "cutadapt "                          # Cutadapt, finds and removes unwanted sequence from your HT-seq reads
-        "--cores {resources.cpus} "           # -j: Number of CPU cores to use. Use 0 to auto-detect (default: 1)
-        "--cut {params.cut} "                 # -u: Remove 'n' first bases (5') from forward R1 (hard-clipping, default: 0)
-        "-U {params.cut} "                    # -U: Remove 'n' first bases (5') from reverse R2 (hard-clipping, default: 0)
-        "--trim-n "                           # --trim-n: Trim N's on ends (3') of reads
-        "--minimum-length {params.length} "   # -m: Discard reads shorter than length
-        "--adapter {params.truseq} "          # -a: Sequence of an adapter ligated to the 3' end of the first read
-        "-A {params.truseq} "                 # -A: 3' adapter to be removed from second read in a pair
-        "--adapter {params.nextera} "         # -a: Sequence of an adapter ligated to the 3' end of the first read
-        "-A {params.nextera} "                # -A: 3' adapter to be removed from second read in a pair
-        "--adapter {params.small} "           # -a: Sequence of an adapter ligated to the 3' end of the first read
-        "-A {params.small} "                  # -A: 3' adapter to be removed from second read in a pair
-        "--output {output.fwd_reads} "        # -o: Write trimmed reads to FILE
-        "--paired-output {output.rev_reads} " # -p: Write second read in a pair to FILE
-        "{input.fwd_reads} "                  # Input forward reads R1.fastq
-        "{input.rev_reads} "                  # Input reverse reads R2.fastq
-        "&> {log} "                           # Log redirection
+### LOAD RULES ###
+##################
 
-###############################################################################
-############################### QUALITY CONTROL ###############################
-###############################################################################
+include: "rules/quality_controls.smk"
+include: "rules/reads_cleaning.smk"
+include: "rules/indexing_genomes.smk"
+include: "rules/reads_mapping.smk"
+include: "rules/primer_cleaping.smk"
+include: "rules/remove_duplicates.smk"
+include: "rules/coverage_stats.smk"
+include: "rules/mask_lowcov.smk"
+include: "rules/variant_calling.smk"
+include: "rules/consensus_calling.smk"
+include: "rules/lineages_calling.smk"
 
 ###############################################################################
-rule multiqc_reports_aggregation:
-    # Aim: aggregates bioinformatics analyses results into a single report
-    # Use: multiqc [OPTIONS] --output [MULTIQC/] [FASTQC/] [MULTIQC/]
-    priority: 999 # Explicit high priority
-    message:
-        """
-        ~ MultiQC ∞ Aggregat HTML Qualities Reports ~
-        """
-    conda:
-        MULTIQC
-    params:
-        #config = MQC_CONFIG,
-        #tag = TAG
-    input:
-        fastqc = expand("results/00_Quality_Control/fastqc/{fastq}/",
-                        fastq = FASTQ),
-        fastq_screen = expand("results/00_Quality_Control/fastq-screen/{fastq}/",
-                             fastq = FASTQ)
-    output:
-        multiqc = directory("results/00_Quality_Control/multiqc/")
-    log:
-        "results/10_Reports/tools-log/multiqc.log"
-    shell:
-        "multiqc "                  # Multiqc, searches in given directories for analysis & compiles a HTML report
-        "--quiet "                   # -q: Only show log warning
-        "--no-ansi "                 # Disable coloured log
-        #"--config {params.config} "  # Specific config file to load
-        #"--tag {params.tag} "        # Use only modules which tagged with this keyword
-        #"--pdf "                     # Creates PDF report with 'simple' template (require xelatex)
-        "--export "                  # Export plots as static images in addition to the report
-        "--outdir {output.multiqc} " # -o: Create report in the specified output directory
-        "{input.fastqc} "            # Input FastQC files
-        "{input.fastq_screen} "      # Input Fastq-Screen
-        "&> {log}"                   # Log redirection
+### TARGET RULES ###
+####################
 
-###############################################################################
-rule fastqscreen_contamination_checking:
-    # Aim: screen if the composition of the library matches with  what you expect
-    # Use fastq_screen [OPTIONS] --outdir [DIR/] [FASTQ.GZ]
-    message:
-        """
-        ~ Fasts-Screen ∞ Screen Contamination ~
-        Fastq: __________ {wildcards.fastq}
-        """
-    conda:
-        FASTQ_SCREEN
-    resources:
-        cpus = CPUS
-    params:
-        config = FQC_CONFIG,
-        subset = SUBSET
+rule all:
     input:
-        fastq = "resources/symlinks/{fastq}.fastq.gz"
-    output:
-        fastq_screen = directory("results/00_Quality_Control/fastq-screen/{fastq}/")
-    log:
-        "results/10_Reports/tools-log/fastq-screen/{fastq}.log"
-    shell:
-        "fastq_screen "                  # FastqScreen, what did you expect ?
-        "-q "                             # --quiet: Only show log warning
-        "--threads {resources.cpus} "     # --threads: Specifies across how many aligner  will be allowed to run
-        "--aligner 'bwa' "                # -a: choose aligner 'bowtie', 'bowtie2', 'bwa'
-        "--conf {params.config} "         # path to configuration file
-        "--subset {params.subset} "       # Don't use the whole sequence file, but create a subset of specified size
-        "--outdir {output.fastq_screen} " # Output directory
-        "{input.fastq} "                  # Input file.fastq
-        "&> {log}"                        # Log redirection
+        multiqc = functions.get_quality_input,
+        trimming = functions.get_trimming_input,
+        consensus = functions.get_consensus_input,
+        flagstat = functions.get_flagstat_input,
+        covstats = functions.get_covstats_input,
+        vcf = functions.get_vcf_input,
+        pangolin = functions.get_pangolin_input,
+        nextclade = functions.get_nextclade_input,
+        gisaid = functions.get_gisaid_input
 
-###############################################################################
-rule fastqc_quality_control:
-    # Aim: reads sequence files and produces a quality control report
-    # Use: fastqc [OPTIONS] --output [DIR/] [FASTQ.GZ]
-    message:
-        """
-        ~ FastQC ∞ Quality Control ~
-        Fastq: __________ {wildcards.fastq}
-        """
-    conda:
-        FASTQC
-    resources:
-        cpus = CPUS
-    input:
-        fastq = "resources/symlinks/{fastq}.fastq.gz"
-    output:
-        fastqc = directory("results/00_Quality_Control/fastqc/{fastq}/")
-    log:
-        "results/10_Reports/tools-log/fastqc/{fastq}.log"
-    shell:
-        "mkdir -p {output.fastqc} "    # (*) this directory must exist as the program will not create it
-        "2> /dev/null && "             # in silence and then... 
-        "fastqc "                    # FastQC, a high throughput sequence QC analysis tool
-        "--quiet "                    # -q: Supress all progress messages on stdout and only report errors
-        "--threads {resources.cpus} " # -t: Specifies files number which can be processed simultaneously
-        "--outdir {output.fastqc} "   # -o: Create all output files in the specified output directory (*)
-        "{input.fastq} "              # Input file.fastq
-        "&> {log}"                    # Log redirection
 
-###############################################################################
diff --git a/workflow/rules/gevarli.smk b/workflow/Snakefile_SAVE
old mode 100755
new mode 100644
similarity index 95%
rename from workflow/rules/gevarli.smk
rename to workflow/Snakefile_SAVE
index ba9ad8c..fed2347
--- a/workflow/rules/gevarli.smk
+++ b/workflow/Snakefile_SAVE
@@ -8,18 +8,21 @@
 ###                                                                         ###
 ###I###R###D######U###2###3###3#######T###R###A###N###S###V###I###H###M###I####
 # Name ___________________ gevarli.smk
-# Version ________________ v.2024.08
+# Version ________________ v.2025.01
 # Author _________________ Nicolas Fernandez
 # Affiliation ____________ IRD_U233_TransVIHMI
 # Aim ____________________ Snakefile with GeVarLi rules
 # Date ___________________ 2021.10.12
-# Latest modifications ___ 2024.08.05 ('Noarch' conda environment yaml files)
-# Use ____________________ snakemake -s gevarli.smk --use-conda 
+# Latest modifications ___ 2025.01.08 (Prepare for Snakedeploy)
+# Use ____________________ snakemake -s Snakemake --use-conda -j
 
 ###############################################################################
 ### CONFIGURATION ###
 #####################
 
+from snakemake.utils import min_version
+min_version("8.27.0")
+
 configfile: "configuration/config.yaml"
 
 ###############################################################################
@@ -46,7 +49,7 @@ def get_trimming_input(wildcards):
     return trimming_list
 
 def stash_or_trash(path):
-    if "trimming" in MODULES:
+    if "keeptrim" in MODULES:
         return path
     else:
         return temp(path)
@@ -157,8 +160,8 @@ def get_gisaid_input(wildcards):
 ### WILDCARDS ###
 #################
 
-FASTQ, = glob_wildcards("resources/reads/{fastq}.fastq.gz")
-SAMPLE, = glob_wildcards("resources/reads/{sample}_R1.fastq.gz")
+FASTQ, = glob_wildcards("resources/symlinks/{fastq}.fastq.gz")
+SAMPLE, = glob_wildcards("resources/symlinks/{sample}_R1.fastq.gz")
 
 ###############################################################################
 ### RESOURCES ###
@@ -173,23 +176,23 @@ TMP_DIR = config["resources"]["tmp_dir"] # Temporary directory
 ### ENVIRONMENTS ###
 ####################
 
-MULTIQC = config["conda"]["yaml"]["multiqc"]           # Multi-QC conda env
-FASTQ_SCREEN = config["conda"]["yaml"]["fastq_screen"] # Fastq-Screen conda env
-FASTQC= config["conda"]["yaml"]["fastqc"]              # FastQC conda env
-CUTADAPT = config["conda"]["yaml"]["cutadapt"]         # Cutadapt conda environment
-SICKLE_TRIM = config["conda"]["yaml"]["sickle_trim"]   # Sickle-Trim conda environment
-MINIMAP2 = config["conda"]["yaml"]["minimap2"]         # BWA conda environment
-BWA = config["conda"]["yaml"]["bwa"]                   # BWA conda environment
-BOWTIE2 = config["conda"]["yaml"]["bowtie2"]           # Bowtie2 conda environment
-SAMTOOLS = config["conda"]["yaml"]["samtools"]         # SamTools conda environment
-BEDTOOLS = config["conda"]["yaml"]["bedtools"]         # BedTools conda environment
-BAMCLIPPER = config["conda"]["yaml"]["bamclipper"]     # BAMClipper
-GAWK = config["conda"]["yaml"]["gawk"]                 # Awk (GNU) conda environment
-IVAR = config["conda"]["yaml"]["ivar"]                 # iVar conda environment
-TSV2VCF = config["conda"]["yaml"]["tsv2vcf"]           # tsv2vcf conda environment
-BCFTOOLS = config["conda"]["yaml"]["bcftools"]         # BcfTools conda environment
-PANGOLIN = config["conda"]["yaml"]["pangolin"]         # Pangolin conda environment
-NEXTCLADE = config["conda"]["yaml"]["nextclade"]       # Nextclade conda environment
+MULTIQC = config["envs"]["multiqc"]           # Multi-QC conda env
+FASTQ_SCREEN = config["envs"]["fastq_screen"] # Fastq-Screen conda env
+FASTQC= config["envs"]["fastqc"]              # FastQC conda env
+CUTADAPT = config["envs"]["cutadapt"]         # Cutadapt conda environment
+SICKLE_TRIM = config["envs"]["sickle_trim"]   # Sickle-Trim conda environment
+MINIMAP2 = config["envs"]["minimap2"]         # BWA conda environment
+BWA = config["envs"]["bwa"]                   # BWA conda environment
+BOWTIE2 = config["envs"]["bowtie2"]           # Bowtie2 conda environment
+SAMTOOLS = config["envs"]["samtools"]         # SamTools conda environment
+BEDTOOLS = config["envs"]["bedtools"]         # BedTools conda environment
+BAMCLIPPER = config["envs"]["bamclipper"]     # BAMClipper
+GAWK = config["envs"]["gawk"]                 # Awk (GNU) conda environment
+IVAR = config["envs"]["ivar"]                 # iVar conda environment
+TSV2VCF = config["envs"]["tsv2vcf"]           # tsv2vcf conda environment
+BCFTOOLS = config["envs"]["bcftools"]         # BcfTools conda environment
+PANGOLIN = config["envs"]["pangolin"]         # Pangolin conda environment
+NEXTCLADE = config["envs"]["nextclade"]       # Nextclade conda environment
 
 ###############################################################################
 ### PARAMETERS ###
@@ -207,11 +210,11 @@ KMER_SIZE = config["minimap2"]["algorithm"]["k-mer_size"]          # MM2 k-mer s
 MINIMIZER_SIZE = config["minimap2"]["algorithm"]["minimizer_size"] # MM2 minimizer window size
 SPLIT_SIZE = config["minimap2"]["algorithm"]["split_size"]         # MM2 split index
 #HOMOPOLYMER = config["minimap2"]["algorithm"]["homopolymer"]       # MM2 if PacBio
+
 BWA_ALGO = config["bwa"]["algorithm"]     # BWA indexing algorithm
 BT2_ALGO = config["bowtie2"]["algorithm"] # BT2 indexing algorithm
 
 REFERENCE = config["consensus"]["reference"] # Genome reference sequence, in fasta format
-REF_PATH = config["consensus"]["path"]       # Path to genomes references
 MIN_COV = config["consensus"]["min_cov"]     # Minimum coverage, mask lower regions with 'N'
 IUPAC = config["consensus"]["iupac"]         # Output variants in the form of IUPAC ambiguity codes
 ALIGNER = config["consensus"]["aligner"]     # Aligner ('minimap2', 'bwa' or 'bowtie2')
@@ -228,7 +231,6 @@ SIC_ENCODING = config["sickle_trim"]["encoding"] # Sickle-trim --qual-type
 SIC_QUALITY = config["sickle_trim"]["quality"]   # Sickle-trim --qual-threshold
 SIC_LENGTH = config["sickle_trim"]["length"]     # Sickle-trim --length-treshold
 
-MM2_PATH = config["minimap2"]["path"]     # Minimpa2 path to indexes
 MM2_PRESET = config["minimap2"]["preset"] # Minimap2 preset
 #MM2_LENGTH = config["minimap2"]["length"]                              # Minimap2 length
 #MM2_KMER_SIZE = config["minimap2"]["algorithm"]["k-mer_size"]          # Minimap2 k-mer size
@@ -236,10 +238,8 @@ MM2_PRESET = config["minimap2"]["preset"] # Minimap2 preset
 #MM2_SPLIT_SIZE = config["minimap2"]["algorithm"]["split_size"]         # Minimap2 split index
 #MM2_HOMOPOLYMER = config["minimap2"]["algorithm"]["homopolymer"]       # Minimap2 for PacBio
 
-BWA_PATH = config["bwa"]["path"]      # BWA path to indexes
 BWA_ALGO = config["bwa"]["algorithm"] # BWA indexing algorithm
 
-BT2_PATH = config["bowtie2"]["path"]               # Bowtie2 path to indexes
 BT2_ALGO = config["bowtie2"]["algorithm"]          # Bowtie2 indexing algorithm
 BT2_SENSITIVITY = config["bowtie2"]["sensitivity"] # Bowtie2 sensitivity preset
 
@@ -456,6 +456,7 @@ rule ivar_consensus:
         "--min-BQ {params.min_bq} "       # -Q: skip bases with baseQ/BAQ smaller than [INT] (default: 13)
         #"--reference {input.masked_ref} " # Reference sequence FASTA FILE
         "{input.mark_dup} "               # Markdup BAM input
+        "2>&1 | grep -v '[mpileup] 1 samples in 1 input files' " # Remove this stdout
         "| "                               ### PIPE to iVar
         "ivar consensus "                # iVar, with command 'consensus': Call consensus from aligned BAM file
         "-p {output.prefix} "             # -p: prefix
@@ -512,6 +513,7 @@ rule ivar_variant_calling:
         "--min-BQ {params.min_bq} "       # -Q: skip bases with baseQ/BAQ smaller than (default: 13) [INT]
         "--reference {input.masked_ref} " # Reference sequence FASTA FILE
         "{input.mark_dup} "               # Markdup BAM input
+        "2>&1 | grep -v '[mpileup] 1 samples in 1 input files' " # Remove this stdout
         "| "                               ### pipe to iVar
         "ivar variants "                 # iVar, with command 'variants': Call variants from aligned BAM file
         "-p {output.variant_call} "       # -p: prefix
@@ -541,20 +543,19 @@ rule bedtools_masking:
         """
     conda:
         BEDTOOLS
-    params:
-        path = REF_PATH
     input:
+        reference = "resources/genomes/{reference}.fasta",
         low_cov_mask = "results/03_Coverage/{reference}/bed/{sample}_{aligner}_{min_cov}X_low-cov-mask.bed"
     output:
         masked_ref = "results/04_Variants/{reference}/{sample}_{aligner}_{min_cov}X_masked-ref.fasta"
     log:
         "results/10_Reports/tools-log/bedtools/{reference}/{sample}_{aligner}_{min_cov}X_masking.log"
     shell:
-        "bedtools maskfasta "                          # Bedtools maskfasta, mask a fasta file based on feature coordinates
-        "-fi {params.path}{wildcards.reference}.fasta " # Input FASTA file 
-        "-bed {input.low_cov_mask} "                    # BED/GFF/VCF file of ranges to mask in -fi
-        "-fo {output.masked_ref} "                      # Output masked FASTA file
-        "&> {log}"                                      # Log redirection 
+        "bedtools maskfasta "       # Bedtools maskfasta, mask a fasta file based on feature coordinates
+        "-fi {input.reference} "     # Input FASTA file 
+        "-bed {input.low_cov_mask} " # BED/GFF/VCF file of ranges to mask in -fi
+        "-fo {output.masked_ref} "   # Output masked FASTA file
+        "&> {log}"                   # Log redirection 
 
 ###############################################################################
 rule bedtools_merged_mask:
@@ -874,7 +875,7 @@ rule bamclipper_amplicon_primers:
         "&& mv {wildcards.sample}_{wildcards.aligner}_mark-dup.primerclipped.bam.bai {output.baiclip}" # because BamClipper v.1 default output system...
 
 ###############################################################################
-############################## REMOVE DUPLICATS ###############################
+############################# REMOVE DUPLICATES ###############################
 ###############################################################################
 
 ###############################################################################
@@ -1056,7 +1057,7 @@ rule minimap2_mapping:
         preset = MM2_PRESET
         #length = LENGTH
     input:
-        mm2_indexes = "resources/indexes/minimap2/{reference}.mmi"
+        mm2_indexes = "resources/indexes/minimap2/{reference}.mmi",
         fwd_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R1.fastq.gz",
         rev_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R2.fastq.gz"
     output:
@@ -1097,7 +1098,7 @@ rule bwa_mapping:
     resources:
         cpus = CPUS
     input:
-        bwa_indexes = "resources/indexes/bwa/{reference}"
+        bwa_indexes = "resources/indexes/bwa/{reference}",
         fwd_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R1.fastq.gz",
         rev_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R2.fastq.gz"
     output:
@@ -1143,7 +1144,7 @@ rule bowtie2_mapping:
         "bowtie2 "                   # Bowtie2, an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences
         "--threads {resources.cpus} " # -p: Number of alignment threads to launch (default: 1)
         "--reorder "                  # Keep the original read order (if multi-processor option -p is used)
-        "-x {input.indexes} "         # -x: Reference index filename prefix (minus trailing .X.bt2)
+        "-x {input.bt2_indexes} "     # -x: Reference index filename prefix (minus trailing .X.bt2)
         "{params.sensitivity} "       # Preset (default: "--sensitive")
                                        # sensitive: same as [-D 15 -R 2 -N 0 -L 22 -i S,1,1.15]
         "-q "                         # -q: Query input files are FASTQ .fq/.fastq (default)
@@ -1176,7 +1177,7 @@ rule minimap2_genome_indexing:
         fasta = "resources/genomes/{reference}.fasta"
     output:
         mm2_indexes = multiext("resources/indexes/minimap2/{reference}",
-                           ".mmi")
+                               ".mmi")
     log:
         "results/10_Reports/tools-log/minimap2-indexes/{reference}.log"
     shell:
@@ -1185,7 +1186,7 @@ rule minimap2_genome_indexing:
         "-w {params.minimizer_size} " # -w: minimizer window size (default: "11") [INT]
         "-I {params.split_size} "     # -I: split index for every {NUM} input bases (default: "8G") [INT]
         #"{params.homopolymer} "       # use homopolymer-compressed k-mer (preferrable for PacBio)
-        "-d {output.indexes} "        # -d: dump index to FILE []
+        "-d {output.mm2_indexes} "    # -d: dump index to FILE []
         "{input.fasta} "              # Reference sequences in the FASTA format
         "&> {log}"                    # Log redirection
 
@@ -1205,14 +1206,14 @@ rule bwa_genome_indexing:
     input:
         fasta = "resources/genomes/{reference}.fasta"
     output:
-        prefix = temp("resources/indexes/bwa/{reference}"),
-        indexes = multiext("resources/indexes/bwa/{ref_seq}",
-                           ".amb", ".ann", ".bwt", ".pac", ".sa")
+        prefix = "resources/indexes/bwa/{reference}",
+        bwa_indexes = multiext("resources/indexes/bwa/{reference}",
+                               ".amb", ".ann", ".bwt", ".pac", ".sa")
     log:
         "results/10_Reports/tools-log/bwa-indexes/{reference}.log"
     shell:
         "bwa index "              # BWA-SW algorithm, index sequences
-        "{params.algorithm} "      # -a: Algorithm for constructing BWT index (default: auto)                                  
+        "{params.algorithm} "      # -a: Algorithm for constructing BWT index (default: auto)
         "-p {output.prefix} "      # -p: Prefix of the output database
         "{input.fasta} "           # Reference sequences in the FASTA format
         "&> {log} "                # Log redirection
@@ -1236,10 +1237,10 @@ rule bowtie2_genome_indexing:
     input:
         fasta = "resources/genomes/{reference}.fasta"
     output:
-        prefix = temp("resources/indexes/bowtie2/{reference}"),
-        indexes = multiext("resources/indexes/bowtie2/{ref_seq}",
-                           ".1.bt2", ".2.bt2", ".3.bt2", ".4.bt2",
-                           ".rev.1.bt2", ".rev.2.bt2")
+        prefix = "resources/indexes/bowtie2/{reference}",
+        bt2_indexes = multiext("resources/indexes/bowtie2/{reference}",
+                               ".1.bt2", ".2.bt2", ".3.bt2", ".4.bt2",
+                               ".rev.1.bt2", ".rev.2.bt2")
     log:
         "results/10_Reports/tools-log/bowtie2-indexes/{reference}.log"
     shell:
@@ -1319,10 +1320,8 @@ rule cutadapt_adapters_removing:
         small = CUT_SMALL,
         cut = CUT_CLIPPING
     input:
-        fwd_reads = "resources/reads/{sample}_R1.fastq.gz",
-        rev_reads = "resources/reads/{sample}_R2.fastq.gz"
-        #fwd_reads = "/users/illumina/local/data/run_1/FATSQ/{sample}_R1.fastq.gz",
-        #rev_reads = "/users/illumina/local/data/run_1/FATSQ/{sample}_R2.fastq.gz"
+        fwd_reads = "resources/symlinks/{sample}_R1.fastq.gz",
+        rev_reads = "resources/symlinks/{sample}_R2.fastq.gz"
     output:
         fwd_reads = temp("results/01_Trimming/cutadapt/{sample}_cutadapt-removed_R1.fastq.gz"),
         rev_reads = temp("results/01_Trimming/cutadapt/{sample}_cutadapt-removed_R2.fastq.gz")
@@ -1404,7 +1403,7 @@ rule fastqscreen_contamination_checking:
         config = FQC_CONFIG,
         subset = SUBSET
     input:
-        fastq = "resources/reads/{fastq}.fastq.gz"
+        fastq = "resources/symlinks/{fastq}.fastq.gz"
     output:
         fastq_screen = directory("results/00_Quality_Control/fastq-screen/{fastq}/")
     log:
@@ -1434,7 +1433,7 @@ rule fastqc_quality_control:
     resources:
         cpus = CPUS
     input:
-        fastq = "resources/reads/{fastq}.fastq.gz"
+        fastq = "resources/symlinks/{fastq}.fastq.gz"
     output:
         fastqc = directory("results/00_Quality_Control/fastqc/{fastq}/")
     log:
diff --git a/workflow/report/workflow.rst b/workflow/report/workflow.rst
new file mode 100644
index 0000000..e69de29
diff --git a/workflow/rules/consensus_calling.smk b/workflow/rules/consensus_calling.smk
new file mode 100644
index 0000000..e5c22c7
--- /dev/null
+++ b/workflow/rules/consensus_calling.smk
@@ -0,0 +1,33 @@
+###############################################################################
+################################## CONSENSUS ##################################
+###############################################################################
+
+###############################################################################
+rule sed_rename_headers:
+    # Aim: rename all fasta header with sample name
+    # Use: sed 's/[OLD]/[NEW]/' [IN] > [OUT]
+    message:
+        """
+        ~ Sed ∞ Rename Fasta Header ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        Min. cov.: _______ {wildcards.min_cov}X
+        Variant caller: __ {wildcards.caller}
+        """
+    conda:
+        GAWK
+    input:
+        cons_tmp = "results/05_Consensus/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_consensus.fasta.tmp"
+    output:
+        consensus = "results/05_Consensus/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_consensus.fasta"
+    log:
+        "results/10_Reports/tools-log/sed/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_fasta-header.log"
+    shell:
+        "sed " # Sed, a Stream EDitor used to perform basic text transformations on an input stream
+        "'s/^>.*$/>{wildcards.sample}_{wildcards.aligner}_{wildcards.min_cov}X_{wildcards.caller}/' "
+        "{input.cons_tmp} "      # Input file
+        "1> {output.consensus} " # Output file
+        "2> {log}"               # Log redirection
+
+###############################################################################
diff --git a/workflow/rules/coverage_stats.smk b/workflow/rules/coverage_stats.smk
new file mode 100644
index 0000000..74bee1b
--- /dev/null
+++ b/workflow/rules/coverage_stats.smk
@@ -0,0 +1,221 @@
+###############################################################################
+################################## STATISTICS #################################
+###############################################################################
+
+###############################################################################
+rule awk_coverage_statistics:
+    # Aim: computing genomme coverage stats
+    # Use: awk {FORMULA} END {{print [RESULTS.tsv] [BEDGRAPH.bed]
+    message:
+        """
+        ~ Awk ∞ Compute Genome Coverage Statistics from BED file ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        """
+    conda:
+        GAWK
+    input:
+        cutadapt = "results/10_Reports/tools-log/cutadapt/{sample}.log",
+        sickle = "results/10_Reports/tools-log/sickle-trim/{sample}.log",
+        samtools = "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_mark-dup.log",
+        flagstat = "results/03_Coverage/{reference}/flagstat/{sample}_{aligner}_flagstat.json",
+        histogram = "results/03_Coverage/{reference}/histogram/{sample}_{aligner}_coverage-histogram.txt",
+        genome_cov = "results/02_Mapping/{reference}/{sample}_{aligner}_genome-cov.bed"
+    output:
+        cov_stats = "results/03_Coverage/{reference}/{sample}_{aligner}_{min_cov}X_coverage-stats.tsv"
+    log:
+        "results/10_Reports/tools-log/awk/{reference}/{sample}_{aligner}_{min_cov}X_coverage-stats.log"
+    shell:
+        r""" rawReads=$(grep -o -E  """                                  # Get raw reads 
+        r""" 'Total read pairs processed:.+' {input.cutadapt}  """       #
+        r""" | sed -E 's/Total read pairs processed:\ +//'  """          #
+        r""" | sed 's/,//g') ; """                                       #
+        #
+        r""" cutadaptPF=$(grep -o -E """                                 # Get cutadapt Passing Filtes reads
+        r""" 'Pairs written \(passing filters\):.+' {input.cutadapt} """ #
+        r""" | sed -E 's/Pairs written \(passing filters\):\ +//' """    #
+        r""" | sed 's/,//g') ; """                                       #
+        #
+        r""" sicklePF=$(grep -o -E """                                   # Get sickle Passing Filtes reads
+        r""" 'FastQ paired records kept:.+' {input.sickle} """           #
+        r""" | sed -E 's/FastQ paired records kept:\ +//') ; """         #
+        #
+        r""" totalDuplicate=$(grep -o -E """                             # Get total duplicated reads
+        r""" 'DUPLICATE TOTAL:.+' {input.samtools} """                   #
+        r""" | sed -E 's/DUPLICATE TOTAL:\ +//') ; """                   #
+        #
+        r""" estimatedLibrarySize=$(grep -o -E """                       # Get estimated library size
+        r""" 'ESTIMATED_LIBRARY_SIZE:.+' {input.samtools} """            #
+        r""" | sed -E 's/ESTIMATED_LIBRARY_SIZE:\ +//') ; """            #
+        #
+        r""" samtoolsPF=$(grep -o -E """                                 # Get samtool Passing Filter reads
+        r""" 'WRITTEN: .+' {input.samtools} """                          #
+        r""" | sed -E 's/WRITTEN:\ +//') ; """                           #
+        #
+        r""" mappedReads=$(grep -o -E -m 1 """                           # Get mapped reads
+        r""" '"mapped": .+' {input.flagstat} """                         #
+        r""" | sed -E 's/"mapped":\ +//' """                             #
+        r""" | sed 's/,//g') ; """                                       #
+        #
+        r""" mappedPercentReads=$(grep -o -E -m 1 """                    # Get mapped precent reads
+        r""" '"mapped %": .+' {input.flagstat} """                       #
+        r""" | sed -E 's/"mapped %":\ +//' """                           #
+        r""" | sed 's/,//g') ; """                                       #
+        #
+        r""" covPercentAt1X=$(grep -o -E """                             # Get coverage percent @1X
+        r""" 'Percent covered:.+' {input.histogram} """                  #
+        r""" | sed -E 's/Percent covered:\ +//') ; """                   #
+        #
+        r""" awk """                                                   # Awk, a program to select particular records in a file and perform operations upon them
+        r""" -v rawReads="${{rawReads}}" """                             # Define external variable
+        r""" -v cutadaptPF="${{cutadaptPF}}" """                         # Define external variable
+        r""" -v sicklePF="${{sicklePF}}" """                             # Define external variable
+        r""" -v totalDuplicate="${{totalDuplicate}}" """                 # Define external variable
+        r""" -v estimatedLibrarySize="${{estimatedLibrarySize}}" """     # Define external variable
+        r""" -v samtoolsPF="${{samtoolsPF}}" """                         # Define external variable
+        r""" -v mappedReads="${{mappedReads}}" """                       # Define external variable
+        r""" -v mappedPercentReads="${{mappedPercentReads}}" """         # Define external variable
+        r""" -v covPercentAt1X="${{covPercentAt1X}}" """                 # Define external variable
+        r""" '$4 >= {wildcards.min_cov} {{supMin_Cov+=$3-$2}} ; """      # Genome size (>= min_cov @X)
+        r""" {{genomeSize+=$3-$2}} ; """                                 # Genome size (total)
+        r""" {{totalBases+=($3-$2)*$4}} ; """                            # Total bases @1X
+        r""" {{totalBasesSq+=(($3-$2)*$4)**2}} """                       # Total bases square @1X
+        r""" END """                                                    # END
+        r""" {{print """                                                # Print
+        r""" "sample_id", "\t", """                                      # header: Sample ID
+        r""" "raw_paired_reads", "\t", """                               # header: Raw paired reads
+        r""" "cutadapt_pairs_PF", "\t", """                              # header: Cutadapt Passing Filters
+        r""" "sickle_reads_PF", "\t", """                                # header: Sickle-trim Passing Filters
+        r""" "duplicated_reads", "\t", """                               # header:
+        r""" "duplicated_percent_%","\t", """                            # header:
+        r""" "estimated_library_size*", "\t", """                        # header:
+        r""" "samtools_pairs_PF", "\t", """                              # header:
+        r""" "mapped_with", "\t",  """                                   # header: aligner
+        r""" "mapped_on", "\t",  """                                     # header: reference
+        r""" "mapped_reads", "\t", """                                   # header:
+        r""" "mapped_percent_%", "\t", """                               # header:
+        r""" "mean_depth", "\t", """                                     # header: mean depth
+        r""" "standard_deviation", "\t", """                             # header: standard deviation
+        r""" "cov_percent_%_@1X", "\t", """                              # header: coverage percentage @1X
+        r""" "cov_percent_%" "\t", """                                   # header: coverage percentage
+        r""" "@_min_cov" """                                             # header: @_[min_cov]_X
+        r""" ORS """                                                      # \n newline
+        r""" "{wildcards.sample}", "\t", """                             # value: Sample ID
+        r""" rawReads, "\t", """                                         # value: Raw sequences
+        r""" cutadaptPF, "\t", """                                       # value: Cutadapt Passing Filter
+        r""" sicklePF, "\t", """                                         # value: Sickle Passing Filter
+        r""" totalDuplicate, "\t", """                                   # value:
+        r""" int(((totalDuplicate)/(rawReads*2))*100), "%", "\t", """    # value: (divided by 2 to estimated pairs)
+        r""" estimatedLibrarySize, "\t", """                             # value:
+        r""" samtoolsPF, "\t", """                                       # value:
+        r""" "{wildcards.aligner}", "\t",  """                           # value: aligner
+        r""" "{wildcards.reference}", "\t",  """                         # value: reference
+        r""" mappedReads, "\t", """                                      # value:
+        r""" mappedPercentReads, "%", "\t", """                          # value:
+        r""" int(totalBases/genomeSize), "\t", """                       # value: mean depth
+        r""" int(sqrt((totalBasesSq/genomeSize)-(totalBases/genomeSize)**2)), "\t", """ # Standard deviation value
+        r""" covPercentAt1X, "\t", """                                   # value
+        r""" supMin_Cov/genomeSize*100, "%", "\t", """                   # Coverage percent (@ min_cov X) value
+        r""" "@{wildcards.min_cov}X" """                                 # @ min_cov X value
+        r""" }}' """                                                     # Close print
+        r""" {input.genome_cov} """                                      # BedGraph coverage input
+        r""" 1> {output.cov_stats} """                                   # Mean depth output
+        r""" 2> {log}"""                                                 # Log redirection
+        
+###############################################################################
+rule bedtools_genome_coverage:
+    # Aim: computing genome coverage sequencing
+    # Use: bedtools genomecov [OPTIONS] -ibam [MARK-DUP.bam] 1> [BEDGRAPH.bed]
+    message:
+        """
+        ~ BedTools ∞ Compute Genome Coverage ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        """
+    conda:
+        BEDTOOLS
+    input:
+        mark_dup = get_bam_input,
+        index = get_bai_input
+    output:
+        genome_cov = "results/02_Mapping/{reference}/{sample}_{aligner}_genome-cov.bed"
+    log:
+        "results/10_Reports/tools-log/bedtools/{reference}/{sample}_{aligner}_genome-cov.log"
+    shell:
+        "bedtools genomecov "    # Bedtools genomecov, compute the coverage of a feature file among a genome
+        "-bga "                   # Report depth in BedGraph format, regions with zero coverage are also reported
+        "-ibam {input.mark_dup} " # The input file is in BAM format, must be sorted by position
+        "1> {output.genome_cov} " # BedGraph output
+        "2> {log} "               # Log redirection
+
+###############################################################################
+rule samtools_coverage_histogram:
+    # Aim: alignment depth and percent coverage histogram
+    # Use: samtools coverage --histogram [INPUT.bam]
+    message:
+        """
+        ~ SamTools ∞ Calcul Depth and Coverage from BAM file ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        """
+    conda:
+        SAMTOOLS
+    resources:
+       cpus = CPUS
+       #bins = BINS,
+       #depth = DEPTH
+    input:
+        mark_dup = get_bam_input
+    output:
+        histogram = "results/03_Coverage/{reference}/histogram/{sample}_{aligner}_coverage-histogram.txt"
+    log:
+        "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_coverage-histogram.log"
+    shell:
+        "samtools coverage "          # Samtools coverage, tools for alignments in the SAM format with command to alignment depth and percent coverage
+        "--histogram "                 # -m: show histogram instead of tabular output
+        "--verbosity 4 "               # Set level of verbosity [INT] (default: 3)
+        "--n-bins 149 "                # -w: number of bins in histogram (default: terminal width - 40) (todo: {params.bins}) 
+        "--depth 0 "                   # -d maximum allowed coverage depth [INT] (default: 1000000 ; 0 removing any depth limit) (todo: {params.depth}) 
+        "--output {output.histogram} " # write output to FILE (default: stdout)
+        "{input.mark_dup} ; "          # Mark_dup bam input
+        "echo >> {output.histogram} ; " # Newline
+        "samtools coverage "    # Samtools coverage, tools for alignments in the SAM format with command to alignment depth and percent coverage
+        "--verbosity 4 "         # Set level of verbosity [INT] (default: 3)
+        "{input.mark_dup} "      # Mark_dup bam input
+        ">> {output.histogram} " # write output to FILE (default: stdout)
+        "2> {log}"               # Log redirection
+
+###############################################################################
+rule samtools_flagstat_ext:
+    # Aim: simple stats
+    # Use: samtools flagstat -@ [THREADS] [INPUT.bam]
+    message:
+        """
+        ~ SamTools ∞ Calcul simple Stats from BAM file ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        """
+    conda:
+        SAMTOOLS
+    resources:
+       cpus = CPUS
+    input:
+        mark_dup = get_bam_input
+    output:
+        flagstat = "results/03_Coverage/{reference}/flagstat/{sample}_{aligner}_flagstat.{ext}"
+    log:
+        "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_flagstat-{ext}.log"
+    shell:
+        "samtools flagstat "           # Samtools flagstat, tools for alignments in the SAM format with command to simple stat
+        "--threads {resources.cpus} "   # -@: Number of additional threads to use (default: 1)
+        "--verbosity 4 "                # Set level of verbosity [INT] (default: 3)
+        "--output-fmt {wildcards.ext} " # -O Specify output format (none, tsv and json)
+        "{input.mark_dup} "             # Mark_dup bam input
+        "1> {output.flagstat} "         # Mark_dup index output
+        "2> {log}"                      # Log redirection
+
+###############################################################################
diff --git a/workflow/rules/indexing_genomes.smk b/workflow/rules/indexing_genomes.smk
old mode 100755
new mode 100644
index b25bf1a..7d238bb
--- a/workflow/rules/indexing_genomes.smk
+++ b/workflow/rules/indexing_genomes.smk
@@ -1,81 +1,36 @@
-###I###R###D######U###2###3###3#######T###R###A###N###S###V###I###H###M###I####
-###                                                                         ###
-###    /\  ______      ___ ____ _  _ __   ____ __   ____     ______  /\     ###
-###    ||  \ \ \ \    / __( ___( \/ )__\ (  _ (  ) (_  _)   / / / /  ||     ###
-###    ||   > > > >  ( (_-.)__) \  /(__)\ )   /)(__ _)(_   < < < <   ||     ###
-###    ||  /_/_/_/    \___(____) \(__)(__(_)\_(____(____)   \_\_\_\  ||     ###
-###    \/                                                            \/     ###
-###                                                                         ###
-###I###R###D######U###2###3###3#######T###R###A###N###S###V###I###H###M###I####
-# Name ___________________ indexing_genomes.smk
-# Version ________________ v.2023.08
-# Author _________________ Nicolas Fernandez
-# Affiliation ____________ IRD_U233_TransVIHMI
-# Aim ____________________ Snakefile with indexing genomes rules
-# Date ___________________ 2022.09.28
-# Latest modifications ___ 2024.08.05 ('Noarch' conda environment yaml files)
-# Use ____________________ snakemake -s indexing_genomes.smk --use-conda 
-
-###############################################################################
-### CONFIGURATION ###
-#####################
-
-configfile: "configuration/config.yaml"
-
 ###############################################################################
-### FUNCTIONS ###
-#################
-
-
-###############################################################################
-### WILDCARDS ###
-#################
-
-REF_SEQ, = glob_wildcards("resources/genomes/{ref_seq}.fasta")
-
-###############################################################################
-### RESOURCES ###
-#################
-
-CPUS = config["resources"]["cpus"] # Threads (maximum)
-
-###############################################################################
-### ENVIRONMENTS ###
-####################
-
-MINIMAP2 = config["conda"]["yaml"]["minimap2"] # MM2 conda env
-BWA = config["conda"]["yaml"]["bwa"]           # BWA conda env
-BOWTIE2 = config["conda"]["yaml"]["bowtie2"]   # BT2 conda env
-
+################################## INDEXING ###################################
 ###############################################################################
-### PARAMETERS ###
-##################
-
-KMER_SIZE = config["minimap2"]["algorithm"]["k-mer_size"]          # MM2 k-mer size
-MINIMIZER_SIZE = config["minimap2"]["algorithm"]["minimizer_size"] # MM2 minimizer window size
-SPLIT_SIZE = config["minimap2"]["algorithm"]["split_size"]         # MM2 split index
-#HOMOPOLYMER = config["minimap2"]["algorithm"]["homopolymer"]       # MM2 if PacBio
-
-BWA_ALGO = config["bwa"]["algorithm"]     # BWA indexing algorithm
-BT2_ALGO = config["bowtie2"]["algorithm"] # BT2 indexing algorithm
 
 ###############################################################################
-### RULES ###
-#############
-
-rule all:
+rule bwa_genome_indexing:
+    # Aim: index sequences in the FASTA format
+    # Use: bwa index -a [ALGO] -p [PREFIX] <genome.fasta>
+    message:
+        """
+        ~ BWA-SW ∞ Index Genome ~
+        Reference: _______ {wildcards.reference}
+        """
+    conda:
+        BWA
+    params:
+        algorithm = BWA_ALGO
     input:
-        mm2_indexes = expand("resources/indexes/minimap2/{ref_seq}.{ext}",
-                            ref_seq = REF_SEQ,
-                            ext = ["mmi"]),
-        bwa_indexes = expand("resources/indexes/bwa/{ref_seq}.{ext}",
-                            ref_seq = REF_SEQ,
-                            ext = ["amb", "ann", "bwt", "pac", "sa"]),
-        bt2_indexes = expand("resources/indexes/bowtie2/{ref_seq}.{ext}",
-                            ref_seq = REF_SEQ,
-                            ext = ["1.bt2", "2.bt2", "3.bt2", "4.bt2",
-                                   "rev.1.bt2", "rev.2.bt2"])
-        
+        fasta = "resources/genomes/{reference}.fasta"
+    output:
+        prefix = "resources/indexes/bwa/{reference}",
+        bwa_indexes = multiext("resources/indexes/bwa/{reference}",
+                               ".amb", ".ann", ".bwt", ".pac", ".sa")
+    log:
+        "results/10_Reports/tools-log/bwa-indexes/{reference}.log"
+    shell:
+        "bwa index "              # BWA-SW algorithm, index sequences
+        "{params.algorithm} "      # -a: Algorithm for constructing BWT index (default: auto)
+        "-p {output.prefix} "      # -p: Prefix of the output database
+        "{input.fasta} "           # Reference sequences in the FASTA format
+        "&> {log} "                # Log redirection
+        "&& touch {output.prefix}" # Touch done
+
 ###############################################################################
 rule minimap2_genome_indexing:
     # Aim: index sequences in the FASTA format
@@ -83,61 +38,32 @@ rule minimap2_genome_indexing:
     message:
         """
         ~ Minimap2 ∞ Index Genome ~
-        Reference: _______ {wildcards.ref_seq}
+        Reference: _______ {wildcards.reference}
         """
     conda:
         MINIMAP2
     params:
         kmer_size = KMER_SIZE,
         minimizer_size = MINIMIZER_SIZE,
-        split_size = SPLIT_SIZE,
+        split_size = SPLIT_SIZE
         #homopolymer = HOMOPOLYMER
     input:
-        fasta = "resources/genomes/{ref_seq}.fasta"
+        fasta = "resources/genomes/{reference}.fasta"
     output:
-        indexes = multiext("resources/indexes/minimap2/{ref_seq}",
-                           ".mmi")
+        mm2_indexes = multiext("resources/indexes/minimap2/{reference}",
+                               ".mmi")
     log:
-        "results/10_Reports/tools-log/minimap2-indexes/{ref_seq}.log"
+        "results/10_Reports/tools-log/minimap2-indexes/{reference}.log"
     shell:
         "minimap2 "                  # Minimap2, index sequences
         "-k {params.kmer_size} "      # -k: k-mer size (default: "21", no larger than "28") [INT]
         "-w {params.minimizer_size} " # -w: minimizer window size (default: "11") [INT]
         "-I {params.split_size} "     # -I: split index for every {NUM} input bases (default: "8G") [INT]
         #"{params.homopolymer} "       # use homopolymer-compressed k-mer (preferrable for PacBio)
-        "-d {output.indexes} "        # -d: dump index to FILE []
+        "-d {output.mm2_indexes} "    # -d: dump index to FILE []
         "{input.fasta} "              # Reference sequences in the FASTA format
         "&> {log}"                    # Log redirection
 
-###############################################################################
-rule bwa_genome_indexing:
-    # Aim: index sequences in the FASTA format
-    # Use: bwa index -a [ALGO] -p [PREFIX] <genome.fasta>
-    message:
-        """
-        ~ BWA-SW ∞ Index Genome ~
-        Reference: _______ {wildcards.ref_seq}
-        """
-    conda:
-        BWA
-    params:
-        algorithm = BWA_ALGO
-    input:
-        fasta = "resources/genomes/{ref_seq}.fasta"
-    output:
-        prefix = temp("resources/indexes/bwa/{ref_seq}"),
-        indexes = multiext("resources/indexes/bwa/{ref_seq}",
-                           ".amb", ".ann", ".bwt", ".pac", ".sa")
-    log:
-        "results/10_Reports/tools-log/bwa-indexes/{ref_seq}.log"
-    shell:
-        "bwa index "              # BWA-SW algorithm, index sequences
-        "{params.algorithm} "      # -a: Algorithm for constructing BWT index (default: auto)                                  
-        "-p {output.prefix} "      # -p: Prefix of the output database
-        "{input.fasta} "           # Reference sequences in the FASTA format
-        "&> {log} "                # Log redirection
-        "&& touch {output.prefix}" # Touch done
-
 ###############################################################################
 rule bowtie2_genome_indexing:
     # Aim: index sequences in the FASTA format
@@ -145,7 +71,7 @@ rule bowtie2_genome_indexing:
     message:
         """
         ~ Bowtie2-build ∞ Index Genome ~
-        Reference: _______ {wildcards.ref_seq}
+        Reference: _______ {wildcards.reference}
         """
     conda:
         BOWTIE2
@@ -154,19 +80,20 @@ rule bowtie2_genome_indexing:
     params:
         algorithm = BT2_ALGO
     input:
-        fasta = "resources/genomes/{ref_seq}.fasta"
+        fasta = "resources/genomes/{reference}.fasta"
     output:
-        prefix = temp("resources/indexes/bowtie2/{ref_seq}"),
-        indexes = multiext("resources/indexes/bowtie2/{ref_seq}",
-                           ".1.bt2", ".2.bt2", ".3.bt2", ".4.bt2",
-                           ".rev.1.bt2", ".rev.2.bt2")
+        prefix = "resources/indexes/bowtie2/{reference}",
+        bt2_indexes = multiext("resources/indexes/bowtie2/{reference}",
+                               ".1.bt2", ".2.bt2", ".3.bt2", ".4.bt2",
+                               ".rev.1.bt2", ".rev.2.bt2")
     log:
-        "results/10_Reports/tools-log/bowtie2-indexes/{ref_seq}.log"
+        "results/10_Reports/tools-log/bowtie2-indexes/{reference}.log"
     shell:
         "bowtie2-build "             # Bowtie2-build, index sequences
         "--quiet "                    # -q: quiet
         "--threads {resources.cpus} " # Number of threads
-        "{params.algorithm} "         # Force (or no by default) generated index to be 'large', even if ref has fewer than 4 billion nucleotides
+        "{params.algorithm} "         # Force (or no by default) generated index to be 'large',
+                                       # even if ref has fewer than 4 billion nucleotides
         "-f "                         # Reference files are FASTA (default)
         "{input.fasta} "              # Reference sequences files (comma-separated list) in the FASTA format
         "{output.prefix} "            # Write bt2 data to files with this dir/basename
diff --git a/workflow/rules/lineages_calling.smk b/workflow/rules/lineages_calling.smk
new file mode 100644
index 0000000..86b0be2
--- /dev/null
+++ b/workflow/rules/lineages_calling.smk
@@ -0,0 +1,74 @@
+###############################################################################
+################################## LINEAGES ###################################
+###############################################################################
+
+###############################################################################
+rule nextclade_lineage:
+    # Aim: nextclade lineage assignation
+    # Use: nextclade [QUERY.fasta] -t [THREADS] --outfile [NAME.csv]
+    message:
+        """
+        ~ Nextclade ∞ Assign Lineage ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        Min. cov.: _______ {wildcards.min_cov}X
+        Variant caller: __ {wildcards.caller}
+        """
+    conda:
+        NEXTCLADE
+    resources:
+        cpus = CPUS
+    params:
+        path = NEXT_PATH,
+        dataset = NEXT_DATASET
+    input:
+        consensus = "results/05_Consensus/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_consensus.fasta"
+    output:
+        lineage = "results/06_Lineages/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_nextclade-report.tsv",
+        alignment = directory("results/06_Lineages/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_nextclade-all/")
+    log:
+        "results/10_Reports/tools-log/nextclade/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_lineage.log"
+    shell:
+        "nextclade "                                    # Nextclade, assign queries sequences to clades and reports potential quality issues
+        "run "                                           # Run analyzis
+        "--jobs {resources.cpus} "                       # -j: Number of CPU threads used by the algorithm (default: all available threads)
+        "--input-dataset {params.path}{params.dataset} " # -raq: Path to a directory containing a dataset (root-seq, tree and qc-config required)
+        "--output-tsv {output.lineage} "                 # -t: Path to output TSV results file
+        "--output-all {output.alignment} "               # -O: Produce all of the output files into this directory, using default basename
+        "{input.consensus} "                             # Path to a .fasta file with input sequences
+        "&> {log}"                                       # Log redirection
+
+###############################################################################
+rule pangolin_lineage:
+    # Aim: lineage mapping
+    # Use: pangolin [QUERY.fasta] -t [THREADS] --outfile [NAME.csv]
+    message:
+        """
+        ~ Pangolin ∞ Assign Lineage ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        Min. cov.: _______ {wildcards.min_cov}X
+        Variant caller: __ {wildcards.caller}
+        """
+    conda:
+        PANGOLIN
+    resources:
+        cpus = CPUS,
+        tmp_dir = TMP_DIR
+    input:
+        consensus = "results/05_Consensus/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_consensus.fasta"
+    output:
+        lineage = "results/06_Lineages/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_pangolin-report.csv"
+    log:
+        "results/10_Reports/tools-log/pangolin/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_lineage.log"
+    shell:
+        "pangolin "                     # Pangolinn, Phylogenetic Assignment of Named Global Outbreak LINeages
+        "{input.consensus} "             # Query fasta file of sequences to analyse
+        "--threads {resources.cpus} "    # -t: Number of threads
+        "--tempdir {resources.tmp_dir} " # Specify where you want the temp stuff to go (default: $TMPDIR)
+        "--outfile {output.lineage} "    # Optional output file name (default: lineage_report.csv)
+        "&> {log}"                       # Log redirection
+
+###############################################################################
diff --git a/workflow/rules/mask_lowcov.smk b/workflow/rules/mask_lowcov.smk
new file mode 100644
index 0000000..90be218
--- /dev/null
+++ b/workflow/rules/mask_lowcov.smk
@@ -0,0 +1,86 @@
+###############################################################################
+############################ MASKING LOW COVERAGE #############################
+###############################################################################
+
+###############################################################################
+rule bedtools_masking:
+    # Aim: masking low coverage regions
+    # Use: bedtools maskfasta [OPTIONS] -fi [REFERENCE.fasta] -bed [RANGE.bed] -fo [MASKEDREF.fasta]
+    message:
+        """
+        ~ BedTools ∞ Mask Low Coverage Regions ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        Min. cov.: _______ {wildcards.min_cov}X
+        """
+    conda:
+        BEDTOOLS
+    input:
+        reference = "resources/genomes/{reference}.fasta",
+        low_cov_mask = "results/03_Coverage/{reference}/bed/{sample}_{aligner}_{min_cov}X_low-cov-mask.bed"
+    output:
+        masked_ref = "results/04_Variants/{reference}/{sample}_{aligner}_{min_cov}X_masked-ref.fasta"
+    log:
+        "results/10_Reports/tools-log/bedtools/{reference}/{sample}_{aligner}_{min_cov}X_masking.log"
+    shell:
+        "bedtools maskfasta "       # Bedtools maskfasta, mask a fasta file based on feature coordinates
+        "-fi {input.reference} "     # Input FASTA file 
+        "-bed {input.low_cov_mask} " # BED/GFF/VCF file of ranges to mask in -fi
+        "-fo {output.masked_ref} "   # Output masked FASTA file
+        "&> {log}"                   # Log redirection 
+
+###############################################################################
+rule bedtools_merged_mask:
+    # Aim: merging overlaps
+    # Use: bedtools merge [OPTIONS] -i [FILTERED.bed] -g [GENOME.fasta] 
+    message:
+        """
+        ~ BedTools ∞ Merge Overlaps ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        Min. cov.: _______ {wildcards.min_cov}X
+        """
+    conda:
+        BEDTOOLS
+    input:
+        min_cov_filt = "results/03_Coverage/{reference}/bed/{sample}_{aligner}_{min_cov}X_min-cov-filt.bed"
+    output:
+        low_cov_mask = temp("results/03_Coverage/{reference}/bed/{sample}_{aligner}_{min_cov}X_low-cov-mask.bed")
+    log:
+        "results/10_Reports/tools-log/bedtools/{reference}/{sample}_{aligner}_{min_cov}X_merging.log"
+    shell:
+        "bedtools merge "          # Bedtools merge, merges overlapping BED/GFF/VCF entries into a single interval
+        "-i {input.min_cov_filt} "  # -i: BED/GFF/VCF input to merge 
+        "1> {output.low_cov_mask} " # merged output
+        "2> {log}"                  # Log redirection
+
+###############################################################################
+rule awk_min_covfilt:
+    # Aim: minimum coverage filtration
+    # Use: awk '$4 < [MIN_COV]' [BEDGRAPH.bed] 1> [FILTERED.bed]
+    message:
+        """
+        ~ Awk ∞ Minimum Coverage Filtration ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        Min. cov.: _______ {wildcards.min_cov}X
+        """
+    conda:
+        GAWK
+    input:
+        genome_cov = "results/02_Mapping/{reference}/{sample}_{aligner}_genome-cov.bed"
+    output:
+        min_cov_filt = temp("results/03_Coverage/{reference}/bed/{sample}_{aligner}_{min_cov}X_min-cov-filt.bed")
+    log:
+        "results/10_Reports/tools-log/awk/{reference}/{sample}_{aligner}_{min_cov}X_min-cov-filt.log"
+    shell:
+        "awk "                      # Awk, a program that you can use to select particular records in a file and perform operations upon them
+        "'$4 < {wildcards.min_cov}' " # Minimum coverage for masking regions in consensus sequence
+        "{input.genome_cov} "         # BedGraph coverage input
+        "1> {output.min_cov_filt} "   # Minimum coverage filtered bed output
+        "2> {log} "                   # Log redirection
+
+###############################################################################
diff --git a/workflow/rules/primer_cleaping.smk b/workflow/rules/primer_cleaping.smk
new file mode 100644
index 0000000..676cacd
--- /dev/null
+++ b/workflow/rules/primer_cleaping.smk
@@ -0,0 +1,45 @@
+###############################################################################
+############################## PRIMER CLEAPING  ###############################
+###############################################################################
+
+###############################################################################
+rule bamclipper_amplicon_primers:
+    # Aim: soft-clip amplicon PCR primers from BAM alignments
+    # Use: bamclipper.sh -n [THREADS] -b [MARKDUP.bam] -p [PRIMER.bed] -u [UPSTREAM] -d [DOWNSTREAM]
+    message:
+        """
+        ~ BAMClipper ∞ soft-clipping amplicon PCR primers from BAM alignments ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        """
+    conda:
+        BAMCLIPPER
+    resources:
+       cpus = CPUS
+    params:
+        path = CLIPPATH,
+        primers = PRIMERS,
+        upstream = UPSTREAM,
+        downstream = DOWNSTREAM
+    input:
+        markdup = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.bam",
+        index = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.bam.bai"
+    output:
+        bamclip = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.primerclipped.bam",
+        baiclip = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.primerclipped.bam.bai"
+    log:
+        "results/10_Reports/tools-log/bamclipper/{reference}/{sample}_{aligner}_primers-clip.log"
+    shell:
+        "bamclipper.sh "                         # BAMClipper, remove primer sequences from BAM alignments of PCR amplicons by soft-clipping
+        "-b {input.markdup} "                     # Indexed BAM alignment file
+        "-p {params.path}{params.primers}.bedpe " # BEDPE of primer pair locations
+        "-n {resources.cpus} "                    # Number of threads (default: 1)
+        "-u {params.upstream} "                   # Number of nuc. upstream for assigning alignments to primers (default: 5)
+        "-d {params.downstream} "                 # Number of nuc. downstream for assigning alignments to primers (default: 5)
+        #"-o results/02_Mapping/ "                 # Path to write output (if BamClipper v.1.1.2) (todo)
+        "&> {log} "                               # Log redirection
+        "&& mv {wildcards.sample}_{wildcards.aligner}_mark-dup.primerclipped.bam {output.bamclip} "    # because BamClipper v.1 default output system...
+        "&& mv {wildcards.sample}_{wildcards.aligner}_mark-dup.primerclipped.bam.bai {output.baiclip}" # because BamClipper v.1 default output system...
+
+###############################################################################
diff --git a/workflow/rules/quality_control.smk b/workflow/rules/quality_controls.smk
old mode 100755
new mode 100644
similarity index 63%
rename from workflow/rules/quality_control.smk
rename to workflow/rules/quality_controls.smk
index fea98c1..1abfda8
--- a/workflow/rules/quality_control.smk
+++ b/workflow/rules/quality_controls.smk
@@ -1,73 +1,12 @@
-###I###RA###D######U###2###3###3#######T###R###A###N###S###V###I###H###M###I####
-###                                                                         ###
-###    /\  ______      ___ ____ _  _ __   ____ __   ____     ______  /\     ###
-###    ||  \ \ \ \    / __( ___( \/ )__\ (  _ (  ) (_  _)   / / / /  ||     ###
-###    ||   > > > >  ( (_-.)__) \  /(__)\ )   /)(__ _)(_   < < < <   ||     ###
-###    ||  /_/_/_/    \___(____) \(__)(__(_)\_(____(____)   \_\_\_\  ||     ###
-###    \/                                                            \/     ###
-###                                                                         ###
-###I###R###D######U###2###3###3#######T###R###A###N###S###V###I###H###M###I####
-# Name ___________________ quality_control.smk
-# Version ________________ v.2023.06
-# Author _________________ Nicolas Fernandez
-# Affiliation ____________ IRD_U233_TransVIHMI
-# Aim ____________________ Snakefile with quality control rules
-# Date ___________________ 2021.09.28
-# Latest modifications ___ 2024.02.08 (add multiqc plot graphs export)
-# Run ____________________ snakemake -s quality_control.smk --use-conda 
-
-###############################################################################
-### CONFIGURATION ###
-#####################
-
-configfile: "configuration/config.yaml"
-
-###############################################################################
-### FUNCTIONS ###
-#################
-
 ###############################################################################
-### WILDCARDS ###
-#################
-
-FASTQ, = glob_wildcards("resources/reads/{fastq}.fastq.gz")
-
+############################### QUALITY CONTROL ###############################
 ###############################################################################
-### RESOURCES ###
-#################
-
-OS = config["os"]                  # Operating system
-CPUS = config["resources"]["cpus"] # Threads (maximum)
-
-###############################################################################
-### ENVIRONMENTS ###
-####################
-
-MULTIQC = config["conda"][OS]["multiqc"]           # Multi-QC conda env
-FASTQ_SCREEN = config["conda"][OS]["fastq_screen"] # Fastq-Screen conda env
-FASTQC= config["conda"][OS]["fastqc"]              # FastQC conda env
-
-###############################################################################
-### PARAMETERS ###
-##################
-
-SUBSET = config["fastq_screen"]["subset"]     # Fastq-Screen --subset
-FQC_CONFIG = config["fastq_screen"]["config"] # Fastq-Screen --conf
-#MQC_CONFIG = config["multiqc"]["config"]      # MultiQC --conf
-#TAG = config["multiqc"]["tag"]                # MultiQC --tag
-
-###############################################################################
-### RULES ###
-#############
-
-rule all:
-    input:
-        multiqc = "results/00_Quality_Control/multiqc/"
 
 ###############################################################################
 rule multiqc_reports_aggregation:
     # Aim: aggregates bioinformatics analyses results into a single report
     # Use: multiqc [OPTIONS] --output [MULTIQC/] [FASTQC/] [MULTIQC/]
+    priority: 999 # Explicit high priority
     message:
         """
         ~ MultiQC ∞ Aggregat HTML Qualities Reports ~
@@ -116,7 +55,7 @@ rule fastqscreen_contamination_checking:
         config = FQC_CONFIG,
         subset = SUBSET
     input:
-        fastq = "resources/reads/{fastq}.fastq.gz"
+        fastq = "resources/symlinks/{fastq}.fastq.gz"
     output:
         fastq_screen = directory("results/00_Quality_Control/fastq-screen/{fastq}/")
     log:
@@ -146,7 +85,7 @@ rule fastqc_quality_control:
     resources:
         cpus = CPUS
     input:
-        fastq = "resources/reads/{fastq}.fastq.gz"
+        fastq = "resources/symlinks/{fastq}.fastq.gz"
     output:
         fastqc = directory("results/00_Quality_Control/fastqc/{fastq}/")
     log:
diff --git a/workflow/rules/reads_cleaning.smk b/workflow/rules/reads_cleaning.smk
new file mode 100644
index 0000000..40cf7ac
--- /dev/null
+++ b/workflow/rules/reads_cleaning.smk
@@ -0,0 +1,91 @@
+###############################################################################
+################################## TRIMMING ###################################
+###############################################################################
+
+###############################################################################
+rule sickle_trim_quality:
+    # Aim: windowed adaptive trimming tool for FASTQ files using quality
+    # Use: sickle [COMMAND] [OPTIONS]
+    message:
+        """
+        ~ Sickle-trim ∞ Trim Low Quality Sequences ~
+        Sample: __________ {wildcards.sample}
+        """
+    conda:
+        SICKLE_TRIM
+    params:
+        command = SIC_COMMAND,
+        encoding = SIC_ENCODING,
+        quality = SIC_QUALITY,
+        length = SIC_LENGTH
+    input:
+        fwd_reads = "results/01_Trimming/cutadapt/{sample}_cutadapt-removed_R1.fastq.gz",
+        rev_reads = "results/01_Trimming/cutadapt/{sample}_cutadapt-removed_R2.fastq.gz"
+    output:
+        fwd_reads = stash_or_trash("results/01_Trimming/sickle/{sample}_sickle-trimmed_R1.fastq.gz"),
+        rev_reads = stash_or_trash("results/01_Trimming/sickle/{sample}_sickle-trimmed_R2.fastq.gz"),
+        single = stash_or_trash("results/01_Trimming/sickle/{sample}_sickle-trimmed_SE.fastq.gz")
+    log:
+        "results/10_Reports/tools-log/sickle-trim/{sample}.log"
+    shell:
+        "sickle "               # Sickle, a windowed adaptive trimming tool for FASTQ files using quality
+        "{params.command} "      # Paired-end or single-end sequence trimming
+        "-t {params.encoding} "  # --qual-type: Type of quality values, solexa ; illumina ; sanger ; CASAVA, < 1.3 ; 1.3 to 1.7 ; >= 1.8
+        "-q {params.quality} "   # --qual-threshold: Threshold for trimming based on average quality in a window (default: 20)
+        "-l {params.length} "    # --length-threshold: Threshold to keep a read based on length after trimming (default: 20)
+        "-f {input.fwd_reads} "  # --pe-file1: Input paired-end forward fastq file
+        "-r {input.rev_reads} "  # --pe-file2: Input paired-end reverse fastq file
+        "-g "                    # --gzip-output: Output gzipped files
+        "-o {output.fwd_reads} " # --output-pe1: Output trimmed forward fastq file
+        "-p {output.rev_reads} " # --output-pe2: Output trimmed reverse fastq file (must use -s option)
+        "-s {output.single} "    # --output-single: Output trimmed singles fastq file
+        "&> {log} "              # Log redirection
+
+###############################################################################
+rule cutadapt_adapters_removing:
+    # Aim: finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence from your high-throughput sequencing reads
+    # Use: cutadapt [OPTIONS] -a/-A [ADAPTER] -o [OUT_FWD.fastq.gz] -p [OUT_REV.fastq.gz] [IN_FWD.fastq.gz] [IN_REV.fastq.gz]
+    # Rmq: multiple adapter sequences can be given using further -a options, but only the best-matching adapter will be removed
+    message:
+        """
+        ~ Cutadapt ∞ Remove Adapters ~
+        Sample: __________ {wildcards.sample}
+        """
+    conda:
+        CUTADAPT
+    resources:
+        cpus = CPUS
+    params:
+        length = CUT_LENGTH,
+        truseq = CUT_TRUSEQ,
+        nextera = CUT_NEXTERA,
+        small = CUT_SMALL,
+        cut = CUT_CLIPPING
+    input:
+        fwd_reads = "resources/symlinks/{sample}_R1.fastq.gz",
+        rev_reads = "resources/symlinks/{sample}_R2.fastq.gz"
+    output:
+        fwd_reads = temp("results/01_Trimming/cutadapt/{sample}_cutadapt-removed_R1.fastq.gz"),
+        rev_reads = temp("results/01_Trimming/cutadapt/{sample}_cutadapt-removed_R2.fastq.gz")
+    log:
+        "results/10_Reports/tools-log/cutadapt/{sample}.log"
+    shell:
+        "cutadapt "                          # Cutadapt, finds and removes unwanted sequence from your HT-seq reads
+        "--cores {resources.cpus} "           # -j: Number of CPU cores to use. Use 0 to auto-detect (default: 1)
+        "--cut {params.cut} "                 # -u: Remove 'n' first bases (5') from forward R1 (hard-clipping, default: 0)
+        "-U {params.cut} "                    # -U: Remove 'n' first bases (5') from reverse R2 (hard-clipping, default: 0)
+        "--trim-n "                           # --trim-n: Trim N's on ends (3') of reads
+        "--minimum-length {params.length} "   # -m: Discard reads shorter than length
+        "--adapter {params.truseq} "          # -a: Sequence of an adapter ligated to the 3' end of the first read
+        "-A {params.truseq} "                 # -A: 3' adapter to be removed from second read in a pair
+        "--adapter {params.nextera} "         # -a: Sequence of an adapter ligated to the 3' end of the first read
+        "-A {params.nextera} "                # -A: 3' adapter to be removed from second read in a pair
+        "--adapter {params.small} "           # -a: Sequence of an adapter ligated to the 3' end of the first read
+        "-A {params.small} "                  # -A: 3' adapter to be removed from second read in a pair
+        "--output {output.fwd_reads} "        # -o: Write trimmed reads to FILE
+        "--paired-output {output.rev_reads} " # -p: Write second read in a pair to FILE
+        "{input.fwd_reads} "                  # Input forward reads R1.fastq
+        "{input.rev_reads} "                  # Input reverse reads R2.fastq
+        "&> {log} "                           # Log redirection
+
+###############################################################################
diff --git a/workflow/rules/reads_mapping.smk b/workflow/rules/reads_mapping.smk
new file mode 100644
index 0000000..85f1b71
--- /dev/null
+++ b/workflow/rules/reads_mapping.smk
@@ -0,0 +1,120 @@
+###############################################################################
+################################### MAPPING ###################################
+###############################################################################
+
+###############################################################################
+rule minimap2_mapping:
+    # Aim: reads mapping against reference sequence
+    # Use: minimap2
+    message:
+        """
+        ~ Minimap2 ∞ Map Reads ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ MiniMap2
+        """
+    conda:
+        MINIMAP2
+    resources:
+        cpus = CPUS
+    params:
+        preset = MM2_PRESET
+        #length = LENGTH
+    input:
+        mm2_indexes = "resources/indexes/minimap2/{reference}.mmi",
+        fwd_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R1.fastq.gz",
+        rev_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R2.fastq.gz"
+    output:
+        mapped = temp("results/02_Mapping/{reference}/{sample}_minimap2-mapped.sam")
+    log:
+        "results/10_Reports/tools-log/minimap2/{sample}_{reference}.log"
+    shell:
+        "minimap2 "           # Minimap2, a versatile sequence alignment program
+        "-x {params.preset} "  # -x: presets (always applied before other options)
+        "-t {resources.cpus} " # -t: Number of threads (default: 3)
+        "-a "                  # -a: output in the SAM format (PAF by default)
+        #"-F {params.length} "  # -F: max fragment length, effective with -x sr mode (default: 800)
+        "{input.mm2_indexes} " # Reference index filename prefix.mmi
+                               # (-k, -w, -I and -H can't be changed during mapping)
+        #"resources/genomes/{wildcards.reference}.fasta " # Reference genome fasta format (for custom -kwIH)
+        #"-k {params.kmer_size} "      # -k: k-mer size (default: "21", no larger than "28") [INT]
+        #"-w {params.minimizer_size} " # -w: minimizer window size (default: "11") [INT]
+        #"-I {params.split_size} "     # -I: split index for every {NUM} input bases (default: "8G") [INT]
+        #"{params.homopolymer} "       # -H: use homopolymer-compressed k-mer (preferrable for PacBio)
+        "{input.fwd_reads} "   # Forward input reads
+        "{input.rev_reads} "   # Reverse input reads
+        "1> {output.mapped} "  # SAM output
+        "2> {log}"             # Log redirection 
+
+###############################################################################
+rule bwa_mapping:
+    # Aim: reads mapping against reference sequence
+    # Use: bwa mem -t [THREADS] -x [REFERENCE] [FWD_R1.fq] [REV_R2.fq] 1> [MAPPED.sam]
+    message:
+        """
+        ~ BWA-MEM ∞ Map Reads ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ BWA
+        """
+    conda:
+        BWA
+    resources:
+        cpus = CPUS
+    input:
+        bwa_indexes = "resources/indexes/bwa/{reference}",
+        fwd_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R1.fastq.gz",
+        rev_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R2.fastq.gz"
+    output:
+        mapped = temp("results/02_Mapping/{reference}/{sample}_bwa-mapped.sam")
+    log:
+        "results/10_Reports/tools-log/bwa/{sample}_{reference}.log"
+    shell:
+        "bwa mem "            # BWA-MEM algorithm, performs local alignment
+        "-t {resources.cpus} " # -t: Number of threads (default: 12)
+        "-v 1 "                # -v: Verbosity level: 1=error, 2=warning, 3=message, 4+=debugging
+        "{input.bwa_indexes} " # Reference index filename prefix
+        "{input.fwd_reads} "   # Forward input reads
+        "{input.rev_reads} "   # Reverse input reads
+        "1> {output.mapped} "  # SAM output
+        "2> {log}"             # Log redirection 
+
+###############################################################################
+rule bowtie2_mapping:
+    # Aim: reads mapping against reference sequence
+    # Use: bowtie2 -p [THREADS] -x [REFERENCE] -1 [FWD_R1.fq] -2 [REV_R2.fq] -S [MAPPED.sam]
+    message:
+        """
+        ~ Bowtie2 ∞ Map Reads ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ Bowtie2
+        """
+    conda:
+        BOWTIE2
+    resources:
+        cpus = CPUS
+    params:
+        sensitivity = BT2_SENSITIVITY
+    input:
+        bt2_indexes = "resources/indexes/bowtie2/{reference}",
+        fwd_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R1.fastq.gz",
+        rev_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R2.fastq.gz"
+    output:
+        mapped = temp("results/02_Mapping/{reference}/{sample}_bowtie2-mapped.sam")
+    log:
+        "results/10_Reports/tools-log/bowtie2/{sample}_{reference}.log"
+    shell:
+        "bowtie2 "                   # Bowtie2, an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences
+        "--threads {resources.cpus} " # -p: Number of alignment threads to launch (default: 1)
+        "--reorder "                  # Keep the original read order (if multi-processor option -p is used)
+        "-x {input.bt2_indexes} "     # -x: Reference index filename prefix (minus trailing .X.bt2)
+        "{params.sensitivity} "       # Preset (default: "--sensitive")
+                                       # sensitive: same as [-D 15 -R 2 -N 0 -L 22 -i S,1,1.15]
+        "-q "                         # -q: Query input files are FASTQ .fq/.fastq (default)
+        "-1 {input.fwd_reads} "       # Forward input reads
+        "-2 {input.rev_reads} "       # Reverse input reads
+        "1> {output.mapped} "         # -S: File for SAM output (default: stdout) 
+        "2> {log}"                    # Log redirection 
+
+###############################################################################
diff --git a/workflow/rules/remove_duplicates.smk b/workflow/rules/remove_duplicates.smk
new file mode 100644
index 0000000..6f55da9
--- /dev/null
+++ b/workflow/rules/remove_duplicates.smk
@@ -0,0 +1,160 @@
+###############################################################################
+############################# REMOVE DUPLICATES ###############################
+###############################################################################
+
+###############################################################################
+rule samtools_index_markdup:
+    # Aim: indexing marked as duplicate BAM file
+    # Use: samtools index -@ [THREADS] -b [MARK-DUP.bam] [INDEX.bai]
+    message:
+        """
+        ~ SamTools ∞ Index 'Marked as Duplicate' BAM file ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        """
+    conda:
+        SAMTOOLS
+    resources:
+       cpus = CPUS
+    input:
+        mark_dup = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.bam"
+    output:
+        index = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.bam.bai"
+    log:
+        "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_mark-dup-index.log"
+    shell:
+        "samtools index "     # Samtools index, tools for alignments in the SAM format with command to index alignment
+        "-@ {resources.cpus} " # --threads: Number of additional threads to use (default: 1)(NB, --threads form dose'nt work)
+        "-b "                  # -b: Generate BAI-format index for BAM files (default)
+        "{input.mark_dup} "    # Mark_dup bam input
+        "{output.index} "      # Mark_dup index output
+        "&> {log}"             # Log redirection
+
+###############################################################################
+rule samtools_markdup:
+    # Aim: marking duplicate alignments
+    # Use: samtools markdup -@ [THREADS] -r -s -O BAM [SORTED.bam] [MARK-DUP.bam] 
+    message:
+        """
+        ~ SamTools ∞ Mark Duplicate Alignments ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        """
+    conda:
+        SAMTOOLS
+    resources:
+       cpus = CPUS
+    input:
+        sorted = "results/02_Mapping/{reference}/{sample}_{aligner}_sorted.bam"
+    output:
+        mark_dup = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.bam"
+    log:
+        "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_mark-dup.log"
+    shell:
+        "samtools markdup "          # Samtools markdup, tools for alignments in the SAM format with command mark duplicates
+        "--threads {resources.cpus} " # -@: Number of additional threads to use (default: 1)
+        "-r "                         # -r: Remove duplicate reads
+        "-s "                         # -s: Report stats
+        "--output-fmt BAM "           # -O: Specify output format: SAM, BAM, CRAM (here, BAM format)
+        "{input.sorted} "             # Sorted bam input
+        "{output.mark_dup} "          # Mark_dup bam output
+        "&> {log}"                    # Log redirection 
+
+###############################################################################
+rule samtools_sorting:
+    # Aim: sorting
+    # Use: samtools sort -@ [THREADS] -m [MEM_GB] -T [TMP_DIR] -O BAM -o [SORTED.bam] [FIX-MATE.bam] 
+    message:
+        """
+        ~ SamTools ∞ Sort BAM file ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        """
+    conda:
+        SAMTOOLS
+    resources:
+       cpus = CPUS,
+       mem_gb = MEM_GB,
+       tmp_dir = TMP_DIR
+    input:
+        fix_mate = "results/02_Mapping/{reference}/{sample}_{aligner}_fix-mate.bam"
+    output:
+        sorted = temp("results/02_Mapping/{reference}/{sample}_{aligner}_sorted.bam")
+    log:
+        "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_sorted.log"
+    shell:
+        "samtools sort "             # Samtools sort, tools for alignments in the SAM format with command to sort alignment file
+        "--threads {resources.cpus} " # -@: Number of additional threads to use (default: 1)
+        "-m {resources.mem_gb}G "     # -m: Set maximum memory per thread, suffix K/M/G recognized (default: 768M)
+        "-T {resources.tmp_dir} "     # -T: Write temporary files to PREFIX.nnnn.bam
+        "--output-fmt BAM "           # -O: Specify output format: SAM, BAM, CRAM (here, BAM format)
+        "-o {output.sorted} "         # Sorted bam output
+        "{input.fix_mate} "           # Fixmate bam input
+        "&> {log}"                    # Log redirection 
+
+###############################################################################
+rule samtools_fixmate:
+    # Aim: filling in mate coordinates
+    # Use: samtools fixmate -@ [THREADS] -m -O BAM [SORT-BY-NAMES.bam] [FIX-MATE.bam] 
+    message:
+        """
+        ~ SamTools ∞ Fil Mate Coordinates ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        """
+    conda:
+        SAMTOOLS
+    resources:
+       cpus = CPUS
+    input:
+        sort_by_names = "results/02_Mapping/{reference}/{sample}_{aligner}_sort-by-names.bam"
+    output:
+        fix_mate = temp("results/02_Mapping/{reference}/{sample}_{aligner}_fix-mate.bam")
+    log:
+        "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_fix-mate.log"
+    shell:
+        "samtools fixmate "          # Samtools fixmate, tools for alignments in the SAM format with command to fix mate information
+        "--threads {resources.cpus} " # -@: Number of additional threads to use (default: 1)
+        "-m "                         # -m: Add mate score tag 
+        "--output-fmt BAM "           # -O: Specify output format: SAM, BAM, CRAM (here, BAM format)
+        "{input.sort_by_names} "      # Sort_by_names bam input
+        "{output.fix_mate} "          # Fix_mate bam output 
+        "&> {log}"                    # Log redirection 
+
+###############################################################################
+rule samtools_sortbynames:
+    # Aim: sorting by names
+    # Use: samtools sort -t [THREADS] -m [MEM_GB] -n -O BAM -o [SORT-BY-NAMES.bam] [MAPPED.sam]
+    message:
+        """
+        ~ SamTools ∞ Sort by Names BAM file ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        """
+    conda:
+        SAMTOOLS
+    resources:
+       cpus = CPUS,
+       mem_gb = MEM_GB
+    input:
+        mapped = "results/02_Mapping/{reference}/{sample}_{aligner}-mapped.sam"
+    output:
+        sort_by_names = temp("results/02_Mapping/{reference}/{sample}_{aligner}_sort-by-names.bam")
+    log:
+        "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_sort-by-names.log"
+    shell:
+        "samtools sort "             # Samtools sort, tools for alignments in the SAM format with command to sort alignment file
+        "--threads {resources.cpus} " # -@: Number of additional threads to use (default: 1)
+        "-m {resources.mem_gb}G "     # -m: Set maximum memory per thread, suffix K/M/G recognized (default: 768M)
+        "-n "                         # -n: Sort by read name (not compatible with samtools index command) 
+        "--output-fmt BAM "           # -O: Specify output format: SAM, BAM, CRAM (here, BAM format)
+        "-o {output.sort_by_names} "  # -o: Write final output to FILE rather than standard output
+        "{input.mapped} "             # Mapped reads input
+        "&> {log}"                    # Log redirection 
+
+###############################################################################
diff --git a/workflow/rules/variant_calling.smk b/workflow/rules/variant_calling.smk
new file mode 100644
index 0000000..54f2766
--- /dev/null
+++ b/workflow/rules/variant_calling.smk
@@ -0,0 +1,142 @@
+###############################################################################
+########################### VARIANTS CALLING - IVAR ###########################
+###############################################################################
+
+###############################################################################
+rule convert_tsv2vcf:
+    message:
+        """
+        ~ iVar ∞ Convert TSV to VCF file ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        Min. cov.: _______ {wildcards.min_cov}X
+        Variant caller: __ iVar
+        """
+    conda:
+        TSV2VCF
+    input:
+        tsv = "results/04_Variants/{reference}/{sample}_{aligner}_{min_cov}X_ivar_variant-call.tsv"
+    output:
+        vcf = "results/04_Variants/{reference}/{sample}_{aligner}_{min_cov}X_ivar_variant-filt.vcf"
+    log:
+        "results/10_Reports/tools-log/tsv2vcf/{reference}/{sample}_{aligner}_{min_cov}X_ivar_tsv2vcf.log"
+    shell:
+        "python3 "                                  # Python 3
+        "workflow/scripts/ivar_variants_to_vcf.py "  # Script (from viralrecon)
+        "{input.tsv} "                               # TSV input
+        "{output.vcf} "                              # VCF output
+        "&> {log}"                                   # Log redirection
+
+###############################################################################
+rule ivar_consensus:
+    # Aim:
+    # Use:
+    message:
+        """
+        ~ iVar ∞ Call Consensus ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        Min. cov.: _______ {wildcards.min_cov}X
+        Variant caller: __ iVar
+        """
+    conda:
+        IVAR
+    params:
+        min_depth = IVAR_MIN_DEPTH,
+        min_freq = IVAR_MIN_FREQ,
+        min_insert = IVAR_MIN_INSERT,
+        max_depth = IVAR_MAX_DEPTH,
+        min_bq = IVAR_MIN_BQ,
+        min_qual = IVAR_MIN_QUAL,
+        baq = IVAR_MAP_QUAL
+    input:
+        mark_dup = get_bam_input,
+        variant_call = "results/04_Variants/{reference}/{sample}_{aligner}_{min_cov}X_ivar_variant-call.tsv"
+    output:
+        prefix = temp("results/05_Consensus/{reference}/{sample}_{aligner}_{min_cov}X_ivar_consensus"),
+        cons_tmp = temp("results/05_Consensus/{reference}/{sample}_{aligner}_{min_cov}X_ivar_consensus.fasta.tmp"),
+        qual_txt = "results/05_Consensus/{reference}/ivar_consensus-quality/{sample}_{aligner}_{min_cov}X_ivar_consensus.qual.txt",
+    log:
+        "results/10_Reports/tools-log/ivar/{reference}/{sample}_{aligner}_{min_cov}X_ivar_consensus.log"
+    shell:
+        "samtools mpileup "              # Samtools mpileup, tools for alignments in the SAM format with command multi-way pileup
+        "--verbosity 0 "                  # Set level of verbosity [INT]
+        "-a "                             # -a: output all positions (including zero depth)
+        "-a "                             # -a -a / -aa: output absolutely all positions, including unused ref. sequences
+        "--count-orphans "                # -A: do not discard anomalous read pairs
+        "--max-depth {params.max_depth} " # -d: max per-file depth; avoids excessive memory usage [INT] (default: 8000)
+        "{params.baq} "                   # --no-BAQ / -B: disable BAQ (per-Base Alignment Quality)
+        "--min-BQ {params.min_bq} "       # -Q: skip bases with baseQ/BAQ smaller than [INT] (default: 13)
+        #"--reference {input.masked_ref} " # Reference sequence FASTA FILE
+        "{input.mark_dup} "               # Markdup BAM input
+        "2>&1 | grep -v '[mpileup] 1 samples in 1 input files' " # Remove this stdout
+        "| "                               ### PIPE to iVar
+        "ivar consensus "                # iVar, with command 'consensus': Call consensus from aligned BAM file
+        "-p {output.prefix} "             # -p: prefix
+        "-q {params.min_qual} "           # -q: Minimum quality score threshold to count base [INT] (Default: 20)
+        "-t {params.min_freq} "           # -t: Minimum frequency threshold to call variants [FLOAT] (Default: 0.03)
+        "-c {params.min_insert} "         # -c: Minimum insertion frequency threshold to call consensus [FLOAT] (Default: 0.8)    
+        "-m {params.min_depth} "          # -m: Minimum read depth to call variants [INT] (Default: 0)
+        "-n N "                           # -n: Character to print in regions with less than minimum coverage (Default: N)
+        #"-i {wildcards.sample} "          # -i: Name of fasta header (default: Consensus_<prefix>_threshold_<min_freq>_quality_<min_qual>_<min_insert>
+        "&> {log} "                       # Log redirection
+        "&& mv {output.prefix}.fa {output.cons_tmp} "       # copy consensus.fa (temp) to consensus.fasta.tmp (tmp)
+        "&& mv {output.prefix}.qual.txt {output.qual_txt} " # cppty consensus.qual.txt (tmp) to ivar_consensus-quality/ directory
+        "&& touch {output.prefix}"                          # Touch done
+
+###############################################################################
+rule ivar_variant_calling:
+    # Aim: SNVs and Indels calling 
+    # Use: samtools mpileup -aa -A -d 0 -B -Q 0 --reference [<reference-fasta] <input.bam> | ivar variants -p <prefix> [-q <min-quality>] [-t <min-frequency-threshold>] [-m <minimum depth>] [-r <reference-fasta>] [-g GFF file]
+    # Note: samtools mpileup output must be piped into ivar variants
+    message:
+        """
+        ~ iVar ∞ Call Variants ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        Min. cov.: _______ {wildcards.min_cov}X
+        Variant caller: __ iVar
+        """
+    conda:
+        IVAR
+    params:
+        min_depth = IVAR_MIN_DEPTH,
+        min_freq = IVAR_MIN_FREQ,
+        min_insert = IVAR_MIN_INSERT,
+        max_depth = IVAR_MAX_DEPTH,
+        min_bq = IVAR_MIN_BQ,
+        min_qual = IVAR_MIN_QUAL,
+        baq = IVAR_MAP_QUAL
+    input:
+        masked_ref = "results/04_Variants/{reference}/{sample}_{aligner}_{min_cov}X_masked-ref.fasta",
+        mark_dup = get_bam_input
+    output:
+        variant_call = "results/04_Variants/{reference}/{sample}_{aligner}_{min_cov}X_ivar_variant-call.tsv"
+    log:
+        "results/10_Reports/tools-log/ivar/{reference}/{sample}_{aligner}_{min_cov}X_ivar_variant-call.log"
+    shell:
+        "samtools mpileup "              # Samtools mpileup, tools for alignments in the SAM format with command multi-way pileup
+        "--verbosity 0 "                  # Set level of verbosity [INT]
+        "-a "                             # -a: output all positions (including zero depth)
+        "-a "                             # -a -a / -aa: output absolutely all positions, including unused ref. sequences
+        "--count-orphans "                # -A: do not discard anomalous read pairs
+        "--max-depth {params.max_depth} " # -d: max per-file depth; avoids excessive memory usage (default: 8000) [INT]
+        "{params.baq} "                   # --no-BAQ / -B: disable BAQ (per-Base Alignment Quality)
+        "--min-BQ {params.min_bq} "       # -Q: skip bases with baseQ/BAQ smaller than (default: 13) [INT]
+        "--reference {input.masked_ref} " # Reference sequence FASTA FILE
+        "{input.mark_dup} "               # Markdup BAM input
+        "2>&1 | grep -v '[mpileup] 1 samples in 1 input files' " # Remove this stdout
+        "| "                               ### pipe to iVar
+        "ivar variants "                 # iVar, with command 'variants': Call variants from aligned BAM file
+        "-p {output.variant_call} "       # -p: prefix
+        "-q {params.min_qual} "           # -q: Minimum quality score threshold to count base (Default: 20) [INT]
+        "-t {params.min_freq} "           # -t: Minimum frequency threshold to call variants (Default: 0.03) [FLOAT]
+        "-m {params.min_depth} "          # -m: Minimum read depth to call variants (Default: 0) [INT]
+        "-r {input.masked_ref} "          # -r: Reference file used for alignment (translate the nuc. sequences and identify intra host single nuc. variants) 
+        #"-g "                            # -g: A GFF file in the GFF3 format can be supplied to specify coordinates of open reading frames (ORFs)
+        "&> {log}"                        # Log redirection 
+
+###############################################################################
diff --git a/workflow/schemas/config.schema.yaml b/workflow/schemas/config.schema.yaml
new file mode 100644
index 0000000..e69de29
diff --git a/workflow/scripts/__pycache__/__init__.cpython-312.pyc b/workflow/scripts/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..80e6722fd8b0751239f5fedd373affaa5933c5a7
GIT binary patch
literal 154
zcmX@j%ge<81gzq%=^*+sh(HIQS%4zb87dhx8U0o=6fpsLpFwJVS?Y%trxq3K8yOfG
z=A{`J80x2Imgu{uh9wsHWa^ja7iFjA<d^FgCl_TFload7$7kkcmc+;F6;$5hu*uC&
bDa}c>D`Ewj#0bR2AjU^#Mn=XWW*`dyT5%%y

literal 0
HcmV?d00001

diff --git a/workflow/scripts/__pycache__/functions.cpython-312.pyc b/workflow/scripts/__pycache__/functions.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..769a4fad5066a794e73ad02064280c41b8dd9c96
GIT binary patch
literal 4247
zcmd5<%}*Og6yLSiUK<;1L(-B25}GzePKphLq;1-!p^i(5j6<N1iX5E9JD4r&^=5V<
zV1mS<l_K?$15zrrr?wZA{3pFYPu7Y`mD)>g2+~VWeQ$QXHn9`sql!AxJdbDJd-LY^
z-n@D9Yinx@2Q4!2ZDF{B<Nm}y)#h;q`x*@Hb22CMDp%xZcpk?d7<*<sJXhdmyt4Nj
zZbpy=pg!3L)Gzyiin0haAP0a3<si@&xdmuQ4gn3zVW6#YE6_H%4QND;0FACh<FQA$
zc$~KclN5FM^^;{?(&Rkv7==FqbT9P%2_W}5)BR>vP3Imsb9LWPinZ#uGH|c)cX*j!
zfD!e`JpP;H;-2gy6pwo?ucDcjcVc+V5>n$+lW~s~ysfBmUZS#L#fqd@qANL_(409>
zi6k4ieoxOo7m`y3p+<7Be{evX>+c^(78El%LZ&5}QIfYybYV^{-A)>Ls_3SXoLkiL
zrc%<3guY@OE)X-<pqaLU<}T<)9LPGi(|PR2)DMAQQV)fnN4GoswuHkKvGe<Z-^8v;
zSNB%nNF~&@C3NirGj{+xw2)!f2?KC0Zz2n8OC04_JOvIA@MbOX)}o{;=1Tc(N{mGn
z%<S*aU9;zMsgh>WlA0{S2jx~ixyf7N*wxEZne^l)PcgI%M6id3fm$)#lA|uCX!@c_
z55fFNWNQbq&Q-+d#>lpKtP&Zjgj&}}X&X!*u-o<d2k@oA?wE;tVomm#uBRV*AK-qW
zO+vWBzJ#n5FsV{3Dq5j@(cRcU?n`GfX(;&xl_XaUsi>=DEocAlV?spEO{Nobl40IT
z6v`N!HdowBaT^rZwfvLAW8;}Lg;R9^gmwg>y0(MD5#Z@XK7;~Mh;3Zo7Q1#LhqvCC
z-Z^pVv6qkb?s9yv_mAeN;I5fB--AH|DxN#A=NdA1+*83~+zKoZqN7D|k8M9{!lS24
z=6t>X+6+@N=5r+oa|#364Sapj4Ya>zb|unrH}qGe<KE@($`FB#K6GR6ILM*3Ydm-h
z#&(1F^T=N33<}vg{|x5FBwYueC08k#Ba~=)vR3oaDg<glBXliSl;}dAyr?H;rJ@zg
ztHc&7XIxjmxGEhGA~dfmx=v(NOk-d#V1{9CiMS-J=p21J;9Jqx!0EX8Enqwgy^2>`
zc73EGwr>=+#qLV`&AVbH)V`j5WxPIng-wF7ur-CA05i+uub`*_EvB{}F`uHzWVsn$
zii6iXV8n-Yc!kH;(=J}o*}J|<D7v0$#47+Y>@?W-!V|gec<m(A=LpjeQmmOM&su?#
zy+82}g$F+Szmq+DkUjFHB{&VHQNH4WbQaR#5}{H7(nei)&Z$xXS)0Xa^;=?<ESqb1
z(U{9{(U``PC46m^w76geT~+i=u)-3ZAh*0A68F_*oZv+I^YlbIn@Uqm>Xta186C-{
zCz_|A>e3F@Ns#yyx`79}c9vNRn%y~h2C~pjPp{QA_&6v;hjuw3IJDcsh1;6tXc1H}
z{HI~ikd1ntAAge-C!mCb41UlDLYBS@^K~)$6!_f?Uds-vkoI%nz_qWCGLQ8!XC?sn
zxIj;XVg@!n10y)vHhbJ(1DhgD3}%{OJY%ETT<Yp{bF8bu;$V$A;0x%@u%36Z2DlOH
zIAR@tD%NhS-vosXSf6?stWCVESp(FaQ&GD&gLx=Wh-NGr<?C+fpTkPPo;?uz&jx=r
zFR3b~YgZ+^RHaSd6E2_wV2<Ut**C?fx`7TdzSM9g(<JOWFdMr|Ilj0664#*b_XEMW
zx6?6;`%=9x@EQ*J@EZ07dw0WJ^U#0Y?x5$u>^esb<)F4yBn`^6`W*C}eps?)`{oqY
zOf2Q+=y}jivC?XYYj(8(iv!~)z;grnK0?w^wY_dwte^(jF|SH8Dc^9T^n5O(Xaw@}
zOM&BR?4v|4Q4^EJGXP@I=E^uQT>^9nbfZbM8UO!ivjXr2E0k14E6@B#s8$tLLCrrC
zR8~A!MI201fM=;5)4JlBzBs0Mak0y*X(=75PpH)gq>~)svE(l(hNQ?AmWA$9pyBL9
zF%j9E&K^c!(G}bSR>YWvA{*~-i$@^~-J9L#_-H2<|DCnsKO2u(VMCJ^ND+EMDN!_;
zJv{7n@tMdTR}^b=R;g1gKPlE!6fb*<w-CkjNAd8|OGpq)ipFf(huAJ-10NSl@}f#E
z(kt)@A9ThPkX<j&^M9Y__~XBGCwBcDJWa$szrc6zg}r>+UW))EEbvE>bRdcCg+-Wa
RXCwflLkGzIA&_9X<6nD_@n8S|

literal 0
HcmV?d00001

diff --git a/workflow/scripts/functions.py b/workflow/scripts/functions.py
new file mode 100644
index 0000000..4231b28
--- /dev/null
+++ b/workflow/scripts/functions.py
@@ -0,0 +1,127 @@
+###I###R###D######U###2###3###3#######T###R###A###N###S###V###I###H###M###I####
+###                                                                         ###
+###    /\  ______      ___ ____ _  _ __   ____ __   ____     ______  /\     ###
+###    ||  \ \ \ \    / __( ___( \/ )__\ (  _ (  ) (_  _)   / / / /  ||     ###
+###    ||   > > > >  ( (_-.)__) \  /(__)\ )   /)(__ _)(_   < < < <   ||     ###
+###    ||  /_/_/_/    \___(____) \(__)(__(_)\_(____(____)   \_\_\_\  ||     ###
+###    \/                                                            \/     ###
+###                                                                         ###
+###I###R###D######U###2###3###3#######T###R###A###N###S###V###I###H###M###I####
+# Name ___________________ functions.py
+# Version ________________ v.2025.01
+# Author _________________ Nicolas Fernandez
+# Affiliation ____________ IRD_U233_TransVIHMI
+# Aim ____________________ Snakefile functions
+# Date ___________________ 2021.10.12
+# Latest modifications ___ 2025.01.13
+# Use ____________________ import scripts.functions as functions
+###############################################################################
+
+# import
+from snakemake.io import temp
+from snakemake.io import expand
+
+# global variable
+MODULES = []
+
+# functions
+def get_memory_per_thread(wildcards):
+    memory_per_thread = int(RAM) // int(CPUS)
+    if memory_per_thread < 1:
+        memory_per_thread = 1
+    return memory_per_thread
+
+def get_quality_input(wildcards):
+    if "quality" in MODULES:
+        return "results/00_Quality_Control/multiqc/"
+    return []
+
+def get_trimming_input(wildcards):
+    if "trimming" in MODULES:
+        return expand("results/01_Trimming/sickle/{sample}_sickle-trimmed_SE.fastq.gz",
+                      sample = SAMPLE)
+    return []
+
+def stash_or_trash(path):
+    if "keeptrim" in MODULES:
+        return path
+    else:
+        return temp(path)
+
+def get_bam_input(wildcards):
+    markdup = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.bam"
+    if "cleapping" in MODULES:
+        markdup = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.primerclipped.bam"
+    return markdup
+
+def get_bai_input(wildcards):
+    index = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.bam.bai"
+    if "cleapping" in MODULES:
+        index = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.primerclipped.bam.bai"
+    return index
+
+def get_flagstat_input(wildcards):
+    if "covstats" in MODULES:
+        return expand("results/03_Coverage/{reference}/flagstat/{sample}_{aligner}_flagstat.{ext}",
+                      reference = REFERENCE,
+                      sample = SAMPLE,
+                      aligner = ALIGNER,
+                      ext = ["txt", "tsv", "json"])
+    return []
+
+def get_covstats_input(wildcards):
+    if "covstats" in MODULES:
+        return expand("results/03_Coverage/{reference}/{sample}_{aligner}_{min_cov}X_coverage-stats.tsv",
+                      reference = REFERENCE,
+                      sample = SAMPLE,
+                      aligner = ALIGNER,
+                      min_cov = MIN_COV)
+    return []
+
+def get_consensus_input(wildcards):
+    if "consensus" in MODULES:
+        return expand("results/05_Consensus/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_consensus.fasta",
+                      reference = REFERENCE,
+                      sample = SAMPLE,
+                      aligner = ALIGNER,
+                      min_cov = MIN_COV,
+                      caller = CALLER)
+    return []
+
+def get_vcf_input(wildcards):
+    if "consensus" in MODULES:
+        return expand("results/04_Variants/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_variant-filt.vcf",
+                      reference = REFERENCE,
+                      sample = SAMPLE,
+                      aligner = ALIGNER,
+                      min_cov = MIN_COV,
+                      caller = CALLER)
+    return []
+
+def get_nextclade_input(wildcards):
+    if "nextclade" in MODULES:
+        return expand("results/06_Lineages/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_nextclade-report.tsv",
+                      reference = REFERENCE,
+                      sample = SAMPLE,
+                      aligner = ALIGNER,
+                      min_cov = MIN_COV,
+                      caller = CALLER)
+    return []
+
+def get_pangolin_input(wildcards):
+    if "pangolin" in MODULES:
+        return expand("results/06_Lineages/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_pangolin-report.csv",
+                      reference = REFERENCE,
+                      sample = SAMPLE,
+                      aligner = ALIGNER,
+                      min_cov = MIN_COV,
+                      caller = CALLER)
+    return []
+
+def get_gisaid_input(wildcards):
+    if "gisaid" in MODULES:
+        return expand("",
+                      )
+    return []
+
+###############################################################################
-- 
GitLab