Feat: return to the modular rules.smk

a13f7f5d · nicolas.fernandez_ird.fr · 40167f50 · a13f7f5d · a13f7f5d · a13f7f5d
Commit a13f7f5d authored 2 months ago by nicolas.fernandez_ird.fr
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
--- a/.snakemake-workflow-catalog.yml
+++ b/.snakemake-workflow-catalog.yml
+usage:
+  software-stack-deployment:
+    conda: true 
+  report: true
--- a/resources/data_test/.gitkeep
+++ b/resources/data_test/.gitkeep
--- a/resources/data_test/SARS-CoV-2_Omicron-BA1_Covid-Seq-Lib-on-MiSeq_250000-reads_R1.fastq.gz
+++ b/resources/data_test/SARS-CoV-2_Omicron-BA1_Covid-Seq-Lib-on-MiSeq_250000-reads_R1.fastq.gz
--- a/resources/data_test/SARS-CoV-2_Omicron-BA1_Covid-Seq-Lib-on-MiSeq_250000-reads_R2.fastq.gz
+++ b/resources/data_test/SARS-CoV-2_Omicron-BA1_Covid-Seq-Lib-on-MiSeq_250000-reads_R2.fastq.gz
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
--- a/Run_GeVarLi.sh
+++ b/Run_GeVarLi.sh
@@ -216,8 +216,8 @@ fastq=$(expr $(ls -l ${workdir}/resources/reads/*.fastq.gz 2> /dev/null | wc -l)
 if [[ "${fastq}" == "0" ]]                                                         # If no sample,
 then                                                                                # start GeVarLi with at least 1 sample
    echo -e "${red}¡${nc} No FASTQ files detected in ${ylo}resources/reads/${nc} ${red}!${nc}
-${red}${sample_test}${nc} in ${ylo}resources/data_test/${nc} FASTQ files were be used as sample example"
-    cp ${workdir}/resources/data_test/${sample_test}_R*.fastq.gz ${workdir}/resources/reads/ # use data_test fastq
+${red}${sample_test}${nc} FASTQ files were be used as sample test"
+    cp ${workdir}/.test/${sample_test}_R*.fastq.gz ${workdir}/resources/reads/ # use .test/fastq files
    fastq="2"
 fi
 samples=$(expr ${fastq} \/ 2) # {fastq.gz count} / 2 = samples count (paired-end)

--- a/config/README.md
+++ b/config/README.md
--- a/configuration/config.yaml
+++ b/configuration/config.yaml
@@ -249,20 +249,20 @@ resources:
    # -''         # Custom (set your own)

 envs: # Conda environement yaml files:
-  bamclipper:    'envs/bamclipper_v.1.0.0.yaml'      # Bamclipper    ver. 1.0.0
-  bcftools:      'envs/bcftools_v.1.20.yaml'         # Bcftools      ver. 1.20
-  bedtools:      'envs/bedtools_v.2.31.1.yaml'       # Bedtools      ver. 2.31.1
-  bowtie2:       'envs/bowtie2_v.2.5.4.yaml'         # Bowtie2       ver. 2.5.4
-  bwa:           'envs/bwa_v.0.7.18.yaml'            # BWA           ver. 0.7.18
-  cutadapt:      'envs/cutadapt_v.4.9.yaml'          # Cutadapt      ver. 4.9
-  fastq_screen:  'envs/fastq-screen_v.0.15.3.yaml'   # Fastq-Screen  ver. 0.15.3 (with BWA ver. 0.7.18)
-  fastqc:        'envs/fastqc_v.0.12.1.yaml'         # FastQC        ver. 0.12.1
-  gawk:          'envs/gawk_v.5.3.0.yaml'            # Awk (GNU)     ver. 5.3.0
-  ivar:          'envs/ivar_v.1.4.3.yaml'            # iVar          ver. 1.4.3 (with Samtools ver. 1.20)
-  minimap2:      'envs/minimap2_v.2.28.yaml'         # Minimap2      ver. 2.28
-  multiqc:       'envs/multiqc_v.1.23.yaml'          # MultiQC       ver. 1.23 (with Pandoc ver. 3.3)
-  nextclade:     'envs/nextclade_v.3.9.1.yaml'       # Nextclade     ver. 3.9.1
-  pangolin:      'envs/pangolin_v.4.3.yaml'          # Pangolin      ver. 4.3
-  samtools:      'envs/samtools_v.1.20.yaml'         # Samtools      ver. 1.20
-  sickle_trim:   'envs/sickle-trim_v.1.33.yaml'      # Sickle-trim   ver. 1.33
-  tsv2vcf:       'envs/tsv2vcf_v.1.1.yaml'           # tsv2vcf       ver. 1.1 (bcftools biopython numpy)
+  bamclipper:    '../envs/bamclipper_v.1.0.0.yaml'      # Bamclipper    ver. 1.0.0
+  bcftools:      '../envs/bcftools_v.1.20.yaml'         # Bcftools      ver. 1.20
+  bedtools:      '../envs/bedtools_v.2.31.1.yaml'       # Bedtools      ver. 2.31.1
+  bowtie2:       '../envs/bowtie2_v.2.5.4.yaml'         # Bowtie2       ver. 2.5.4
+  bwa:           '../envs/bwa_v.0.7.18.yaml'            # BWA           ver. 0.7.18
+  cutadapt:      '../envs/cutadapt_v.4.9.yaml'          # Cutadapt      ver. 4.9
+  fastq_screen:  '../envs/fastq-screen_v.0.15.3.yaml'   # Fastq-Screen  ver. 0.15.3 (with BWA ver. 0.7.18)
+  fastqc:        '../envs/fastqc_v.0.12.1.yaml'         # FastQC        ver. 0.12.1
+  gawk:          '../envs/gawk_v.5.3.0.yaml'            # Awk (GNU)     ver. 5.3.0
+  ivar:          '../envs/ivar_v.1.4.3.yaml'            # iVar          ver. 1.4.3 (with Samtools ver. 1.20)
+  minimap2:      '../envs/minimap2_v.2.28.yaml'         # Minimap2      ver. 2.28
+  multiqc:       '../envs/multiqc_v.1.23.yaml'          # MultiQC       ver. 1.23 (with Pandoc ver. 3.3)
+  nextclade:     '../envs/nextclade_v.3.9.1.yaml'       # Nextclade     ver. 3.9.1
+  pangolin:      '../envs/pangolin_v.4.3.yaml'          # Pangolin      ver. 4.3
+  samtools:      '../envs/samtools_v.1.20.yaml'         # Samtools      ver. 1.20
+  sickle_trim:   '../envs/sickle-trim_v.1.33.yaml'      # Sickle-trim   ver. 1.33
+  tsv2vcf:       '../envs/tsv2vcf_v.1.1.yaml'           # tsv2vcf       ver. 1.1 (bcftools biopython numpy)
--- a/configuration/fastq-screen.conf
+++ b/configuration/fastq-screen.conf
--- a/configuration/multiqc/default.yaml
+++ b/configuration/multiqc/default.yaml
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
--- a/workflow/rules/gevarli.smk
+++ b/workflow/rules/gevarli.smk
@@ -8,18 +8,21 @@
 ###                                                                         ###
 ###I###R###D######U###2###3###3#######T###R###A###N###S###V###I###H###M###I####
 # Name ___________________ gevarli.smk
-# Version ________________ v.2024.08
+# Version ________________ v.2025.01
 # Author _________________ Nicolas Fernandez
 # Affiliation ____________ IRD_U233_TransVIHMI
 # Aim ____________________ Snakefile with GeVarLi rules
 # Date ___________________ 2021.10.12
-# Latest modifications ___ 2024.08.05 ('Noarch' conda environment yaml files)
-# Use ____________________ snakemake -s gevarli.smk --use-conda 
+# Latest modifications ___ 2025.01.08 (Prepare for Snakedeploy)
+# Use ____________________ snakemake -s Snakemake --use-conda -j

 ###############################################################################
 ### CONFIGURATION ###
 #####################

+from snakemake.utils import min_version
+min_version("8.27.0")
+
 configfile: "configuration/config.yaml"

 ###############################################################################
@@ -46,7 +49,7 @@ def get_trimming_input(wildcards):
    return trimming_list

 def stash_or_trash(path):
-    if "trimming" in MODULES:
+    if "keeptrim" in MODULES:
        return path
    else:
        return temp(path)
@@ -157,8 +160,8 @@ def get_gisaid_input(wildcards):
 ### WILDCARDS ###
 #################

-FASTQ, = glob_wildcards("resources/reads/{fastq}.fastq.gz")
-SAMPLE, = glob_wildcards("resources/reads/{sample}_R1.fastq.gz")
+FASTQ, = glob_wildcards("resources/symlinks/{fastq}.fastq.gz")
+SAMPLE, = glob_wildcards("resources/symlinks/{sample}_R1.fastq.gz")

 ###############################################################################
 ### RESOURCES ###
@@ -173,23 +176,23 @@ TMP_DIR = config["resources"]["tmp_dir"] # Temporary directory
 ### ENVIRONMENTS ###
 ####################

-MULTIQC = config["conda"]["yaml"]["multiqc"]           # Multi-QC conda env
-FASTQ_SCREEN = config["conda"]["yaml"]["fastq_screen"] # Fastq-Screen conda env
-FASTQC= config["conda"]["yaml"]["fastqc"]              # FastQC conda env
-CUTADAPT = config["conda"]["yaml"]["cutadapt"]         # Cutadapt conda environment
-SICKLE_TRIM = config["conda"]["yaml"]["sickle_trim"]   # Sickle-Trim conda environment
-MINIMAP2 = config["conda"]["yaml"]["minimap2"]         # BWA conda environment
-BWA = config["conda"]["yaml"]["bwa"]                   # BWA conda environment
-BOWTIE2 = config["conda"]["yaml"]["bowtie2"]           # Bowtie2 conda environment
-SAMTOOLS = config["conda"]["yaml"]["samtools"]         # SamTools conda environment
-BEDTOOLS = config["conda"]["yaml"]["bedtools"]         # BedTools conda environment
-BAMCLIPPER = config["conda"]["yaml"]["bamclipper"]     # BAMClipper
-GAWK = config["conda"]["yaml"]["gawk"]                 # Awk (GNU) conda environment
-IVAR = config["conda"]["yaml"]["ivar"]                 # iVar conda environment
-TSV2VCF = config["conda"]["yaml"]["tsv2vcf"]           # tsv2vcf conda environment
-BCFTOOLS = config["conda"]["yaml"]["bcftools"]         # BcfTools conda environment
-PANGOLIN = config["conda"]["yaml"]["pangolin"]         # Pangolin conda environment
-NEXTCLADE = config["conda"]["yaml"]["nextclade"]       # Nextclade conda environment
+MULTIQC = config["envs"]["multiqc"]           # Multi-QC conda env
+FASTQ_SCREEN = config["envs"]["fastq_screen"] # Fastq-Screen conda env
+FASTQC= config["envs"]["fastqc"]              # FastQC conda env
+CUTADAPT = config["envs"]["cutadapt"]         # Cutadapt conda environment
+SICKLE_TRIM = config["envs"]["sickle_trim"]   # Sickle-Trim conda environment
+MINIMAP2 = config["envs"]["minimap2"]         # BWA conda environment
+BWA = config["envs"]["bwa"]                   # BWA conda environment
+BOWTIE2 = config["envs"]["bowtie2"]           # Bowtie2 conda environment
+SAMTOOLS = config["envs"]["samtools"]         # SamTools conda environment
+BEDTOOLS = config["envs"]["bedtools"]         # BedTools conda environment
+BAMCLIPPER = config["envs"]["bamclipper"]     # BAMClipper
+GAWK = config["envs"]["gawk"]                 # Awk (GNU) conda environment
+IVAR = config["envs"]["ivar"]                 # iVar conda environment
+TSV2VCF = config["envs"]["tsv2vcf"]           # tsv2vcf conda environment
+BCFTOOLS = config["envs"]["bcftools"]         # BcfTools conda environment
+PANGOLIN = config["envs"]["pangolin"]         # Pangolin conda environment
+NEXTCLADE = config["envs"]["nextclade"]       # Nextclade conda environment

 ###############################################################################
 ### PARAMETERS ###
@@ -207,11 +210,11 @@ KMER_SIZE = config["minimap2"]["algorithm"]["k-mer_size"]          # MM2 k-mer s
 MINIMIZER_SIZE = config["minimap2"]["algorithm"]["minimizer_size"] # MM2 minimizer window size
 SPLIT_SIZE = config["minimap2"]["algorithm"]["split_size"]         # MM2 split index
 #HOMOPOLYMER = config["minimap2"]["algorithm"]["homopolymer"]       # MM2 if PacBio
+
 BWA_ALGO = config["bwa"]["algorithm"]     # BWA indexing algorithm
 BT2_ALGO = config["bowtie2"]["algorithm"] # BT2 indexing algorithm

 REFERENCE = config["consensus"]["reference"] # Genome reference sequence, in fasta format
-REF_PATH = config["consensus"]["path"]       # Path to genomes references
 MIN_COV = config["consensus"]["min_cov"]     # Minimum coverage, mask lower regions with 'N'
 IUPAC = config["consensus"]["iupac"]         # Output variants in the form of IUPAC ambiguity codes
 ALIGNER = config["consensus"]["aligner"]     # Aligner ('minimap2', 'bwa' or 'bowtie2')
@@ -228,7 +231,6 @@ SIC_ENCODING = config["sickle_trim"]["encoding"] # Sickle-trim --qual-type
 SIC_QUALITY = config["sickle_trim"]["quality"]   # Sickle-trim --qual-threshold
 SIC_LENGTH = config["sickle_trim"]["length"]     # Sickle-trim --length-treshold

-MM2_PATH = config["minimap2"]["path"]     # Minimpa2 path to indexes
 MM2_PRESET = config["minimap2"]["preset"] # Minimap2 preset
 #MM2_LENGTH = config["minimap2"]["length"]                              # Minimap2 length
 #MM2_KMER_SIZE = config["minimap2"]["algorithm"]["k-mer_size"]          # Minimap2 k-mer size
@@ -236,10 +238,8 @@ MM2_PRESET = config["minimap2"]["preset"] # Minimap2 preset
 #MM2_SPLIT_SIZE = config["minimap2"]["algorithm"]["split_size"]         # Minimap2 split index
 #MM2_HOMOPOLYMER = config["minimap2"]["algorithm"]["homopolymer"]       # Minimap2 for PacBio

-BWA_PATH = config["bwa"]["path"]      # BWA path to indexes
 BWA_ALGO = config["bwa"]["algorithm"] # BWA indexing algorithm

-BT2_PATH = config["bowtie2"]["path"]               # Bowtie2 path to indexes
 BT2_ALGO = config["bowtie2"]["algorithm"]          # Bowtie2 indexing algorithm
 BT2_SENSITIVITY = config["bowtie2"]["sensitivity"] # Bowtie2 sensitivity preset

@@ -456,6 +456,7 @@ rule ivar_consensus:
        "--min-BQ {params.min_bq} "       # -Q: skip bases with baseQ/BAQ smaller than [INT] (default: 13)
        #"--reference {input.masked_ref} " # Reference sequence FASTA FILE
        "{input.mark_dup} "               # Markdup BAM input
+        "2>&1 | grep -v '[mpileup] 1 samples in 1 input files' " # Remove this stdout
        "| "                               ### PIPE to iVar
        "ivar consensus "                # iVar, with command 'consensus': Call consensus from aligned BAM file
        "-p {output.prefix} "             # -p: prefix
@@ -512,6 +513,7 @@ rule ivar_variant_calling:
        "--min-BQ {params.min_bq} "       # -Q: skip bases with baseQ/BAQ smaller than (default: 13) [INT]
        "--reference {input.masked_ref} " # Reference sequence FASTA FILE
        "{input.mark_dup} "               # Markdup BAM input
+        "2>&1 | grep -v '[mpileup] 1 samples in 1 input files' " # Remove this stdout
        "| "                               ### pipe to iVar
        "ivar variants "                 # iVar, with command 'variants': Call variants from aligned BAM file
        "-p {output.variant_call} "       # -p: prefix
@@ -541,20 +543,19 @@ rule bedtools_masking:
        """
    conda:
        BEDTOOLS
-    params:
-        path = REF_PATH
    input:
+        reference = "resources/genomes/{reference}.fasta",
        low_cov_mask = "results/03_Coverage/{reference}/bed/{sample}_{aligner}_{min_cov}X_low-cov-mask.bed"
    output:
        masked_ref = "results/04_Variants/{reference}/{sample}_{aligner}_{min_cov}X_masked-ref.fasta"
    log:
        "results/10_Reports/tools-log/bedtools/{reference}/{sample}_{aligner}_{min_cov}X_masking.log"
    shell:
-        "bedtools maskfasta "                          # Bedtools maskfasta, mask a fasta file based on feature coordinates
-        "-fi {params.path}{wildcards.reference}.fasta " # Input FASTA file 
-        "-bed {input.low_cov_mask} "                    # BED/GFF/VCF file of ranges to mask in -fi
-        "-fo {output.masked_ref} "                      # Output masked FASTA file
-        "&> {log}"                                      # Log redirection 
+        "bedtools maskfasta "       # Bedtools maskfasta, mask a fasta file based on feature coordinates
+        "-fi {input.reference} "     # Input FASTA file 
+        "-bed {input.low_cov_mask} " # BED/GFF/VCF file of ranges to mask in -fi
+        "-fo {output.masked_ref} "   # Output masked FASTA file
+        "&> {log}"                   # Log redirection 

 ###############################################################################
 rule bedtools_merged_mask:
@@ -874,7 +875,7 @@ rule bamclipper_amplicon_primers:
        "&& mv {wildcards.sample}_{wildcards.aligner}_mark-dup.primerclipped.bam.bai {output.baiclip}" # because BamClipper v.1 default output system...

 ###############################################################################
-############################## REMOVE DUPLICATS ###############################
+############################# REMOVE DUPLICATES ###############################
 ###############################################################################

 ###############################################################################
@@ -1056,7 +1057,7 @@ rule minimap2_mapping:
        preset = MM2_PRESET
        #length = LENGTH
    input:
-        mm2_indexes = "resources/indexes/minimap2/{reference}.mmi"
+        mm2_indexes = "resources/indexes/minimap2/{reference}.mmi",
        fwd_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R1.fastq.gz",
        rev_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R2.fastq.gz"
    output:
@@ -1097,7 +1098,7 @@ rule bwa_mapping:
    resources:
        cpus = CPUS
    input:
-        bwa_indexes = "resources/indexes/bwa/{reference}"
+        bwa_indexes = "resources/indexes/bwa/{reference}",
        fwd_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R1.fastq.gz",
        rev_reads = "results/01_Trimming/sickle/{sample}_sickle-trimmed_R2.fastq.gz"
    output:
@@ -1143,7 +1144,7 @@ rule bowtie2_mapping:
        "bowtie2 "                   # Bowtie2, an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences
        "--threads {resources.cpus} " # -p: Number of alignment threads to launch (default: 1)
        "--reorder "                  # Keep the original read order (if multi-processor option -p is used)
-        "-x {input.indexes} "         # -x: Reference index filename prefix (minus trailing .X.bt2)
+        "-x {input.bt2_indexes} "     # -x: Reference index filename prefix (minus trailing .X.bt2)
        "{params.sensitivity} "       # Preset (default: "--sensitive")
                                       # sensitive: same as [-D 15 -R 2 -N 0 -L 22 -i S,1,1.15]
        "-q "                         # -q: Query input files are FASTQ .fq/.fastq (default)
@@ -1176,7 +1177,7 @@ rule minimap2_genome_indexing:
        fasta = "resources/genomes/{reference}.fasta"
    output:
        mm2_indexes = multiext("resources/indexes/minimap2/{reference}",
-                           ".mmi")
+                               ".mmi")
    log:
        "results/10_Reports/tools-log/minimap2-indexes/{reference}.log"
    shell:
@@ -1185,7 +1186,7 @@ rule minimap2_genome_indexing:
        "-w {params.minimizer_size} " # -w: minimizer window size (default: "11") [INT]
        "-I {params.split_size} "     # -I: split index for every {NUM} input bases (default: "8G") [INT]
        #"{params.homopolymer} "       # use homopolymer-compressed k-mer (preferrable for PacBio)
-        "-d {output.indexes} "        # -d: dump index to FILE []
+        "-d {output.mm2_indexes} "    # -d: dump index to FILE []
        "{input.fasta} "              # Reference sequences in the FASTA format
        "&> {log}"                    # Log redirection

@@ -1205,14 +1206,14 @@ rule bwa_genome_indexing:
    input:
        fasta = "resources/genomes/{reference}.fasta"
    output:
-        prefix = temp("resources/indexes/bwa/{reference}"),
-        indexes = multiext("resources/indexes/bwa/{ref_seq}",
-                           ".amb", ".ann", ".bwt", ".pac", ".sa")
+        prefix = "resources/indexes/bwa/{reference}",
+        bwa_indexes = multiext("resources/indexes/bwa/{reference}",
+                               ".amb", ".ann", ".bwt", ".pac", ".sa")
    log:
        "results/10_Reports/tools-log/bwa-indexes/{reference}.log"
    shell:
        "bwa index "              # BWA-SW algorithm, index sequences
-        "{params.algorithm} "      # -a: Algorithm for constructing BWT index (default: auto)                                  
+        "{params.algorithm} "      # -a: Algorithm for constructing BWT index (default: auto)
        "-p {output.prefix} "      # -p: Prefix of the output database
        "{input.fasta} "           # Reference sequences in the FASTA format
        "&> {log} "                # Log redirection
@@ -1236,10 +1237,10 @@ rule bowtie2_genome_indexing:
    input:
        fasta = "resources/genomes/{reference}.fasta"
    output:
-        prefix = temp("resources/indexes/bowtie2/{reference}"),
-        indexes = multiext("resources/indexes/bowtie2/{ref_seq}",
-                           ".1.bt2", ".2.bt2", ".3.bt2", ".4.bt2",
-                           ".rev.1.bt2", ".rev.2.bt2")
+        prefix = "resources/indexes/bowtie2/{reference}",
+        bt2_indexes = multiext("resources/indexes/bowtie2/{reference}",
+                               ".1.bt2", ".2.bt2", ".3.bt2", ".4.bt2",
+                               ".rev.1.bt2", ".rev.2.bt2")
    log:
        "results/10_Reports/tools-log/bowtie2-indexes/{reference}.log"
    shell:
@@ -1319,10 +1320,8 @@ rule cutadapt_adapters_removing:
        small = CUT_SMALL,
        cut = CUT_CLIPPING
    input:
-        fwd_reads = "resources/reads/{sample}_R1.fastq.gz",
-        rev_reads = "resources/reads/{sample}_R2.fastq.gz"
-        #fwd_reads = "/users/illumina/local/data/run_1/FATSQ/{sample}_R1.fastq.gz",
-        #rev_reads = "/users/illumina/local/data/run_1/FATSQ/{sample}_R2.fastq.gz"
+        fwd_reads = "resources/symlinks/{sample}_R1.fastq.gz",
+        rev_reads = "resources/symlinks/{sample}_R2.fastq.gz"
    output:
        fwd_reads = temp("results/01_Trimming/cutadapt/{sample}_cutadapt-removed_R1.fastq.gz"),
        rev_reads = temp("results/01_Trimming/cutadapt/{sample}_cutadapt-removed_R2.fastq.gz")
@@ -1404,7 +1403,7 @@ rule fastqscreen_contamination_checking:
        config = FQC_CONFIG,
        subset = SUBSET
    input:
-        fastq = "resources/reads/{fastq}.fastq.gz"
+        fastq = "resources/symlinks/{fastq}.fastq.gz"
    output:
        fastq_screen = directory("results/00_Quality_Control/fastq-screen/{fastq}/")
    log:
@@ -1434,7 +1433,7 @@ rule fastqc_quality_control:
    resources:
        cpus = CPUS
    input:
-        fastq = "resources/reads/{fastq}.fastq.gz"
+        fastq = "resources/symlinks/{fastq}.fastq.gz"
    output:
        fastqc = directory("results/00_Quality_Control/fastqc/{fastq}/")
    log:

--- a/workflow/report/workflow.rst
+++ b/workflow/report/workflow.rst
--- a/workflow/rules/consensus_calling.smk
+++ b/workflow/rules/consensus_calling.smk
+###############################################################################
+################################## CONSENSUS ##################################
+###############################################################################
+
+###############################################################################
+rule sed_rename_headers:
+    # Aim: rename all fasta header with sample name
+    # Use: sed 's/[OLD]/[NEW]/' [IN] > [OUT]
+    message:
+        """
+        ~ Sed ∞ Rename Fasta Header ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        Min. cov.: _______ {wildcards.min_cov}X
+        Variant caller: __ {wildcards.caller}
+        """
+    conda:
+        GAWK
+    input:
+        cons_tmp = "results/05_Consensus/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_consensus.fasta.tmp"
+    output:
+        consensus = "results/05_Consensus/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_consensus.fasta"
+    log:
+        "results/10_Reports/tools-log/sed/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_fasta-header.log"
+    shell:
+        "sed " # Sed, a Stream EDitor used to perform basic text transformations on an input stream
+        "'s/^>.*$/>{wildcards.sample}_{wildcards.aligner}_{wildcards.min_cov}X_{wildcards.caller}/' "
+        "{input.cons_tmp} "      # Input file
+        "1> {output.consensus} " # Output file
+        "2> {log}"               # Log redirection
+
+###############################################################################
--- a/workflow/rules/coverage_stats.smk
+++ b/workflow/rules/coverage_stats.smk
+###############################################################################
+################################## STATISTICS #################################
+###############################################################################
+
+###############################################################################
+rule awk_coverage_statistics:
+    # Aim: computing genomme coverage stats
+    # Use: awk {FORMULA} END {{print [RESULTS.tsv] [BEDGRAPH.bed]
+    message:
+        """
+        ~ Awk ∞ Compute Genome Coverage Statistics from BED file ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        """
+    conda:
+        GAWK
+    input:
+        cutadapt = "results/10_Reports/tools-log/cutadapt/{sample}.log",
+        sickle = "results/10_Reports/tools-log/sickle-trim/{sample}.log",
+        samtools = "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_mark-dup.log",
+        flagstat = "results/03_Coverage/{reference}/flagstat/{sample}_{aligner}_flagstat.json",
+        histogram = "results/03_Coverage/{reference}/histogram/{sample}_{aligner}_coverage-histogram.txt",
+        genome_cov = "results/02_Mapping/{reference}/{sample}_{aligner}_genome-cov.bed"
+    output:
+        cov_stats = "results/03_Coverage/{reference}/{sample}_{aligner}_{min_cov}X_coverage-stats.tsv"
+    log:
+        "results/10_Reports/tools-log/awk/{reference}/{sample}_{aligner}_{min_cov}X_coverage-stats.log"
+    shell:
+        r""" rawReads=$(grep -o -E  """                                  # Get raw reads 
+        r""" 'Total read pairs processed:.+' {input.cutadapt}  """       #
+        r""" | sed -E 's/Total read pairs processed:\ +//'  """          #
+        r""" | sed 's/,//g') ; """                                       #
+        #
+        r""" cutadaptPF=$(grep -o -E """                                 # Get cutadapt Passing Filtes reads
+        r""" 'Pairs written \(passing filters\):.+' {input.cutadapt} """ #
+        r""" | sed -E 's/Pairs written \(passing filters\):\ +//' """    #
+        r""" | sed 's/,//g') ; """                                       #
+        #
+        r""" sicklePF=$(grep -o -E """                                   # Get sickle Passing Filtes reads
+        r""" 'FastQ paired records kept:.+' {input.sickle} """           #
+        r""" | sed -E 's/FastQ paired records kept:\ +//') ; """         #
+        #
+        r""" totalDuplicate=$(grep -o -E """                             # Get total duplicated reads
+        r""" 'DUPLICATE TOTAL:.+' {input.samtools} """                   #
+        r""" | sed -E 's/DUPLICATE TOTAL:\ +//') ; """                   #
+        #
+        r""" estimatedLibrarySize=$(grep -o -E """                       # Get estimated library size
+        r""" 'ESTIMATED_LIBRARY_SIZE:.+' {input.samtools} """            #
+        r""" | sed -E 's/ESTIMATED_LIBRARY_SIZE:\ +//') ; """            #
+        #
+        r""" samtoolsPF=$(grep -o -E """                                 # Get samtool Passing Filter reads
+        r""" 'WRITTEN: .+' {input.samtools} """                          #
+        r""" | sed -E 's/WRITTEN:\ +//') ; """                           #
+        #
+        r""" mappedReads=$(grep -o -E -m 1 """                           # Get mapped reads
+        r""" '"mapped": .+' {input.flagstat} """                         #
+        r""" | sed -E 's/"mapped":\ +//' """                             #
+        r""" | sed 's/,//g') ; """                                       #
+        #
+        r""" mappedPercentReads=$(grep -o -E -m 1 """                    # Get mapped precent reads
+        r""" '"mapped %": .+' {input.flagstat} """                       #
+        r""" | sed -E 's/"mapped %":\ +//' """                           #
+        r""" | sed 's/,//g') ; """                                       #
+        #
+        r""" covPercentAt1X=$(grep -o -E """                             # Get coverage percent @1X
+        r""" 'Percent covered:.+' {input.histogram} """                  #
+        r""" | sed -E 's/Percent covered:\ +//') ; """                   #
+        #
+        r""" awk """                                                   # Awk, a program to select particular records in a file and perform operations upon them
+        r""" -v rawReads="${{rawReads}}" """                             # Define external variable
+        r""" -v cutadaptPF="${{cutadaptPF}}" """                         # Define external variable
+        r""" -v sicklePF="${{sicklePF}}" """                             # Define external variable
+        r""" -v totalDuplicate="${{totalDuplicate}}" """                 # Define external variable
+        r""" -v estimatedLibrarySize="${{estimatedLibrarySize}}" """     # Define external variable
+        r""" -v samtoolsPF="${{samtoolsPF}}" """                         # Define external variable
+        r""" -v mappedReads="${{mappedReads}}" """                       # Define external variable
+        r""" -v mappedPercentReads="${{mappedPercentReads}}" """         # Define external variable
+        r""" -v covPercentAt1X="${{covPercentAt1X}}" """                 # Define external variable
+        r""" '$4 >= {wildcards.min_cov} {{supMin_Cov+=$3-$2}} ; """      # Genome size (>= min_cov @X)
+        r""" {{genomeSize+=$3-$2}} ; """                                 # Genome size (total)
+        r""" {{totalBases+=($3-$2)*$4}} ; """                            # Total bases @1X
+        r""" {{totalBasesSq+=(($3-$2)*$4)**2}} """                       # Total bases square @1X
+        r""" END """                                                    # END
+        r""" {{print """                                                # Print
+        r""" "sample_id", "\t", """                                      # header: Sample ID
+        r""" "raw_paired_reads", "\t", """                               # header: Raw paired reads
+        r""" "cutadapt_pairs_PF", "\t", """                              # header: Cutadapt Passing Filters
+        r""" "sickle_reads_PF", "\t", """                                # header: Sickle-trim Passing Filters
+        r""" "duplicated_reads", "\t", """                               # header:
+        r""" "duplicated_percent_%","\t", """                            # header:
+        r""" "estimated_library_size*", "\t", """                        # header:
+        r""" "samtools_pairs_PF", "\t", """                              # header:
+        r""" "mapped_with", "\t",  """                                   # header: aligner
+        r""" "mapped_on", "\t",  """                                     # header: reference
+        r""" "mapped_reads", "\t", """                                   # header:
+        r""" "mapped_percent_%", "\t", """                               # header:
+        r""" "mean_depth", "\t", """                                     # header: mean depth
+        r""" "standard_deviation", "\t", """                             # header: standard deviation
+        r""" "cov_percent_%_@1X", "\t", """                              # header: coverage percentage @1X
+        r""" "cov_percent_%" "\t", """                                   # header: coverage percentage
+        r""" "@_min_cov" """                                             # header: @_[min_cov]_X
+        r""" ORS """                                                      # \n newline
+        r""" "{wildcards.sample}", "\t", """                             # value: Sample ID
+        r""" rawReads, "\t", """                                         # value: Raw sequences
+        r""" cutadaptPF, "\t", """                                       # value: Cutadapt Passing Filter
+        r""" sicklePF, "\t", """                                         # value: Sickle Passing Filter
+        r""" totalDuplicate, "\t", """                                   # value:
+        r""" int(((totalDuplicate)/(rawReads*2))*100), "%", "\t", """    # value: (divided by 2 to estimated pairs)
+        r""" estimatedLibrarySize, "\t", """                             # value:
+        r""" samtoolsPF, "\t", """                                       # value:
+        r""" "{wildcards.aligner}", "\t",  """                           # value: aligner
+        r""" "{wildcards.reference}", "\t",  """                         # value: reference
+        r""" mappedReads, "\t", """                                      # value:
+        r""" mappedPercentReads, "%", "\t", """                          # value:
+        r""" int(totalBases/genomeSize), "\t", """                       # value: mean depth
+        r""" int(sqrt((totalBasesSq/genomeSize)-(totalBases/genomeSize)**2)), "\t", """ # Standard deviation value
+        r""" covPercentAt1X, "\t", """                                   # value
+        r""" supMin_Cov/genomeSize*100, "%", "\t", """                   # Coverage percent (@ min_cov X) value
+        r""" "@{wildcards.min_cov}X" """                                 # @ min_cov X value
+        r""" }}' """                                                     # Close print
+        r""" {input.genome_cov} """                                      # BedGraph coverage input
+        r""" 1> {output.cov_stats} """                                   # Mean depth output
+        r""" 2> {log}"""                                                 # Log redirection
+        
+###############################################################################
+rule bedtools_genome_coverage:
+    # Aim: computing genome coverage sequencing
+    # Use: bedtools genomecov [OPTIONS] -ibam [MARK-DUP.bam] 1> [BEDGRAPH.bed]
+    message:
+        """
+        ~ BedTools ∞ Compute Genome Coverage ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        """
+    conda:
+        BEDTOOLS
+    input:
+        mark_dup = get_bam_input,
+        index = get_bai_input
+    output:
+        genome_cov = "results/02_Mapping/{reference}/{sample}_{aligner}_genome-cov.bed"
+    log:
+        "results/10_Reports/tools-log/bedtools/{reference}/{sample}_{aligner}_genome-cov.log"
+    shell:
+        "bedtools genomecov "    # Bedtools genomecov, compute the coverage of a feature file among a genome
+        "-bga "                   # Report depth in BedGraph format, regions with zero coverage are also reported
+        "-ibam {input.mark_dup} " # The input file is in BAM format, must be sorted by position
+        "1> {output.genome_cov} " # BedGraph output
+        "2> {log} "               # Log redirection
+
+###############################################################################
+rule samtools_coverage_histogram:
+    # Aim: alignment depth and percent coverage histogram
+    # Use: samtools coverage --histogram [INPUT.bam]
+    message:
+        """
+        ~ SamTools ∞ Calcul Depth and Coverage from BAM file ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        """
+    conda:
+        SAMTOOLS
+    resources:
+       cpus = CPUS
+       #bins = BINS,
+       #depth = DEPTH
+    input:
+        mark_dup = get_bam_input
+    output:
+        histogram = "results/03_Coverage/{reference}/histogram/{sample}_{aligner}_coverage-histogram.txt"
+    log:
+        "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_coverage-histogram.log"
+    shell:
+        "samtools coverage "          # Samtools coverage, tools for alignments in the SAM format with command to alignment depth and percent coverage
+        "--histogram "                 # -m: show histogram instead of tabular output
+        "--verbosity 4 "               # Set level of verbosity [INT] (default: 3)
+        "--n-bins 149 "                # -w: number of bins in histogram (default: terminal width - 40) (todo: {params.bins}) 
+        "--depth 0 "                   # -d maximum allowed coverage depth [INT] (default: 1000000 ; 0 removing any depth limit) (todo: {params.depth}) 
+        "--output {output.histogram} " # write output to FILE (default: stdout)
+        "{input.mark_dup} ; "          # Mark_dup bam input
+        "echo >> {output.histogram} ; " # Newline
+        "samtools coverage "    # Samtools coverage, tools for alignments in the SAM format with command to alignment depth and percent coverage
+        "--verbosity 4 "         # Set level of verbosity [INT] (default: 3)
+        "{input.mark_dup} "      # Mark_dup bam input
+        ">> {output.histogram} " # write output to FILE (default: stdout)
+        "2> {log}"               # Log redirection
+
+###############################################################################
+rule samtools_flagstat_ext:
+    # Aim: simple stats
+    # Use: samtools flagstat -@ [THREADS] [INPUT.bam]
+    message:
+        """
+        ~ SamTools ∞ Calcul simple Stats from BAM file ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        """
+    conda:
+        SAMTOOLS
+    resources:
+       cpus = CPUS
+    input:
+        mark_dup = get_bam_input
+    output:
+        flagstat = "results/03_Coverage/{reference}/flagstat/{sample}_{aligner}_flagstat.{ext}"
+    log:
+        "results/10_Reports/tools-log/samtools/{reference}/{sample}_{aligner}_flagstat-{ext}.log"
+    shell:
+        "samtools flagstat "           # Samtools flagstat, tools for alignments in the SAM format with command to simple stat
+        "--threads {resources.cpus} "   # -@: Number of additional threads to use (default: 1)
+        "--verbosity 4 "                # Set level of verbosity [INT] (default: 3)
+        "--output-fmt {wildcards.ext} " # -O Specify output format (none, tsv and json)
+        "{input.mark_dup} "             # Mark_dup bam input
+        "1> {output.flagstat} "         # Mark_dup index output
+        "2> {log}"                      # Log redirection
+
+###############################################################################
--- a/workflow/rules/indexing_genomes.smk
+++ b/workflow/rules/indexing_genomes.smk
-###I###R###D######U###2###3###3#######T###R###A###N###S###V###I###H###M###I####
-###                                                                         ###
-###    /\  ______      ___ ____ _  _ __   ____ __   ____     ______  /\     ###
-###    ||  \ \ \ \    / __( ___( \/ )__\ (  _ (  ) (_  _)   / / / /  ||     ###
-###    ||   > > > >  ( (_-.)__) \  /(__)\ )   /)(__ _)(_   < < < <   ||     ###
-###    ||  /_/_/_/    \___(____) \(__)(__(_)\_(____(____)   \_\_\_\  ||     ###
-###    \/                                                            \/     ###
-###                                                                         ###
-###I###R###D######U###2###3###3#######T###R###A###N###S###V###I###H###M###I####
-# Name ___________________ indexing_genomes.smk
-# Version ________________ v.2023.08
-# Author _________________ Nicolas Fernandez
-# Affiliation ____________ IRD_U233_TransVIHMI
-# Aim ____________________ Snakefile with indexing genomes rules
-# Date ___________________ 2022.09.28
-# Latest modifications ___ 2024.08.05 ('Noarch' conda environment yaml files)
-# Use ____________________ snakemake -s indexing_genomes.smk --use-conda 
-
-###############################################################################
-### CONFIGURATION ###
-#####################
-
-configfile: "configuration/config.yaml"
-
 ###############################################################################
-### FUNCTIONS ###
-#################
-
-
-###############################################################################
-### WILDCARDS ###
-#################
-
-REF_SEQ, = glob_wildcards("resources/genomes/{ref_seq}.fasta")
-
-###############################################################################
-### RESOURCES ###
-#################
-
-CPUS = config["resources"]["cpus"] # Threads (maximum)
-
-###############################################################################
-### ENVIRONMENTS ###
-####################
-
-MINIMAP2 = config["conda"]["yaml"]["minimap2"] # MM2 conda env
-BWA = config["conda"]["yaml"]["bwa"]           # BWA conda env
-BOWTIE2 = config["conda"]["yaml"]["bowtie2"]   # BT2 conda env
-
+################################## INDEXING ###################################
 ###############################################################################
-### PARAMETERS ###
-##################
-
-KMER_SIZE = config["minimap2"]["algorithm"]["k-mer_size"]          # MM2 k-mer size
-MINIMIZER_SIZE = config["minimap2"]["algorithm"]["minimizer_size"] # MM2 minimizer window size
-SPLIT_SIZE = config["minimap2"]["algorithm"]["split_size"]         # MM2 split index
-#HOMOPOLYMER = config["minimap2"]["algorithm"]["homopolymer"]       # MM2 if PacBio
-
-BWA_ALGO = config["bwa"]["algorithm"]     # BWA indexing algorithm
-BT2_ALGO = config["bowtie2"]["algorithm"] # BT2 indexing algorithm

 ###############################################################################
-### RULES ###
-#############
-
-rule all:
+rule bwa_genome_indexing:
+    # Aim: index sequences in the FASTA format
+    # Use: bwa index -a [ALGO] -p [PREFIX] <genome.fasta>
+    message:
+        """
+        ~ BWA-SW ∞ Index Genome ~
+        Reference: _______ {wildcards.reference}
+        """
+    conda:
+        BWA
+    params:
+        algorithm = BWA_ALGO
    input:
-        mm2_indexes = expand("resources/indexes/minimap2/{ref_seq}.{ext}",
-                            ref_seq = REF_SEQ,
-                            ext = ["mmi"]),
-        bwa_indexes = expand("resources/indexes/bwa/{ref_seq}.{ext}",
-                            ref_seq = REF_SEQ,
-                            ext = ["amb", "ann", "bwt", "pac", "sa"]),
-        bt2_indexes = expand("resources/indexes/bowtie2/{ref_seq}.{ext}",
-                            ref_seq = REF_SEQ,
-                            ext = ["1.bt2", "2.bt2", "3.bt2", "4.bt2",
-                                   "rev.1.bt2", "rev.2.bt2"])
-        
+        fasta = "resources/genomes/{reference}.fasta"
+    output:
+        prefix = "resources/indexes/bwa/{reference}",
+        bwa_indexes = multiext("resources/indexes/bwa/{reference}",
+                               ".amb", ".ann", ".bwt", ".pac", ".sa")
+    log:
+        "results/10_Reports/tools-log/bwa-indexes/{reference}.log"
+    shell:
+        "bwa index "              # BWA-SW algorithm, index sequences
+        "{params.algorithm} "      # -a: Algorithm for constructing BWT index (default: auto)
+        "-p {output.prefix} "      # -p: Prefix of the output database
+        "{input.fasta} "           # Reference sequences in the FASTA format
+        "&> {log} "                # Log redirection
+        "&& touch {output.prefix}" # Touch done
+
 ###############################################################################
 rule minimap2_genome_indexing:
    # Aim: index sequences in the FASTA format
@@ -83,61 +38,32 @@ rule minimap2_genome_indexing:
    message:
        """
        ~ Minimap2 ∞ Index Genome ~
-        Reference: _______ {wildcards.ref_seq}
+        Reference: _______ {wildcards.reference}
        """
    conda:
        MINIMAP2
    params:
        kmer_size = KMER_SIZE,
        minimizer_size = MINIMIZER_SIZE,
-        split_size = SPLIT_SIZE,
+        split_size = SPLIT_SIZE
        #homopolymer = HOMOPOLYMER
    input:
-        fasta = "resources/genomes/{ref_seq}.fasta"
+        fasta = "resources/genomes/{reference}.fasta"
    output:
-        indexes = multiext("resources/indexes/minimap2/{ref_seq}",
-                           ".mmi")
+        mm2_indexes = multiext("resources/indexes/minimap2/{reference}",
+                               ".mmi")
    log:
-        "results/10_Reports/tools-log/minimap2-indexes/{ref_seq}.log"
+        "results/10_Reports/tools-log/minimap2-indexes/{reference}.log"
    shell:
        "minimap2 "                  # Minimap2, index sequences
        "-k {params.kmer_size} "      # -k: k-mer size (default: "21", no larger than "28") [INT]
        "-w {params.minimizer_size} " # -w: minimizer window size (default: "11") [INT]
        "-I {params.split_size} "     # -I: split index for every {NUM} input bases (default: "8G") [INT]
        #"{params.homopolymer} "       # use homopolymer-compressed k-mer (preferrable for PacBio)
-        "-d {output.indexes} "        # -d: dump index to FILE []
+        "-d {output.mm2_indexes} "    # -d: dump index to FILE []
        "{input.fasta} "              # Reference sequences in the FASTA format
        "&> {log}"                    # Log redirection

-###############################################################################
-rule bwa_genome_indexing:
-    # Aim: index sequences in the FASTA format
-    # Use: bwa index -a [ALGO] -p [PREFIX] <genome.fasta>
-    message:
-        """
-        ~ BWA-SW ∞ Index Genome ~
-        Reference: _______ {wildcards.ref_seq}
-        """
-    conda:
-        BWA
-    params:
-        algorithm = BWA_ALGO
-    input:
-        fasta = "resources/genomes/{ref_seq}.fasta"
-    output:
-        prefix = temp("resources/indexes/bwa/{ref_seq}"),
-        indexes = multiext("resources/indexes/bwa/{ref_seq}",
-                           ".amb", ".ann", ".bwt", ".pac", ".sa")
-    log:
-        "results/10_Reports/tools-log/bwa-indexes/{ref_seq}.log"
-    shell:
-        "bwa index "              # BWA-SW algorithm, index sequences
-        "{params.algorithm} "      # -a: Algorithm for constructing BWT index (default: auto)                                  
-        "-p {output.prefix} "      # -p: Prefix of the output database
-        "{input.fasta} "           # Reference sequences in the FASTA format
-        "&> {log} "                # Log redirection
-        "&& touch {output.prefix}" # Touch done
-
 ###############################################################################
 rule bowtie2_genome_indexing:
    # Aim: index sequences in the FASTA format
@@ -145,7 +71,7 @@ rule bowtie2_genome_indexing:
    message:
        """
        ~ Bowtie2-build ∞ Index Genome ~
-        Reference: _______ {wildcards.ref_seq}
+        Reference: _______ {wildcards.reference}
        """
    conda:
        BOWTIE2
@@ -154,19 +80,20 @@ rule bowtie2_genome_indexing:
    params:
        algorithm = BT2_ALGO
    input:
-        fasta = "resources/genomes/{ref_seq}.fasta"
+        fasta = "resources/genomes/{reference}.fasta"
    output:
-        prefix = temp("resources/indexes/bowtie2/{ref_seq}"),
-        indexes = multiext("resources/indexes/bowtie2/{ref_seq}",
-                           ".1.bt2", ".2.bt2", ".3.bt2", ".4.bt2",
-                           ".rev.1.bt2", ".rev.2.bt2")
+        prefix = "resources/indexes/bowtie2/{reference}",
+        bt2_indexes = multiext("resources/indexes/bowtie2/{reference}",
+                               ".1.bt2", ".2.bt2", ".3.bt2", ".4.bt2",
+                               ".rev.1.bt2", ".rev.2.bt2")
    log:
-        "results/10_Reports/tools-log/bowtie2-indexes/{ref_seq}.log"
+        "results/10_Reports/tools-log/bowtie2-indexes/{reference}.log"
    shell:
        "bowtie2-build "             # Bowtie2-build, index sequences
        "--quiet "                    # -q: quiet
        "--threads {resources.cpus} " # Number of threads
-        "{params.algorithm} "         # Force (or no by default) generated index to be 'large', even if ref has fewer than 4 billion nucleotides
+        "{params.algorithm} "         # Force (or no by default) generated index to be 'large',
+                                       # even if ref has fewer than 4 billion nucleotides
        "-f "                         # Reference files are FASTA (default)
        "{input.fasta} "              # Reference sequences files (comma-separated list) in the FASTA format
        "{output.prefix} "            # Write bt2 data to files with this dir/basename

--- a/workflow/rules/lineages_calling.smk
+++ b/workflow/rules/lineages_calling.smk
+###############################################################################
+################################## LINEAGES ###################################
+###############################################################################
+
+###############################################################################
+rule nextclade_lineage:
+    # Aim: nextclade lineage assignation
+    # Use: nextclade [QUERY.fasta] -t [THREADS] --outfile [NAME.csv]
+    message:
+        """
+        ~ Nextclade ∞ Assign Lineage ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        Min. cov.: _______ {wildcards.min_cov}X
+        Variant caller: __ {wildcards.caller}
+        """
+    conda:
+        NEXTCLADE
+    resources:
+        cpus = CPUS
+    params:
+        path = NEXT_PATH,
+        dataset = NEXT_DATASET
+    input:
+        consensus = "results/05_Consensus/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_consensus.fasta"
+    output:
+        lineage = "results/06_Lineages/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_nextclade-report.tsv",
+        alignment = directory("results/06_Lineages/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_nextclade-all/")
+    log:
+        "results/10_Reports/tools-log/nextclade/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_lineage.log"
+    shell:
+        "nextclade "                                    # Nextclade, assign queries sequences to clades and reports potential quality issues
+        "run "                                           # Run analyzis
+        "--jobs {resources.cpus} "                       # -j: Number of CPU threads used by the algorithm (default: all available threads)
+        "--input-dataset {params.path}{params.dataset} " # -raq: Path to a directory containing a dataset (root-seq, tree and qc-config required)
+        "--output-tsv {output.lineage} "                 # -t: Path to output TSV results file
+        "--output-all {output.alignment} "               # -O: Produce all of the output files into this directory, using default basename
+        "{input.consensus} "                             # Path to a .fasta file with input sequences
+        "&> {log}"                                       # Log redirection
+
+###############################################################################
+rule pangolin_lineage:
+    # Aim: lineage mapping
+    # Use: pangolin [QUERY.fasta] -t [THREADS] --outfile [NAME.csv]
+    message:
+        """
+        ~ Pangolin ∞ Assign Lineage ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        Min. cov.: _______ {wildcards.min_cov}X
+        Variant caller: __ {wildcards.caller}
+        """
+    conda:
+        PANGOLIN
+    resources:
+        cpus = CPUS,
+        tmp_dir = TMP_DIR
+    input:
+        consensus = "results/05_Consensus/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_consensus.fasta"
+    output:
+        lineage = "results/06_Lineages/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_pangolin-report.csv"
+    log:
+        "results/10_Reports/tools-log/pangolin/{reference}/{sample}_{aligner}_{min_cov}X_{caller}_lineage.log"
+    shell:
+        "pangolin "                     # Pangolinn, Phylogenetic Assignment of Named Global Outbreak LINeages
+        "{input.consensus} "             # Query fasta file of sequences to analyse
+        "--threads {resources.cpus} "    # -t: Number of threads
+        "--tempdir {resources.tmp_dir} " # Specify where you want the temp stuff to go (default: $TMPDIR)
+        "--outfile {output.lineage} "    # Optional output file name (default: lineage_report.csv)
+        "&> {log}"                       # Log redirection
+
+###############################################################################
--- a/workflow/rules/mask_lowcov.smk
+++ b/workflow/rules/mask_lowcov.smk
+###############################################################################
+############################ MASKING LOW COVERAGE #############################
+###############################################################################
+
+###############################################################################
+rule bedtools_masking:
+    # Aim: masking low coverage regions
+    # Use: bedtools maskfasta [OPTIONS] -fi [REFERENCE.fasta] -bed [RANGE.bed] -fo [MASKEDREF.fasta]
+    message:
+        """
+        ~ BedTools ∞ Mask Low Coverage Regions ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        Min. cov.: _______ {wildcards.min_cov}X
+        """
+    conda:
+        BEDTOOLS
+    input:
+        reference = "resources/genomes/{reference}.fasta",
+        low_cov_mask = "results/03_Coverage/{reference}/bed/{sample}_{aligner}_{min_cov}X_low-cov-mask.bed"
+    output:
+        masked_ref = "results/04_Variants/{reference}/{sample}_{aligner}_{min_cov}X_masked-ref.fasta"
+    log:
+        "results/10_Reports/tools-log/bedtools/{reference}/{sample}_{aligner}_{min_cov}X_masking.log"
+    shell:
+        "bedtools maskfasta "       # Bedtools maskfasta, mask a fasta file based on feature coordinates
+        "-fi {input.reference} "     # Input FASTA file 
+        "-bed {input.low_cov_mask} " # BED/GFF/VCF file of ranges to mask in -fi
+        "-fo {output.masked_ref} "   # Output masked FASTA file
+        "&> {log}"                   # Log redirection 
+
+###############################################################################
+rule bedtools_merged_mask:
+    # Aim: merging overlaps
+    # Use: bedtools merge [OPTIONS] -i [FILTERED.bed] -g [GENOME.fasta] 
+    message:
+        """
+        ~ BedTools ∞ Merge Overlaps ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        Min. cov.: _______ {wildcards.min_cov}X
+        """
+    conda:
+        BEDTOOLS
+    input:
+        min_cov_filt = "results/03_Coverage/{reference}/bed/{sample}_{aligner}_{min_cov}X_min-cov-filt.bed"
+    output:
+        low_cov_mask = temp("results/03_Coverage/{reference}/bed/{sample}_{aligner}_{min_cov}X_low-cov-mask.bed")
+    log:
+        "results/10_Reports/tools-log/bedtools/{reference}/{sample}_{aligner}_{min_cov}X_merging.log"
+    shell:
+        "bedtools merge "          # Bedtools merge, merges overlapping BED/GFF/VCF entries into a single interval
+        "-i {input.min_cov_filt} "  # -i: BED/GFF/VCF input to merge 
+        "1> {output.low_cov_mask} " # merged output
+        "2> {log}"                  # Log redirection
+
+###############################################################################
+rule awk_min_covfilt:
+    # Aim: minimum coverage filtration
+    # Use: awk '$4 < [MIN_COV]' [BEDGRAPH.bed] 1> [FILTERED.bed]
+    message:
+        """
+        ~ Awk ∞ Minimum Coverage Filtration ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        Min. cov.: _______ {wildcards.min_cov}X
+        """
+    conda:
+        GAWK
+    input:
+        genome_cov = "results/02_Mapping/{reference}/{sample}_{aligner}_genome-cov.bed"
+    output:
+        min_cov_filt = temp("results/03_Coverage/{reference}/bed/{sample}_{aligner}_{min_cov}X_min-cov-filt.bed")
+    log:
+        "results/10_Reports/tools-log/awk/{reference}/{sample}_{aligner}_{min_cov}X_min-cov-filt.log"
+    shell:
+        "awk "                      # Awk, a program that you can use to select particular records in a file and perform operations upon them
+        "'$4 < {wildcards.min_cov}' " # Minimum coverage for masking regions in consensus sequence
+        "{input.genome_cov} "         # BedGraph coverage input
+        "1> {output.min_cov_filt} "   # Minimum coverage filtered bed output
+        "2> {log} "                   # Log redirection
+
+###############################################################################
--- a/workflow/rules/primer_cleaping.smk
+++ b/workflow/rules/primer_cleaping.smk
+###############################################################################
+############################## PRIMER CLEAPING  ###############################
+###############################################################################
+
+###############################################################################
+rule bamclipper_amplicon_primers:
+    # Aim: soft-clip amplicon PCR primers from BAM alignments
+    # Use: bamclipper.sh -n [THREADS] -b [MARKDUP.bam] -p [PRIMER.bed] -u [UPSTREAM] -d [DOWNSTREAM]
+    message:
+        """
+        ~ BAMClipper ∞ soft-clipping amplicon PCR primers from BAM alignments ~
+        Sample: __________ {wildcards.sample}
+        Reference: _______ {wildcards.reference}
+        Aligner: _________ {wildcards.aligner}
+        """
+    conda:
+        BAMCLIPPER
+    resources:
+       cpus = CPUS
+    params:
+        path = CLIPPATH,
+        primers = PRIMERS,
+        upstream = UPSTREAM,
+        downstream = DOWNSTREAM
+    input:
+        markdup = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.bam",
+        index = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.bam.bai"
+    output:
+        bamclip = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.primerclipped.bam",
+        baiclip = "results/02_Mapping/{reference}/{sample}_{aligner}_mark-dup.primerclipped.bam.bai"
+    log:
+        "results/10_Reports/tools-log/bamclipper/{reference}/{sample}_{aligner}_primers-clip.log"
+    shell:
+        "bamclipper.sh "                         # BAMClipper, remove primer sequences from BAM alignments of PCR amplicons by soft-clipping
+        "-b {input.markdup} "                     # Indexed BAM alignment file
+        "-p {params.path}{params.primers}.bedpe " # BEDPE of primer pair locations
+        "-n {resources.cpus} "                    # Number of threads (default: 1)
+        "-u {params.upstream} "                   # Number of nuc. upstream for assigning alignments to primers (default: 5)
+        "-d {params.downstream} "                 # Number of nuc. downstream for assigning alignments to primers (default: 5)
+        #"-o results/02_Mapping/ "                 # Path to write output (if BamClipper v.1.1.2) (todo)
+        "&> {log} "                               # Log redirection
+        "&& mv {wildcards.sample}_{wildcards.aligner}_mark-dup.primerclipped.bam {output.bamclip} "    # because BamClipper v.1 default output system...
+        "&& mv {wildcards.sample}_{wildcards.aligner}_mark-dup.primerclipped.bam.bai {output.baiclip}" # because BamClipper v.1 default output system...
+
+###############################################################################