From 6c2b7b7a3a3361001f7d8a1abd857a5000eba682 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20FERNANDEZ=20NU=C3=91EZ?= <nicolas.fernandez@ird.fr> Date: Mon, 17 Apr 2023 16:10:20 +0200 Subject: [PATCH] 2023-04 release --- README.md | 83 +++++++++++++++++++-------------------- Start_GeVarLi.sh | 4 +- configuration/config.yaml | 6 +-- 3 files changed, 46 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index 70a96b6..172b2bf 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # GeVarLi: GEnome assembly, VARiant calling and LIneage assignation #  - | Catalina (10.15.7) | Big Sure (11.6.3) | Monterey (12.6.0) | Ventura (13.2.1)/E6055C?icon=apple&label&list=|&scale=0.9>) + | Catalina (10.15.7) | Big Sure (11.6.3) | Monterey (12.6.0) | Ventura (13.3.1)/E6055C?icon=apple&label&list=|&scale=0.9>)  | Focal Fossa (20.04) | Jammy Jellyfish (22.04)/772953?icon=https://www.svgrepo.com/show/25424/ubuntu-logo.svg&label&list=|&scale=0.9>)  | Focal Fossa (20.04) | Jammy Jellyfish (22.04)/00BCF2?icon=windows&label&list=|&scale=0.9>)  @@ -12,9 +12,9 @@    - - - + + + ## ~ ABOUT ~ ## @@ -41,20 +41,19 @@ The Covid-19 epidemic has highlighted the disparities that remain between contin - Sickle-trim (_quality trimming_) - Reads mapping - (_bam files_) - - - - -- Variants calling - - (_vcf files_) + - (_bed files_) + - Visualization (IGV) +- Variants calling and filtering (_vcf files_) - Genome coverage (_statistics reports_) - Consensus sequences (_fasta file_) - Genomes classification - - Nextclade - - Pangolin + - Nextclade (_consensus quality and lineages reports_) + - Pangolin (_lineages reports_) ### Version ### -*V.2023.03* +*V.2023.04* ### Rulegraph ### @@ -189,8 +188,8 @@ _Option-2: Edit **fastq-screen.conf** file in **./configuration/** directory_ First run will auto-created _(only once)_: - Snakemake-Base conda environment _(Snakemake, Mamba, Rename, GraphViz)_ - - GeVarLi-conda environments _(tools used by GeVarLi)_ - - Indexes for BWA and BOWTIE2 aligners _(for each fasta genomes in resources)_ + - GeVarLi-Tools conda environments _(tools used by GeVarLi)_ + - Indexes for BWA and BOWTIE2 aligners _(for each fasta genomes in resources/ directory)_ _This may take some time, depending on your internet connection and your computer_ @@ -205,10 +204,10 @@ _Some [temp] tagged files are removed by default, to save disk usage_ ├── 📂 archives/ │ └── 📦 Results_{YYYY-MM-DD_HHhMM}_{REFERENCE}_{ALIGNER}_{MINCOV}_{SAMPLES}_archive.tar.gz └── 📂 results/ - ├── 🧬 All_consensus_sequences.fasta - ├── 📊 All_genome_coverages.tsv - ├── 📊 All_nextclade_lineages.tsv - ├── 📊 All_pangolin_lineages.tsv + ├── 🧬 All_{REFERENCE}_consensus_sequences.fasta + ├── 📊 All_{REFERENCE}_genome_coverages.tsv + ├── 📊 All_{REFERENCE}_nextclade_lineages.tsv + ├── 📊 All_{REFERENCE}_pangolin_lineages.tsv ├── 🌠All_readsQC_reports.html ├── 📂 00_Quality_Control/ │ ├── 📂 fastq-screen/ @@ -229,40 +228,40 @@ _Some [temp] tagged files are removed by default, to save disk usage_ │    ├── 📄 multiqc_general_stats.txt |    └── 📄 multiqc_sources.txt ├── 📂 01_Trimmidapt - │ ├── 📂 cutad{SAMPLE}_cutadapt-removed_R{1/2}.fastq.gz # [temp] - │ │ └── 📦 {S + │ ├── 📂 cutadapt/ + │ │ └── 📦 {SAMPLE}_cutadapt-removed_R{1/2}.fastq.gz # [temp] │ └── 📂 sickle/ │ ├── 📦 {SAMPLE}_sickle-trimmed_R{1/2}.fastq.gz # [temp] │ └── 📦 {SAMPLE}_sickle-trimmed_SE.fastq.gz # [temp] ├── 📂 02_Mapping/ - │ ├── 🧠{SAMPLE}_{ALIGNER}_mark-dup.bam - │ ├── ðŸ—‚ï¸ {SAMPLE}_{ALIGNER}_mark-dup.bam.bai - │ ├── 🧠{SAMPLE}_{ALIGNER}_mark-dup.primerclipped.bam - │ ├── ðŸ—‚ï¸ {SAMPLE}_{ALIGNER}_mark-dup.primerclipped.bam.bai + │ ├── 🧠{SAMPLE}_{REFERENCE}_{ALIGNER}_mark-dup.bam + │ ├── ðŸ—‚ï¸ {SAMPLE}_{REFERENCE}_{ALIGNER}_mark-dup.bam.bai + │ ├── 🧠{SAMPLE}_{REFERENCE}_{ALIGNER}_mark-dup.primerclipped.bam + │ ├── ðŸ—‚ï¸ {SAMPLE}_{REFERENCE}_{ALIGNER}_mark-dup.primerclipped.bam.bai │ ├── 🧠{SAMPLE}_{ALIGNER}-mapped.sam # [temp] - │ ├── 🧠{SAMPLE}_{ALIGNER}_sorted-by-names.bam # [temp] - │ ├── 🧠{SAMPLE}_{ALIGNER}_fixed-mate.bam # [temp] - │ └── 🧠{SAMPLE}_{ALIGNER}_sorted.bam # [temp] + │ ├── 🧠{SAMPLE}_{REFERENCE}_{ALIGNER}_sorted-by-names.bam # [temp] + │ ├── 🧠{SAMPLE}_{REFERENCE}_{ALIGNER}_fixed-mate.bam # [temp] + │ └── 🧠{SAMPLE}_{REFERENCE}_{ALIGNER}_sorted.bam # [temp] ├── 📂 03_Coverage/ - │ ├── 📊 {SAMPLE}_{ALIGNER}_{MINCOV}_coverage-stats.tsv - │ ├── ðŸ›ï¸ {SAMPLE}_{ALIGNER}_genome-cov.bed # [temp] - │ ├── ðŸ›ï¸ {SAMPLE}_{ALIGNER}_{MINCOV}_min-cov-filt.bed # [temp] - │ └── ðŸ›ï¸ {SAMPLE}_{ALIGNER}_{MINCOV}_low-cov-mask.bed # [temp] + │ ├── 📊 {SAMPLE}_{REFERENCE}_{ALIGNER}_{MINCOV}_coverage-stats.tsv + │ ├── ðŸ›ï¸ {SAMPLE}_{REFERENCE}_{ALIGNER}_genome-cov.bed # [temp] + │ ├── ðŸ›ï¸ {SAMPLE}_{REFERENCE}_{ALIGNER}_{MINCOV}_min-cov-filt.bed # [temp] + │ └── ðŸ›ï¸ {SAMPLE}_{REFERENCE}_{ALIGNER}_{MINCOV}_low-cov-mask.bed # [temp] ├── 📂 04_Variants/ - │ ├── 🧬 {SAMPLE}_{ALIGNER}_{MINCOV}_masked-ref.fasta - │ ├── ðŸ—‚ï¸ {SAMPLE}_{ALIGNER}_{MINCOV}_masked-ref.fasta.fai - │ ├── 🧠{SAMPLE}_{ALIGNER}_{MINCOV}_indel-qual.bam - │ ├── ðŸ—‚ï¸ {SAMPLE}_{ALIGNER}_{MINCOV}_indel-qual.bai - │ ├── ðŸ§®ï¸ {SAMPLE}_{ALIGNER}_{MINCOV}_variant-call.vcf - │ ├── ðŸ§®ï¸ {SAMPLE}_{ALIGNER}_{MINCOV}_variant-filt.vcf - │ ├── 📦 {SAMPLE}_{ALIGNER}_{MINCOV}_variant-filt.vcf.bgz # [temp] - │ └── ðŸ—‚ï¸ {SAMPLE}_{ALIGNER}_{MINCOV}_variant-filt.vcf.bgz.tbi # [temp] + │ ├── 🧬 {SAMPLE}_{REFERENCE}_{ALIGNER}_{MINCOV}_masked-ref.fasta + │ ├── ðŸ—‚ï¸ {SAMPLE}_{REFERENCE}_{ALIGNER}_{MINCOV}_masked-ref.fasta.fai + │ ├── 🧠{SAMPLE}_{REFERENCE}_{ALIGNER}_{MINCOV}_indel-qual.bam + │ ├── ðŸ—‚ï¸ {SAMPLE}_{REFERENCE}_{ALIGNER}_{MINCOV}_indel-qual.bai + │ ├── ðŸ§®ï¸ {SAMPLE}_{REFERENCE}_{ALIGNER}_{MINCOV}_variant-call.vcf + │ ├── ðŸ§®ï¸ {SAMPLE}_{REFERENCE}_{ALIGNER}_{MINCOV}_variant-filt.vcf + │ ├── 📦 {SAMPLE}_{REFERENCE}_{ALIGNER}_{MINCOV}_variant-filt.vcf.bgz # [temp] + │ └── ðŸ—‚ï¸ {SAMPLE}_{REFERENCE}_{ALIGNER}_{MINCOV}_variant-filt.vcf.bgz.tbi # [temp] ├── 📂 05_Consensus/ - │ └── 🧬 {SAMPLE}_{ALIGNER}_{MINCOV}_consensus.fasta + │ └── 🧬 {SAMPLE}_{REFERENCE}_{ALIGNER}_{MINCOV}_consensus.fasta ├── 📂 06_Lineages/ - │ ├── 📊 {SAMPLE}_{ALIGNER}_{MINCOV}_nextclade-report.tsv - │ ├── 📊 {SAMPLE}_{ALIGNER}_{MINCOV}_pangolin-report.csv - │ └── 📂 {SAMPLE}_{ALIGNER}_{MINCOV}_nextclade-all/ + │ ├── 📊 {SAMPLE}_{REFERENCE}_{ALIGNER}_{MINCOV}_nextclade-report.tsv + │ ├── 📊 {SAMPLE}_{REFERENCE}_{ALIGNER}_{MINCOV}_pangolin-report.csv + │ └── 📂 {SAMPLE}_{REFERENCE}_{ALIGNER}_{MINCOV}_nextclade-all/  │ ├── 🧬 nextclade.aligned.fasta  │ ├── 📊 nextclade.csv  │ ├── 📊 nextclade.errors.csv diff --git a/Start_GeVarLi.sh b/Start_GeVarLi.sh index 287d5e1..e7fd319 100755 --- a/Start_GeVarLi.sh +++ b/Start_GeVarLi.sh @@ -124,7 +124,7 @@ Conda environment ${red}snakemake-base_v.${snakemake_base_version}${nc} will be # ${red}Rename${nc}: Rename fastq files (ver. 1.601) # ${red}Graphviz${nc}: Dot snakemake DAG (ver. 7.1.0) " - conda env create -f ${workdir}/workflow/environments/${os}/snakemake-base_v.${snakemake_base_version}.yaml +q conda env create -f ${workdir}/workflow/environments/${os}/snakemake-base_v.${snakemake_base_version}.yaml fi # Remove old 'gevarli' and 'snakemake' environments @@ -437,7 +437,7 @@ for directory in ${workdir}/results/02_Mapping/*/ ; do awk "NR==1 || NR%2==0" ${workdir}/results/All_${reference}_genome_coverages.tsv \ 2> /dev/null \ 1> ${workdir}/results/GENCOV.tmp \ - && mv ${workdir}/results/GENCOV.tmp ${workdir}/results/All_genome_coverages.tsv \ + && mv ${workdir}/results/GENCOV.tmp ${workdir}/results/All_${reference}_genome_coverages.tsv \ 2> /dev/null ; # Concatenate PANGOLIN cat ${workdir}/results/06_Lineages/${reference}/*_pangolin-report.csv \ diff --git a/configuration/config.yaml b/configuration/config.yaml index 8679e8d..9d4d9df 100755 --- a/configuration/config.yaml +++ b/configuration/config.yaml @@ -30,8 +30,8 @@ consensus: reference: # Your reference, in fasta format (default: SARS-CoV-2_Wuhan_MN-908947-3) # Available options (not exhaustive), choose one: - 'SARS-CoV-2_Wuhan_MN-908947-3' # SARS-CoV-2 (Nextclade and Pangolin) - #- 'Monkeypox-virus_Zaire_AF-380138-1' # Monkeypox (Nextclade) - #- 'Monkeypox-virus_UK_MT-903345-1' # Monkeypox (Nextclade) + #- 'Monkeypox-virus_Zaire_AF-380138-1' # Monkeypox (Nextclade and Pangolin) + #- 'Monkeypox-virus_UK_MT-903345-1' # Monkeypox (Nextclade and Pangolin) #- 'Swinepox-virus_India_MW-036632-1' # Swinepox (Nextclade) #- 'Ebola-virus_Zaire_AF-272001-1' # Ebola (na) #- 'Ebola-virus_Sudan_MH-121162-1' # Ebola (Nextclade) @@ -142,7 +142,7 @@ conda: frontend: # Conda frontend (default: mamba) # Available options, choose one: - 'mamba' # mamba (faster) - #- 'conda' # conda + #- 'conda' # conda (iTrop) osx: # Conda OSX environement yaml files: snakemake_base: '../environments/osx/snakemake-base_v.2023.04.yaml' # Snakemake-Base ver. 2023.04 gevarli_tools: '../environments/osx/gevarli-tools_v.2023.04.yaml' # GeVarLi-Tools ver. 2023.04 -- GitLab