diff --git a/Start_RQC.sh b/Start_RQC.sh index 32d3fa6aa29af0e42d9696c05de51b67d3e50ee7..0682d5d4c36c6644c34782f1adaba01028d25caa 100755 --- a/Start_RQC.sh +++ b/Start_RQC.sh @@ -1,14 +1,15 @@ #!/bin/bash -rqc_version="v.2022.11" +snakemake_base_version="v.2023.02" +rqc_version="v.2023.03" ###I###R###D######U###2###3###3#######T###R###A###N###S###V###I###H###M###I#### # Name ___________________ Start_RQC.sh -# Version ________________ v.2022.11 +# Version ________________ v.2023.03 # Author _________________ Nicolas Fernandez # Affiliation ____________ IRD_U233_TransVIHMI # Aim ____________________ Bash script running quality_control.smk snakefile # Date ___________________ 2021.10.12 -# Latest modifications ___ 2022.11.18 +# Latest modifications ___ 2023.03.01 # Use ____________________ bash Start_RQC.sh ############################################################################### @@ -32,7 +33,7 @@ ${blue}Author${nc} _________________ Nicolas Fernandez ${blue}Affiliation${nc} ____________ IRD_U233_TransVIHMI ${blue}Aim${nc} ____________________ Bash script for ${red}R${nc}eads ${red}Q${nc}quality ${red}C${nc}ontrol ${blue}Date${nc} ___________________ 2021.10.12 -${blue}Latest modifications${nc} ___ 2022.11.18 +${blue}Latest modifications${nc} ___ 2023.03.01 ${blue}Run${nc} ____________________ bash Start_RQC.sh " @@ -106,22 +107,29 @@ ${green}#####${nc} ${red}SETTINGS${nc} ${green}#####${nc} ${green}--------------------${nc} " -workdir=$(cd "$(dirname "${BASH_SOURCE[0]}" )" && pwd) # Get working directory -fastq=$(expr $(ls -l ${workdir}/resources/reads/*.fastq.gz | wc -l)) # Get fastq.gz files count -samples=$(expr ${fastq} \/ 2) # {fastq.gz count} / 2 = samples count (paired-end) -conda_version=$(conda --version | sed "s/conda //") # Get conda version -snakemake_version=$(grep -o -E "snakemake_version: '.+'" ${workdir}/config/config.yaml | \ - sed "s/snakemake_version: //" | sed "s/'//g") # Get snakemake version -conda_frontend=$(grep -o -E "conda_frontend: '.+'" ${workdir}/config/config.yaml | \ - sed "s/conda_frontend: //" | sed "s/'//g") # Get conda frontend -max_threads=$(grep -o -E "cpus: [0-9]+" ${workdir}/config/config.yaml | sed "s/cpus: //") # Get user config for max threads -max_memory=$(grep -o -E "ram: [0-9]+" ${workdir}/config/config.yaml | sed "s/ram: //") # Get user config for max memory -memory_per_job=$(expr ${max_memory} \/ ${max_threads}) # Calcul maximum memory usage per job -mapper=$(grep -o -E "mapper: '.+'" ${workdir}/config/config.yaml | \ - sed "s/mapper: //" | sed "s/'//g") # Get user config mapper -subset=$(grep -o -E "subset: [0-9]+" ${workdir}/config/config.yaml | sed "s/subset: //") # Get user config subset -time_stamp_start=$(date +"%Y-%m-%d %H:%M") # Get analyzes starting time -SECONDS=0 # Initialize SECONDS counter +workdir=$(cd "$(dirname "${BASH_SOURCE[0]}" )" && pwd) # Get working directory +config_file="${workdir}/configuration/config.yaml" # Get configuration file +fastq=$(expr $(ls -l ${workdir}/resources/reads/*.fastq.gz | wc -l)) # Get fastq.gz files count + +if [[ "${fastq}" == "0" ]] # Start GeVarLi with at least 1 sample +then + echo -e "${red}¡${nc} No fastq file detected in ${ylo}resources/reads/${nc} ${red}!${nc} +${red}SARS-CoV-2${nc} ${ylo}resources/data_test/${nc} fastq will be used as sample example +" + cp ${workdir}/resources/data_test/*.fastq.gz ${workdir}/resources/reads/ # use data_test/*.fastq.gz +fi + +samples=$(expr ${fastq} \/ 2) # {fastq.gz count} / 2 = samples count (paired-end) +conda_version=$(conda --version | sed "s/conda //") # Get conda version +snakemake_version=$(grep -o -E "snakemake_version: '.+'" ${config_file} | sed "s/snakemake_version: //" | sed "s/'//g") # Get snakemake version +conda_frontend=$(grep -o -E "frontend: '.+'" ${config_file} | sed "s/frontend: //" | sed "s/'//g") # Get conda frontend +max_threads=$(grep -o -E "cpus: [0-9]+" ${config_file} | sed "s/cpus: //") # Get user config for max threads +max_memory=$(grep -o -E "ram: [0-9]+" ${config_file} | sed "s/ram: //") # Get user config for max memory +memory_per_job=$(expr ${max_memory} \/ ${max_threads}) # Calcul maximum memory usage per job +aligner=$(grep -o -E "aligner: '.+'" ${config_file} | sed "s/aligner: //" | sed "s/'//g") # Get user config aligner +time_stamp_start=$(date +"%Y-%m-%d %H:%M") # Get analyzes starting time +time_stamp_archive=$(date +"%Y-%m-%d_%Hh%M") # Get analyzes time to archive (wo space) +SECONDS=0 # Initialize SECONDS counter # Print some analyzes settings echo -e " @@ -136,38 +144,41 @@ ${blue}max Threads${nc} ____________ ${red}${max_threads}${nc} of ${ylo}${logica ${blue}max Memory${nc} _____________ ${red}${max_memory}${nc} of ${ylo}${ram_gb}${nc} Gb available ${blue}job Memory${nc} _____________ ${red}${memory_per_job}${nc} Gb per job -${blue}Mapper${nc} _________________ ${ylo}${mapper}${nc} -${blue}Subet${nc} __________________ ${ylo}${subset}${nc} reads +${blue}Aligner${nc} ________________ ${ylo}${aligner}${nc} ${blue}Start time${nc} _____________ ${time_stamp_start} " ############################################################################### -###### RQC Base Conda Environment Installation ###### +###### Snakemake Installation ###### echo -e " ${green}------------------------------------------------------------------------${nc} -${green}#####${nc} ${red}RQC-BASE CONDA ENVIRONMENT INSTALLATION${nc} ${green}#####${nc} -${green}---------------------------------------------------${nc} +${green}#####${nc} ${red}SNAKEMAKE-BASE INSTALLATION${nc} ${green}#####${nc} +${green}---------------------------------------${nc} " -# Test if 'rqc-base' environment exist -if [[ $(conda info --envs | grep -o -E "^rqc-base_${rqc_version}") ]] +# Test if latest 'snakemake-base' environment exist +if [[ $(conda info --envs | grep -o -E "^snakemake-base_${snakemake_base_version}") ]] then - echo -e " -Conda environment ${ylo}rqc-base_${rqc_version}${nc} it's already created + echo -e " +Conda environment ${ylo}snakemake-base_${snakemake_base_version}${nc} it's already created! " else - echo -e " -Conda environment ${ylo}rqc-base_${rqc_version}${nc} will be now created + echo -e " +Conda environment ${red}snakemake-base${nc} ${ylo}${snakemake_base_version}${nc} will be now created, with: + +# ${red}Mamba${nc} ver. ${ylo}1.0.0${nc} (to create snakemake-conda's environments faster) +# ${red}Snakemake${nc} ver. ${ylo}7.18.1${nc} (to run GeVarLi) +# ${red}Rename${nc} ver. ${ylo}1.601${nc} (to rename fastq files) +# ${red}Graphviz${nc} ver. ${ylo}6.0.1${nc} (to dot snakemake DAG) " - # Create a 'rqc-base' environment, with : - # Mamba ver. 1.0.0 (to create snakemake-conda's environments faster) - # Snakemake ver. 7.18.1 (to run RQC) - # Rename ver. 1.601 (to rename fastq files) - # Graphviz ver. 6.0.1 (to dot snakemake DAG) - conda env create -f ${workdir}/workflow/envs/${os}/rqc-base_${rqc_version}.yaml + conda env create -f ${workdir}/workflow/environments/${os}/snakemake-base_${snakemake_base_version}.yaml fi +# Remove old 'gevarli-base' environment +conda env remove --name rqc-base_v.2022.11 + + ############################################################################### ###### Conda Env. Activation ###### echo -e " @@ -176,10 +187,12 @@ ${green}#####${nc} ${red}CONDA ACTIVATION${nc} ${green}#####${nc} ${green}----------------------------${nc} " +echo -e "conda activate ${red}snakemake-base${nc} ${ylo}${snakemake_base_version}${nc}" + # intern shell source conda source ~/miniconda3/etc/profile.d/conda.sh 2> /dev/null # local user source /usr/local/miniconda3/etc/profile.d/conda.sh 2> /dev/null # HPC server -conda activate rqc-base_${rqc_version} +conda activate snakemake-base_${snakemake_base_version} ############################################################################### ###### Rename samples ###### @@ -189,12 +202,18 @@ ${green}#####${nc} ${red}RENAME FASTQ FILES${nc} ${green}#####${nc} ${green}------------------------------${nc} " -# Rename fastq files to remove "_001" Illumina pattern. -## De/comment (#) if you want keep Illumina barcode-ID and/or Illumina line-ID -#rename "s/_S\d+_/_/" ${workdir}/resources/reads/*.fastq.gz 2> /dev/null # Remove barcode-ID like {_S001_} -#rename "s/_L\d+_/_/" ${workdir}/resources/reads/*.fastq.gz 2> /dev/null # Remove line-ID ID like {_L001_} +# Rename fastq files to remove "_001" Illumina pattern (mandatory) +## De/comment line (#) if you want keep Illumina barcode-ID and/or Illumina line-ID +echo -e "Removing ${red}'_S'${nc} index tag ID" +rename "s/_S\d+_/_/" ${workdir}/resources/reads/*.fastq.gz 2> /dev/null # Remove barcode-ID like {_S001_} +echo -e "Removing ${red}'_L'${nc} line tag ID" +rename "s/_L\d+_/_/" ${workdir}/resources/reads/*.fastq.gz 2> /dev/null # Remove line-ID ID like {_L001_} +echo -e "Removing ${red}'_001'${nc} illumina tag ID" rename "s/_001.fastq.gz/.fastq.gz/" ${workdir}/resources/reads/*.fastq.gz 2> /dev/null # Remove end-name ID like {_001}.fastq.gz +echo -e " +If you want to keep Illumina ${blue}barcode-ID${nc} and/or Illumina ${blue}line-ID${nc}, please edit ${ylo}Start_GeVarLi.sh${nc} script (l.199). +" ############################################################################### ###### Call snakemake pipelines ###### @@ -220,7 +239,7 @@ for snakefile in ${snakefile_list} ; do echo -e "${blue}-- ${snakefile} --${nc}" ; snakemake \ --directory ${workdir}/ \ - --snakefile ${workdir}/workflow/rules/${snakefile}.smk \ + --snakefile ${workdir}/workflow/snakefiles/${snakefile}.smk \ --config os=${os} \ --rerun-incomplete \ --unlock ; @@ -241,7 +260,7 @@ for snakefile in ${snakefile_list} ; do echo -e "${blue}-- ${snakefile} --${nc}" ; snakemake \ --directory ${workdir}/ \ - --snakefile ${workdir}/workflow/rules/${snakefile}.smk \ + --snakefile ${workdir}/workflow/snakefiles/${snakefile}.smk \ --cores ${max_threads} \ --config os=${os} \ --rerun-incomplete \ @@ -266,7 +285,7 @@ for snakefile in ${snakefile_list} ; do echo -e "${blue}-- ${snakefile} --${nc}" ; snakemake \ --directory ${workdir}/ \ - --snakefile ${workdir}/workflow/rules/${snakefile}.smk \ + --snakefile ${workdir}/workflow/snakefiles/${snakefile}.smk \ --cores ${max_threads} \ --config os=${os} \ --rerun-incomplete \ @@ -295,7 +314,7 @@ for snakefile in ${snakefile_list} ; do echo -e "${blue}-- ${snakefile} --${nc}" ; snakemake \ --directory ${workdir}/ \ - --snakefile ${workdir}/workflow/rules/${snakefile}.smk \ + --snakefile ${workdir}/workflow/snakefiles/${snakefile}.smk \ --cores ${max_threads}\ --config os=${os} \ --rerun-incomplete \ @@ -326,7 +345,7 @@ for snakefile in ${snakefile_list} ; do echo -e "${blue}-- ${snakefile} --${nc}" ; snakemake \ --directory ${workdir}/ \ - --snakefile ${workdir}/workflow/rules/${snakefile}.smk \ + --snakefile ${workdir}/workflow/snakefiles/${snakefile}.smk \ --cores ${max_threads} \ --max-threads ${max_threads} \ --config os=${os} \ @@ -362,7 +381,7 @@ for snakefile in ${snakefile_list} ; do for extention in ${extention_list} ; do snakemake \ --directory ${workdir}/ \ - --snakefile ${workdir}/workflow/rules/${snakefile}.smk \ + --snakefile ${workdir}/workflow/snakefiles/${snakefile}.smk \ --${graph} \ | dot -T${extention} \ 2> /dev/null \ @@ -374,7 +393,7 @@ done for snakefile in ${snakefile_list} ; do snakemake \ --directory ${workdir} \ - --snakefile ${workdir}/workflow/rules/${snakefile}.smk \ + --snakefile ${workdir}/workflow/snakefiles/${snakefile}.smk \ --summary > ${workdir}/results/10_Reports/files-summaries/${snakefile}_files-summary.txt \ 2> /dev/null ; done @@ -420,7 +439,7 @@ Author _________________ Nicolas Fernandez Affiliation ____________ IRD_U233_TransVIHMI Aim ____________________ Bash script for RQC Date ___________________ 2021.10.12 -Latest modifications ___ 2022.11.18 +Latest modifications ___ 2023.03.01 Run ____________________ bash Start_RQC.sh Operating System _______ ${os} @@ -456,8 +475,9 @@ cd ${workdir}/results/ tar -zcf 10_Reports_archive.tar.gz 10_Reports # Gzip results directory +mkdir -p ${workdir}/archives/ 2> /dev/null cd ${workdir}/ -tar -zcf Results_${time_stamp_archive}_${reference}_${aligner}-${min_cov}X_${samples}sp_archive.tar.gz results +tar -zcf archives/Results_${time_stamp_archive}_${samples}sp_archive.tar.gz results/ echo -e " ${green}------------------------------------------------------------------------${nc} diff --git a/config/config.yaml b/configuration/config.yaml similarity index 59% rename from config/config.yaml rename to configuration/config.yaml index 3b57aeb3e5d9648c0221ec08c721d791f4e0305f..706fa49849760dab4281bf92dccca83641fca03d 100644 --- a/config/config.yaml +++ b/configuration/config.yaml @@ -1,12 +1,12 @@ --- ###I###R###D######U###2###3###3#######T###R###A###N###S###V###I###H###M###I#### # Name ___________________ config.yaml -# Version ________________ v.2022.11 +# Version ________________ v.2023.03 # Author _________________ Nicolas Fernandez # Affiliation ____________ IRD_U233_TransVIHMI # Aim ____________________ Configuration yaml file for quality_control.smk snakefile # Date ___________________ 2021.10.12 -# Latest modifications ___ 2022.11.18 +# Latest modifications ___ 2023.03.01 # Use ____________________ Edit default settings ############################################################################### @@ -21,41 +21,15 @@ resources: ## '.' # Local (i.e. GeVarLi root directory) ## '/scratch' # HPC (set it to match your HPC usage) -conda_frontend: 'mamba' # Conda frontend (conda or mamba) - -# conda_version: '22.9.0' # Conda version (upgrade from "4.12.0") -# mamba_version: '1.0.0' # Mamba version (faster than conda) -# snakemake_version: '7.18.1' # Snakemake version (upgrade from "6.12.3") -# rename_version: '1.601' # Rename version (to rename fastq files) -# graphviz_version: '6.0.1' # GraphViz version (to do dot snakemake DAG) - - -### ENVIRONNEMENTS ----------------------------------------------------------------------------------- -conda: - osx: # Conda OSX environement yaml files: - bowtie2: '../envs/osx/bowtie2_v.2.4.5.yaml' # Bowtie2 ver. 2.4.5 - bwa: '../envs/osx/bwa_v.0.7.17.yaml' # BWA ver. 0.7.17 - fastq-screen: '../envs/osx/fastq-screen_v.0.15.2.yaml' # Fastq-Screen ver. 0.15.2 - fastqc: '../envs/osx/fastqc_v.0.11.9.yaml' # FastQC ver. 0.11.9 - rqc-base: '../envs/osx/rqc-base_v.2022.11' # RQC-Base ver. 2022.11 - multiqc: '../envs/osx/multiqc_v.1.12.yaml' # MultiQC ver. 1.12 - linux: # Conda Linux environement yaml files: - bowtie2: '../envs/linux/bowtie2_v.2.4.5.yaml' # Bowtie2 ver. 2.4.5 - bwa: '../envs/linux/bwa_v.0.7.17.yaml' # BWA ver. 0.7.17 - fastq-screen: '../envs/linux/fastq-screen_v.0.15.2.yaml' # Fastq-Screen ver. 0.15.2 - fastqc: '../envs/linux/fastqc_v.0.11.9.yaml' # FastQC ver. 0.11.9 - rqc-base: '../envs/linux/rqc-base_v.2022.11' # RQC-Base ver. 2022.11 - multiqc: '../envs/linux/multiqc_v.1.12.yaml' # MultiQC ver. 1.12 - ### FASTQSCREEN -------------------------------------------------------------------------------------- fastq-screen: - config: 'config/fastq-screen_bwa.conf' # Path to the fastq-screen configuration file + config: 'configuration/fastq-screen_bwa.conf' # Path to the fastq-screen configuration file # Available options: - #- 'config/fastq-screen_bwa.conf' # Default fastq-screen configuration file for bwa users - #- 'config/fastq-screen_bowtie2.conf' # Default fastq-screen configuration file for bowtie2 users - #- 'config/fastq-screen_custom.conf' # Your cutom fastq-screen configuration file + #- 'configuration/fastq-screen_bwa.conf' # Default fastq-screen configuration file for bwa users + #- 'configuration/fastq-screen_bowtie2.conf' # Default fastq-screen configuration file for bowtie2 users + #- 'configuration/fastq-screen_custom.conf' # Your cutom fastq-screen configuration file subset: 10000 # Don't use whole sequences, but a subset reads (default: "1000") [INT] (0=all) - mapper: 'bwa' # Mapper used by fastq-screen for alignments (default "bwa") + aligner: 'bwa' # Mapper used by fastq-screen for alignments (default "bwa") # Available options: #- 'bwa' # Burrows-Wheeler Aligner (default, somme small genomes indexes provided, see "fastq-screen_bwa.conf") #- 'bowtie2' # Bowtie2 (somme small genomes indexes provided, see "fastq-screen_bowtie2.conf") @@ -84,6 +58,36 @@ bowtie2: #- '' # (default) #- '--large-index' # Force generated index to be "large", even if reference has fewer than 4 billion nucleotides +### ENVIRONNEMENTS ----------------------------------------------------------------------------------- + +# conda_version: '22.9.0' # Conda version (upgrade from "4.12.0") +# mamba_version: '1.0.0' # Mamba version (faster than conda) +# snakemake_version: '7.18.1' # Snakemake version (upgrade from "6.12.3") +# rename_version: '1.601' # Rename version (to rename fastq files) +# graphviz_version: '6.0.1' # GraphViz version (to do dot snakemake DAG) + +conda: + frontend: 'mamba' # Conda frontend (conda or mamba) + # Available options: + #- 'mamba' # mamba (default) + #- 'conda' # conda + osx: # Conda OSX environement yaml files: + snakemake-base: '../environments/osx/snakemake-base_v.2023.02.yaml' # Snakemake-Base ver. 2023.02 + #rqc-tools: '../environments/osx/rqc-tools_v.2023.03' # RQC-Tools ver. 2023.03 + bowtie2: '../environments/osx/bowtie2_v.2.4.5.yaml' # Bowtie2 ver. 2.4.5 + bwa: '../environments/osx/bwa_v.0.7.17.yaml' # BWA ver. 0.7.17 + fastq-screen: '../environments/osx/fastq-screen_v.0.15.2.yaml' # Fastq-Screen ver. 0.15.2 + fastqc: '../environments/osx/fastqc_v.0.11.9.yaml' # FastQC ver. 0.11.9 + multiqc: '../environments/osx/multiqc_v.1.12.yaml' # MultiQC ver. 1.12 + linux: # Conda Linux environement yaml files: + snakemake-base: '../environments/linux/snakemake-base_v.2023.02.yaml' # Snakemake-Base ver. 2023.02 + #rqc-tools: '../environments/linux/rqc-tools_v.2023.03' # RQC-Tools ver. 2023.03 + bowtie2: '../environments/linux/bowtie2_v.2.4.5.yaml' # Bowtie2 ver. 2.4.5 + bwa: '../environments/linux/bwa_v.0.7.17.yaml' # BWA ver. 0.7.17 + fastq-screen: '../environments/linux/fastq-screen_v.0.15.2.yaml' # Fastq-Screen ver. 0.15.2 + fastqc: '../environments/linux/fastqc_v.0.11.9.yaml' # FastQC ver. 0.11.9 + multiqc: '../environments/linux/multiqc_v.1.12.yaml' # MultiQC ver. 1.12 + ### OPERATING-SYSTEM --------------------------------------------------------------------------------- os: 'osx' # Operating System (default: "osx") # Available options: diff --git a/config/fastq-screen_bowtie2.conf b/configuration/fastq-screen_bowtie2.conf similarity index 100% rename from config/fastq-screen_bowtie2.conf rename to configuration/fastq-screen_bowtie2.conf diff --git a/config/fastq-screen_bwa.conf b/configuration/fastq-screen_bwa.conf similarity index 78% rename from config/fastq-screen_bwa.conf rename to configuration/fastq-screen_bwa.conf index ecdbcac5b97dc4738a3e954023ef5da275b1eaa1..05134f52c6a478d31176331b8f7038ecc3b84009 100644 --- a/config/fastq-screen_bwa.conf +++ b/configuration/fastq-screen_bwa.conf @@ -26,13 +26,13 @@ DATABASE Vectors resources/indexes/bwa/UniVec_wo_phiX_and_kanamycin #### Putative Hosts (Large genomes) ----- ### Gorilla g4 -#DATABASE Gorilla resources/indexes/bwa/Gorilla-gorilla_g4 +DATABASE Gorilla resources/indexes/bwa/Gorilla-gorilla_g4 ### Chimpanzee t3 -#DATABASE Chimpanzee resources/indexes/bwa/Pan_troglodytes_t3 +DATABASE Chimpanzee resources/indexes/bwa/Pan_troglodytes_t3 ### Human - Homo-sapiens_h38_NC-000001.11 -#DATABASE Human resources/indexes/bwa/Homo-sapiens_h38_NC000001-11 +DATABASE Human resources/indexes/bwa/Homo-sapiens_h38_NC000001-11 ### Bat v1 -#DATABASE Bat resources/indexes/bwa/Pteropus-vampyrus_v1 +DATABASE Bat resources/indexes/bwa/Pteropus-vampyrus_v1 #### Main Viruses (Small genomes) ----- @@ -52,18 +52,18 @@ DATABASE HIV resources/indexes/bwa/HIV-1_HXB2_K03455-1 ### E. coli (Bacteria) - QC_Echerichia-coli_CP060121.1 DATABASE Ecoli resources/indexes/bwa/Echerichia-coli_CP060121-1 ### S. cerevisiae (Yeast) - sequence -#DATABASE Scerevisiae resources/indexes/bwa/Saccharomyces-cerevisiae +DATABASE Scerevisiae resources/indexes/bwa/Saccharomyces-cerevisiae ### C. elegans (Nematode) - sequence -#DATABASE Celegans resources/indexes/bwa/Caenorhabditis-elegans +DATABASE Celegans resources/indexes/bwa/Caenorhabditis-elegans ### D. melanogaster (Fruit fly) - sequence -#DATABASE Dmelanogaster resources/indexes/bwa/Drosophila-melanogaster +DATABASE Dmelanogaster resources/indexes/bwa/Drosophila-melanogaster ### D. rerio (Zebrafish) - sequence -#DATABASE Drerio resources/indexes/bwa/Danio-rerio +DATABASE Drerio resources/indexes/bwa/Danio-rerio ### X. laevis (Xenope) - sequence -#DATABASE Xlaevis resources/indexes/bwa/Xenopus-laevis +DATABASE Xlaevis resources/indexes/bwa/Xenopus-laevis ### G. gallus - sequence -#DATABASE Ggallus ressources/indexes/bwa/Gallus-gallus +DATABASE Ggallus ressources/indexes/bwa/Gallus-gallus ### Mouse m39 -#DATABASE Mouse resources/indexes/bwa/Mus-musculus_m39 +DATABASE Mouse resources/indexes/bwa/Mus-musculus_m39 ### Arabidopsis t10 (Plant) - sequence from NCBI (ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/735/GCF_000001735.3_TAIR10/GCF_000001735.3_TAIR10_genomic.fna.gz) -#DATABASE Arabidopsis resources/indexes/bwa/Arabidopsis-thaliana_t10_NC003070-9 +DATABASE Arabidopsis resources/indexes/bwa/Arabidopsis-thaliana_t10_NC003070-9 diff --git a/resources/data_test/.gitkeep b/resources/data_test/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..f13c79cb973f7529714edbaff5ce25450f3cf7ff --- /dev/null +++ b/resources/data_test/.gitkeep @@ -0,0 +1 @@ +Git: keep this non empty file! \ No newline at end of file diff --git a/resources/data_test/SARS-CoV-2_Omicron-BA-1-1_Covid-Seq-Lib-on-MiSeq_250000-reads_R1.fastq.gz b/resources/data_test/SARS-CoV-2_Omicron-BA-1-1_Covid-Seq-Lib-on-MiSeq_250000-reads_R1.fastq.gz new file mode 100644 index 0000000000000000000000000000000000000000..efda6523124e705811d70e9ee78e11f542bdd582 Binary files /dev/null and b/resources/data_test/SARS-CoV-2_Omicron-BA-1-1_Covid-Seq-Lib-on-MiSeq_250000-reads_R1.fastq.gz differ diff --git a/resources/data_test/SARS-CoV-2_Omicron-BA-1-1_Covid-Seq-Lib-on-MiSeq_250000-reads_R2.fastq.gz b/resources/data_test/SARS-CoV-2_Omicron-BA-1-1_Covid-Seq-Lib-on-MiSeq_250000-reads_R2.fastq.gz new file mode 100644 index 0000000000000000000000000000000000000000..852cc3f5735a9b9c2628b78f12b8e1f2719d7d6f Binary files /dev/null and b/resources/data_test/SARS-CoV-2_Omicron-BA-1-1_Covid-Seq-Lib-on-MiSeq_250000-reads_R2.fastq.gz differ diff --git a/workflow/envs/linux/bowtie2_v.2.4.5.yaml b/workflow/environments/linux/bowtie2_v.2.4.5.yaml similarity index 100% rename from workflow/envs/linux/bowtie2_v.2.4.5.yaml rename to workflow/environments/linux/bowtie2_v.2.4.5.yaml diff --git a/workflow/envs/linux/bwa_v.0.7.17.yaml b/workflow/environments/linux/bwa_v.0.7.17.yaml similarity index 100% rename from workflow/envs/linux/bwa_v.0.7.17.yaml rename to workflow/environments/linux/bwa_v.0.7.17.yaml diff --git a/workflow/envs/linux/fastq-screen_v.0.15.2.yaml b/workflow/environments/linux/fastq-screen_v.0.15.2.yaml similarity index 100% rename from workflow/envs/linux/fastq-screen_v.0.15.2.yaml rename to workflow/environments/linux/fastq-screen_v.0.15.2.yaml diff --git a/workflow/envs/linux/fastqc_v.0.11.9.yaml b/workflow/environments/linux/fastqc_v.0.11.9.yaml similarity index 100% rename from workflow/envs/linux/fastqc_v.0.11.9.yaml rename to workflow/environments/linux/fastqc_v.0.11.9.yaml diff --git a/workflow/envs/linux/multiqc_v.1.12.yaml b/workflow/environments/linux/multiqc_v.1.12.yaml similarity index 100% rename from workflow/envs/linux/multiqc_v.1.12.yaml rename to workflow/environments/linux/multiqc_v.1.12.yaml diff --git a/workflow/envs/linux/rqc-base_v.2022.11.yaml b/workflow/environments/linux/snakemake-base_v.2023.02.yaml similarity index 99% rename from workflow/envs/linux/rqc-base_v.2022.11.yaml rename to workflow/environments/linux/snakemake-base_v.2023.02.yaml index 4fa951d8536984a62a3dfc887099b1b0de18d22b..1fdbaa7f5f6bef1bc50ec3873c97ff980f8bcc96 100644 --- a/workflow/envs/linux/rqc-base_v.2022.11.yaml +++ b/workflow/environments/linux/snakemake-base_v.2023.02.yaml @@ -1,4 +1,4 @@ -name: rqc-base_v.2022.11 +name: snakemake-base_v.2023.02 channels: - bioconda - conda-forge diff --git a/workflow/envs/osx/bowtie2_v.2.4.5.yaml b/workflow/environments/osx/bowtie2_v.2.4.5.yaml similarity index 100% rename from workflow/envs/osx/bowtie2_v.2.4.5.yaml rename to workflow/environments/osx/bowtie2_v.2.4.5.yaml diff --git a/workflow/envs/osx/bwa_v.0.7.17.yaml b/workflow/environments/osx/bwa_v.0.7.17.yaml similarity index 100% rename from workflow/envs/osx/bwa_v.0.7.17.yaml rename to workflow/environments/osx/bwa_v.0.7.17.yaml diff --git a/workflow/envs/osx/fastq-screen_v.0.15.2.yaml b/workflow/environments/osx/fastq-screen_v.0.15.2.yaml similarity index 100% rename from workflow/envs/osx/fastq-screen_v.0.15.2.yaml rename to workflow/environments/osx/fastq-screen_v.0.15.2.yaml diff --git a/workflow/envs/osx/fastqc_v.0.11.9.yaml b/workflow/environments/osx/fastqc_v.0.11.9.yaml similarity index 100% rename from workflow/envs/osx/fastqc_v.0.11.9.yaml rename to workflow/environments/osx/fastqc_v.0.11.9.yaml diff --git a/workflow/envs/osx/multiqc_v.1.12.yaml b/workflow/environments/osx/multiqc_v.1.12.yaml similarity index 100% rename from workflow/envs/osx/multiqc_v.1.12.yaml rename to workflow/environments/osx/multiqc_v.1.12.yaml diff --git a/workflow/envs/osx/rqc-base_v.2022.11.yaml b/workflow/environments/osx/snakemake-base_v.2023.02.yaml similarity index 99% rename from workflow/envs/osx/rqc-base_v.2022.11.yaml rename to workflow/environments/osx/snakemake-base_v.2023.02.yaml index 876cff05f607863c51bf12cf4eef6f9695f84cae..fdb030f2764c252707b1b8a35c2dec6e2af21926 100644 --- a/workflow/envs/osx/rqc-base_v.2022.11.yaml +++ b/workflow/environments/osx/snakemake-base_v.2023.02.yaml @@ -1,4 +1,4 @@ -name: rqc-base_v.2022.11 +name: snakemake-base_v.2023.02 channels: - bioconda - conda-forge diff --git a/workflow/rules/indexing_genomes.smk b/workflow/snakefiles/indexing_genomes.smk similarity index 98% rename from workflow/rules/indexing_genomes.smk rename to workflow/snakefiles/indexing_genomes.smk index d496a05d5725846a2918c78e905a08337f34a263..33e428bdeb4dabd7fa733d1bb7fb60680b88a2e3 100755 --- a/workflow/rules/indexing_genomes.smk +++ b/workflow/snakefiles/indexing_genomes.smk @@ -4,13 +4,13 @@ # Affiliation ____________ IRD_U233_TransVIHMI # Aim ____________________ Snakefile with indexing genomes rules # Date ___________________ 2022.09.28 -# Latest modifications ___ 2022.11.18 +# Latest modifications ___ 2023.03.01 # Use ____________________ snakemake -s indexing_genomes.smk --use-conda ############################################################################### ###### CONFIGURATION ###### -configfile: "config/config.yaml" +configfile: "configuration/config.yaml" ############################################################################### ###### FUNCTIONS ###### diff --git a/workflow/rules/quality_control.smk b/workflow/snakefiles/quality_control.smk similarity index 93% rename from workflow/rules/quality_control.smk rename to workflow/snakefiles/quality_control.smk index f65e0cc26d9ee81d16868b880d2b586b95fdd294..91d283e2905037938b667d3b5d0091436c5eea11 100755 --- a/workflow/rules/quality_control.smk +++ b/workflow/snakefiles/quality_control.smk @@ -4,13 +4,13 @@ # Affiliation ____________ IRD_U233_TransVIHMI # Aim ____________________ Snakefile with quality control rules # Date ___________________ 2021.09.28 -# Latest modifications ___ 2022.11.18 +# Latest modifications ___ 2023.03.01 # Run ____________________ snakemake -s quality_control.smk --use-conda ############################################################################### ###### CONFIGURATION ###### -configfile: "config/config.yaml" +configfile: "configuration/config.yaml" ############################################################################### ###### FUNCTIONS ###### @@ -38,9 +38,9 @@ MULTIQC = config["conda"][OS]["multiqc"] # MultiQC ############################################################################### ###### PARAMETERS ###### -CONFIG = config["fastq-screen"]["config"] # Fastq-screen --conf -MAPPER = config["fastq-screen"]["mapper"] # Fastq-screen --aligner -SUBSET = config["fastq-screen"]["subset"] # Fastq-screen --subset +CONFIG = config["fastq-screen"]["config"] # Fastq-screen --conf +ALIGNER = config["fastq-screen"]["aligner"] # Fastq-screen --aligner +SUBSET = config["fastq-screen"]["subset"] # Fastq-screen --subset ############################################################################### @@ -86,7 +86,7 @@ rule fastqscreen_contamination_checking: cpus = CPUS params: config = CONFIG, - mapper = MAPPER, + aligner = ALIGNER, subset = SUBSET input: fastq = "resources/reads/" @@ -99,7 +99,7 @@ rule fastqscreen_contamination_checking: "-q " # --quiet: Only show log warning "--threads {resources.cpus} " # --threads: Specifies across how many threads bowtie will be allowed to run "--conf {params.config} " # path to configuration file - "--aligner {params.mapper} " # -a: choose aligner 'bowtie', 'bowtie2', 'bwa' + "--aligner {params.aligner} " # -a: choose aligner 'bowtie', 'bowtie2', 'bwa' "--subset {params.subset} " # Don't use the whole sequence file, but create a subset of specified size "--outdir {output.fastqscreen} " # Output directory "{input.fastq}/*.fastq.gz " # Input file.fastq