From 5d2f61d94aabdedbf92131d7883c3cf4b419db81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20FERNANDEZ=20NU=C3=91EZ?= <nicolas.fernandez@ird.fr> Date: Fri, 21 Mar 2025 11:02:32 +0100 Subject: [PATCH] v.2025.03 --- Run_GeVarLi.sh | 426 ++++-------------- Snakemake: | 0 version.txt => VERSION_temp.txt | 0 config/config.yaml | 15 +- workflow/Snakefile | 18 +- workflow/envs/workflow-core.yaml | 2 +- workflow/envs/yq_v.3.4.3.yaml | 7 + workflow/rules/symlinks_renamming.smk | 22 +- workflow/rules/workflow_reporting.smk | 155 ++++++- workflow/schemas/config.schema.yaml | 378 ++++++++++++++++ .../__pycache__/functions.cpython-312.pyc | Bin 6219 -> 6453 bytes workflow/scripts/functions.py | 6 +- .../scripts/results/10_Reports/settings.log | 1 + workflow/scripts/settings.sh | 196 ++++++++ 14 files changed, 857 insertions(+), 369 deletions(-) create mode 100644 Snakemake: rename version.txt => VERSION_temp.txt (100%) create mode 100644 workflow/envs/yq_v.3.4.3.yaml create mode 100644 workflow/scripts/results/10_Reports/settings.log create mode 100644 workflow/scripts/settings.sh diff --git a/Run_GeVarLi.sh b/Run_GeVarLi.sh index 2be7fdc..350b527 100755 --- a/Run_GeVarLi.sh +++ b/Run_GeVarLi.sh @@ -13,7 +13,7 @@ # Version ________________ v.2025.03 # Author _________________ Nicolas Fernandez # Affiliation ____________ IRD_U233_TransVIHMI -# Aim ____________________ Bash script running gevarli.smk snakefile +# Aim ____________________ Bash script running GeVarLi snakefile # Date ___________________ 2021.10.12 # Latest modifications ___ 2025.03.12 # Use ____________________ '. Run_GeVarLi.sh' @@ -24,55 +24,19 @@ ############# workdir=$(cd "$(dirname "${BASH_SOURCE[0]}" )" && pwd) # Get working directory -version=$(<${workdir}/version.txt) # Get version -test_dir=$(<${workdir}/.test/) # Get test directory - -############################################################################### -### OPERATING SYSTEM ### -######################## - -shell=$SHELL # Get shell - -# Get operating system -case "$OSTYPE" in - darwin*) os="osx" ;; - linux*) os="linux" ;; - bsd*) os="bsd" ;; - solaris*) os="solaris" ;; - msys*) os="windows" ;; - cygwin*) os="windows" ;; - *) os="unknown (${OSTYPE})" ;; -esac - -############################################################################### -### HARDWARE ### -################ - -if [[ ${os} == "osx" ]] -then - model_name=$(sysctl -n machdep.cpu.brand_string) # Get chip model name - physical_cpu=$(sysctl -n hw.physicalcpu) # Get physical cpu - logical_cpu=$(sysctl -n hw.logicalcpu) # Get logical cpu - mem_size=$(sysctl -n hw.memsize) # Get memory size (bit) - ram_gb=$(expr ${mem_size} \/ $((1024**3))) # mem_size / 1024**3 = Gb -elif [[ ${os} == "linux" || ${os} == "bsd" || ${os} == "solaris" ]] -then - model_name=$(lscpu | grep -o -E "Model name: +.+" | sed -E "s/Model name: +//") # Get chip model name - physical_cpu=$(lscpu | grep -o -E "^CPU\(s\): +[0-9]+" | sed -E "s/CPU\(s\): +//") # Get physical cpu - threads_cpu=$(lscpu | grep -o -E "^Thread\(s\) per core: +[0-9]+" | sed -E "s/Thread\(s\) per core: +//") # Get thread(s) per core - logical_cpu=$(expr ${physical_cpu} \* ${threads_cpu}) # Calcul logical cpu - mem_size=$(grep -o -E "MemTotal: +[0-9]+" /proc/meminfo | sed -E "s/MemTotal: +//") # Get memory size (Kb) - ram_gb=$(expr ${mem_size} \/ $((1024**2))) # mem_size / 1024**2 = Gb -else - echo -e "Please, use an '${ylo}UNIX${nc}' operating system, like: '${red}linux${nc}', '${red}osx${nc}' or '${red}WSL${nc}'." - return 0 -fi +version=$(<${workdir}/VERSION_temp.txt) # Get version +blue="\033[1;34m" # blue +green="\033[1;32m" # green +red="\033[1;31m" # red +ylo="\033[1;33m" # yellow +nc="\033[0m" # no color ############################################################################### -### Network ### +### NETWORK ### ############### +# Test if network is online if ping -c 1 -W 5 google.com > /dev/null 2>&1 || \ ping -c 1 -W 5 cloudflare.com > /dev/null 2>&1 then @@ -81,7 +45,6 @@ else network="Offline" fi - ############################################################################### ### CONDA ### ############# @@ -89,29 +52,46 @@ fi # Test if a conda distribution already exist if [[ ! $(command -v conda) ]] then # If no, invitation message to install it - message_conda=" + echo -e " ${red}No Conda distribution found.${nc} ${blue}GeVarLi${nc} use the free and open-source package manager ${ylo}Conda${nc}. Read documentation at: ${green}https://transvihmi.pages.ird.fr/nfernandez/GeVarLi/en/pages/installations/${nc}" -else # If yes, informations message about it - message_conda=" -$(conda --version) -$(which conda) -$(conda config --show channels)" -fi - -# Intern shell source conda -source ~/miniforge3/etc/profile.d/conda.sh 2> /dev/null # local user with miniforge3 -source ~/mambaforge/etc/profile.d/conda.sh 2> /dev/null # local user with mambaforge ¡ Deprecated ! -source ~/miniconda3/etc/profile.d/conda.sh 2> /dev/null # local user with miniconda3 ¡ Deprecated ! -source /usr/local/bioinfo/miniconda3-23.10.0-1/etc/profile.d/conda.sh 2> /dev/null # iTROP HPC server (conda 23.11.0) - + return 0 +else # If yes, intern shell source conda + echo -e "\n ${green}Conda${nc} distribution found and sourced." + source ~/miniforge3/etc/profile.d/conda.sh 2> /dev/null # local user with miniforge3 + source ~/mambaforge/etc/profile.d/conda.sh 2> /dev/null # local user with mambaforge ¡ Deprecated ! + source ~/miniconda3/etc/profile.d/conda.sh 2> /dev/null # local user with miniconda3 ¡ Deprecated ! + source /usr/local/bioinfo/miniconda3-23.10.0-1/etc/profile.d/conda.sh 2> /dev/null # iTROP HPC server (conda 23.11.0) ############################################################################### ### SPINNER ### ############### -bash ${workdir}/workflow/scripts/spinner2.sh > /dev/null 2>&1 +# Function to run a command with a spinner +run_with_spinner() { + ("$@" > /dev/null 2>&1) & + local pid=$! + disown $pid 2>/dev/null + + local spinner=( "â ‹" "â ™" "â ¹" "â ¸" "â ¼" "â ´" "â ¦" "â §" "â ‡" "â " ) + local i=0 + while kill -0 $pid 2>/dev/null; do + # Clear the line + printf "\r\033[K%s Please wait" "${spinner[$i]}" + i=$(( (i+1) % ${#spinner[@]} )) + sleep 0.1 + done + wait $pid + local exit_code=$? + # Clear the spinner line + printf "\r\033[K" + if [ $exit_code -eq 0 ]; then + echo "✔ Job done!" + else + echo "✖ Job failed with exit code $exit_code." + fi +} ############################################################################### ### WORKFLOW-CORE ### @@ -120,278 +100,91 @@ bash ${workdir}/workflow/scripts/spinner2.sh > /dev/null 2>&1 # Test if 'workflow-core' environment exist. if conda env list | grep -q "^workflow-core" then # If 'exist' - echo -e " -${ylo}Workflow-Core${nc} conda environment already created. -" - if [[ $network == "Online" ]] - then # If 'online' - echo -e "\r -Updating ${ylo}Workflow-Core${nc} environment. -" - #run_with_spinner \ - conda env update \ - --prune \ - --name workflow-core \ - --file ${workdir}/workflow/envs/workflow-core.yaml - #> /dev/null 2>&1 + echo -e "\n ${ylo}Workflow-Core${nc} conda environment already created." + # Test if 'workflow-core' environment is up-to-date. + ENV_YAML="${workdir}/workflow/envs/workflow-core.yaml" + CURRENT_ENV=$(conda env export --no-builds --name workflow-core | grep -v '^prefix:') + EXPECTED_ENV=$(grep -v '^prefix:' "$ENV_YAML") + if false #diff <(echo "$CURRENT_ENV") <(echo "$EXPECTED_ENV") > /dev/null + then # If 'up-to-date' + echo -e "\n ${ylo}Workflow-Core${nc} environment is already up-to-date." + else # If 'not' up-to-date + if [[ $network == "Offline" ]] + then # If 'offline' + echo -e "\n Cannot update ${ylo}Workflow-Core${nc} environment. + ${green}Network${nc}: ${red}${network}${nc}." + else # If 'online' + echo -e "\n Updating ${ylo}Workflow-Core${nc} environment. \n" + run_with_spinner \ + conda env update \ + --prune \ + --name workflow-core \ + --file $ENV_YAML + fi fi else # If 'not' exist - echo -e " -${ylo}Workflow-Core${nc} conda environment not found. -" + echo -e "\n ${ylo}Workflow-Core${nc} conda environment not found." if [[ $network == "Online" ]] then # If 'online' - echo -e " -${ylo}Workflow-Core${nc} conda environment will be create, with: - + echo -e "\n ${ylo}Workflow-Core${nc} conda environment will be create, with: > ${red}Snakemake${nc} > ${red}Snakedeploy${nc} - > ${red}Snakemake Slurm plugin${nc} -" - #run_with_spinner \ - conda env create \ - --file ${workdir}/workflow/envs/workflow-core.yaml \ - --quiet \ - > /dev/null 2>&1 + > ${red}Snakemake Slurm plugin${nc} \n" + run_with_spinner \ + conda env create \ + --file ${workdir}/workflow/envs/workflow-core.yaml \ + --quiet + else # If 'offline' + echo -e "\n Cannot install ${ylo}Workflow-Core${nc} environment. + ${green}Network${nc}: ${red}${network}${nc}". fi fi +############################################################################### +### ACTIVATE WORKFLOW-CORE ### +############################### + # Active workflow-core conda environment. if conda env list | grep -q "^workflow-core" then + echo -e "\n Activate ${ylo}Workflow-Core${nc} conda environment." conda activate workflow-core else - echo -e " -${ylo}Workflow-Core${nc} conda environment not installed. -" - return 0 -fi - -############################################################################### -### CHECK CONFIGURATION ### -########################### - -conda_version=$(conda --version | sed 's/conda //') # Conda version -mamba_version=$(mamba --version | head -n 1 | sed 's/mamba //') # Mamba version -snakemake_version=$(snakemake --version) # Snakemake version - -config_file="${workdir}/config/config.yaml" # Get configuration file - -fastq_dir=$(yq -Mr '.fastq_dir' ${config_file}) # Get path to fastq files directory -fastq_files=$(/bin/ls -l ${fastq_dir}/*.fastq.gz 2> /dev/null | wc -l | sed 's/ //') # Get fastq.gz files count -fastq_R1=$(/bin/ls -l ${fastq_dir}/*R1*.fastq.gz 2> /dev/null | wc -l | sed 's/ //') # Get R1 fastq files count -fastq_R2=$(/bin/ls -l ${fastq_dir}/*R2*.fastq.gz 2> /dev/null | wc -l | sed 's/ //') # Get R2 fastq files count - -max_threads=$(yq -Mr '.resources.cpus' ${config_file}) # Get user config: max threads -max_memory=$(yq -Mr '.resources.ram' ${config_file}) # Get user config: max memory (Gb) - -qualities=$(yq -Mr '.modules.qualities' ${config_file}) # Reads QC -keeptrim=$(yq -Mr '.modules.keeptrim' ${config_file}) # Keep trimmed reads -cleapping=$(yq -Mr '.modules.cleapping' ${config_file}) # Reads cleapping -covstats=$(yq -Mr '.modules.covstats' ${config_file}) # Mapping coverage stats -consensus=$(yq -Mr '.modules.consensus' ${config_file}) # Consensus -lineages=$(yq -Mr '.modules.lineages' ${config_file}) # Lineages assignation -gisaid=$(yq -Mr '.modules.gisaid' ${config_file}) # Gisaid submission file - -# Get user config: genome reference -reference=$(yq -Mc '.consensus.reference' ${config_file} | sed 's/\[\"//' | sed 's/\"\]//' | sed 's/\"\,\"/ ; /g') -# Get user config: mapper -mapper=$(yq -Mc '.consensus.mapper' ${config_file} | sed 's/\[\"//' | sed 's/\"\]//' | sed 's/\"\,\"/ ; /g') -# Get user config: minimum coverage -min_cov=$(yq -Mc '.consensus.min_cov' ${config_file} | sed 's/\[\"//' | sed 's/\"\]//' | sed 's/\"\,\"/ ; /g') -# Get user config: minimum allele frequency -min_freq=$(yq -Mr '.consensus.min_freq' ${config_file}) -# Get user config: assigner tool -assigner=$(yq -Mc '.consensus.assigner' ${config_file} | sed 's/\[\"//' | sed 's/\"\]//' | sed 's/\"\,\"/ ; /g') -# Get user config: hard clipping option -hard_clipping=$(yq -Mr '.cutadapt.clipping' ${config_file}) -# Get user config: dataset for nextclade -nextclade_dataset=$(yq -Mc '.nextclade.dataset' ${config_file} | sed 's/\[\"//' | sed 's/\"\]//') -# Get user config: fastq_screen subsetption -fastqscreen_subset=$(yq -Mr '.fastq_screen.subset' ${config_file}) -# Get user config: cutadapt clipping -cutadapt_clipping=$(yq -Mr '.cutadapt.clipping' ${config_file}) - - -############################################################################### -### MESSAGE ### -############### - -# Timer -time_stamp_start=$(date +"%Y-%m-%d %H:%M") # Get system: analyzes starting time -time_stamp_archive=$(date +"%Y-%m-%d_%Hh%M") # Convert time for archive (wo space) -SECONDS=0 # Initialize SECONDS counter - -# Colors -blue="\033[1;34m" # blue -green="\033[1;32m" # green -red="\033[1;31m" # red -ylo="\033[1;33m" # yellow -nc="\033[0m" # no color - -# Message -message=" -${blue}------------------------------------------------------------------------${nc} -${blue}#####${nc} ${red}ABOUT${nc} ${blue}#####${nc} -${blue}-----------------${nc} - -${green}Name${nc} ___________________ Run_GeVarLi.sh -${green}Version${nc} ________________ ${ylo}${version}${nc} -${green}Author${nc} _________________ Nicolas Fernandez -${green}Affiliation${nc} ____________ IRD_U233_TransVIHMI -${green}Aim${nc} ____________________ Bash script running GeVarLi Snakefile -${green}Date${nc} ___________________ 2021.10.12 -${green}Latest modifications${nc} ___ 2025.03.12 -${green}Use${nc} ____________________ '${ylo}. Run_GeVarLi.sh${nc}' - -${blue}------------------------------------------------------------------------${nc} -${blue}#####${nc} ${red}Operating System${nc} ${blue}#####${nc} -${blue}----------------------------${nc} - -${green}Shell${nc} __________________ ${ylo}${shell}${nc} -${green}Operating system${nc} _______ ${red}${os}${nc} - -${blue}------------------------------------------------------------------------${nc} -${blue}#####${nc} ${red}Hardware${nc} ${blue}#####${nc} -${blue}--------------------${nc} - -${ylo}Brand(R)${nc} | ${ylo}Type(R)${nc} | ${ylo}Model${nc} | ${ylo}@ Speed GHz${nc} -${green}Chip Model Name${nc} ________ ${model_name} -${green}Physical CPUs${nc} __________ ${red}${physical_cpu}${nc} -${green}Logical CPUs${nc} ___________ ${red}${logical_cpu}${nc} threads -${green}System Memory${nc} __________ ${red}${ram_gb}${nc} Gb of RAM - -${blue}------------------------------------------------------------------------${nc} -${blue}#####${nc} ${red}Network${nc} ${blue}#####${nc} -${blue}-------------------${nc} - -${green}Network${nc} ________________ ${red}${network}${nc} - -${blue}------------------------------------------------------------------------${nc} -${blue}#####${nc} ${red}Conda${nc} ${blue}#####${nc} -${blue}-----------------${nc} - -${message_conda} - -${blue}------------------------------------------------------------------------${nc} -${blue}#####${nc} ${red}Workflow-Core${nc} ${blue}#####${nc} -${blue}-------------------------${nc} - -${green}GeVarLi${nc} use ${green}Snakemake${nc}, a workflow manager. -${green}Snakemake${nc} is provided into ${red}Workflow-Core${nc}, a ${green}Conda${nc} environment. - -You can remove old depreciated environements such as: 'gevarli-base', 'snakemake-base' or 'workflow-base'. -To list all your conda environments, you can run: '${ylo}conda env list${nc}'. -To remove old conda environments, you can run: '${ylo}conda remove --all --yes --name${nc} ${red}<ENV_NAME>${nc}'. - -${blue}------------------------------------------------------------------------${nc} -${blue}#####${nc} ${red}Configuration${nc} ${blue}#####${nc} -${blue}-------------------------${nc} - -${green}Starting time${nc} ______________ ${time_stamp_start} - -${green}Conda version${nc} ______________ ${ylo}${conda_version}${nc} -${green}Mamba version${nc} ______________ ${ylo}${mamba_version}${nc} -${green}Snakemake version${nc} __________ ${ylo}${snakemake_version}${nc} - -${green}Max threads${nc} ________________ ${red}${max_threads}${nc} of ${ylo}${logical_cpu}${nc} threads available -${green}Max memory${nc} _________________ ${red}${max_memory}${nc} of ${ylo}${ram_gb}${nc} Gb available -${green}Jobs memory${nc} ________________ ${red}${memory_per_job}${nc} Gb per job - -${green}Network${nc} ____________________ ${red}${network}${nc} - -${green}Working directory${nc} _________ '${ylo}${workdir}/${nc}' - -${green}Fastq directory${nc} ___________ '${ylo}${fastq_dir}${nc}' -${green} > Fastq processed${nc} ________ ${red}${fastq_files}${nc} fastq files -${green} > Forward reads${nc} __________ ${red}${fastq_R1}${nc} R1 -${green} > Reverse reads${nc} __________ ${red}${fastq_R2}${nc} R2 - -${blue}Modules:${nc} -${green} > Quality Control${nc} ________ ${red}${qualities}${nc} -${green} > Keep Trim${nc} ______________ ${red}${keeptrim}${nc} -${green} > Soft Clipping${nc} __________ ${red}${cleapping}${nc} -${green} > Cov Stats${nc} ______________ ${red}${covstats}${nc} -${green} > Consensus${nc} ______________ ${red}${consensus}${nc} -${green} > Lineages${nc} _______________ ${red}${lineages}${nc} -${green} > Gisaid${nc} _________________ ${red}${gisaid}${nc} - -${blue}Params:${nc} -${green} > Reference genome${nc} _______ ${ylo}${reference}${nc} -${green} > Mapper${nc} ________________ ${ylo}${mapper}${nc} -${green} > Min coverage${nc} ___________ ${red}${min_cov}${nc} X -${green} > Min allele frequency${nc} ___ ${red}${min_freq}${nc} -${green} > Assigner${nc} _ ${red}${assigner}${nc} -${green} - Nextclade dataset${nc} _____ ${red}${nextclade_dataset}${nc} -${green} > Fastq-Screen subset${nc} ____ ${red}${fastqscreen_subset}${nc} -${green} > Cutadapt clipping${nc} ______ ${red}${cutadapt_clipping} nt${nc} -" - -# Print settings message -echo -e "${message}" - -# Log settings message -mkdir -p ${workdir}/results/10_Reports/ 2> /dev/null - -echo -e "${message}" \ - | sed "s/\x1B\[[0-9;]*[mK]//g" \ - > ${workdir}/results/10_Reports/settings.log - -mkdir -p ${workdir}/results/10_Reports/envs/ 2> /dev/null -cp ${workdir}/workflow/envs/*.yaml ${workdir}/results/10_Reports/envs/ -cp ${config_file} ${workdir}/results/10_Reports/config.log 2> /dev/null - - -# If errors: -if [[ ! $(command -v conda) ]] # If no conda -then - return 0 -elif [[ "${fastq}" == "0" ]] # If no FASTQ -then + echo -e "\n Cannot activate ${ylo}Workflow-Core${nc} conda environment." return 0 fi - ############################################################################### ### RUN SNAKEMAKE ### ##################### -# Print settings message echo -e " ${blue}------------------------------------------------------------------------${nc} ${blue}#####${nc} ${red}Run Snakemake${nc} ${blue}#####${nc} ${blue}-------------------------${nc} " -echo -e " -${green} > Snakemake: unlock working directory${nc} -" +echo -e "\n ${green} > Snakemake: unlock working directory${nc} \n" snakemake \ --directory ${workdir}/ \ --snakefile ${workdir}/workflow/Snakefile \ --rerun-incomplete \ --unlock -echo -e " -${green} > Snakemake: list conda environments${nc} -" - snakemake \ +echo -e "\n ${green} > Snakemake: list conda environments${nc} \n" +snakemake \ --directory ${workdir}/ \ --snakefile ${workdir}/workflow/Snakefile \ --list-conda-envs - echo -e " -${green} > Snakemake: create conda environments${nc} -" +echo -e "\n ${green} > Snakemake: create conda environments${nc} \n" snakemake \ --directory ${workdir}/ \ --snakefile ${workdir}/workflow/Snakefile \ --conda-create-envs-only \ --use-conda -echo -e " -${green} > Snakemake: dry run${nc} -" +echo -e "\n ${green} > Snakemake: dry run${nc} \n" snakemake \ --directory ${workdir}/ \ --snakefile ${workdir}/workflow/Snakefile \ @@ -399,9 +192,7 @@ snakemake \ --dry-run \ --quiet host rules -echo -e " -${green} > Snakemake: run${nc} -" +echo -e "\n ${green} > Snakemake: run${nc} \n" snakemake \ --directory ${workdir}/ \ --snakefile ${workdir}/workflow/Snakefile\ @@ -412,49 +203,12 @@ snakemake \ --use-conda \ --quiet host progress -# Deactive workflow-core conda environment. -echo -e " -Deactivate ${ylo}Workflow-Core${nc} conda environment." -conda deactivate - -############################################################################### -### Gzip ### -############ - -# Gzip reports directory -#cd ${workdir}/results/ -#tar -zcf 10_Reports_archive.tar.gz 10_Reports/ -#cd ${workdir} - -# Gzip results directory -#mkdir -p ${workdir}/archives/ 2> /dev/null -#tar -zcf archives/Results_${time_stamp_archive}_${reference}_${aligner}-${min_cov}X_${samples}sp_archive.tar.gz results/ - ############################################################################### -### Timer ### -############# +### DEACTIVATE WORKFLOW-CORE ### +################################# -# Timer -time_stamp_end=$(date +"%Y-%m-%d %H:%M") # Get date / hour ending analyzes -elapsed_time=${SECONDS} # Get SECONDS counter -hours=$((${elapsed_time}/3600)) # /3600 = hours -minutes=$(((${elapsed_time}%3600)/60)) # %3600 /60 = minutes -seconds=$((${elapsed_time}%60)) # %60 = seconds -formatted_time=$(printf "%02d:%02d:%02d" ${hours} ${minutes} ${seconds}) # Format - -# Time message -message_time=" -${green}Start time${nc} _____________ ${time_stamp_start} -${green}End time${nc} _______________ ${time_stamp_end} -${green}Processing time${nc} ________ ${ylo}${formatted_time}${nc} -" - -# Print time message -echo -e "${message_time}" - -# Log time message -echo -e "${message_time}" \ - | sed "s/\x1B\[[0-9;]*[mK]//g" \ - >> ${workdir}/results/10_Reports/settings.log +# Deactive workflow-core conda environment. +echo -e "\n Deactivate ${ylo}Workflow-Core${nc} conda environment." +conda deactivate -############################################################################### +############################################################################### \ No newline at end of file diff --git a/Snakemake: b/Snakemake: new file mode 100644 index 0000000..e69de29 diff --git a/version.txt b/VERSION_temp.txt similarity index 100% rename from version.txt rename to VERSION_temp.txt diff --git a/config/config.yaml b/config/config.yaml index 87e9489..77c3741 100755 --- a/config/config.yaml +++ b/config/config.yaml @@ -25,13 +25,13 @@ fastq_dir: 'resources/reads/' # Path to reads fastq.gz files (default: 'resources/reads/') [PATH] modules: # GeVarLi analysis modules, set 'true' or 'false' [BOOL] - qualities: true # Perform reads quality controls (FastQC, Fastq-Screen) (default: true) - keeptrim: false # Keep trimmed reads files (Cutadapt / Sickle-trim) (default: false) - clipping: false # Perform reads clipping (Bamclipper) (default: false) - covstats: true # Perform coverage statistics (Fagstat, Covstats) (default: true) - consensus: true # Perform reads mapping to reference (BWA, Minimap2, Bowtie2) (default: true) - lineages: true # Perform clade and lineage assignations (Nexclade, Pangolin) (default: true) - gisaid: false # Perform gisaid submission template (GisAid : TODO) (default: false) + qualities: true # Perform reads quality controls (FastQC, Fastq-Screen) (default: true) + keeptrim: false # Keep trimmed reads files (Cutadapt / Sickle-trim) (default: false) + clipping: false # Perform reads clipping (Bamclipper) (default: false) + covstats: true # Perform coverage statistics (Fagstat, Covstats) (default: true) + consensus: true # Perform reads mapping to reference (BWA, Minimap2, Bowtie2) (default: true) + lineages: true # Perform clade and lineage assignations (Nexclade, Pangolin) (default: true) + gisaid: false # Perform gisaid submission template (GisAid : TODO) (default: false) consensus: reference: 'SARS-CoV-2_Wuhan_MN908947.3' # Your reference, in fasta format (default: 'SARS-CoV-2_Wuhan_MN-908947-3') [STR] @@ -336,6 +336,7 @@ envs: # Conda environement yaml files: sickle_trim: '../envs/sickle-trim_v.1.33.yaml' # Sickle-trim ver. 1.33 tsv2vcf: '../envs/tsv2vcf_v.2025.01.yaml' # tsv2vcf ver. 2025.01 (biopython=1.85 numpy=2.2.2 scipy=1.15.1) workflow: '../envs/workflow-core.yaml' # workflow ver. 2025.01 (snakemake=8.27.1 graphviz=12.0.0) + yq: '../envs/yq_v.3.4.3.yaml' # yq ver. 3.4.3 report: # Snakemake reports formats stat: diff --git a/workflow/Snakefile b/workflow/Snakefile index 10c88c7..db6dfd0 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -28,6 +28,8 @@ configfile: "config/config.yaml" ################# # Imports +import time, re + from snakemake.utils import min_version from snakemake.utils import validate @@ -41,6 +43,7 @@ FASTQ_DIR = config["fastq_dir"] VALID_FASTQ = get_valid_fastq(FASTQ_DIR) SAMPLE = list(VALID_FASTQ.keys()) MATE = ["1", "2"] +START_TIME = time.time() # Exports import scripts.functions as functions @@ -60,7 +63,7 @@ functions.STAT_EXT = config["report"]["stat"]["extention"] # Stat reports extent min_version("8.27.0") # Schemas -#validate(config, schema="schemas/config.schema.yaml") +validate(config, schema="schemas/config.schema.yaml") ############################################################################### ### WILDCARDS ### @@ -81,9 +84,9 @@ TMP_DIR = config["resources"]["tmp_dir"] # Temporary directory ### ENVIRONMENTS ### #################### -MULTIQC = config["envs"]["multiqc"] # Multi-QC conda env -FASTQ_SCREEN = config["envs"]["fastq_screen"] # Fastq-Screen conda env -FASTQC= config["envs"]["fastqc"] # FastQC conda env +YQ = config["envs"]["yq"] # YQ conda environment +FASTQ_SCREEN = config["envs"]["fastq_screen"] # Fastq-Screen conda environment +FASTQC= config["envs"]["fastqc"] # FastQC conda environment CUTADAPT = config["envs"]["cutadapt"] # Cutadapt conda environment SICKLE_TRIM = config["envs"]["sickle_trim"] # Sickle-Trim conda environment MINIMAP2 = config["envs"]["minimap2"] # BWA conda environment @@ -91,14 +94,15 @@ BWA = config["envs"]["bwa"] # BWA conda environment BOWTIE2 = config["envs"]["bowtie2"] # Bowtie2 conda environment SAMTOOLS = config["envs"]["samtools"] # SamTools conda environment BEDTOOLS = config["envs"]["bedtools"] # BedTools conda environment -BAMCLIPPER = config["envs"]["bamclipper"] # BAMClipper +BAMCLIPPER = config["envs"]["bamclipper"] # BAMClipper onda environment GAWK = config["envs"]["gawk"] # Awk (GNU) conda environment IVAR = config["envs"]["ivar"] # iVar conda environment TSV2VCF = config["envs"]["tsv2vcf"] # tsv2vcf conda environment BCFTOOLS = config["envs"]["bcftools"] # BcfTools conda environment PANGOLIN = config["envs"]["pangolin"] # Pangolin conda environment NEXTCLADE = config["envs"]["nextclade"] # Nextclade conda environment -WORKFLOW = config["envs"]["workflow"] # Workflow conda environment +MULTIQC = config["envs"]["multiqc"] # Multi-QC conda environment +WORKFLOW = config["envs"]["workflow"] # Workflow conda environment ############################################################################### ### PARAMETERS ### @@ -196,6 +200,6 @@ include: "rules/workflow_reporting.smk" rule all: input: - report = "results/10_Reports/snakemake/workflow-report.html" + tarball = "results/Reports_archive.tar.gz" ############################################################################### diff --git a/workflow/envs/workflow-core.yaml b/workflow/envs/workflow-core.yaml index dd3c0c1..f1d7366 100644 --- a/workflow/envs/workflow-core.yaml +++ b/workflow/envs/workflow-core.yaml @@ -10,4 +10,4 @@ dependencies: - snakemake-interface-report-plugins - snakemake-interface-storage-plugins - graphviz=12.0.0 - - yq=3.4.3 + - yq=3.4.3 \ No newline at end of file diff --git a/workflow/envs/yq_v.3.4.3.yaml b/workflow/envs/yq_v.3.4.3.yaml new file mode 100644 index 0000000..03f2662 --- /dev/null +++ b/workflow/envs/yq_v.3.4.3.yaml @@ -0,0 +1,7 @@ +name: yq +channels: + - conda-forge + - bioconda + - nodefaults +dependencies: + - yq=3.4.3 \ No newline at end of file diff --git a/workflow/rules/symlinks_renamming.smk b/workflow/rules/symlinks_renamming.smk index 116843e..eb72ba6 100644 --- a/workflow/rules/symlinks_renamming.smk +++ b/workflow/rules/symlinks_renamming.smk @@ -17,6 +17,25 @@ # Use ____________________ snakemake -s Snakefile --use-conda -j ############################################################################### +############################################################################### +rule config: + # Aim: Load configuration file + # Use: yq -r . <CONFIG_FILE> + message: + """ + ~ Configuration ∞ Show analyses settings ~ + """ + conda: + WORKFLOW + input: + config_file = "config/config.yaml" + output: + done = temp("done.temp") + shell: + "bash workflow/scripts/settings.sh " + "&& " + "touch {output.done}" + ############################################################################### rule symlinks: # Aim: Create fastq files symlinks @@ -28,7 +47,8 @@ rule symlinks: Reads: ___________ R{wildcards.mate} """ input: - valid_fastq = lambda wildcards: os.path.abspath(VALID_FASTQ[wildcards.sample][wildcards.mate]) + valid_fastq = lambda wildcards: os.path.abspath(VALID_FASTQ[wildcards.sample][wildcards.mate]), + done = "done.temp" output: symlink = temp("results/symlinks/{sample}_R{mate}.fastq.gz") log: diff --git a/workflow/rules/workflow_reporting.smk b/workflow/rules/workflow_reporting.smk index 23d9c52..ee6152f 100644 --- a/workflow/rules/workflow_reporting.smk +++ b/workflow/rules/workflow_reporting.smk @@ -17,6 +17,31 @@ # Use ____________________ snakemake -s Snakefile --use-conda -j ############################################################################### + + +############################################################################### + +############################################################################### +rule tar_reports: + # Aim: compresses reports into a tarball + # Use: tar -zcf 10_Reports_archive.tar.gz 10_Reports/ + message: + """ + ~ Archive ∞ Compress reports ~ + """ + input: + html_report = "results/10_Reports/workflow-report.html" + output: + tarball = "results/Reports_archive.tar.gz" + shell: + "tar " # Tar command + "-z " # Gzip compression + "-c " # Create a new archive + "-f " # Use archive file or device ARCHIVE + "{output.tarball} " # Output archive + "-C results/ " # Change to directory 'results/' + "10_Reports/" # Input directory to compress + ############################################################################### rule snakemake_report: # Aim: generates a workflow report in HTML format @@ -30,20 +55,22 @@ rule snakemake_report: params: #style_sheet = STYLE_SHEET input: - final_outputs = get_final_outputs(), - summary = "results/10_Reports/snakemake/files-summary.txt", - #graph = "results/10_Reports/snakemake/{graph_type}.{ext}", multiqc = "results/10_Reports/multiqc/", + time = "results/10_Reports/time.log", + summary = "results/10_Reports/files-summary.txt", + graph = expand("results/10_Reports/graphs/{graph_type}.{ext}", + graph_type = GRAPH_TYPE, + ext = GRAPH_EXT), output: - report = "results/10_Reports/snakemake/workflow-report.html" + html_report = "results/10_Reports/workflow-report.html" log: - "results/10_Reports/tools-log/snakemake/workflow-report.log" + "results/10_Reports/tools-log/workflow-report.log" shell: - "snakemake " # Snakemake - "--report " # Create an HTML report with results and statistics + "snakemake " # Snakemake + "--report " # Create an HTML report with results and statistics #"--report-stylesheet {params.style_sheet} " # Custom stylesheet to use for report - " {output.report} " # Output report - "2> {log}" # Log redirection + " {output.html_report} " # Output report + "2> {log}" # Log redirection ############################################################################### rule snakemake_summary: @@ -58,9 +85,9 @@ rule snakemake_summary: input: final_outputs = get_final_outputs() output: - summary = "results/10_Reports/snakemake/files-summary.txt" + summary = "results/10_Reports/files-summary.txt" log: - "results/10_Reports/tools-log/snakemake/files-summary.log" + "results/10_Reports/tools-log/files-summary.log" shell: "snakemake " # Snakemake "--summary " # Print a summary of all files created by the workflow @@ -81,9 +108,9 @@ rule snakemake_graph: input: final_outputs = get_final_outputs() output: - graph = "results/10_Reports/snakemake/{graph_type}.{ext}" + graph = "results/10_Reports/graphs/{graph_type}.{ext}" log: - "results/10_Reports/tools-log/snakemake/{graph_type}-{ext}.log" + "results/10_Reports/tools-log/graphs/{graph_type}-{ext}.log" shell: "snakemake " # Snakemake "--{wildcards.graph_type} " # Graph types: 'dag', 'rulegraph', 'filegraph' @@ -107,7 +134,9 @@ rule multiqc_aggregation: input: final_outputs = get_final_outputs() output: - multiqc = directory("results/10_Reports/multiqc/") + multiqc = directory("results/10_Reports/multiqc/"), + html_report = "results/10_Reports/multiqc/multiqc_report.html", + copy_report = "results/All_QC_reports.html" log: "results/10_Reports/tools-log/multiqc.log" shell: @@ -120,6 +149,100 @@ rule multiqc_aggregation: "--export " # Export plots as static images in addition to the report "--outdir {output.multiqc} " # -o: Create report in the specified output directory "{input.final_outputs} " # Input final outputs - "> {log} 2>&1" # Log redirection + "> {log} 2>&1 " # Log redirection + "&& cp {output.html_report} {output.copy_report} " # Copy report to results directory + "2> /dev/null" # Suppress error messages + +############################################################################### +rule log_time: + # Aim: log workflow start and end time + # Use: date +"%Y-%m-%d %H:%M" + message: + """ + ~ Log ∞ Workflow time ~ + """ + input: + final_outputs = get_final_outputs() + output: + time_log = "results/10_Reports/time.log" + run: + time_stamp_start = time.strftime("%Y-%m-%d %H:%M", time.localtime(start_time)) # Get system: analyzes starting time + time_stamp_end = time.strftime("%Y-%m-%d %H:%M", time.localtime()) # Get date / hour ending analyzes + elapsed_time = int(time.time() - start_time) # Get SECONDS counter + hours = elapsed_time // 3600 # /3600 = hours + minutes = (elapsed_time % 3600) // 60 # %3600 /60 = minutes + seconds = elapsed_time % 60 # %60 = seconds + formatted_time = f"{hours:02d}:{minutes:02d}:{seconds:02d}" # Format + green = "\033[32m" # ANSI green color code + ylo = "\033[33m" # ANSI yellow color code + nc = "\033[0m" # ANSI no-color code + message_time = f""" +{green}Start time{nc} _____________ {time_stamp_start} +{green}End time{nc} _______________ {time_stamp_end} +{green}Processing time{nc} ________ {ylo}{formatted_time}{nc} +""" + print(message_time) # Print time message + ansi_escape = re.compile(r'\x1B\[[0-?]*[ -/]*[@-~]') # ANSI escape code + message_clean = ansi_escape.sub('', message_time) # Clean ANSI escape code + with open(output.time_log, "w") as f: # Log time message + f.write(message_clean) + +############################################################################### +rule log_setup: + # Aim: log user setup + # Use: + message: + """ + ~ Log ∞ User setup ~ + """ + input: + setup = "config/config.yaml", + output: + setup_log = "results/10_Reports/setup.log" + run: + import subprocess + uname = subprocess.check_output(["uname", "-a"]).decode().strip() + fastq_dir = subprocess.check_output(["yq", "-Mr", ".fastq_dir", input.setup]).decode().strip() + try: + fastq_files = subprocess.check_output(["bash", "-c", "ls -1 {}/*.fastq.gz 2>/dev/null | wc -l".format(fastq_dir)]).decode().strip() + except Exception: + fastq_files = "0" + with open(output.setup_log, "w") as f: + f.write("OS info: " + uname + "\n") + f.write("Fastq directory: " + fastq_dir + "\n") + f.write("Number of fastq files: " + fastq_files + "\n") + +############################################################################### +rule log_environments: + # Aim: copy conda environments files to results + # Use: cp workflow/envs/*.yaml results/10_Reports/envs/ + message: + """ + ~ Log ∞ Workflow environments ~ + """ + input: + envs = "workflow/envs" + output: + envs_log = directory("results/10_Reports/envs/") + shell: + "mkdir -p {output.envs_log} && " # Create directory + "cp {input.envs}/*.yaml {output.envs_log} " # Copy envs files + "2> /dev/null" # Suppress error messages -############################################################################### \ No newline at end of file +############################################################################### +rule log_config: + # Aim: copy settings file to results + # Use: cp config/config.yaml results/10_Reports/config.log + message: + """ + ~ Log ∞ Workflow configuration ~ + """ + input: + config = "config/config.yaml" + output: + config_log = "results/10_Reports/config.log" + shell: + "cp {input.config} {output.config_log} " # Copy config file + "2> /dev/null" # Suppress error messages + +############################################################################### diff --git a/workflow/schemas/config.schema.yaml b/workflow/schemas/config.schema.yaml index e69de29..a399a7c 100644 --- a/workflow/schemas/config.schema.yaml +++ b/workflow/schemas/config.schema.yaml @@ -0,0 +1,378 @@ +$schema: "http://json-schema.org/draft-07/schema#" +title: "GeVarLi Configuration Schema" +type: object +required: + - fastq_dir + - modules + - consensus + - gisaid + - nextclade + - ivar + - bamclipper + - bwa + - bowtie2 + - minimap2 + - sickle_trim + - cutadapt + - multiqc + - fastq_screen + - rename + - resources + - envs + - report +properties: + fastq_dir: + type: string + + modules: + type: object + required: + - qualities + - keeptrim + - clipping + - covstats + - consensus + - lineages + - gisaid + properties: + qualities: + type: boolean + keeptrim: + type: boolean + clipping: + type: boolean + covstats: + type: boolean + consensus: + type: boolean + lineages: + type: boolean + gisaid: + type: boolean + + consensus: + type: object + required: + - reference + - min_cov + - min_freq + - iupac + - mapper + - caller + - assigner + properties: + reference: + type: string + min_cov: + type: string + min_freq: + type: string + iupac: + type: string + mapper: + type: string + caller: + type: string + assigner: + type: string + + gisaid: + type: object + required: + - username + - threshold + - name + - country + - identifier + - year + properties: + username: + type: string + threshold: + type: string + name: + type: string + country: + type: string + identifier: + type: string + year: + type: string + + nextclade: + type: object + required: + - path + - dataset + properties: + path: + type: string + dataset: + type: string + + ivar: + type: object + required: + - max_depth + - min_bq + - min_qual + - map_qual + properties: + max_depth: + type: string + min_bq: + type: string + min_qual: + type: string + map_qual: + type: string + + bamclipper: + type: object + required: + - path + - primers + - upstream + - downstream + properties: + path: + type: string + primers: + type: string + upstream: + type: string + downstream: + type: string + + bwa: + type: object + required: + - algorithm + properties: + algorithm: + type: string + + bowtie2: + type: object + required: + - algorithm + - sensitivity + properties: + algorithm: + type: string + sensitivity: + type: string + + minimap2: + type: object + required: + - preset + - algorithm + properties: + preset: + type: string + algorithm: + type: object + required: + - "k-mer_size" + - minimizer_size + - split_size + - homopolymer + properties: + "k-mer_size": + type: string + minimizer_size: + type: string + split_size: + type: string + homopolymer: + type: string + + sickle_trim: + type: object + required: + - quality + - length + - command + - encoding + properties: + quality: + type: string + length: + type: string + command: + type: string + encoding: + type: string + + cutadapt: + type: object + required: + - clipping + - length + - kits + properties: + clipping: + type: string + length: + type: string + kits: + type: object + required: + - truseq + - nextera + - small + properties: + truseq: + type: string + nextera: + type: string + small: + type: string + + multiqc: + type: object + required: + - path + - config + - tag + properties: + path: + type: string + config: + type: string + tag: + type: string + + fastq_screen: + type: object + required: + - config + - subset + - reference + properties: + config: + type: string + subset: + type: string + reference: + type: array + items: + type: string + + rename: + type: object + required: + - barcode_id + - line_id + - end_id + properties: + barcode_id: + type: boolean + line_id: + type: boolean + end_id: + type: boolean + + resources: + type: object + required: + - cpus + - ram + - tmp_dir + properties: + cpus: + type: string + ram: + type: string + tmp_dir: + type: string + + envs: + type: object + required: + - bamclipper + - bcftools + - bedtools + - bowtie2 + - bwa + - cutadapt + - fastq_screen + - fastqc + - gawk + - ivar + - minimap2 + - multiqc + - nextclade + - pangolin + - samtools + - sickle_trim + - tsv2vcf + - workflow + properties: + bamclipper: + type: string + bcftools: + type: string + bedtools: + type: string + bowtie2: + type: string + bwa: + type: string + cutadapt: + type: string + fastq_screen: + type: string + fastqc: + type: string + gawk: + type: string + ivar: + type: string + minimap2: + type: string + multiqc: + type: string + nextclade: + type: string + pangolin: + type: string + samtools: + type: string + sickle_trim: + type: string + tsv2vcf: + type: string + workflow: + type: string + + report: + type: object + required: + - stat + - graph + properties: + stat: + type: object + required: + - extention + properties: + extention: + type: array + items: + type: string + graph: + type: object + required: + - type + - extention + properties: + type: + type: array + items: + type: string + extention: + type: array + items: + type: string \ No newline at end of file diff --git a/workflow/scripts/__pycache__/functions.cpython-312.pyc b/workflow/scripts/__pycache__/functions.cpython-312.pyc index a2b197c5146556f8a4983e5b3e05db163057bdbe..09825b982dd622ec6f2390258070bd704c9da8c8 100644 GIT binary patch delta 233 zcmX?Yu+@m~G%qg~0}!k{dn-LiX(Qi$4#t+v=Q!lqCiC-%Fr_F@UeB$-l%fKngi=&j zGlJAIFfcOIFa$Gbs%`$r*Tx!NB~z4IT$)o-tZ!%#ACy{<Uj$?pr<Rl!=;h?6SIJ_L zP0r6t%S;DLOJI>s%_}R`pIk07gYnR20ny1!`m93NStKs9NL-LKzrtd1gGCU=wYb7! ciNrO&!ea7(TVnE3@wuGwjEs^+%0Slw0EnbYG5`Po delta 68 zcmdmLblQOLG%qg~0}z-wTuGmyxRGx^2cz8Pa~$$)EWr$#%A3Ra+gK-?h|XY~y7{c= WWF{sa$;tf^b2+0K86}I9f!Y9^M-nOk diff --git a/workflow/scripts/functions.py b/workflow/scripts/functions.py index c4e4ae1..fc28f23 100644 --- a/workflow/scripts/functions.py +++ b/workflow/scripts/functions.py @@ -171,7 +171,11 @@ def get_final_outputs(): min_cov = MIN_COV, caller = CALLER, assigner = ASSIGNER)) - # final_outpus + # logs + final_outputs.append("results/10_Reports/setup.log") + final_outputs.append("results/10_Reports/config.log") + final_outputs.append("results/10_Reports/envs/") + # return final_outpus return final_outputs ############################################################################### diff --git a/workflow/scripts/results/10_Reports/settings.log b/workflow/scripts/results/10_Reports/settings.log new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/workflow/scripts/results/10_Reports/settings.log @@ -0,0 +1 @@ + diff --git a/workflow/scripts/settings.sh b/workflow/scripts/settings.sh new file mode 100644 index 0000000..bd675f7 --- /dev/null +++ b/workflow/scripts/settings.sh @@ -0,0 +1,196 @@ +#!/bin/bash + +###I###R###D######U###2###3###3#######T###R###A###N###S###V###I###H###M###I#### +### ### +### /\ ______ ___ ____ _ _ __ ____ __ ____ ______ /\ ### +### || \ \ \ \ / __( ___( \/ )__\ ( _ ( ) (_ _) / / / / || ### +### || > > > > ( (_-.)__) \ /(__)\ ) /)(__ _)(_ < < < < || ### +### || /_/_/_/ \___(____) \(__)(__(_)\_(____(____) \_\_\_\ || ### +### \/ \/ ### +### ### +###I###R###D######U###2###3###3#######T###R###A###N###S###V###I###H###M###I#### +# Name ___________________ Run_GeVarLi.sh +# Version ________________ v.2025.03 +# Author _________________ Nicolas Fernandez +# Affiliation ____________ IRD_U233_TransVIHMI +# Aim ____________________ Bash script running GeVarLi snakefile +# Date ___________________ 2021.10.12 +# Latest modifications ___ 2025.03.12 +# Use ____________________ '. Run_GeVarLi.sh' +############################################################################### + +############################################################################### +### ABOUT ### +############# + +workdir=$(cd "$(dirname "${BASH_SOURCE[0]}" )"/../../ && pwd) # Get working directory +version=$(<${workdir}/VERSION_temp.txt) # Get version + +blue="\033[1;34m" # blue +green="\033[1;32m" # green +red="\033[1;31m" # red +ylo="\033[1;33m" # yellow +nc="\033[0m" # no color + +############################################################################### +### OPERATING SYSTEM ### +######################## + +shell=$SHELL # Get shell + +# Get operating system +case "$OSTYPE" in + darwin*) os="osx" ;; + linux*) os="linux" ;; + bsd*) os="bsd" ;; + solaris*) os="solaris" ;; + msys*) os="windows" ;; + cygwin*) os="windows" ;; + *) os="unknown (${OSTYPE})" ;; +esac + +############################################################################### +### HARDWARE ### +################ + +if [[ ${os} == "osx" ]] +then + model_name=$(sysctl -n machdep.cpu.brand_string) # Get chip model name + physical_cpu=$(sysctl -n hw.physicalcpu) # Get physical cpu + logical_cpu=$(sysctl -n hw.logicalcpu) # Get logical cpu + mem_size=$(sysctl -n hw.memsize) # Get memory size (bit) + ram_gb=$(expr ${mem_size} \/ $((1024**3))) # mem_size / 1024**3 = Gb +elif [[ ${os} == "linux" || ${os} == "bsd" || ${os} == "solaris" ]] +then + model_name=$(lscpu | grep -o -E "Model name: +.+" | sed -E "s/Model name: +//") # Get chip model name + physical_cpu=$(lscpu | grep -o -E "^CPU\(s\): +[0-9]+" | sed -E "s/CPU\(s\): +//") # Get physical cpu + threads_cpu=$(lscpu | grep -o -E "^Thread\(s\) per core: +[0-9]+" | sed -E "s/Thread\(s\) per core: +//") # Get thread(s) per core + logical_cpu=$(expr ${physical_cpu} \* ${threads_cpu}) # Calcul logical cpu + mem_size=$(grep -o -E "MemTotal: +[0-9]+" /proc/meminfo | sed -E "s/MemTotal: +//") # Get memory size (Kb) + ram_gb=$(expr ${mem_size} \/ $((1024**2))) # mem_size / 1024**2 = Gb +else + echo -e "\n Please, use an '${ylo}UNIX${nc}' operating system, like: + > '${green}linux${nc}' + > '${green}osx${nc}' + > '${green}WSL${nc}' \n" + return 0 +fi + +############################################################################### +### NETWORK ### +############### + +if ping -c 1 -W 5 google.com > /dev/null 2>&1 || \ + ping -c 1 -W 5 cloudflare.com > /dev/null 2>&1 +then + network="Online" +else + network="Offline" +fi + +############################################################################### +### CONDA ### +############# + +message_conda=" +$(conda --version) +Path: $(which conda) +$(conda config --show channels) +" + +############################################################################### +### CONFIGURATION ### +##################### + +conda_version=$(conda --version | sed 's/conda //') # Conda version +mamba_version=$(mamba --version | head -n 1 | sed 's/mamba //') # Mamba version +snakemake_version=$(snakemake --version) # Snakemake version + +fastq_dir=$(yq -Mr '.fastq_dir' ${config_file}) # Get path to fastq files directory +fastq_files=$(/bin/ls -l ${fastq_dir}/*.fastq.gz 2> /dev/null | wc -l | sed 's/ //') # Get fastq.gz files count +fastq_R1=$(/bin/ls -l ${fastq_dir}/*R1*.fastq.gz 2> /dev/null | wc -l | sed 's/ //') # Get R1 fastq files count +fastq_R2=$(/bin/ls -l ${fastq_dir}/*R2*.fastq.gz 2> /dev/null | wc -l | sed 's/ //') # Get R2 fastq files count + +max_threads=$(yq -Mr '.resources.cpus' ${config_file}) # Get user config: max threads +max_memory=$(yq -Mr '.resources.ram' ${config_file}) # Get user config: max memory (Gb) + +qualities=$(yq -Mr '.modules.qualities' ${config_file}) # Reads QC +keeptrim=$(yq -Mr '.modules.keeptrim' ${config_file}) # Keep trimmed reads +cleapping=$(yq -Mr '.modules.cleapping' ${config_file}) # Reads cleapping +covstats=$(yq -Mr '.modules.covstats' ${config_file}) # Mapping coverage stats +consensus=$(yq -Mr '.modules.consensus' ${config_file}) # Consensus +lineages=$(yq -Mr '.modules.lineages' ${config_file}) # Lineages assignation +gisaid=$(yq -Mr '.modules.gisaid' ${config_file}) # Gisaid submission file + +# Get user config: genome reference +reference=$(yq -Mc '.consensus.reference' ${config_file} | sed 's/\[\"//' | sed 's/\"\]//' | sed 's/\"\,\"/ ; /g') +# Get user config: mapper +mapper=$(yq -Mc '.consensus.mapper' ${config_file} | sed 's/\[\"//' | sed 's/\"\]//' | sed 's/\"\,\"/ ; /g') +# Get user config: minimum coverage +min_cov=$(yq -Mc '.consensus.min_cov' ${config_file} | sed 's/\[\"//' | sed 's/\"\]//' | sed 's/\"\,\"/ ; /g') +# Get user config: minimum allele frequency +min_freq=$(yq -Mr '.consensus.min_freq' ${config_file}) +# Get user config: assigner tool +assigner=$(yq -Mc '.consensus.assigner' ${config_file} | sed 's/\[\"//' | sed 's/\"\]//' | sed 's/\"\,\"/ ; /g') +# Get user config: hard clipping option +hard_clipping=$(yq -Mr '.cutadapt.clipping' ${config_file}) +# Get user config: dataset for nextclade +nextclade_dataset=$(yq -Mc '.nextclade.dataset' ${config_file} | sed 's/\[\"//' | sed 's/\"\]//') +# Get user config: fastq_screen subsetption +fastqscreen_subset=$(yq -Mr '.fastq_screen.subset' ${config_file}) +# Get user config: cutadapt clipping +cutadapt_clipping=$(yq -Mr '.cutadapt.clipping' ${config_file}) + + +message_settings=""" +${blue}------------------------------------------------------------------------${nc} +${blue}#####${nc} ${red}Configuration${nc} ${blue}#####${nc} +${blue}-------------------------${nc} + +${green}Starting time${nc} ______________ ${time_stamp_start} + +${green}Conda version${nc} ______________ ${ylo}${conda_version}${nc} +${green}Mamba version${nc} ______________ ${ylo}${mamba_version}${nc} +${green}Snakemake version${nc} __________ ${ylo}${snakemake_version}${nc} + +${green}Max threads${nc} ________________ ${red}${max_threads}${nc} of ${ylo}${logical_cpu}${nc} threads available +${green}Max memory${nc} _________________ ${red}${max_memory}${nc} of ${ylo}${ram_gb}${nc} Gb available +${green}Jobs memory${nc} ________________ ${red}${memory_per_job}${nc} Gb per job + +${green}Network${nc} ____________________ ${red}${network}${nc} + +${green}Working directory${nc} _________ '${ylo}${workdir}/${nc}' + +${green}Fastq directory${nc} ___________ '${ylo}${fastq_dir}${nc}' +${green} > Fastq processed${nc} ________ ${red}${fastq_files}${nc} fastq files +${green} > Forward reads${nc} __________ ${red}${fastq_R1}${nc} R1 +${green} > Reverse reads${nc} __________ ${red}${fastq_R2}${nc} R2 + +${blue}Modules:${nc} +${green} > Quality Control${nc} ________ ${red}${qualities}${nc} +${green} > Keep Trim${nc} ______________ ${red}${keeptrim}${nc} +${green} > Soft Clipping${nc} __________ ${red}${cleapping}${nc} +${green} > Cov Stats${nc} ______________ ${red}${covstats}${nc} +${green} > Consensus${nc} ______________ ${red}${consensus}${nc} +${green} > Lineages${nc} _______________ ${red}${lineages}${nc} +${green} > Gisaid${nc} _________________ ${red}${gisaid}${nc} + +${blue}Params:${nc} +${green} > Reference genome${nc} _______ ${ylo}${reference}${nc} +${green} > Mapper${nc} ________________ ${ylo}${mapper}${nc} +${green} > Min coverage${nc} ___________ ${red}${min_cov}${nc} X +${green} > Min allele frequency${nc} ___ ${red}${min_freq}${nc} +${green} > Assigner${nc} _ ${red}${assigner}${nc} +${green} - Nextclade dataset${nc} _____ ${red}${nextclade_dataset}${nc} +${green} > Fastq-Screen subset${nc} ____ ${red}${fastqscreen_subset}${nc} +${green} > Cutadapt clipping${nc} ______ ${red}${cutadapt_clipping} nt${nc} +""" + +# Print settings message +echo -e "${message}" + +# Log settings message +mkdir -p ${workdir}/results/10_Reports/ 2> /dev/null +echo -e "${message}" \ + | sed "s/\x1B\[[0-9;]*[mK]//g" \ + > ${workdir}/results/10_Reports/settings.log \ No newline at end of file -- GitLab