From d8a0e760c6f4d26fc30d79800214f359d41dcecb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20FERNANDEZ=20NU=C3=91EZ?= <fernandez.nunez.nicolas@gmail.com> Date: Tue, 12 Oct 2021 12:25:02 +0200 Subject: [PATCH] Add rule merging all reads passing filters --- RQCP.sh | 2 +- config/config.yaml | 1 + resources/reads/.gitkeep | 0 .../rules/reads_quality_control_pipeline.smk | 27 ++++++++++++++++--- 4 files changed, 26 insertions(+), 4 deletions(-) delete mode 100644 resources/reads/.gitkeep diff --git a/RQCP.sh b/RQCP.sh index ff3d888..2beb9f0 100755 --- a/RQCP.sh +++ b/RQCP.sh @@ -56,7 +56,7 @@ echo "Quiet." echo "________________________________________________________________________" ###### Create dir for QC tools ##### -mkdir ${WORKDIR}/results/reads/cutadapt/ ${WORKDIR}/results/reads/sickle-trim/ ${WORKDIR}/results/reads/fastq-join/ 2> /dev/null +mkdir ${WORKDIR}/results/reads/cutadapt/ ${WORKDIR}/results/reads/sickle-trim/ ${WORKDIR}/results/reads/fastq-join/ ${WORKDIR}/results/reads/merged/ 2> /dev/null ###### Extract bwa indexes for small genomes ##### echo "" diff --git a/config/config.yaml b/config/config.yaml index cb2ec92..db1a547 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -35,6 +35,7 @@ datasets: #- 'sickle-single' - 'fastq-join' #- 'fastq-join-unique' + - 'merged' quality-tool: - 'fastqc' - 'fastq-screen' diff --git a/resources/reads/.gitkeep b/resources/reads/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/workflow/rules/reads_quality_control_pipeline.smk b/workflow/rules/reads_quality_control_pipeline.smk index 559ff1a..77afcc4 100644 --- a/workflow/rules/reads_quality_control_pipeline.smk +++ b/workflow/rules/reads_quality_control_pipeline.smk @@ -52,7 +52,7 @@ SUBSET = config["fastq-screen"]["subset"] # Fastq-screen --subset ############################################################################### rule all: input: - joined = expand("results/reads/fastq-join/{sample}_cutadapt_sickle-trim_fastq-join_Mate.fastq.gz", + merged = expand("results/reads/merged/{sample}_cutadapt_sickle-trim_fastq-join_merged.fastq.gz", sample = SAMPLE), multiqc = expand("results/quality/{qcdir}/multiqc/", qcdir = QCDIR) @@ -137,12 +137,33 @@ rule fastqc: "--outdir {output.fastqc} " # -o: Create all output files in the specified output directory "{input.fastq}/*.fastq.gz " # Input file.fastq "&> {log}" # Add redirection for log - + +############################################################################### +rule merge: + # Aim: Merge all passing filters reads + # Use: cat {input} > {output} + priority: 1 + message: + "Take all passing filter {wildcards.sample} reads" + input: + single = "results/reads/sickle-single/{sample}_cutadapt_sickle-trim_Single.fastq.gz", + forward= "results/reads/fastq-join-unique/{sample}_cutadapt_sickle-trim_fastq-join_Unique_R1.fastq.gz", + reverse = "results/reads/fastq-join-unique/{sample}_cutadapt_sickle-trim_fastq-join_Unique_R2.fastq.gz", + joined = "results/reads/fastq-join/{sample}_cutadapt_sickle-trim_fastq-join_Mate.fastq.gz", + output: + merged = "results/reads/merged/{sample}_cutadapt_sickle-trim_fastq-join_merged.fastq.gz" + shell: + "cat " # Cat, concatenate + "{input.single} " # Input trimmed singles fastq file from Sickle-trim + "{input.forward} " # Input unique forward files from Fastq-join + "{input.reverse} " # Input unique reverse files from Fastq-join + "{input.joined} " # Input join files from Fastq-join + "> {output.merged}" # Output merged + ############################################################################### rule fastqjoin: # Aim: joins two paired-end reads on the overlapping ends # Use: fastq-join [OPTIONS] <read1.fastq> <read2.fastq> [mate.fastq] -o <read.fastq> -o <read.fastq> -o <read.fastq> - priority: 1 message: "Fastq-join assemble R1 and R2 from {wildcards.sample} reads" conda: -- GitLab