From d8a0e760c6f4d26fc30d79800214f359d41dcecb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolas=20FERNANDEZ=20NU=C3=91EZ?=
 <fernandez.nunez.nicolas@gmail.com>
Date: Tue, 12 Oct 2021 12:25:02 +0200
Subject: [PATCH] Add rule merging all reads passing filters

---
 RQCP.sh                                       |  2 +-
 config/config.yaml                            |  1 +
 resources/reads/.gitkeep                      |  0
 .../rules/reads_quality_control_pipeline.smk  | 27 ++++++++++++++++---
 4 files changed, 26 insertions(+), 4 deletions(-)
 delete mode 100644 resources/reads/.gitkeep

diff --git a/RQCP.sh b/RQCP.sh
index ff3d888..2beb9f0 100755
--- a/RQCP.sh
+++ b/RQCP.sh
@@ -56,7 +56,7 @@ echo "Quiet."
 echo "________________________________________________________________________"
 
 ###### Create dir for QC tools #####
-mkdir ${WORKDIR}/results/reads/cutadapt/ ${WORKDIR}/results/reads/sickle-trim/ ${WORKDIR}/results/reads/fastq-join/ 2> /dev/null
+mkdir ${WORKDIR}/results/reads/cutadapt/ ${WORKDIR}/results/reads/sickle-trim/ ${WORKDIR}/results/reads/fastq-join/ ${WORKDIR}/results/reads/merged/ 2> /dev/null
 
 ###### Extract bwa indexes for small genomes #####
 echo ""
diff --git a/config/config.yaml b/config/config.yaml
index cb2ec92..db1a547 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -35,6 +35,7 @@ datasets:
     #- 'sickle-single'
     - 'fastq-join'
     #- 'fastq-join-unique'
+    - 'merged'
   quality-tool:
     - 'fastqc'
     - 'fastq-screen'
diff --git a/resources/reads/.gitkeep b/resources/reads/.gitkeep
deleted file mode 100644
index e69de29..0000000
diff --git a/workflow/rules/reads_quality_control_pipeline.smk b/workflow/rules/reads_quality_control_pipeline.smk
index 559ff1a..77afcc4 100644
--- a/workflow/rules/reads_quality_control_pipeline.smk
+++ b/workflow/rules/reads_quality_control_pipeline.smk
@@ -52,7 +52,7 @@ SUBSET = config["fastq-screen"]["subset"]   # Fastq-screen --subset
 ###############################################################################
 rule all:
     input:
-        joined = expand("results/reads/fastq-join/{sample}_cutadapt_sickle-trim_fastq-join_Mate.fastq.gz",
+        merged = expand("results/reads/merged/{sample}_cutadapt_sickle-trim_fastq-join_merged.fastq.gz",
                          sample = SAMPLE),
         multiqc = expand("results/quality/{qcdir}/multiqc/",
                          qcdir = QCDIR)
@@ -137,12 +137,33 @@ rule fastqc:
         "--outdir {output.fastqc} "   # -o: Create all output files in the specified output directory
         "{input.fastq}/*.fastq.gz "   # Input file.fastq
         "&> {log}"                    # Add redirection for log
-        
+
+###############################################################################
+rule merge:
+    # Aim: Merge all passing filters reads
+    # Use: cat {input} > {output}
+    priority: 1
+    message:
+        "Take all passing filter {wildcards.sample} reads"
+    input:
+        single = "results/reads/sickle-single/{sample}_cutadapt_sickle-trim_Single.fastq.gz",
+        forward= "results/reads/fastq-join-unique/{sample}_cutadapt_sickle-trim_fastq-join_Unique_R1.fastq.gz",
+        reverse = "results/reads/fastq-join-unique/{sample}_cutadapt_sickle-trim_fastq-join_Unique_R2.fastq.gz",
+        joined = "results/reads/fastq-join/{sample}_cutadapt_sickle-trim_fastq-join_Mate.fastq.gz",
+    output:
+        merged = "results/reads/merged/{sample}_cutadapt_sickle-trim_fastq-join_merged.fastq.gz"
+    shell:
+        "cat "              # Cat, concatenate
+        "{input.single} "   # Input trimmed singles fastq file from Sickle-trim 
+        "{input.forward} "  # Input unique forward files from Fastq-join
+        "{input.reverse} "  # Input unique reverse files from Fastq-join
+        "{input.joined} "   # Input join files from Fastq-join
+        "> {output.merged}" # Output merged
+     
 ###############################################################################
 rule fastqjoin:
     # Aim: joins two paired-end reads on the overlapping ends
     # Use: fastq-join [OPTIONS] <read1.fastq> <read2.fastq> [mate.fastq] -o <read.fastq> -o <read.fastq> -o <read.fastq>
-    priority: 1
     message:
         "Fastq-join assemble R1 and R2 from {wildcards.sample} reads"
     conda:
-- 
GitLab