Skip to content
Snippets Groups Projects
Commit 979ae7f6 authored by aurore.comte_ird.fr's avatar aurore.comte_ird.fr
Browse files

update

parent 7ba58080
No related branches found
No related tags found
No related merge requests found
# Importation des librairies glob, sys, et re # Importation des librairies glob, sys, et re
import glob,sys,re import glob,sys,re
from pathlib import Path
## appel du fichier de config ## appel du fichier de config
configfile: "configPT.yaml" configfile: "configPT.yaml"
## Definition des variables "constantes" à partir du fichier config (attention aux arborescences dans le yaml) ## Definition des variables "constantes" à partir du fichier config (attention aux arborescences dans le yaml)
REFERENCE_DIR = config['REFERENCE']
FASTA_REF= config['FASTA'] FASTA_REF= config['FASTA']
GFF_REF= config['GFF'] GFF_REF= config['GFF']
GTF_REF= config['GTF'] GTF_REF= config['GTF']
READS_DIR = config['READS'] READS_DIR = config['READS']
FASTQC_DIR = config['FASTQC'] OUTDIR = Path(config["OUTDIR"]).resolve().as_posix()
FASTQSCREEN_DIR = config['FASTQSCREEN']
FASTP_DIR = config['FASTP']
FASTQC_AF_DIR = config['FASTQC_AF']
MULTIQC_DIR = config['MULTIQC']
INDEX_DIR = config['INDEX']
INDEX_STAR_DIR = config['INDEX_STAR']
MAPPINGH_DIR = config['MAPPINGH']
MAPPINGS_DIR = config['MAPPING_STAR']
BAM_DIR = config['BAMFILES']
STATS_DIR = config['STATS']
FILTER_DIR = config['FILTER']
DUPL_DIR = config['DUPL']
HTSEQ_DIR = config['HTSEQ']
STRINGTIE_DIR = config['STRINGTIE']
PREPDE = config['PREPDE']
name_hisat_index = config['name_hisat_index']
FASTQC_DIR = f"{OUTDIR}/01_FastQC/"
FASTQSCREEN_DIR = f"{OUTDIR}/02_FastQScreen/"
FASTP_DIR = f"{OUTDIR}/03_FastP/"
FASTQC_AF_DIR = f"{OUTDIR}/04_FastQC_afterFT/"
MULTIQC_DIR = f"{OUTDIR}/05_MULTIQC/"
INDEX_DIR = f"{OUTDIR}/06_Index_HISAT/"
INDEX_STAR_DIR = f"{OUTDIR}/07_index_HISAT/"
MAPPINGH_DIR = f"{OUTDIR}/08_mapping_HISAT/"
MAPPINGS_DIR = f"{OUTDIR}/09_mapping_STAR/"
BAM_DIR = f"{OUTDIR}/12_BAMfiles/"
STATS_DIR = f"{OUTDIR}/14_Stats_BAM/"
FILTER_DIR = f"{OUTDIR}/13_Filtered_BAM/"
DUPL_DIR = f"{OUTDIR}/15_MarkedDupli_BAM/"
HTSEQ_DIR = f"{OUTDIR}/16_HTseq-count/"
STRINGTIE_DIR = f"{OUTDIR}/17_Stringtie/"
name_hisat_index = config['name_hisat_index']
## Software path ## Software path
fastQC = config['FastQC']
fastQScreen = config['FastQScreen']
fastQScreen_conf = config['FastQScreen_conf'] fastQScreen_conf = config['FastQScreen_conf']
fastP = config['FastP']
multiQC = config['MultiQC']
hisat_build = config['hisat-build']
hisat = config['hisat']
gffread = config['gffread']
star = config['star']
samtools = config['samtools']
picardtools = config['picardtools']
htseq = config['htseq']
stringtie = config['stringtie']
#Wild cards #Wild cards
REFERENCE, = glob_wildcards(f"{REFERENCE_DIR}{{reference}}.fasta") READS, = glob_wildcards(f"{READS_DIR}{{reads}}.fastq.gz")
ANNOTATION, = glob_wildcards(f"{REFERENCE_DIR}{{annotation}}.gff") MAPPING = ["STAR", "HISAT2"]
READS, = glob_wildcards(f"{READS_DIR}{{reads}}_r1.fastq.gz") COUNT = ["HTSEQ", "STINGTIE"]
# Regle finale pour vérifier la présence des outputs et si ils sont présents ils ne lancent pas le job # Regle finale pour vérifier la présence des outputs et si ils sont présents ils ne lancent pas le job
rule final: rule final:
input: input:
#out_fastqc = expand(f"{FASTQC_DIR}{{reads}}_r1_fastqc.html", reads = READS), out_fastqc = expand(f"{FASTQC_DIR}{{reads}}_fastqc.html", reads = READS),
#out_fastqs = expand(f"{FASTQSCREEN_DIR}{{reads}}_r1_screen.html", reads = READS), out_fastqs = expand(f"{FASTQSCREEN_DIR}{{reads}}_screen.html", reads = READS),
#out_fastp = expand(f"{FASTP_DIR}{{reads}}_report_fastp.html", reads = READS), out_fastp = expand(f"{FASTP_DIR}{{reads}}_report_fastp.html", reads = READS),
#out_fastqc_af = expand(f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.html", reads = READS), #out_fastqc_af = expand(f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.html", reads = READS),
out_multiqc = expand(f"{MULTIQC_DIR}multiqc_report.html"), out_multiqc = expand(f"{MULTIQC_DIR}multiqc_report.html"),
#out_hisat_idx = expand(f"{INDEX_DIR}P.Falci_3D7"), out_hisat_map = expand(f"{MAPPINGH_DIR}{{reads}}_HISAT.bam" , reads = READS),
#out_hisat_map = expand(f"{MAPPINGH_DIR}{{reads}}_HISAT.bam" , reads = READS), out_star_map = expand(f"{MAPPINGS_DIR}{{reads}}Aligned.sortedByCoord.out.bam" , reads = READS),
#out_star_map = expand(f"{MAPPINGS_DIR}{{reads}}Aligned.sortedByCoord.out.bam" , reads = READS), out_bam_sort = expand(f"{BAM_DIR}{{reads}}_STAR_sort.bam", reads = READS),
#out_bam_sort = expand(f"{BAM_DIR}{{reads}}_STAR_sort.bam", reads = READS), out_bam_filt = expand(f"{FILTER_DIR}{{reads}}_STAR_sort_mapped.bam", reads = READS),
#out_bam_filt = expand(f"{FILTER_DIR}{{reads}}_STAR_sort_mapped.bam", reads = READS), out_bam_index = expand(f"{BAM_DIR}{{reads}}_STAR_sort.bam.bai", reads = READS),
#out_bam_index = expand(f"{BAM_DIR}{{reads}}_STAR_sort.bam.bai", reads = READS),
out_bam_stats = expand(f"{STATS_DIR}{{reads}}_STAR_sort_flagstat.txt", reads = READS), out_bam_stats = expand(f"{STATS_DIR}{{reads}}_STAR_sort_flagstat.txt", reads = READS),
#out_bam_md = expand(f"{DUPL_DIR}{{reads}}_HISAT_sort_mapped_markdup_metric.txt", reads = READS), out_bam_md = expand(f"{DUPL_DIR}{{reads}}_HISAT_sort_mapped_markdup_metric.txt", reads = READS),
out_htseq = expand(f"{HTSEQ_DIR}HISAT/{{reads}}.txt", reads = READS), out_htseq = expand(f"{HTSEQ_DIR}HISAT/{{reads}}.txt", reads = READS),
#out_gtf_list = expand(f'{STRINGTIE_DIR}STRINGTIE_MERGE/STAR_gtf_list.txt'), #out_gtf_list = expand(f'{STRINGTIE_DIR}STRINGTIE_MERGE/STAR_gtf_list.txt'),
#out_stringtie = expand(f'{STRINGTIE_DIR}STRINGTIE_MERGE/STAR_stringtie_merged.gtf'), #out_stringtie = expand(f'{STRINGTIE_DIR}STRINGTIE_MERGE/STAR_stringtie_merged.gtf'),
#out_stringtiecc = expand(f'{STRINGTIE_DIR}STAR/{{reads}}/{{reads}}.gtf', reads=READS), out_stringtiecc = expand(f'{STRINGTIE_DIR}STAR/{{reads}}/{{reads}}.gtf', reads=READS),
#out_gtf_list = expand(f'{STRINGTIE_DIR}HISAT_gtf_list.txt'), #out_gtf_list = expand(f'{STRINGTIE_DIR}HISAT_gtf_list.txt'),
#out_strg_list = expand(f'{STRINGTIE_DIR}HISAT_Stringtie_list.txt'), out_strg_list = expand(f'{STRINGTIE_DIR}HISAT_Stringtie_list.txt'),
out_csv = expand(f'{STRINGTIE_DIR}HISAT_transcript_count_matrix.csv') out_csv = expand(f'{STRINGTIE_DIR}HISAT_transcript_count_matrix.csv')
# ----------------------------------- QUALITY -------------------------------------------------------------------
# FastQC before filtering # FastQC before filtering
rule fastqc: rule fastqc:
""" """
...@@ -83,25 +71,22 @@ rule fastqc: ...@@ -83,25 +71,22 @@ rule fastqc:
threads:4 threads:4
params: params:
out_fastqc=directory(f"{FASTQC_DIR}"), out_fastqc=directory(f"{FASTQC_DIR}"),
fastqc= f"{fastQC}"
input: input:
r1 = f"{READS_DIR}{{reads}}_r1.fastq.gz", r1 = f"{READS_DIR}{{reads}}.fastq.gz"
r2 = f"{READS_DIR}{{reads}}_r2.fastq.gz"
output: output:
out_fastqc_r1 = f"{FASTQC_DIR}{{reads}}_r1_fastqc.html", out_fastqc_r1 = f"{FASTQC_DIR}{{reads}}_fastqc.html",
out_fastqc_r2 = f"{FASTQC_DIR}{{reads}}_r2_fastqc.html", out_fastqc_r1_zip = f"{FASTQC_DIR}{{reads}}_fastqc.zip",
out_fastqc_r1_zip = f"{FASTQC_DIR}{{reads}}_r1_fastqc.zip",
out_fastqc_r2_zip = f"{FASTQC_DIR}{{reads}}_r2_fastqc.zip"
message: message:
""" """
input= input=
r1 = {input.r1} r1 = {input.r1}
r2 = {input.r2}
threads={threads} threads={threads}
""" """
conda:
"envs/quality.yaml"
shell: shell:
""" """
{params.fastqc} -o {params.out_fastqc} -t 4 {input.r1} {input.r2} fastqc -o {params.out_fastqc} -t 4 {input.r1}
""" """
# FastQScreen agaisnt 3D7 and human # FastQScreen agaisnt 3D7 and human
...@@ -112,28 +97,25 @@ rule fastqscreen: ...@@ -112,28 +97,25 @@ rule fastqscreen:
threads:4 threads:4
params: params:
out_fastqs=directory(f"{FASTQSCREEN_DIR}"), out_fastqs=directory(f"{FASTQSCREEN_DIR}"),
fastqs= f"{fastQScreen}",
fastqs_conf = f"{fastQScreen_conf}" fastqs_conf = f"{fastQScreen_conf}"
input: input:
fastqc= rules.fastqc.output.out_fastqc_r2_zip, fastqc= rules.fastqc.output.out_fastqc_r1_zip,
r1 = f"{READS_DIR}{{reads}}_r1.fastq.gz", r1 = f"{READS_DIR}{{reads}}.fastq.gz",
r2 = f"{READS_DIR}{{reads}}_r2.fastq.gz"
output: output:
html_r1= f"{FASTQSCREEN_DIR}{{reads}}_r1_screen.html", html_r1= f"{FASTQSCREEN_DIR}{{reads}}_screen.html",
html_r2=f"{FASTQSCREEN_DIR}{{reads}}_r2_screen.html", txt_r1=f"{FASTQSCREEN_DIR}{{reads}}_screen.txt",
txt_r1=f"{FASTQSCREEN_DIR}{{reads}}_r1_screen.txt",
txt_r2=f"{FASTQSCREEN_DIR}{{reads}}_r2_screen.txt",
message: message:
""" """
Execute {rule} Execute {rule}
input= input=
r1 = {input.r1} r1 = {input.r1}
r2 = {input.r2}
threads={threads} threads={threads}
""" """
conda:
"envs/quality.yaml"
shell: shell:
""" """
{params.fastqs} --conf {params.fastqs_conf} --aligner BOWTIE2 {input.r1} {input.r2} --outdir {params.out_fastqs} fastq_screen --conf {params.fastqs_conf} --aligner BOWTIE2 {input.r1} --outdir {params.out_fastqs}
""" """
# Filtering and trimming with FastP # Filtering and trimming with FastP
...@@ -143,16 +125,11 @@ rule fastp: ...@@ -143,16 +125,11 @@ rule fastp:
""" """
params: params:
out_fastp=directory(f"{FASTP_DIR}"), out_fastp=directory(f"{FASTP_DIR}"),
fastp=f"{fastP}"
input: input:
fastqc= rules.fastqc.output.out_fastqc_r2_zip, fastqc = rules.fastqc.output.out_fastqc_r1_zip,
r1 = f"{READS_DIR}{{reads}}_r1.fastq.gz", r1 = f"{READS_DIR}{{reads}}.fastq.gz",
r2 = f"{READS_DIR}{{reads}}_r2.fastq.gz"
output: output:
out_paired_r1 = f"{FASTP_DIR}{{reads}}_r1_paired.fastq.gz", out_r1 = f"{FASTP_DIR}{{reads}}_fastp.fastq.gz",
out_unpaired_r1 = f"{FASTP_DIR}{{reads}}_r1_unpaired.fastq.gz",
out_paired_r2 = f"{FASTP_DIR}{{reads}}_r2_paired.fastq.gz",
out_unpaired_r2 = f"{FASTP_DIR}{{reads}}_r2_unpaired.fastq.gz",
out_report= f"{FASTP_DIR}{{reads}}_report_fastp.html", out_report= f"{FASTP_DIR}{{reads}}_report_fastp.html",
out_json= f"{FASTP_DIR}{{reads}}_fastp.json" out_json= f"{FASTP_DIR}{{reads}}_fastp.json"
...@@ -161,11 +138,12 @@ rule fastp: ...@@ -161,11 +138,12 @@ rule fastp:
Execute {rule} Execute {rule}
input= input=
r1 = {input.r1} r1 = {input.r1}
r2 = {input.r2}
""" """
conda:
"envs/quality.yaml"
shell: shell:
""" """
{params.fastp} --in1 {input.r1} --in2 {input.r2} --out1 {output.out_paired_r1} --out2 {output.out_paired_r2} --unpaired1 {output.out_unpaired_r1} --unpaired2 {output.out_unpaired_r2} -w 1 -h {output.out_report} -j {output.out_json} fastp -i {input.r1} -o {output.out_r1} -h {output.out_report} -j {output.out_json}
""" """
# FastQC after filtering # FastQC after filtering
...@@ -175,26 +153,22 @@ rule fastqc_af: ...@@ -175,26 +153,22 @@ rule fastqc_af:
""" """
params: params:
out_fastqc_af = directory(f"{FASTQC_AF_DIR}"), out_fastqc_af = directory(f"{FASTQC_AF_DIR}"),
fastqc= f"{fastQC}"
input: input:
fastp=f"{FASTP_DIR}{{reads}}_r2_paired.fastq.gz", r1 = rules.fastp.output.out_r1,
r1 = f"{FASTP_DIR}{{reads}}_r1_paired.fastq.gz",
r2 = f"{FASTP_DIR}{{reads}}_r2_paired.fastq.gz"
output: output:
out_fastqc_af_r1 = f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.html", out_fastqc_af_r1 = f"{FASTQC_AF_DIR}{{reads}}_paired_fastqc.html",
out_fastqc_af_r2 = f"{FASTQC_AF_DIR}{{reads}}_r2_paired_fastqc.html", out_fastqc_af_r1_zip = f"{FASTQC_AF_DIR}{{reads}}_paired_fastqc.zip",
out_fastqc_af_r1_zip = f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.zip",
out_fastqc_af_r2_zip = f"{FASTQC_AF_DIR}{{reads}}_r2_paired_fastqc.zip"
message: message:
""" """
Execute {rule} Execute {rule}
input= input=
r1 = {input.r1} r1 = {input.r1}
r2 = {input.r2}
""" """
conda:
"envs/quality.yaml"
shell: shell:
""" """
{params.fastqc} -o {params.out_fastqc_af} -t 4 {input.r1} {input.r2} fastqc -o {params.out_fastqc_af} -t 4 {input.r1}
""" """
# FastQScreen agaisnt 3D7 and human # FastQScreen agaisnt 3D7 and human
...@@ -204,36 +178,30 @@ rule multi_qc: ...@@ -204,36 +178,30 @@ rule multi_qc:
""" """
params: params:
out_multi = directory(f"{MULTIQC_DIR}"), out_multi = directory(f"{MULTIQC_DIR}"),
multiqc= f"{multiQC}"
input: input:
expand(f"{FASTQSCREEN_DIR}{{reads}}_r1_screen.html", reads = READS), expand(f"{FASTQSCREEN_DIR}{{reads}}_screen.html", reads = READS),
expand(f"{FASTQSCREEN_DIR}{{reads}}_r2_screen.html", reads = READS), expand(f"{FASTQSCREEN_DIR}{{reads}}_screen.txt", reads = READS),
expand(f"{FASTQSCREEN_DIR}{{reads}}_r1_screen.txt", reads = READS), expand(f"{FASTQC_DIR}{{reads}}_fastqc.html", reads=READS),
expand(f"{FASTQSCREEN_DIR}{{reads}}_r2_screen.txt", reads = READS), expand(f"{FASTQC_DIR}{{reads}}_fastqc.zip", reads=READS),
expand(f"{FASTQC_DIR}{{reads}}_r1_fastqc.html", reads=READS), expand(f"{FASTQC_AF_DIR}{{reads}}_paired_fastqc.html", reads=READS),
expand(f"{FASTQC_DIR}{{reads}}_r2_fastqc.html", reads=READS), expand(f"{FASTQC_AF_DIR}{{reads}}_paired_fastqc.zip", reads=READS),
expand(f"{FASTQC_DIR}{{reads}}_r1_fastqc.zip", reads=READS),
expand(f"{FASTQC_DIR}{{reads}}_r2_fastqc.zip", reads=READS),
expand(f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.html", reads=READS),
expand(f"{FASTQC_AF_DIR}{{reads}}_r2_paired_fastqc.html", reads=READS),
expand(f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.zip", reads=READS),
expand(f"{FASTQC_AF_DIR}{{reads}}_r2_paired_fastqc.zip", reads=READS),
expand(f"{FASTP_DIR}{{reads}}_report_fastp.html", reads=READS), expand(f"{FASTP_DIR}{{reads}}_report_fastp.html", reads=READS),
expand(f"{FASTP_DIR}{{reads}}_fastp.json",reads=READS) expand(f"{FASTP_DIR}{{reads}}_fastp.json",reads=READS)
output: output:
f"{MULTIQC_DIR}multiqc_report.html", f"{MULTIQC_DIR}multiqc_report.html",
conda:
"envs/quality.yaml"
shell: shell:
""" """
{params.multiqc} {input} -o {params.out_multi} multiQC {input} -o {params.out_multi}
""" """
# ----------------------------------- MAPPING -------------------------------------------------------------------
# Indexing of 3D7 genome (environ 1 min) # Indexing of 3D7 genome (environ 1 min)
rule hisat2_index: rule hisat2_index:
""" """
Make index with HISAT2 for 3D7 Make index with HISAT2 for 3D7
""" """
params:
hisat_build= f"{hisat_build}"
input: input:
fasta = f"{FASTA_REF}" fasta = f"{FASTA_REF}"
output: output:
...@@ -249,10 +217,12 @@ rule hisat2_index: ...@@ -249,10 +217,12 @@ rule hisat2_index:
log: log:
output = f"{INDEX_DIR}LOG/{name_hisat_index}_HISAT-INDEX.o", output = f"{INDEX_DIR}LOG/{name_hisat_index}_HISAT-INDEX.o",
error = f"{INDEX_DIR}LOG/{name_hisat_index}_HISAT-INDEX.e", error = f"{INDEX_DIR}LOG/{name_hisat_index}_HISAT-INDEX.e",
conda:
"envs/hisat2.yaml"
shell: shell:
""" """
ln -s {input.fasta} {output.index} 1>{log.output} 2>{log.error} ln -s {input.fasta} {output.index} 1>{log.output} 2>{log.error}
{params.hisat_build} {input.fasta} {output.index} --quiet hisat2-build {input.fasta} {output.index} --quiet
""" """
# Mapping with HISAT2 on Pf 3D7 (max 2 min par échantillon, total 10 min) # Mapping with HISAT2 on Pf 3D7 (max 2 min par échantillon, total 10 min)
...@@ -261,12 +231,9 @@ rule hisat2_map: ...@@ -261,12 +231,9 @@ rule hisat2_map:
Map reads on the genome with HISAT2 on paired end Map reads on the genome with HISAT2 on paired end
""" """
threads: 4 threads: 4
params:
hisat= f"{hisat}"
input: input:
reference = rules.hisat2_index.output.index, reference = rules.hisat2_index.output.index,
r1 = f"{FASTP_DIR}{{reads}}_r1_paired.fastq.gz", r1 = rules.fastp.output.out_r1,
r2 = f"{FASTP_DIR}{{reads}}_r2_paired.fastq.gz"
output: output:
summary = f"{MAPPINGH_DIR}{{reads}}_HISAT_summary.txt", summary = f"{MAPPINGH_DIR}{{reads}}_HISAT_summary.txt",
bam = f"{MAPPINGH_DIR}{{reads}}_HISAT.bam", bam = f"{MAPPINGH_DIR}{{reads}}_HISAT.bam",
...@@ -275,49 +242,25 @@ rule hisat2_map: ...@@ -275,49 +242,25 @@ rule hisat2_map:
Execute {rule} Execute {rule}
input: input:
reference : {input.reference} reference : {input.reference}
R1 : {input.r1} R1 : {input.r1}
R2 : {input.r2}
output: output:
bam: {output.bam} bam: {output.bam}
""" """
conda:
"envs/hisat2.yaml"
shell: shell:
""" """
{params.hisat} -p {threads} -x {input.reference} -1 {input.r1} -2 {input.r2} --summary-file {output.summary} | samtools view -S -b >{output.bam} hisat2 -p {threads} -x {input.reference} -u {input.r1} --summary-file {output.summary} | samtools view -S -b >{output.bam}
""" """
#Convert GFF in GTF with gffread for RNA-STAR
rule gff_to_gtf:
"""
Convert GFF in GTF for RNA-STAR
"""
threads: 8
params:
ggfread= f"{gffread}"
input:
gff = f"{REFERENCE_DIR}{{annotation}}.gff"
output:
gtf = f"{REFERENCE_DIR}{{annotation}}.gtf"
message:
"""
Execute {rule}
input:
gff : {input.gff}
output:
gtf: {output.gtf}
"""
shell:
"""
{params.ggfreadfread} {input.gff} -T -o {output.gtf}
"""
#Indexing for RNA-STAR (environ 2 min) #Indexing for RNA-STAR (environ 2 min)
rule star_index: rule star_index:
""" """
Make index with STAR for 3D7 Make index with STAR
""" """
threads: 4 threads: 4
params: params:
star= f"{star}",
dir = directory(f"{INDEX_STAR_DIR}") dir = directory(f"{INDEX_STAR_DIR}")
input: input:
fasta = f"{FASTA_REF}", fasta = f"{FASTA_REF}",
...@@ -333,11 +276,12 @@ rule star_index: ...@@ -333,11 +276,12 @@ rule star_index:
output: output:
index: {output.index} index: {output.index}
""" """
conda:
"envs/Star.yaml"
shell: shell:
""" """
ln -s {input.fasta} {output.index} ln -s {input.fasta} {output.index}
{params.star} --runMode genomeGenerate --runThreadN {threads} --genomeDir {params.dir} --genomeFastaFiles {input.fasta} --sjdbGTFfile {input.gtf} STAR --runMode genomeGenerate --runThreadN {threads} --genomeDir {params.dir} --genomeFastaFiles {input.fasta} --sjdbGTFfile {input.gtf}
""" """
#Mapping with RNA-STAR on Pf 3D7 (environ 10 min par échantillon, total 1h) #Mapping with RNA-STAR on Pf 3D7 (environ 10 min par échantillon, total 1h)
...@@ -347,14 +291,12 @@ rule star_mapping: ...@@ -347,14 +291,12 @@ rule star_mapping:
""" """
threads: 4 threads: 4
params: params:
star= f"{star}",
idx = directory(f"{INDEX_STAR_DIR}"), idx = directory(f"{INDEX_STAR_DIR}"),
bam = f"{MAPPINGS_DIR}{{reads}}" bam = f"{MAPPINGS_DIR}{{reads}}"
input: input:
idx = rules.star_index.output.index, idx = rules.star_index.output.index,
gtf = f"{GTF_REF}", gtf = f"{GTF_REF}",
r1 = f"{FASTP_DIR}{{reads}}_r1_paired.fastq.gz", r1 = rules.fastp.output.out_r1
r2 = f"{FASTP_DIR}{{reads}}_r2_paired.fastq.gz"
output: output:
bam = f"{MAPPINGS_DIR}{{reads}}Aligned.sortedByCoord.out.bam" bam = f"{MAPPINGS_DIR}{{reads}}Aligned.sortedByCoord.out.bam"
message: message:
...@@ -365,13 +307,14 @@ rule star_mapping: ...@@ -365,13 +307,14 @@ rule star_mapping:
gtf : {input.gtf} gtf : {input.gtf}
reads : reads :
r1: {input.r1} r1: {input.r1}
r2: {input.r2}
output: output:
bam: {output.bam} bam: {output.bam}
""" """
conda:
"envs/Star.yaml"
shell: shell:
""" """
{params.star} --runThreadN 4 --genomeDir {params.idx} --readFilesIn {input.r1} {input.r2} --readFilesCommand gunzip -c --outFileNamePrefix {params.bam} --outSAMtype BAM SortedByCoordinate --sjdbGTFfile {input.gtf} STAR --runThreadN 4 --genomeDir {params.idx} --readFilesIn {input.r1} --readFilesCommand gunzip -c --outFileNamePrefix {params.bam} --outSAMtype BAM SortedByCoordinate --sjdbGTFfile {input.gtf}
""" """
#Sort the BAM files with samtools #Sort the BAM files with samtools
...@@ -380,8 +323,6 @@ rule bam_sort: ...@@ -380,8 +323,6 @@ rule bam_sort:
Sort the BAM files Sort the BAM files
""" """
threads: 4 threads: 4
params:
samtools= f"{samtools}"
input: input:
star =f'{MAPPINGS_DIR}{{reads}}Aligned.sortedByCoord.out.bam', star =f'{MAPPINGS_DIR}{{reads}}Aligned.sortedByCoord.out.bam',
hisat = f"{MAPPINGH_DIR}{{reads}}_HISAT.bam" hisat = f"{MAPPINGH_DIR}{{reads}}_HISAT.bam"
...@@ -397,10 +338,12 @@ rule bam_sort: ...@@ -397,10 +338,12 @@ rule bam_sort:
output: output:
bam: {output.out_star}, {output.out_hisat} bam: {output.out_star}, {output.out_hisat}
""" """
conda:
"envs/samtools.yaml"
shell: shell:
""" """
{params.samtools} sort {input.star} > {output.out_star}; samtools sort {input.star} > {output.out_star};
{params.samtools} sort {input.hisat} > {output.out_hisat} samtools sort {input.hisat} > {output.out_hisat}
""" """
#Filtering of the BAM files with samtools #Filtering of the BAM files with samtools
...@@ -409,8 +352,6 @@ rule bam_filter: ...@@ -409,8 +352,6 @@ rule bam_filter:
Filtering BAM files Filtering BAM files
""" """
threads: 4 threads: 4
params:
samtools= f"{samtools}"
input: input:
star_bam = f"{BAM_DIR}{{reads}}_STAR_sort.bam", star_bam = f"{BAM_DIR}{{reads}}_STAR_sort.bam",
hisat_bam = f"{BAM_DIR}{{reads}}_HISAT_sort.bam" hisat_bam = f"{BAM_DIR}{{reads}}_HISAT_sort.bam"
...@@ -425,12 +366,14 @@ rule bam_filter: ...@@ -425,12 +366,14 @@ rule bam_filter:
input: {input} input: {input}
""" """
conda:
"envs/samtools.yaml"
shell: shell:
""" """
{params.samtools} view -b -F 4 {input.star_bam} > {output.mapped_star} samtools view -b -F 4 {input.star_bam} > {output.mapped_star}
{params.samtools} view -b -f 4 {input.star_bam} > {output.unmapped_star} samtools view -b -f 4 {input.star_bam} > {output.unmapped_star}
{params.samtools} view -b -F 4 {input.hisat_bam} > {output.mapped_hisat} samtools view -b -F 4 {input.hisat_bam} > {output.mapped_hisat}
{params.samtools} view -b -f 4 {input.hisat_bam} > {output.unmapped_hisat} samtools view -b -f 4 {input.hisat_bam} > {output.unmapped_hisat}
""" """
# Identify the duplicates in BAM with Picard tools # Identify the duplicates in BAM with Picard tools
...@@ -439,8 +382,6 @@ rule bam_markdup: ...@@ -439,8 +382,6 @@ rule bam_markdup:
Mark duplicates in BAM files Mark duplicates in BAM files
""" """
threads: 4 threads: 4
params:
picardtools= f"{picardtools}"
input: input:
bam_star=f"{FILTER_DIR}{{reads}}_STAR_sort_mapped.bam", bam_star=f"{FILTER_DIR}{{reads}}_STAR_sort_mapped.bam",
bam_hisat = f"{FILTER_DIR}{{reads}}_HISAT_sort_mapped.bam", bam_hisat = f"{FILTER_DIR}{{reads}}_HISAT_sort_mapped.bam",
...@@ -454,10 +395,12 @@ rule bam_markdup: ...@@ -454,10 +395,12 @@ rule bam_markdup:
Execute {rule} Execute {rule}
input: {input} input: {input}
""" """
conda:
"envs/picard.yaml"
shell: shell:
""" """
java -jar {params.picardtools} MarkDuplicates I={input.bam_star} O={output.star} M={output.star_txt} java -jar picard.jar MarkDuplicates I={input.bam_star} O={output.star} M={output.star_txt}
java -jar {params.picardtools} MarkDuplicates I={input.bam_hisat} O={output.hisat} M={output.hisat_txt} java -jar picard.jar MarkDuplicates I={input.bam_hisat} O={output.hisat} M={output.hisat_txt}
""" """
#Index the BAM files with samtools #Index the BAM files with samtools
...@@ -466,8 +409,6 @@ rule bam_index: ...@@ -466,8 +409,6 @@ rule bam_index:
Index BAM files Index BAM files
""" """
threads: 4 threads: 4
params:
samtools= f"{samtools}"
input: input:
bam_star = f"{BAM_DIR}{{reads}}_STAR_sort.bam", bam_star = f"{BAM_DIR}{{reads}}_STAR_sort.bam",
bam_hisat = f"{BAM_DIR}{{reads}}_HISAT_sort.bam", bam_hisat = f"{BAM_DIR}{{reads}}_HISAT_sort.bam",
...@@ -482,14 +423,16 @@ rule bam_index: ...@@ -482,14 +423,16 @@ rule bam_index:
Execute {rule} Execute {rule}
input: {input} input: {input}
""" """
conda:
"envs/samtools.yaml"
shell: shell:
""" """
{params.samtools} index {input.bam_star} samtools index {input.bam_star}
{params.samtools} index {input.bam_hisat} samtools index {input.bam_hisat}
{params.samtools} index {input.bam_star_filt} samtools index {input.bam_star_filt}
{params.samtools} index {input.bam_hisat_filt} samtools index {input.bam_hisat_filt}
{params.samtools} index {input.bam_star_filt_md} samtools index {input.bam_star_filt_md}
{params.samtools} index {input.bam_hisat_filt_md} samtools index {input.bam_hisat_filt_md}
""" """
#Stats on the BAM files with samtools #Stats on the BAM files with samtools
...@@ -498,8 +441,6 @@ rule bam_stats: ...@@ -498,8 +441,6 @@ rule bam_stats:
Statistics data on the BAM files Statistics data on the BAM files
""" """
threads: 4 threads: 4
params:
samtools= f"{samtools}"
input: input:
bam_star = f"{BAM_DIR}{{reads}}_STAR_sort.bam", bam_star = f"{BAM_DIR}{{reads}}_STAR_sort.bam",
bam_hisat = f"{BAM_DIR}{{reads}}_HISAT_sort.bam", bam_hisat = f"{BAM_DIR}{{reads}}_HISAT_sort.bam",
...@@ -519,24 +460,25 @@ rule bam_stats: ...@@ -519,24 +460,25 @@ rule bam_stats:
Execute {rule} Execute {rule}
input: {input} input: {input}
""" """
conda:
"envs/samtools.yaml"
shell: shell:
""" """
{params.samtools} flagstat {input.bam_star} > {output.flagstat_star} samtools flagstat {input.bam_star} > {output.flagstat_star}
{params.samtools} flagstat {input.bam_hisat} > {output.flagstat_hisat} samtools flagstat {input.bam_hisat} > {output.flagstat_hisat}
{params.samtools} flagstat {input.bam_star_filt} > {output.flagstat_star_filt} samtools flagstat {input.bam_star_filt} > {output.flagstat_star_filt}
{params.samtools} flagstat {input.bam_star_filt_md} > {output.flagstat_star_filt_md} samtools flagstat {input.bam_star_filt_md} > {output.flagstat_star_filt_md}
{params.samtools} flagstat {input.bam_hisat_filt} > {output.flagstat_hisat_filt} samtools flagstat {input.bam_hisat_filt} > {output.flagstat_hisat_filt}
{params.samtools} flagstat {input.bam_hisat_filt_md} > {output.flagstat_hisat_filt_md} samtools flagstat {input.bam_hisat_filt_md} > {output.flagstat_hisat_filt_md}
""" """
# ----------------------------------- COUNT -------------------------------------------------------------------
#Create the count table from the BAM files with HTseq count #Create the count table from the BAM files with HTseq count
rule htseq_counts : rule htseq_counts :
""" """
Create the count table with HTseq-count Create the count table with HTseq-count
""" """
threads: 4 threads: 4
params:
htseq= f"{htseq}"
input: input:
bam_index = rules.bam_index.output.bai, bam_index = rules.bam_index.output.bai,
bam_star = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup.bam", bam_star = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup.bam",
...@@ -553,10 +495,12 @@ rule htseq_counts : ...@@ -553,10 +495,12 @@ rule htseq_counts :
gtf: {input.gtf} gtf: {input.gtf}
output: {output.count_table_star} output: {output.count_table_star}
""" """
conda:
"envs/htseq.yaml"
shell: shell:
""" """
{params.htseq} -t exon -i gene_id -f bam {input.bam_star} {input.gtf} > {output.count_table_star} htseq-count -t exon -i gene_id -f bam {input.bam_star} {input.gtf} > {output.count_table_star}
{params.htseq} -t exon -i gene_id -f bam {input.bam_hisat} {input.gtf} > {output.count_table_hisat} htseq-count -t exon -i gene_id -f bam {input.bam_hisat} {input.gtf} > {output.count_table_hisat}
""" """
#Create the count table from the BAM files with stringtie #Create the count table from the BAM files with stringtie
...@@ -565,8 +509,6 @@ rule stringtie : ...@@ -565,8 +509,6 @@ rule stringtie :
Create the count table with HTseq-count Create the count table with HTseq-count
""" """
threads: 4 threads: 4
params:
stringtie= f"{stringtie}"
input: input:
bam_index = rules.bam_index.output.bai, bam_index = rules.bam_index.output.bai,
bam_star = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup.bam", bam_star = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup.bam",
...@@ -585,11 +527,12 @@ rule stringtie : ...@@ -585,11 +527,12 @@ rule stringtie :
gtf: {input.gtf} gtf: {input.gtf}
output: {output.star_tsv} output: {output.star_tsv}
""" """
conda:
"envs/stringtie.yaml"
shell: shell:
""" """
{params.stringtie} -p 8 -G {input.gtf} -e -B -o {output.star_gtf} -A {output.star_tsv} {input.bam_star} stringtie -p 8 -G {input.gtf} -e -B -o {output.star_gtf} -A {output.star_tsv} {input.bam_star}
{params.stringtie} -p 8 -G {input.gtf} -e -B -o {output.hisat_gtf} -A {output.hisat_tsv} {input.bam_hisat} stringtie -p 8 -G {input.gtf} -e -B -o {output.hisat_gtf} -A {output.hisat_tsv} {input.bam_hisat}
""" """
# Create a list of GTF created by Stringtie # Create a list of GTF created by Stringtie
...@@ -631,14 +574,23 @@ rule prepDE_stringtie_table: ...@@ -631,14 +574,23 @@ rule prepDE_stringtie_table:
input : input :
mergelist_star = rules.list_for_prepDE.output.star_list, mergelist_star = rules.list_for_prepDE.output.star_list,
mergelist_hisat = rules.list_for_prepDE.output.hisat_list, mergelist_hisat = rules.list_for_prepDE.output.hisat_list,
script = f"{PREPDE}"
output : output :
gcsv_star = f'{STRINGTIE_DIR}STAR_gene_count_matrix.csv', gcsv_star = f'{STRINGTIE_DIR}STAR_gene_count_matrix.csv',
tcsv_star = f'{STRINGTIE_DIR}STAR_transcript_count_matrix.csv', tcsv_star = f'{STRINGTIE_DIR}STAR_transcript_count_matrix.csv',
gcsv_hisat = f'{STRINGTIE_DIR}HISAT_gene_count_matrix.csv', gcsv_hisat = f'{STRINGTIE_DIR}HISAT_gene_count_matrix.csv',
tcsv_hisat = f'{STRINGTIE_DIR}HISAT_transcript_count_matrix.csv', tcsv_hisat = f'{STRINGTIE_DIR}HISAT_transcript_count_matrix.csv',
conda:
"envs/stringtie.yaml"
shell: shell:
""" """
python2 {input.script} -i {input.mergelist_star} -t {output.tcsv_star} -g {output.gcsv_star} python2 prepDE.py -i {input.mergelist_star} -t {output.tcsv_star} -g {output.gcsv_star}
python2 {input.script} -i {input.mergelist_hisat} -t {output.tcsv_hisat} -g {output.gcsv_hisat} python2 prepDE.py -i {input.mergelist_hisat} -t {output.tcsv_hisat} -g {output.gcsv_hisat}
""" """
\ No newline at end of file
# pseudo count:
rule Kalisto:
# Dossiers que l'on va utiliser # Dossiers que l'on va utiliser
'REFERENCE': "/Users/COHEN/Documents/BulKRNA/TestSnakeMake/01_Genome/" 'FASTA': "DATA/ref/riceNUCMSU7.fasta"
'FASTA': "/Users/COHEN/Documents/BulKRNA/TestSnakeMake/01_Genome/plasmodb_pfalciparum3d7_genome.fasta" 'GFF': "DATA/annotation/riceNUCMSU7.gff"
'GFF': "/Users/COHEN/Documents/BulKRNA/TestSnakeMake/01_Genome/plasmodb-46_pfalciparum3d7.gff" 'GTF': "DATA/annotation/riceNUCMSU7.gtf"
'GTF': "/Users/COHEN/Documents/BulKRNA/TestSnakeMake/01_Genome/plasmodb-46_pfalciparum3d7.gtf" 'READS': "DATA/fastq/"
'READS': "/Users/COHEN/Documents/BulKRNA/TestSnakeMake/02_RawData/" 'OUTDIR': "output"
'FASTQC': "/Users/COHEN/Documents/BulKRNA/TestSnakeMake/03_FastQC/"
'FASTQSCREEN': "/Users/COHEN/Documents/BulKRNA/TestSnakeMake/04_FastQScreen/"
'FASTP': "/Users/COHEN/Documents/BulKRNA/TestSnakeMake/05_FastP/"
'FASTQC_AF': "/Users/COHEN/Documents/BulKRNA/TestSnakeMake/06_FastQC_AfterFT/"
'MULTIQC': "/Users/COHEN/Documents/BulKRNA/TestSnakeMake/07_MultiQC/"
'INDEX': "/Users/COHEN/Documents/BulKRNA/TestSnakeMake/08_Index_HISAT_3D7/"
'name_hisat_index': "PFalci3D7"
'MAPPINGH': "/Users/COHEN/Documents/BulKRNA/TestSnakeMake/09_Mapping_HISAT_3D7/"
'INDEX_STAR': "/Users/COHEN/Documents/BulKRNA/TestSnakeMake/10_Index_STAR_3D7/"
'MAPPING_STAR': "/Users/COHEN/Documents/BulKRNA/TestSnakeMake/11_Mapping_STAR_3D7/"
'BAMFILES': "/Users/COHEN/Documents/BulKRNA/TestSnakeMake/12_BAMfiles/"
'STATS': "/Users/COHEN/Documents/BulKRNA/TestSnakeMake/14_Stats_BAM/"
'FILTER': "/Users/COHEN/Documents/BulKRNA/TestSnakeMake/13_Filtered_BAM/"
'DUPL': "/Users/COHEN/Documents/BulKRNA/TestSnakeMake/15_MarkedDupli_BAM/"
'HTSEQ': "/Users/COHEN/Documents/BulKRNA/TestSnakeMake/16_HTseq-count/"
'STRINGTIE': "/Users/COHEN/Documents/BulKRNA/TestSnakeMake/17_Stringtie/"
### logiciels ### logiciels
'FastQC': "fastqc" 'FastQScreen_conf': "ConfigFastqScreen.txt"
'FastQScreen': "fastq_screen" 'name_hisat_index': "rice"
'FastQScreen_conf': "/Users/COHEN/Documents/Logiciels/FastQ-Screen-0.14.1/fastq_screen.conf" \ No newline at end of file
'FastP': "fastp"
'MultiQC': "multiQC"
'hisat-build' : "/Users/COHEN/Documents/Logiciels/hisat2-2.2.0/hisat2-build"
'hisat': "/Users/COHEN/Documents/Logiciels/hisat2-2.2.0/hisat2"
'gffread': "/Users/COHEN/Documents/Logiciels/gffread-0.12.2.OSX_x86_64/gffread"
'star': "/Users/COHEN/Documents/Logiciels/STAR-2-7-5c/bin/MacOSX_x86_64/STAR"
'samtools': "samtools"
'picardtools': "/Users/COHEN/Documents/Logiciels/picard.jar"
'htseq': "htseq-count"
'stringtie': "/Users/COHEN/Documents/Logiciels/stringtie-2.1.4.OSX_x86_64/stringtie"
'PREPDE': "/Users/COHEN/Documents/Logiciels/stringtie-2.1.4.OSX_x86_64/prepDE.py"
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment