Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
RNAja-pipeline
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
PHIM
RNAja-pipeline
Commits
979ae7f6
Commit
979ae7f6
authored
4 years ago
by
aurore.comte_ird.fr
Browse files
Options
Downloads
Patches
Plain Diff
update
parent
7ba58080
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
Snakefile
+137
-185
137 additions, 185 deletions
Snakefile
configPT.yaml
+7
-35
7 additions, 35 deletions
configPT.yaml
with
144 additions
and
220 deletions
ProjetT-Pip-RNAseq
→
Snakefile
+
137
−
185
View file @
979ae7f6
# Importation des librairies glob, sys, et re
# Importation des librairies glob, sys, et re
import glob,sys,re
import glob,sys,re
from pathlib import Path
## appel du fichier de config
## appel du fichier de config
configfile: "configPT.yaml"
configfile: "configPT.yaml"
## Definition des variables "constantes" à partir du fichier config (attention aux arborescences dans le yaml)
## Definition des variables "constantes" à partir du fichier config (attention aux arborescences dans le yaml)
REFERENCE_DIR = config['REFERENCE']
FASTA_REF= config['FASTA']
FASTA_REF= config['FASTA']
GFF_REF= config['GFF']
GFF_REF= config['GFF']
GTF_REF= config['GTF']
GTF_REF= config['GTF']
READS_DIR = config['READS']
READS_DIR = config['READS']
FASTQC_DIR = config['FASTQC']
OUTDIR = Path(config["OUTDIR"]).resolve().as_posix()
FASTQSCREEN_DIR = config['FASTQSCREEN']
FASTP_DIR = config['FASTP']
FASTQC_AF_DIR = config['FASTQC_AF']
MULTIQC_DIR = config['MULTIQC']
INDEX_DIR = config['INDEX']
INDEX_STAR_DIR = config['INDEX_STAR']
MAPPINGH_DIR = config['MAPPINGH']
MAPPINGS_DIR = config['MAPPING_STAR']
BAM_DIR = config['BAMFILES']
STATS_DIR = config['STATS']
FILTER_DIR = config['FILTER']
DUPL_DIR = config['DUPL']
HTSEQ_DIR = config['HTSEQ']
STRINGTIE_DIR = config['STRINGTIE']
PREPDE = config['PREPDE']
name_hisat_index = config['name_hisat_index']
FASTQC_DIR = f"{OUTDIR}/01_FastQC/"
FASTQSCREEN_DIR = f"{OUTDIR}/02_FastQScreen/"
FASTP_DIR = f"{OUTDIR}/03_FastP/"
FASTQC_AF_DIR = f"{OUTDIR}/04_FastQC_afterFT/"
MULTIQC_DIR = f"{OUTDIR}/05_MULTIQC/"
INDEX_DIR = f"{OUTDIR}/06_Index_HISAT/"
INDEX_STAR_DIR = f"{OUTDIR}/07_index_HISAT/"
MAPPINGH_DIR = f"{OUTDIR}/08_mapping_HISAT/"
MAPPINGS_DIR = f"{OUTDIR}/09_mapping_STAR/"
BAM_DIR = f"{OUTDIR}/12_BAMfiles/"
STATS_DIR = f"{OUTDIR}/14_Stats_BAM/"
FILTER_DIR = f"{OUTDIR}/13_Filtered_BAM/"
DUPL_DIR = f"{OUTDIR}/15_MarkedDupli_BAM/"
HTSEQ_DIR = f"{OUTDIR}/16_HTseq-count/"
STRINGTIE_DIR = f"{OUTDIR}/17_Stringtie/"
name_hisat_index = config['name_hisat_index']
## Software path
## Software path
fastQC = config['FastQC']
fastQScreen = config['FastQScreen']
fastQScreen_conf = config['FastQScreen_conf']
fastQScreen_conf = config['FastQScreen_conf']
fastP = config['FastP']
multiQC = config['MultiQC']
hisat_build = config['hisat-build']
hisat = config['hisat']
gffread = config['gffread']
star = config['star']
samtools = config['samtools']
picardtools = config['picardtools']
htseq = config['htseq']
stringtie = config['stringtie']
#Wild cards
#Wild cards
RE
FERENCE
, = glob_wildcards(f"{RE
FERENCE
_DIR}{{re
ference
}}.fast
a
")
RE
ADS
, = glob_wildcards(f"{RE
ADS
_DIR}{{re
ads
}}.fast
q.gz
")
ANNOTATION, = glob_wildcards(f"{REFERENCE_DIR}{{annotation}}.gff")
MAPPING = ["STAR", "HISAT2"]
READS, = glob_wildcards(f"{READS_DIR}{{reads}}_r1.fastq.gz")
COUNT = ["HTSEQ", "STINGTIE"]
# Regle finale pour vérifier la présence des outputs et si ils sont présents ils ne lancent pas le job
# Regle finale pour vérifier la présence des outputs et si ils sont présents ils ne lancent pas le job
rule final:
rule final:
input:
input:
#
out_fastqc = expand(f"{FASTQC_DIR}{{reads}}_
r1_
fastqc.html", reads = READS),
out_fastqc = expand(f"{FASTQC_DIR}{{reads}}_fastqc.html", reads = READS),
#
out_fastqs = expand(f"{FASTQSCREEN_DIR}{{reads}}_
r1_
screen.html", reads = READS),
out_fastqs = expand(f"{FASTQSCREEN_DIR}{{reads}}_screen.html", reads = READS),
#
out_fastp = expand(f"{FASTP_DIR}{{reads}}_report_fastp.html", reads = READS),
out_fastp = expand(f"{FASTP_DIR}{{reads}}_report_fastp.html", reads = READS),
#out_fastqc_af = expand(f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.html", reads = READS),
#out_fastqc_af = expand(f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.html", reads = READS),
out_multiqc = expand(f"{MULTIQC_DIR}multiqc_report.html"),
out_multiqc = expand(f"{MULTIQC_DIR}multiqc_report.html"),
#out_hisat_idx = expand(f"{INDEX_DIR}P.Falci_3D7"),
out_hisat_map = expand(f"{MAPPINGH_DIR}{{reads}}_HISAT.bam" , reads = READS),
#out_hisat_map = expand(f"{MAPPINGH_DIR}{{reads}}_HISAT.bam" , reads = READS),
out_star_map = expand(f"{MAPPINGS_DIR}{{reads}}Aligned.sortedByCoord.out.bam" , reads = READS),
#out_star_map = expand(f"{MAPPINGS_DIR}{{reads}}Aligned.sortedByCoord.out.bam" , reads = READS),
out_bam_sort = expand(f"{BAM_DIR}{{reads}}_STAR_sort.bam", reads = READS),
#out_bam_sort = expand(f"{BAM_DIR}{{reads}}_STAR_sort.bam", reads = READS),
out_bam_filt = expand(f"{FILTER_DIR}{{reads}}_STAR_sort_mapped.bam", reads = READS),
#out_bam_filt = expand(f"{FILTER_DIR}{{reads}}_STAR_sort_mapped.bam", reads = READS),
out_bam_index = expand(f"{BAM_DIR}{{reads}}_STAR_sort.bam.bai", reads = READS),
#out_bam_index = expand(f"{BAM_DIR}{{reads}}_STAR_sort.bam.bai", reads = READS),
out_bam_stats = expand(f"{STATS_DIR}{{reads}}_STAR_sort_flagstat.txt", reads = READS),
out_bam_stats = expand(f"{STATS_DIR}{{reads}}_STAR_sort_flagstat.txt", reads = READS),
#
out_bam_md = expand(f"{DUPL_DIR}{{reads}}_HISAT_sort_mapped_markdup_metric.txt", reads = READS),
out_bam_md = expand(f"{DUPL_DIR}{{reads}}_HISAT_sort_mapped_markdup_metric.txt", reads = READS),
out_htseq = expand(f"{HTSEQ_DIR}HISAT/{{reads}}.txt", reads = READS),
out_htseq = expand(f"{HTSEQ_DIR}HISAT/{{reads}}.txt", reads = READS),
#out_gtf_list = expand(f'{STRINGTIE_DIR}STRINGTIE_MERGE/STAR_gtf_list.txt'),
#out_gtf_list = expand(f'{STRINGTIE_DIR}STRINGTIE_MERGE/STAR_gtf_list.txt'),
#out_stringtie = expand(f'{STRINGTIE_DIR}STRINGTIE_MERGE/STAR_stringtie_merged.gtf'),
#out_stringtie = expand(f'{STRINGTIE_DIR}STRINGTIE_MERGE/STAR_stringtie_merged.gtf'),
#
out_stringtiecc = expand(f'{STRINGTIE_DIR}STAR/{{reads}}/{{reads}}.gtf', reads=READS),
out_stringtiecc = expand(f'{STRINGTIE_DIR}STAR/{{reads}}/{{reads}}.gtf', reads=READS),
#out_gtf_list = expand(f'{STRINGTIE_DIR}HISAT_gtf_list.txt'),
#out_gtf_list = expand(f'{STRINGTIE_DIR}HISAT_gtf_list.txt'),
#
out_strg_list = expand(f'{STRINGTIE_DIR}HISAT_Stringtie_list.txt'),
out_strg_list = expand(f'{STRINGTIE_DIR}HISAT_Stringtie_list.txt'),
out_csv = expand(f'{STRINGTIE_DIR}HISAT_transcript_count_matrix.csv')
out_csv = expand(f'{STRINGTIE_DIR}HISAT_transcript_count_matrix.csv')
# ----------------------------------- QUALITY -------------------------------------------------------------------
# FastQC before filtering
# FastQC before filtering
rule fastqc:
rule fastqc:
"""
"""
...
@@ -83,25 +71,22 @@ rule fastqc:
...
@@ -83,25 +71,22 @@ rule fastqc:
threads:4
threads:4
params:
params:
out_fastqc=directory(f"{FASTQC_DIR}"),
out_fastqc=directory(f"{FASTQC_DIR}"),
fastqc= f"{fastQC}"
input:
input:
r1 = f"{READS_DIR}{{reads}}_r1.fastq.gz",
r1 = f"{READS_DIR}{{reads}}.fastq.gz"
r2 = f"{READS_DIR}{{reads}}_r2.fastq.gz"
output:
output:
out_fastqc_r1 = f"{FASTQC_DIR}{{reads}}_r1_fastqc.html",
out_fastqc_r1 = f"{FASTQC_DIR}{{reads}}_fastqc.html",
out_fastqc_r2 = f"{FASTQC_DIR}{{reads}}_r2_fastqc.html",
out_fastqc_r1_zip = f"{FASTQC_DIR}{{reads}}_fastqc.zip",
out_fastqc_r1_zip = f"{FASTQC_DIR}{{reads}}_r1_fastqc.zip",
out_fastqc_r2_zip = f"{FASTQC_DIR}{{reads}}_r2_fastqc.zip"
message:
message:
"""
"""
input=
input=
r1 = {input.r1}
r1 = {input.r1}
r2 = {input.r2}
threads={threads}
threads={threads}
"""
"""
conda:
"envs/quality.yaml"
shell:
shell:
"""
"""
{params.
fastqc
}
-o {params.out_fastqc} -t 4 {input.r1}
{input.r2}
fastqc -o {params.out_fastqc} -t 4 {input.r1}
"""
"""
# FastQScreen agaisnt 3D7 and human
# FastQScreen agaisnt 3D7 and human
...
@@ -112,28 +97,25 @@ rule fastqscreen:
...
@@ -112,28 +97,25 @@ rule fastqscreen:
threads:4
threads:4
params:
params:
out_fastqs=directory(f"{FASTQSCREEN_DIR}"),
out_fastqs=directory(f"{FASTQSCREEN_DIR}"),
fastqs= f"{fastQScreen}",
fastqs_conf = f"{fastQScreen_conf}"
fastqs_conf = f"{fastQScreen_conf}"
input:
input:
fastqc= rules.fastqc.output.out_fastqc_r2_zip,
fastqc= rules.fastqc.output.out_fastqc_r1_zip,
r1 = f"{READS_DIR}{{reads}}_r1.fastq.gz",
r1 = f"{READS_DIR}{{reads}}.fastq.gz",
r2 = f"{READS_DIR}{{reads}}_r2.fastq.gz"
output:
output:
html_r1= f"{FASTQSCREEN_DIR}{{reads}}_r1_screen.html",
html_r1= f"{FASTQSCREEN_DIR}{{reads}}_screen.html",
html_r2=f"{FASTQSCREEN_DIR}{{reads}}_r2_screen.html",
txt_r1=f"{FASTQSCREEN_DIR}{{reads}}_screen.txt",
txt_r1=f"{FASTQSCREEN_DIR}{{reads}}_r1_screen.txt",
txt_r2=f"{FASTQSCREEN_DIR}{{reads}}_r2_screen.txt",
message:
message:
"""
"""
Execute {rule}
Execute {rule}
input=
input=
r1 = {input.r1}
r1 = {input.r1}
r2 = {input.r2}
threads={threads}
threads={threads}
"""
"""
conda:
"envs/quality.yaml"
shell:
shell:
"""
"""
{params.fastqs}
--conf {params.fastqs_conf} --aligner BOWTIE2 {input.r1}
{input.r2}
--outdir {params.out_fastqs}
fastq_screen
--conf {params.fastqs_conf} --aligner BOWTIE2 {input.r1} --outdir {params.out_fastqs}
"""
"""
# Filtering and trimming with FastP
# Filtering and trimming with FastP
...
@@ -143,16 +125,11 @@ rule fastp:
...
@@ -143,16 +125,11 @@ rule fastp:
"""
"""
params:
params:
out_fastp=directory(f"{FASTP_DIR}"),
out_fastp=directory(f"{FASTP_DIR}"),
fastp=f"{fastP}"
input:
input:
fastqc= rules.fastqc.output.out_fastqc_r2_zip,
fastqc = rules.fastqc.output.out_fastqc_r1_zip,
r1 = f"{READS_DIR}{{reads}}_r1.fastq.gz",
r1 = f"{READS_DIR}{{reads}}.fastq.gz",
r2 = f"{READS_DIR}{{reads}}_r2.fastq.gz"
output:
output:
out_paired_r1 = f"{FASTP_DIR}{{reads}}_r1_paired.fastq.gz",
out_r1 = f"{FASTP_DIR}{{reads}}_fastp.fastq.gz",
out_unpaired_r1 = f"{FASTP_DIR}{{reads}}_r1_unpaired.fastq.gz",
out_paired_r2 = f"{FASTP_DIR}{{reads}}_r2_paired.fastq.gz",
out_unpaired_r2 = f"{FASTP_DIR}{{reads}}_r2_unpaired.fastq.gz",
out_report= f"{FASTP_DIR}{{reads}}_report_fastp.html",
out_report= f"{FASTP_DIR}{{reads}}_report_fastp.html",
out_json= f"{FASTP_DIR}{{reads}}_fastp.json"
out_json= f"{FASTP_DIR}{{reads}}_fastp.json"
...
@@ -161,11 +138,12 @@ rule fastp:
...
@@ -161,11 +138,12 @@ rule fastp:
Execute {rule}
Execute {rule}
input=
input=
r1 = {input.r1}
r1 = {input.r1}
r2 = {input.r2}
"""
"""
conda:
"envs/quality.yaml"
shell:
shell:
"""
"""
{params.fastp} --in1 {input.r1} --in2 {input.r2} --out1 {output.out_paired_r1} --out2 {output.out_paired_r2} --unpaired1 {output.out_unpaired_r1} --unpaired2 {output.out_unpaired_r2} -w 1
-h {output.out_report} -j {output.out_json}
fastp -i {input.r1} -o {output.out_r1}
-h {output.out_report} -j {output.out_json}
"""
"""
# FastQC after filtering
# FastQC after filtering
...
@@ -175,26 +153,22 @@ rule fastqc_af:
...
@@ -175,26 +153,22 @@ rule fastqc_af:
"""
"""
params:
params:
out_fastqc_af = directory(f"{FASTQC_AF_DIR}"),
out_fastqc_af = directory(f"{FASTQC_AF_DIR}"),
fastqc= f"{fastQC}"
input:
input:
fastp=f"{FASTP_DIR}{{reads}}_r2_paired.fastq.gz",
r1 = rules.fastp.output.out_r1,
r1 = f"{FASTP_DIR}{{reads}}_r1_paired.fastq.gz",
r2 = f"{FASTP_DIR}{{reads}}_r2_paired.fastq.gz"
output:
output:
out_fastqc_af_r1 = f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.html",
out_fastqc_af_r1 = f"{FASTQC_AF_DIR}{{reads}}_paired_fastqc.html",
out_fastqc_af_r2 = f"{FASTQC_AF_DIR}{{reads}}_r2_paired_fastqc.html",
out_fastqc_af_r1_zip = f"{FASTQC_AF_DIR}{{reads}}_paired_fastqc.zip",
out_fastqc_af_r1_zip = f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.zip",
out_fastqc_af_r2_zip = f"{FASTQC_AF_DIR}{{reads}}_r2_paired_fastqc.zip"
message:
message:
"""
"""
Execute {rule}
Execute {rule}
input=
input=
r1 = {input.r1}
r1 = {input.r1}
r2 = {input.r2}
"""
"""
conda:
"envs/quality.yaml"
shell:
shell:
"""
"""
{params.
fastqc
}
-o {params.out_fastqc_af} -t 4 {input.r1}
{input.r2}
fastqc -o {params.out_fastqc_af} -t 4 {input.r1}
"""
"""
# FastQScreen agaisnt 3D7 and human
# FastQScreen agaisnt 3D7 and human
...
@@ -204,36 +178,30 @@ rule multi_qc:
...
@@ -204,36 +178,30 @@ rule multi_qc:
"""
"""
params:
params:
out_multi = directory(f"{MULTIQC_DIR}"),
out_multi = directory(f"{MULTIQC_DIR}"),
multiqc= f"{multiQC}"
input:
input:
expand(f"{FASTQSCREEN_DIR}{{reads}}_r1_screen.html", reads = READS),
expand(f"{FASTQSCREEN_DIR}{{reads}}_screen.html", reads = READS),
expand(f"{FASTQSCREEN_DIR}{{reads}}_r2_screen.html", reads = READS),
expand(f"{FASTQSCREEN_DIR}{{reads}}_screen.txt", reads = READS),
expand(f"{FASTQSCREEN_DIR}{{reads}}_r1_screen.txt", reads = READS),
expand(f"{FASTQC_DIR}{{reads}}_fastqc.html", reads=READS),
expand(f"{FASTQSCREEN_DIR}{{reads}}_r2_screen.txt", reads = READS),
expand(f"{FASTQC_DIR}{{reads}}_fastqc.zip", reads=READS),
expand(f"{FASTQC_DIR}{{reads}}_r1_fastqc.html", reads=READS),
expand(f"{FASTQC_AF_DIR}{{reads}}_paired_fastqc.html", reads=READS),
expand(f"{FASTQC_DIR}{{reads}}_r2_fastqc.html", reads=READS),
expand(f"{FASTQC_AF_DIR}{{reads}}_paired_fastqc.zip", reads=READS),
expand(f"{FASTQC_DIR}{{reads}}_r1_fastqc.zip", reads=READS),
expand(f"{FASTQC_DIR}{{reads}}_r2_fastqc.zip", reads=READS),
expand(f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.html", reads=READS),
expand(f"{FASTQC_AF_DIR}{{reads}}_r2_paired_fastqc.html", reads=READS),
expand(f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.zip", reads=READS),
expand(f"{FASTQC_AF_DIR}{{reads}}_r2_paired_fastqc.zip", reads=READS),
expand(f"{FASTP_DIR}{{reads}}_report_fastp.html", reads=READS),
expand(f"{FASTP_DIR}{{reads}}_report_fastp.html", reads=READS),
expand(f"{FASTP_DIR}{{reads}}_fastp.json",reads=READS)
expand(f"{FASTP_DIR}{{reads}}_fastp.json",reads=READS)
output:
output:
f"{MULTIQC_DIR}multiqc_report.html",
f"{MULTIQC_DIR}multiqc_report.html",
conda:
"envs/quality.yaml"
shell:
shell:
"""
"""
{params.
multi
qc}
{input} -o {params.out_multi}
multi
QC
{input} -o {params.out_multi}
"""
"""
# ----------------------------------- MAPPING -------------------------------------------------------------------
# Indexing of 3D7 genome (environ 1 min)
# Indexing of 3D7 genome (environ 1 min)
rule hisat2_index:
rule hisat2_index:
"""
"""
Make index with HISAT2 for 3D7
Make index with HISAT2 for 3D7
"""
"""
params:
hisat_build= f"{hisat_build}"
input:
input:
fasta = f"{FASTA_REF}"
fasta = f"{FASTA_REF}"
output:
output:
...
@@ -249,10 +217,12 @@ rule hisat2_index:
...
@@ -249,10 +217,12 @@ rule hisat2_index:
log:
log:
output = f"{INDEX_DIR}LOG/{name_hisat_index}_HISAT-INDEX.o",
output = f"{INDEX_DIR}LOG/{name_hisat_index}_HISAT-INDEX.o",
error = f"{INDEX_DIR}LOG/{name_hisat_index}_HISAT-INDEX.e",
error = f"{INDEX_DIR}LOG/{name_hisat_index}_HISAT-INDEX.e",
conda:
"envs/hisat2.yaml"
shell:
shell:
"""
"""
ln -s {input.fasta} {output.index} 1>{log.output} 2>{log.error}
ln -s {input.fasta} {output.index} 1>{log.output} 2>{log.error}
{params.
hisat
_
build
}
{input.fasta} {output.index} --quiet
hisat
2-
build {input.fasta} {output.index} --quiet
"""
"""
# Mapping with HISAT2 on Pf 3D7 (max 2 min par échantillon, total 10 min)
# Mapping with HISAT2 on Pf 3D7 (max 2 min par échantillon, total 10 min)
...
@@ -261,12 +231,9 @@ rule hisat2_map:
...
@@ -261,12 +231,9 @@ rule hisat2_map:
Map reads on the genome with HISAT2 on paired end
Map reads on the genome with HISAT2 on paired end
"""
"""
threads: 4
threads: 4
params:
hisat= f"{hisat}"
input:
input:
reference = rules.hisat2_index.output.index,
reference = rules.hisat2_index.output.index,
r1 = f"{FASTP_DIR}{{reads}}_r1_paired.fastq.gz",
r1 = rules.fastp.output.out_r1,
r2 = f"{FASTP_DIR}{{reads}}_r2_paired.fastq.gz"
output:
output:
summary = f"{MAPPINGH_DIR}{{reads}}_HISAT_summary.txt",
summary = f"{MAPPINGH_DIR}{{reads}}_HISAT_summary.txt",
bam = f"{MAPPINGH_DIR}{{reads}}_HISAT.bam",
bam = f"{MAPPINGH_DIR}{{reads}}_HISAT.bam",
...
@@ -275,49 +242,25 @@ rule hisat2_map:
...
@@ -275,49 +242,25 @@ rule hisat2_map:
Execute {rule}
Execute {rule}
input:
input:
reference : {input.reference}
reference : {input.reference}
R1 : {input.r1}
R1 : {input.r1}
R2 : {input.r2}
output:
output:
bam: {output.bam}
bam: {output.bam}
"""
"""
conda:
"envs/hisat2.yaml"
shell:
shell:
"""
"""
{params.
hisat
}
-p {threads} -x {input.reference} -
1 {input.r1} -2
{input.r
2
} --summary-file {output.summary} | samtools view -S -b >{output.bam}
hisat
2
-p {threads} -x {input.reference} -
u
{input.r
1
} --summary-file {output.summary} | samtools view -S -b >{output.bam}
"""
"""
#Convert GFF in GTF with gffread for RNA-STAR
rule gff_to_gtf:
"""
Convert GFF in GTF for RNA-STAR
"""
threads: 8
params:
ggfread= f"{gffread}"
input:
gff = f"{REFERENCE_DIR}{{annotation}}.gff"
output:
gtf = f"{REFERENCE_DIR}{{annotation}}.gtf"
message:
"""
Execute {rule}
input:
gff : {input.gff}
output:
gtf: {output.gtf}
"""
shell:
"""
{params.ggfreadfread} {input.gff} -T -o {output.gtf}
"""
#Indexing for RNA-STAR (environ 2 min)
#Indexing for RNA-STAR (environ 2 min)
rule star_index:
rule star_index:
"""
"""
Make index with STAR
for 3D7
Make index with STAR
"""
"""
threads: 4
threads: 4
params:
params:
star= f"{star}",
dir = directory(f"{INDEX_STAR_DIR}")
dir = directory(f"{INDEX_STAR_DIR}")
input:
input:
fasta = f"{FASTA_REF}",
fasta = f"{FASTA_REF}",
...
@@ -333,11 +276,12 @@ rule star_index:
...
@@ -333,11 +276,12 @@ rule star_index:
output:
output:
index: {output.index}
index: {output.index}
"""
"""
conda:
"envs/Star.yaml"
shell:
shell:
"""
"""
ln -s {input.fasta} {output.index}
ln -s {input.fasta} {output.index}
{params.star} --runMode genomeGenerate --runThreadN {threads} --genomeDir {params.dir} --genomeFastaFiles {input.fasta} --sjdbGTFfile {input.gtf}
STAR --runMode genomeGenerate --runThreadN {threads} --genomeDir {params.dir} --genomeFastaFiles {input.fasta} --sjdbGTFfile {input.gtf}
"""
"""
#Mapping with RNA-STAR on Pf 3D7 (environ 10 min par échantillon, total 1h)
#Mapping with RNA-STAR on Pf 3D7 (environ 10 min par échantillon, total 1h)
...
@@ -347,14 +291,12 @@ rule star_mapping:
...
@@ -347,14 +291,12 @@ rule star_mapping:
"""
"""
threads: 4
threads: 4
params:
params:
star= f"{star}",
idx = directory(f"{INDEX_STAR_DIR}"),
idx = directory(f"{INDEX_STAR_DIR}"),
bam = f"{MAPPINGS_DIR}{{reads}}"
bam = f"{MAPPINGS_DIR}{{reads}}"
input:
input:
idx = rules.star_index.output.index,
idx = rules.star_index.output.index,
gtf = f"{GTF_REF}",
gtf = f"{GTF_REF}",
r1 = f"{FASTP_DIR}{{reads}}_r1_paired.fastq.gz",
r1 = rules.fastp.output.out_r1
r2 = f"{FASTP_DIR}{{reads}}_r2_paired.fastq.gz"
output:
output:
bam = f"{MAPPINGS_DIR}{{reads}}Aligned.sortedByCoord.out.bam"
bam = f"{MAPPINGS_DIR}{{reads}}Aligned.sortedByCoord.out.bam"
message:
message:
...
@@ -365,13 +307,14 @@ rule star_mapping:
...
@@ -365,13 +307,14 @@ rule star_mapping:
gtf : {input.gtf}
gtf : {input.gtf}
reads :
reads :
r1: {input.r1}
r1: {input.r1}
r2: {input.r2}
output:
output:
bam: {output.bam}
bam: {output.bam}
"""
"""
conda:
"envs/Star.yaml"
shell:
shell:
"""
"""
{params.star}
--runThreadN 4 --genomeDir {params.idx} --readFilesIn {input.r1}
{input.r2}
--readFilesCommand gunzip -c --outFileNamePrefix {params.bam} --outSAMtype BAM SortedByCoordinate --sjdbGTFfile {input.gtf}
STAR
--runThreadN 4 --genomeDir {params.idx} --readFilesIn {input.r1} --readFilesCommand gunzip -c --outFileNamePrefix {params.bam} --outSAMtype BAM SortedByCoordinate --sjdbGTFfile {input.gtf}
"""
"""
#Sort the BAM files with samtools
#Sort the BAM files with samtools
...
@@ -380,8 +323,6 @@ rule bam_sort:
...
@@ -380,8 +323,6 @@ rule bam_sort:
Sort the BAM files
Sort the BAM files
"""
"""
threads: 4
threads: 4
params:
samtools= f"{samtools}"
input:
input:
star =f'{MAPPINGS_DIR}{{reads}}Aligned.sortedByCoord.out.bam',
star =f'{MAPPINGS_DIR}{{reads}}Aligned.sortedByCoord.out.bam',
hisat = f"{MAPPINGH_DIR}{{reads}}_HISAT.bam"
hisat = f"{MAPPINGH_DIR}{{reads}}_HISAT.bam"
...
@@ -397,10 +338,12 @@ rule bam_sort:
...
@@ -397,10 +338,12 @@ rule bam_sort:
output:
output:
bam: {output.out_star}, {output.out_hisat}
bam: {output.out_star}, {output.out_hisat}
"""
"""
conda:
"envs/samtools.yaml"
shell:
shell:
"""
"""
{params.
samtools
}
sort {input.star} > {output.out_star};
samtools sort {input.star} > {output.out_star};
{params.
samtools
}
sort {input.hisat} > {output.out_hisat}
samtools sort {input.hisat} > {output.out_hisat}
"""
"""
#Filtering of the BAM files with samtools
#Filtering of the BAM files with samtools
...
@@ -409,8 +352,6 @@ rule bam_filter:
...
@@ -409,8 +352,6 @@ rule bam_filter:
Filtering BAM files
Filtering BAM files
"""
"""
threads: 4
threads: 4
params:
samtools= f"{samtools}"
input:
input:
star_bam = f"{BAM_DIR}{{reads}}_STAR_sort.bam",
star_bam = f"{BAM_DIR}{{reads}}_STAR_sort.bam",
hisat_bam = f"{BAM_DIR}{{reads}}_HISAT_sort.bam"
hisat_bam = f"{BAM_DIR}{{reads}}_HISAT_sort.bam"
...
@@ -425,12 +366,14 @@ rule bam_filter:
...
@@ -425,12 +366,14 @@ rule bam_filter:
input: {input}
input: {input}
"""
"""
conda:
"envs/samtools.yaml"
shell:
shell:
"""
"""
{params.
samtools
}
view -b -F 4 {input.star_bam} > {output.mapped_star}
samtools view -b -F 4 {input.star_bam} > {output.mapped_star}
{params.
samtools
}
view -b -f 4 {input.star_bam} > {output.unmapped_star}
samtools view -b -f 4 {input.star_bam} > {output.unmapped_star}
{params.
samtools
}
view -b -F 4 {input.hisat_bam} > {output.mapped_hisat}
samtools view -b -F 4 {input.hisat_bam} > {output.mapped_hisat}
{params.
samtools
}
view -b -f 4 {input.hisat_bam} > {output.unmapped_hisat}
samtools view -b -f 4 {input.hisat_bam} > {output.unmapped_hisat}
"""
"""
# Identify the duplicates in BAM with Picard tools
# Identify the duplicates in BAM with Picard tools
...
@@ -439,8 +382,6 @@ rule bam_markdup:
...
@@ -439,8 +382,6 @@ rule bam_markdup:
Mark duplicates in BAM files
Mark duplicates in BAM files
"""
"""
threads: 4
threads: 4
params:
picardtools= f"{picardtools}"
input:
input:
bam_star=f"{FILTER_DIR}{{reads}}_STAR_sort_mapped.bam",
bam_star=f"{FILTER_DIR}{{reads}}_STAR_sort_mapped.bam",
bam_hisat = f"{FILTER_DIR}{{reads}}_HISAT_sort_mapped.bam",
bam_hisat = f"{FILTER_DIR}{{reads}}_HISAT_sort_mapped.bam",
...
@@ -454,10 +395,12 @@ rule bam_markdup:
...
@@ -454,10 +395,12 @@ rule bam_markdup:
Execute {rule}
Execute {rule}
input: {input}
input: {input}
"""
"""
conda:
"envs/picard.yaml"
shell:
shell:
"""
"""
java -jar
{params.picardtools}
MarkDuplicates I={input.bam_star} O={output.star} M={output.star_txt}
java -jar
picard.jar
MarkDuplicates I={input.bam_star} O={output.star} M={output.star_txt}
java -jar
{params.picardtools}
MarkDuplicates I={input.bam_hisat} O={output.hisat} M={output.hisat_txt}
java -jar
picard.jar
MarkDuplicates I={input.bam_hisat} O={output.hisat} M={output.hisat_txt}
"""
"""
#Index the BAM files with samtools
#Index the BAM files with samtools
...
@@ -466,8 +409,6 @@ rule bam_index:
...
@@ -466,8 +409,6 @@ rule bam_index:
Index BAM files
Index BAM files
"""
"""
threads: 4
threads: 4
params:
samtools= f"{samtools}"
input:
input:
bam_star = f"{BAM_DIR}{{reads}}_STAR_sort.bam",
bam_star = f"{BAM_DIR}{{reads}}_STAR_sort.bam",
bam_hisat = f"{BAM_DIR}{{reads}}_HISAT_sort.bam",
bam_hisat = f"{BAM_DIR}{{reads}}_HISAT_sort.bam",
...
@@ -482,14 +423,16 @@ rule bam_index:
...
@@ -482,14 +423,16 @@ rule bam_index:
Execute {rule}
Execute {rule}
input: {input}
input: {input}
"""
"""
conda:
"envs/samtools.yaml"
shell:
shell:
"""
"""
{params.
samtools
}
index {input.bam_star}
samtools index {input.bam_star}
{params.
samtools
}
index {input.bam_hisat}
samtools index {input.bam_hisat}
{params.
samtools
}
index {input.bam_star_filt}
samtools index {input.bam_star_filt}
{params.
samtools
}
index {input.bam_hisat_filt}
samtools index {input.bam_hisat_filt}
{params.
samtools
}
index {input.bam_star_filt_md}
samtools index {input.bam_star_filt_md}
{params.
samtools
}
index {input.bam_hisat_filt_md}
samtools index {input.bam_hisat_filt_md}
"""
"""
#Stats on the BAM files with samtools
#Stats on the BAM files with samtools
...
@@ -498,8 +441,6 @@ rule bam_stats:
...
@@ -498,8 +441,6 @@ rule bam_stats:
Statistics data on the BAM files
Statistics data on the BAM files
"""
"""
threads: 4
threads: 4
params:
samtools= f"{samtools}"
input:
input:
bam_star = f"{BAM_DIR}{{reads}}_STAR_sort.bam",
bam_star = f"{BAM_DIR}{{reads}}_STAR_sort.bam",
bam_hisat = f"{BAM_DIR}{{reads}}_HISAT_sort.bam",
bam_hisat = f"{BAM_DIR}{{reads}}_HISAT_sort.bam",
...
@@ -519,24 +460,25 @@ rule bam_stats:
...
@@ -519,24 +460,25 @@ rule bam_stats:
Execute {rule}
Execute {rule}
input: {input}
input: {input}
"""
"""
conda:
"envs/samtools.yaml"
shell:
shell:
"""
"""
{params.
samtools
}
flagstat {input.bam_star} > {output.flagstat_star}
samtools flagstat {input.bam_star} > {output.flagstat_star}
{params.
samtools
}
flagstat {input.bam_hisat} > {output.flagstat_hisat}
samtools flagstat {input.bam_hisat} > {output.flagstat_hisat}
{params.
samtools
}
flagstat {input.bam_star_filt} > {output.flagstat_star_filt}
samtools flagstat {input.bam_star_filt} > {output.flagstat_star_filt}
{params.
samtools
}
flagstat {input.bam_star_filt_md} > {output.flagstat_star_filt_md}
samtools flagstat {input.bam_star_filt_md} > {output.flagstat_star_filt_md}
{params.
samtools
}
flagstat {input.bam_hisat_filt} > {output.flagstat_hisat_filt}
samtools flagstat {input.bam_hisat_filt} > {output.flagstat_hisat_filt}
{params.
samtools
}
flagstat {input.bam_hisat_filt_md} > {output.flagstat_hisat_filt_md}
samtools flagstat {input.bam_hisat_filt_md} > {output.flagstat_hisat_filt_md}
"""
"""
# ----------------------------------- COUNT -------------------------------------------------------------------
#Create the count table from the BAM files with HTseq count
#Create the count table from the BAM files with HTseq count
rule htseq_counts :
rule htseq_counts :
"""
"""
Create the count table with HTseq-count
Create the count table with HTseq-count
"""
"""
threads: 4
threads: 4
params:
htseq= f"{htseq}"
input:
input:
bam_index = rules.bam_index.output.bai,
bam_index = rules.bam_index.output.bai,
bam_star = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup.bam",
bam_star = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup.bam",
...
@@ -553,10 +495,12 @@ rule htseq_counts :
...
@@ -553,10 +495,12 @@ rule htseq_counts :
gtf: {input.gtf}
gtf: {input.gtf}
output: {output.count_table_star}
output: {output.count_table_star}
"""
"""
conda:
"envs/htseq.yaml"
shell:
shell:
"""
"""
{params.htseq}
-t exon -i gene_id -f bam {input.bam_star} {input.gtf} > {output.count_table_star}
htseq-count
-t exon -i gene_id -f bam {input.bam_star} {input.gtf} > {output.count_table_star}
{params.htseq}
-t exon -i gene_id -f bam {input.bam_hisat} {input.gtf} > {output.count_table_hisat}
htseq-count
-t exon -i gene_id -f bam {input.bam_hisat} {input.gtf} > {output.count_table_hisat}
"""
"""
#Create the count table from the BAM files with stringtie
#Create the count table from the BAM files with stringtie
...
@@ -565,8 +509,6 @@ rule stringtie :
...
@@ -565,8 +509,6 @@ rule stringtie :
Create the count table with HTseq-count
Create the count table with HTseq-count
"""
"""
threads: 4
threads: 4
params:
stringtie= f"{stringtie}"
input:
input:
bam_index = rules.bam_index.output.bai,
bam_index = rules.bam_index.output.bai,
bam_star = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup.bam",
bam_star = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup.bam",
...
@@ -585,11 +527,12 @@ rule stringtie :
...
@@ -585,11 +527,12 @@ rule stringtie :
gtf: {input.gtf}
gtf: {input.gtf}
output: {output.star_tsv}
output: {output.star_tsv}
"""
"""
conda:
"envs/stringtie.yaml"
shell:
shell:
"""
"""
{params.stringtie} -p 8 -G {input.gtf} -e -B -o {output.star_gtf} -A {output.star_tsv} {input.bam_star}
stringtie -p 8 -G {input.gtf} -e -B -o {output.star_gtf} -A {output.star_tsv} {input.bam_star}
{params.stringtie} -p 8 -G {input.gtf} -e -B -o {output.hisat_gtf} -A {output.hisat_tsv} {input.bam_hisat}
stringtie -p 8 -G {input.gtf} -e -B -o {output.hisat_gtf} -A {output.hisat_tsv} {input.bam_hisat}
"""
"""
# Create a list of GTF created by Stringtie
# Create a list of GTF created by Stringtie
...
@@ -631,14 +574,23 @@ rule prepDE_stringtie_table:
...
@@ -631,14 +574,23 @@ rule prepDE_stringtie_table:
input :
input :
mergelist_star = rules.list_for_prepDE.output.star_list,
mergelist_star = rules.list_for_prepDE.output.star_list,
mergelist_hisat = rules.list_for_prepDE.output.hisat_list,
mergelist_hisat = rules.list_for_prepDE.output.hisat_list,
script = f"{PREPDE}"
output :
output :
gcsv_star = f'{STRINGTIE_DIR}STAR_gene_count_matrix.csv',
gcsv_star = f'{STRINGTIE_DIR}STAR_gene_count_matrix.csv',
tcsv_star = f'{STRINGTIE_DIR}STAR_transcript_count_matrix.csv',
tcsv_star = f'{STRINGTIE_DIR}STAR_transcript_count_matrix.csv',
gcsv_hisat = f'{STRINGTIE_DIR}HISAT_gene_count_matrix.csv',
gcsv_hisat = f'{STRINGTIE_DIR}HISAT_gene_count_matrix.csv',
tcsv_hisat = f'{STRINGTIE_DIR}HISAT_transcript_count_matrix.csv',
tcsv_hisat = f'{STRINGTIE_DIR}HISAT_transcript_count_matrix.csv',
conda:
"envs/stringtie.yaml"
shell:
shell:
"""
"""
python2 {input.script} -i {input.mergelist_star} -t {output.tcsv_star} -g {output.gcsv_star}
python2 prepDE.py -i {input.mergelist_star} -t {output.tcsv_star} -g {output.gcsv_star}
python2 {input.script} -i {input.mergelist_hisat} -t {output.tcsv_hisat} -g {output.gcsv_hisat}
python2 prepDE.py -i {input.mergelist_hisat} -t {output.tcsv_hisat} -g {output.gcsv_hisat}
"""
"""
\ No newline at end of file
# pseudo count:
rule Kalisto:
This diff is collapsed.
Click to expand it.
configPT.yaml
+
7
−
35
View file @
979ae7f6
# Dossiers que l'on va utiliser
# Dossiers que l'on va utiliser
'
REFERENCE'
:
"
/Users/COHEN/Documents/BulKRNA/TestSnakeMake/01_Genome/"
'
FASTA'
:
"
DATA/ref/riceNUCMSU7.fasta"
'
FASTA'
:
"
/Users/COHEN/Documents/BulKRNA/TestSnakeMake/01_Genome/plasmodb_pfalciparum3d7_genome.fasta"
'
GFF'
:
"
DATA/annotation/riceNUCMSU7.gff"
'
GFF'
:
"
/Users/COHEN/Documents/BulKRNA/TestSnakeMake/01_Genome/plasmodb-46_pfalciparum3d7.gff"
'
GTF'
:
"
DATA/annotation/riceNUCMSU7.gtf"
'
GTF'
:
"
/Users/COHEN/Documents/BulKRNA/TestSnakeMake/01_Genome/plasmodb-46_pfalciparum3d7.gtf"
'
READS'
:
"
DATA/fastq/"
'
READS'
:
"
/Users/COHEN/Documents/BulKRNA/TestSnakeMake/02_RawData/"
'
OUTDIR'
:
"
output"
'
FASTQC'
:
"
/Users/COHEN/Documents/BulKRNA/TestSnakeMake/03_FastQC/"
'
FASTQSCREEN'
:
"
/Users/COHEN/Documents/BulKRNA/TestSnakeMake/04_FastQScreen/"
'
FASTP'
:
"
/Users/COHEN/Documents/BulKRNA/TestSnakeMake/05_FastP/"
'
FASTQC_AF'
:
"
/Users/COHEN/Documents/BulKRNA/TestSnakeMake/06_FastQC_AfterFT/"
'
MULTIQC'
:
"
/Users/COHEN/Documents/BulKRNA/TestSnakeMake/07_MultiQC/"
'
INDEX'
:
"
/Users/COHEN/Documents/BulKRNA/TestSnakeMake/08_Index_HISAT_3D7/"
'
name_hisat_index'
:
"
PFalci3D7"
'
MAPPINGH'
:
"
/Users/COHEN/Documents/BulKRNA/TestSnakeMake/09_Mapping_HISAT_3D7/"
'
INDEX_STAR'
:
"
/Users/COHEN/Documents/BulKRNA/TestSnakeMake/10_Index_STAR_3D7/"
'
MAPPING_STAR'
:
"
/Users/COHEN/Documents/BulKRNA/TestSnakeMake/11_Mapping_STAR_3D7/"
'
BAMFILES'
:
"
/Users/COHEN/Documents/BulKRNA/TestSnakeMake/12_BAMfiles/"
'
STATS'
:
"
/Users/COHEN/Documents/BulKRNA/TestSnakeMake/14_Stats_BAM/"
'
FILTER'
:
"
/Users/COHEN/Documents/BulKRNA/TestSnakeMake/13_Filtered_BAM/"
'
DUPL'
:
"
/Users/COHEN/Documents/BulKRNA/TestSnakeMake/15_MarkedDupli_BAM/"
'
HTSEQ'
:
"
/Users/COHEN/Documents/BulKRNA/TestSnakeMake/16_HTseq-count/"
'
STRINGTIE'
:
"
/Users/COHEN/Documents/BulKRNA/TestSnakeMake/17_Stringtie/"
### logiciels
### logiciels
'
FastQC'
:
"
fastqc"
'
FastQScreen_conf'
:
"
ConfigFastqScreen.txt"
'
FastQScreen'
:
"
fastq_screen"
'
name_hisat_index'
:
"
rice"
'
FastQScreen_conf'
:
"
/Users/COHEN/Documents/Logiciels/FastQ-Screen-0.14.1/fastq_screen.conf"
\ No newline at end of file
'
FastP'
:
"
fastp"
'
MultiQC'
:
"
multiQC"
'
hisat-build'
:
"
/Users/COHEN/Documents/Logiciels/hisat2-2.2.0/hisat2-build"
'
hisat'
:
"
/Users/COHEN/Documents/Logiciels/hisat2-2.2.0/hisat2"
'
gffread'
:
"
/Users/COHEN/Documents/Logiciels/gffread-0.12.2.OSX_x86_64/gffread"
'
star'
:
"
/Users/COHEN/Documents/Logiciels/STAR-2-7-5c/bin/MacOSX_x86_64/STAR"
'
samtools'
:
"
samtools"
'
picardtools'
:
"
/Users/COHEN/Documents/Logiciels/picard.jar"
'
htseq'
:
"
htseq-count"
'
stringtie'
:
"
/Users/COHEN/Documents/Logiciels/stringtie-2.1.4.OSX_x86_64/stringtie"
'
PREPDE'
:
"
/Users/COHEN/Documents/Logiciels/stringtie-2.1.4.OSX_x86_64/prepDE.py"
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment