Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
RNAja-pipeline
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
PHIM
RNAja-pipeline
Commits
af11c11d
Commit
af11c11d
authored
4 years ago
by
CamilleC
Browse files
Options
Downloads
Patches
Plain Diff
Delete ProjetT-Pip-RNAseq_v2
parent
a58eb0d0
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
Scripts/ProjetT-Pip-RNAseq_v2
+0
-585
0 additions, 585 deletions
Scripts/ProjetT-Pip-RNAseq_v2
with
0 additions
and
585 deletions
Scripts/ProjetT-Pip-RNAseq_v2
deleted
100644 → 0
+
0
−
585
View file @
a58eb0d0
# Importation des librairies glob, sys, et re
import glob,sys,re
## appel du fichier de config
configfile: "configPT.yaml"
## Definition des variables "constantes" à partir du fichier config (attention aux arborescences dans le yaml)
REFERENCE_DIR = config['REFERENCE']
FASTA_REF= config['FASTA']
GFF_REF= config['GFF']
GTF_REF= config['GTF']
READS_DIR = config['READS']
FASTQC_DIR = config['FASTQC']
FASTQSCREEN_DIR = config['FASTQSCREEN']
FASTP_DIR = config['FASTP']
FASTQC_AF_DIR = config['FASTQC_AF']
MULTIQC_DIR = config['MULTIQC']
INDEX_DIR = config['INDEX']
INDEX_STAR_DIR = config['INDEX_STAR']
MAPPINGH_DIR = config['MAPPINGH']
MAPPINGS_DIR = config['MAPPING_STAR']
BAM_DIR = config['BAMFILES']
STATS_DIR = config['STATS']
FILTER_DIR = config['FILTER']
DUPL_DIR = config['DUPL']
HTSEQ_DIR = config['HTSEQ']
STRINGTIE_DIR = config['STRINGTIE']
STRINGTIECC_DIR = config['STRINGTIECC']
PREPDE = config['PREPDE']
NREADS= config['NREADS']
#Wild cards
REFERENCE, = glob_wildcards(f"{REFERENCE_DIR}{{reference}}.fasta")
ANNOTATION, = glob_wildcards(f"{REFERENCE_DIR}{{annotation}}.gff")
READS, = glob_wildcards(f"{READS_DIR}{{reads}}_r1.fastq.gz")
#SAMPLEFQ, = glob_wildcards(f"{FASTQC_DIR}{{samplefq}}.html")
#SAMPLEFS, = glob_wildcards(f"{FASTQSCREEN_DIR}{{samplefs}}.html")
#SAMPLEFQAF, = glob_wildcards(f"{FASTQC_AF_DIR}{{samplefqaf}}.html")
#SAMPLEFP, = glob_wildcards(f"{FASTP_DIR}{{samplefp}}.html")
#BAM, = glob_wildcards(f"{BAM_DIR}{{bam}}.bam")
# Regle finale pour vérifier la présence des outputs et si ils sont présents ils ne lancent pas le job
rule final:
input:
out_fastqc = expand(f"{FASTQC_DIR}{{reads}}_r1_fastqc.html", reads = READS),
out_fastqs = expand(f"{FASTQSCREEN_DIR}{{reads}}_r1_screen.html", reads = READS),
out_fastp = expand(f"{FASTP_DIR}{{reads}}_report_fastp.html", reads = READS),
out_fastqc_af = expand(f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.html", reads = READS),
out_multiqc = expand(f"{MULTIQC_DIR}multiqc_report.html"),
out_hisat_idx = expand(f"{INDEX_DIR}P.Falci_3D7"),
out_hisat_map = expand(f"{MAPPINGH_DIR}{{reads}}_HISAT.bam" , reads = READS),
out_star_map = expand(f"{MAPPINGS_DIR}{{reads}}Aligned.sortedByCoord.out.bam" , reads = READS),
out_bam_sort = expand(f"{BAM_DIR}{{reads}}_STAR_sort.bam", reads = READS),
out_bam_filt = expand(f"{FILTER_DIR}{{reads}}_STAR_sort_mapped.bam", reads = READS),
out_bam_index = expand(f"{BAM_DIR}{{reads}}_STAR_sort.bam.bai", reads = READS),
out_bam_stats = expand(f"{STATS_DIR}{{reads}}_STAR_sort_flagstat.txt", reads = READS),
#out_bam_md = expand(f"{DUPL_DIR}{{reads}}_HISAT_sort_mapped_markdup_metric.txt", reads = READS),
out_htseq = expand(f"{HTSEQ_DIR}HISAT/{{reads}}.txt", reads = READS),
#out_gtf_list = expand(f'{STRINGTIE_DIR}STRINGTIE_MERGE/STAR_gtf_list.txt'),
#out_stringtie = expand(f'{STRINGTIE_DIR}STRINGTIE_MERGE/STAR_stringtie_merged.gtf'),
out_stringtiecc = expand(f'{STRINGTIECC_DIR}STAR/{{reads}}/{{reads}}.gtf', reads=READS),
out_gtf_list = expand(f'{STRINGTIECC_DIR}HISAT_gtf_list.txt', reads=READS),
out_csv = f'{STRINGTIECC_DIR}HISAT_transcript_count_matrix.csv'
# FastQC before filtering
rule fastqc:
"""
FastQC - Control Quality of reads
"""
threads:4
params:
out_fastqc=directory(f"{FASTQC_DIR}")
input:
r1 = f"{READS_DIR}{{reads}}_r1.fastq.gz",
r2 = f"{READS_DIR}{{reads}}_r2.fastq.gz"
output:
out_fastqc_r1 = f"{FASTQC_DIR}{{reads}}_r1_fastqc.html",
out_fastqc_r2 = f"{FASTQC_DIR}{{reads}}_r2_fastqc.html",
out_fastqc_r1_zip = f"{FASTQC_DIR}{{reads}}_r1_fastqc.zip",
out_fastqc_r2_zip = f"{FASTQC_DIR}{{reads}}_r2_fastqc.zip"
message:
"""
input=
r1 = {input.r1}
r2 = {input.r2}
threads={threads}
"""
shell:
"""
fastqc -o {params.out_fastqc} -t 4 {input.r1} {input.r2}
"""
# FastQScreen agaisnt 3D7 and human
rule fastqscreen:
"""
FastQScreen - Control Quality of reads
"""
threads:4
priority:10000000000000000
params:
out_fastqs=directory(f"{FASTQSCREEN_DIR}")
input:
r1 = f"{READS_DIR}{{reads}}_r1.fastq.gz",
r2 = f"{READS_DIR}{{reads}}_r2.fastq.gz"
output:
html_r1= f"{FASTQSCREEN_DIR}{{reads}}_r1_screen.html",
html_r2=f"{FASTQSCREEN_DIR}{{reads}}_r2_screen.html",
txt_r1=f"{FASTQSCREEN_DIR}{{reads}}_r1_screen.txt",
txt_r2=f"{FASTQSCREEN_DIR}{{reads}}_r2_screen.txt",
message:
"""
Execute {rule}
input=
r1 = {input.r1}
r2 = {input.r2}
threads={threads}
"""
shell:
"""
fastq_screen --conf /Users/COHEN/Documents/Logiciels/FastQ-Screen-0.14.1/fastq_screen.conf --aligner BOWTIE2 {input.r1} {input.r2} --outdir {params.out_fastqs}
"""
# Filtering and trimming with FastP
rule fastp:
"""
FastP - Filtering and trimming before mapping
"""
params:
out_fastp=directory(f"{FASTP_DIR}")
input:
r1 = f"{READS_DIR}{{reads}}_r1.fastq.gz",
r2 = f"{READS_DIR}{{reads}}_r2.fastq.gz"
output:
out_paired_r1 = f"{FASTP_DIR}{{reads}}_r1_paired.fastq.gz",
out_unpaired_r1 = f"{FASTP_DIR}{{reads}}_r1_unpaired.fastq.gz",
out_paired_r2 = f"{FASTP_DIR}{{reads}}_r2_paired.fastq.gz",
out_unpaired_r2 = f"{FASTP_DIR}{{reads}}_r2_unpaired.fastq.gz",
out_report= f"{FASTP_DIR}{{reads}}_report_fastp.html",
out_json= f"{FASTP_DIR}{{reads}}_fastp.json"
message:
"""
input={input.r1},{input.r2}
"""
shell:
"""
fastp --in1 {input.r1} --in2 {input.r2} --out1 {output.out_paired_r1} --out2 {output.out_paired_r2} --unpaired1 {output.out_unpaired_r1} --unpaired2 {output.out_unpaired_r2} -w 1 -h {output.out_report} -j {output.out_json}
"""
# FastQC after filtering
rule fastqc_af:
"""
FastQC - Control Quality of reads after FastP
"""
priority:100000000000000
params:
out_fastqc_af = directory(f"{FASTQC_AF_DIR}")
input:
r1 = f"{FASTP_DIR}{{reads}}_r1_paired.fastq.gz",
r2 = f"{FASTP_DIR}{{reads}}_r2_paired.fastq.gz"
output:
out_fastqc_af_r1 = f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.html",
out_fastqc_af_r2 = f"{FASTQC_AF_DIR}{{reads}}_r2_paired_fastqc.html",
out_fastqc_af_r1_zip = f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.zip",
out_fastqc_af_r2_zip = f"{FASTQC_AF_DIR}{{reads}}_r2_paired_fastqc.zip"
message:
"""
input={input.r1},{input.r2}
"""
shell:
"""
fastqc -o {params.out_fastqc_af} -t 4 {input.r1} {input.r2}
"""
# FastQScreen agaisnt 3D7 and human
rule multi_qc:
"""
MultiQC
"""
params:
out_multi = directory(f"{MULTIQC_DIR}")
input:
expand(f"{FASTQSCREEN_DIR}{{reads}}_r1_screen.html", reads = READS),
expand(f"{FASTQSCREEN_DIR}{{reads}}_r2_screen.html", reads = READS),
expand(f"{FASTQSCREEN_DIR}{{reads}}_r1_screen.txt", reads = READS),
expand(f"{FASTQSCREEN_DIR}{{reads}}_r2_screen.txt", reads = READS),
expand(f"{FASTQC_DIR}{{reads}}_r1_fastqc.html", reads=READS),
expand(f"{FASTQC_DIR}{{reads}}_r2_fastqc.html", reads=READS),
expand(f"{FASTQC_DIR}{{reads}}_r1_fastqc.zip", reads=READS),
expand(f"{FASTQC_DIR}{{reads}}_r2_fastqc.zip", reads=READS),
expand(f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.html", reads=READS),
expand(f"{FASTQC_AF_DIR}{{reads}}_r2_paired_fastqc.html", reads=READS),
expand(f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.zip", reads=READS),
expand(f"{FASTQC_AF_DIR}{{reads}}_r2_paired_fastqc.zip", reads=READS),
expand(f"{FASTP_DIR}{{reads}}_report_fastp.html", reads=READS),
expand(f"{FASTP_DIR}{{reads}}_fastp.json",reads=READS)
output:
f"{MULTIQC_DIR}multiqc_report.html",
shell:
"""
multiqc {input} -o {params.out_multi}
"""
# Indexing of 3D7 genome (environ 1 min)
rule hisat2_index:
"""
Make index with HISAT2 for 3D7
"""
input:
fasta = f"{FASTA_REF}"
output:
index = f"{INDEX_DIR}P.Falci_3D7",
message:
"""
Execute {rule}
input:
fasta : {input.fasta}
output:
index: {output.index}
"""
log:
output = f"{INDEX_DIR}LOG/3D7_HISAT-INDEX.o",
error = f"{INDEX_DIR}LOG/3D7_HISAT-INDEX.e",
shell:
"""
ln -s {input.fasta} {output.index} 1>{log.output} 2>{log.error}
/Users/COHEN/Documents/Logiciels/hisat2-2.2.0/hisat2-build {input.fasta} {output.index} --quiet
"""
# Mapping with HISAT2 on Pf 3D7 (max 2 min par échantillon, total 10 min)
rule hisat2_map:
"""
Map reads on the genome with HISAT2 on paired end
"""
threads: 4
input:
reference = rules.hisat2_index.output.index,
r1 = f"{FASTP_DIR}{{reads}}_r1_paired.fastq.gz",
r2 = f"{FASTP_DIR}{{reads}}_r2_paired.fastq.gz"
output:
summary = f"{MAPPINGH_DIR}{{reads}}_HISAT_summary.txt",
bam = f"{MAPPINGH_DIR}{{reads}}_HISAT.bam",
message:
"""
Execute {rule}
input:
reference : {input.reference}
R1 : {input.r1}
R2 : {input.r2}
output:
bam: {output.bam}
"""
shell:
"""
/Users/COHEN/Documents/Logiciels/hisat2-2.2.0/hisat2 -p {threads} -x {input.reference} -1 {input.r1} -2 {input.r2} --summary-file {output.summary} | samtools view -S -b >{output.bam}
"""
#Convert GFF in GTF with gffread for RNA-STAR
rule gff_to_gtf:
"""
Convert GFF in GTF for RNA-STAR
"""
threads: 8
input:
gff = f"{REFERENCE_DIR}{{annotation}}.gff"
output:
gtf = f"{REFERENCE_DIR}{{annotation}}.gtf"
message:
"""
Execute {rule}
input:
gff : {input.gff}
output:
gtf: {output.gtf}
"""
shell:
"""
/Users/COHEN/Documents/Logiciels/gffread-0.12.2.OSX_x86_64/gffread {input.gff} -T -o {output.gtf}
"""
#Indexing for RNA-STAR (environ 2 min)
rule star_index:
"""
Make index with STAR for 3D7
"""
threads: 4
params:
dir = directory(f"{INDEX_STAR_DIR}")
input:
fasta = f"{FASTA_REF}",
gtf = f"{GTF_REF}",
output:
index = f"{INDEX_STAR_DIR}Log.out",
message:
"""
Execute {rule}
input:
fasta : {input.fasta}
gtf : {input.gtf}
output:
index: {output.index}
"""
shell:
"""
ln -s {input.fasta} {output.index}
/Users/COHEN/Documents/Logiciels/STAR-2-7-5c/bin/MacOSX_x86_64/STAR --runMode genomeGenerate --runThreadN {threads} --genomeDir {params.dir} --genomeFastaFiles {input.fasta} --sjdbGTFfile {input.gtf}
"""
#Mapping with RNA-STAR on Pf 3D7 (environ 10 min par échantillon, total 1h)
rule star_mapping:
"""
Map reads on the genome with STAR on paired end
"""
threads: 4
params:
idx = directory(f"{INDEX_STAR_DIR}"),
bam = f"{MAPPINGS_DIR}{{reads}}"
input:
idx = rules.star_index.output.index,
gtf = f"{GTF_REF}",
r1 = f"{FASTP_DIR}{{reads}}_r1_paired.fastq.gz",
r2 = f"{FASTP_DIR}{{reads}}_r2_paired.fastq.gz"
output:
bam = f"{MAPPINGS_DIR}{{reads}}Aligned.sortedByCoord.out.bam"
message:
"""
Execute {rule}
input:
idx : {params.idx}
gtf : {input.gtf}
reads :
r1: {input.r1}
r2: {input.r2}
output:
bam: {output.bam}
"""
shell:
"""
/Users/COHEN/Documents/Logiciels/STAR-2-7-5c/bin/MacOSX_x86_64/STAR --runThreadN 4 --genomeDir {params.idx} --readFilesIn {input.r1} {input.r2} --readFilesCommand gunzip -c --outFileNamePrefix {params.bam} --outSAMtype BAM SortedByCoordinate --sjdbGTFfile {input.gtf}
"""
#Sort the BAM files with samtools
rule bam_sort:
"""
Sort the BAM files
"""
threads: 4
input:
star =f'{MAPPINGS_DIR}{{reads}}Aligned.sortedByCoord.out.bam',
hisat = f"{MAPPINGH_DIR}{{reads}}_HISAT.bam"
output:
out_star = f"{BAM_DIR}{{reads}}_STAR_sort.bam",
out_hisat = f"{BAM_DIR}{{reads}}_HISAT_sort.bam"
message:
"""
Execute {rule}
input:
star : {input.star}
hisat : {input.hisat}
output:
bam: {output.out_star}, {output.out_hisat}
"""
shell:
"""
samtools sort {input.star} > {output.out_star};
samtools sort {input.hisat} > {output.out_hisat}
"""
#Filtering of the BAM files with samtools
rule bam_filter:
"""
Filtering BAM files
"""
threads: 4
input:
star_bam = f"{BAM_DIR}{{reads}}_STAR_sort.bam",
hisat_bam = f"{BAM_DIR}{{reads}}_HISAT_sort.bam"
output:
mapped_star = f"{FILTER_DIR}{{reads}}_STAR_sort_mapped.bam",
unmapped_star = f"{FILTER_DIR}{{reads}}_STAR_sort_unmapped.bam",
mapped_hisat = f"{FILTER_DIR}{{reads}}_HISAT_sort_mapped.bam",
unmapped_hisat = f"{FILTER_DIR}{{reads}}_HISAT_sort_unmapped.bam"
message:
"""
Execute {rule}
input: {input.star_bam}
"""
shell:
"""
samtools view -b -F 4 {input.star_bam} > {output.mapped_star}
samtools view -b -f 4 {input.star_bam} > {output.unmapped_star}
samtools view -b -F 4 {input.hisat_bam} > {output.mapped_hisat}
samtools view -b -f 4 {input.hisat_bam} > {output.unmapped_hisat}
"""
# Identify the duplicates in BAM with Picard tools
rule bam_markdup:
"""
Mark duplicates in BAM files
"""
threads: 4
#params:
# récupérer l'option dans le yaml . voir pour mettre un if . A regarder !!!
input:
bam_star=f"{FILTER_DIR}{{reads}}_STAR_sort_mapped.bam",
bam_hisat = f"{FILTER_DIR}{{reads}}_HISAT_sort_mapped.bam",
output:
star = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup.bam",
star_txt = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup_metric.txt",
hisat = f"{DUPL_DIR}{{reads}}_HISAT_sort_mapped_markdup.bam",
hisat_txt = f"{DUPL_DIR}{{reads}}_HISAT_sort_mapped_markdup_metric.txt"
message:
"""
Execute {rule}
input: {input.bam_star}, {input.bam_hisat}
"""
shell:
"""
java -jar /Users/COHEN/Documents/Logiciels/picard.jar MarkDuplicates I={input.bam_star} O={output.star} M={output.star_txt}
java -jar /Users/COHEN/Documents/Logiciels/picard.jar MarkDuplicates I={input.bam_hisat} O={output.hisat} M={output.hisat_txt}
"""
#Index the BAM files with samtools
rule bam_index:
"""
Index BAM files
"""
threads: 4
input:
bam_star = f"{BAM_DIR}{{reads}}_STAR_sort.bam",
bam_hisat = f"{BAM_DIR}{{reads}}_HISAT_sort.bam",
bam_star_filt = f"{FILTER_DIR}{{reads}}_STAR_sort_mapped.bam",
bam_hisat_filt =f"{FILTER_DIR}{{reads}}_HISAT_sort_mapped.bam",
bam_star_filt_md = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup.bam",
bam_hisat_filt_md = f"{DUPL_DIR}{{reads}}_HISAT_sort_mapped_markdup.bam",
output:
bai = f"{BAM_DIR}{{reads}}_STAR_sort.bam.bai"
message:
"""
Execute {rule}
input: {input.bam_star}
"""
shell:
"""
samtools index {input.bam_star}
samtools index {input.bam_hisat}
samtools index {input.bam_star_filt}
samtools index {input.bam_hisat_filt}
samtools index {input.bam_star_filt_md}
samtools index {input.bam_hisat_filt_md}
"""
#Stats on the BAM files with samtools
rule bam_stats:
"""
Statistics data on the BAM files
"""
threads: 4
input:
bam_star = f"{BAM_DIR}{{reads}}_STAR_sort.bam",
bam_hisat = f"{BAM_DIR}{{reads}}_HISAT_sort.bam",
bam_star_filt = f"{FILTER_DIR}{{reads}}_STAR_sort_mapped.bam",
bam_hisat_filt =f"{FILTER_DIR}{{reads}}_HISAT_sort_mapped.bam",
bam_star_filt_md = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup.bam",
bam_hisat_filt_md = f"{DUPL_DIR}{{reads}}_HISAT_sort_mapped_markdup.bam",
output:
flagstat_star = f"{STATS_DIR}{{reads}}_STAR_sort_flagstat.txt",
flagstat_hisat = f"{STATS_DIR}{{reads}}_HISAT_sort_flagstat.txt",
flagstat_star_filt= f"{STATS_DIR}{{reads}}_STAR_sort_mapped_flagstat.txt",
flagstat_hisat_filt = f"{STATS_DIR}{{reads}}_HISAT_sort_mapped_flagstat.txt",
flagstat_star_filt_md= f"{STATS_DIR}{{reads}}_STAR_sort_mapped_markdup_flagstat.txt",
flagstat_hisat_filt_md= f"{STATS_DIR}{{reads}}_HISAT_sort_mapped_markdup_flagstat.txt",
message:
"""
Execute {rule}
input: {input}
"""
shell:
"""
samtools flagstat {input.bam_star} > {output.flagstat_star}
samtools flagstat {input.bam_hisat} > {output.flagstat_hisat}
samtools flagstat {input.bam_star_filt} > {output.flagstat_star_filt}
samtools flagstat {input.bam_star_filt_md} > {output.flagstat_star_filt_md}
samtools flagstat {input.bam_hisat_filt} > {output.flagstat_hisat_filt}
samtools flagstat {input.bam_hisat_filt_md} > {output.flagstat_hisat_filt_md}
"""
#Create the count table from the BAM files with Picard tools
rule htseq_counts :
"""
Create the count table with HTseq-count
"""
threads: 4
input:
bam_star = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup.bam",
bam_hisat = f"{DUPL_DIR}{{reads}}_HISAT_sort_mapped_markdup.bam",
gtf = f"{GTF_REF}"
output:
count_table_star = f"{HTSEQ_DIR}STAR/{{reads}}.txt",
count_table_hisat = f"{HTSEQ_DIR}HISAT/{{reads}}.txt"
message:
"""
Execute {rule}
input:
bam: {input.bam_star}
gtf: {input.gtf}
output: {output.count_table_star}
"""
shell:
"""
htseq-count -t exon -i gene_id -f bam {input.bam_star} {input.gtf} > {output.count_table_star}
htseq-count -t exon -i gene_id -f bam {input.bam_hisat} {input.gtf} > {output.count_table_hisat}
"""
#Create the count table from the BAM files with Picard tools
rule stringtie_cc :
"""
Create the count table with HTseq-count
"""
threads: 4
input:
bam_star = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup.bam",
bam_hisat = f"{DUPL_DIR}{{reads}}_HISAT_sort_mapped_markdup.bam",
gtf = f"{GTF_REF}"
output:
star_gtf = f'{STRINGTIECC_DIR}STAR/{{reads}}/{{reads}}.gtf',
hisat_gtf= f'{STRINGTIECC_DIR}HISAT/{{reads}}/{{reads}}.gtf',
star_tsv = f'{STRINGTIECC_DIR}STAR/{{reads}}/{{reads}}.tsv',
hisat_tsv = f'{STRINGTIECC_DIR}HISAT/{{reads}}/{{reads}}.tsv'
message:
"""
Execute {rule}
input:
bam: {input.bam_star}
gtf: {input.gtf}
output: {output.star_tsv}
"""
shell:
"""
/Users/COHEN/Documents/Logiciels/stringtie-2.1.4.OSX_x86_64/stringtie -p 8 -G {input.gtf} -e -B -o {output.star_gtf} -A {output.star_tsv} {input.bam_star}
/Users/COHEN/Documents/Logiciels/stringtie-2.1.4.OSX_x86_64/stringtie -p 8 -G {input.gtf} -e -B -o {output.hisat_gtf} -A {output.hisat_tsv} {input.bam_hisat}
"""
rule gtf_list:
threads : 1
input:
list_gtf_star = expand(f'{STRINGTIECC_DIR}STAR/{{reads}}/{{reads}}.gtf', reads = READS),
list_gtf_hisat = expand(f'{STRINGTIECC_DIR}HISAT/{{reads}}/{{reads}}.gtf', reads = READS),
names_reads= f'{NREADS}'
output :
gtf_star = f'{STRINGTIECC_DIR}STAR_gtf_list.txt',
gtf_hisat = f'{STRINGTIECC_DIR}HISAT_gtf_list.txt',
prep_star = f'{STRINGTIECC_DIR}STAR_gtf_list_prep.txt',
prep_hisat = f'{STRINGTIECC_DIR}HISAT_gtf_list_prep.txt'
shell:
"""
find {input.list_gtf_star} >> {output.gtf_star}
find {input.list_gtf_hisat} >> {output.gtf_hisat}
paste {input.names_reads} {output.gtf_star} | sed "s/\t/ /" >> {output.prep_star}
paste {input.names_reads} {output.gtf_hisat} | sed "s/\t/ /" >> {output.prep_hisat}
"""
rule stringtie_csv:
threads : 1
input :
mergelist_star = rules.gtf_list.output.prep_star,
mergelist_hisat = rules.gtf_list.output.prep_hisat,
script = f"{PREPDE}"
output :
gcsv_star = f'{STRINGTIECC_DIR}STAR_gene_count_matrix.csv',
tcsv_star = f'{STRINGTIECC_DIR}STAR_transcript_count_matrix.csv',
gcsv_hisat = f'{STRINGTIECC_DIR}HISAT_gene_count_matrix.csv',
tcsv_hisat = f'{STRINGTIECC_DIR}HISAT_transcript_count_matrix.csv',
shell:
"""
python2 {input.script} -i {input.mergelist_star} -t {output.tcsv_star} -g {output.gcsv_star}
python2 {input.script} -i {input.mergelist_hisat} -t {output.tcsv_hisat} -g {output.gcsv_hisat}
"""
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment