Skip to content
Snippets Groups Projects
Commit b190eab3 authored by CamilleC's avatar CamilleC
Browse files

Create ProjetT-Pip-RNAseq_v2

parent 02cc753b
No related branches found
No related tags found
No related merge requests found
# Importation des librairies glob, sys, et re
import glob,sys,re
## appel du fichier de config
configfile: "configPT.yaml"
## Definition des variables "constantes" à partir du fichier config (attention aux arborescences dans le yaml)
REFERENCE_DIR = config['REFERENCE']
FASTA_REF= config['FASTA']
GFF_REF= config['GFF']
GTF_REF= config['GTF']
READS_DIR = config['READS']
FASTQC_DIR = config['FASTQC']
FASTQSCREEN_DIR = config['FASTQSCREEN']
FASTP_DIR = config['FASTP']
FASTQC_AF_DIR = config['FASTQC_AF']
MULTIQC_DIR = config['MULTIQC']
INDEX_DIR = config['INDEX']
INDEX_STAR_DIR = config['INDEX_STAR']
MAPPINGH_DIR = config['MAPPINGH']
MAPPINGS_DIR = config['MAPPING_STAR']
BAM_DIR = config['BAMFILES']
STATS_DIR = config['STATS']
FILTER_DIR = config['FILTER']
DUPL_DIR = config['DUPL']
HTSEQ_DIR = config['HTSEQ']
STRINGTIE_DIR = config['STRINGTIE']
STRINGTIECC_DIR = config['STRINGTIECC']
PREPDE = config['PREPDE']
NREADS= config['NREADS']
#Wild cards
REFERENCE, = glob_wildcards(f"{REFERENCE_DIR}{{reference}}.fasta")
ANNOTATION, = glob_wildcards(f"{REFERENCE_DIR}{{annotation}}.gff")
READS, = glob_wildcards(f"{READS_DIR}{{reads}}_r1.fastq.gz")
#SAMPLEFQ, = glob_wildcards(f"{FASTQC_DIR}{{samplefq}}.html")
#SAMPLEFS, = glob_wildcards(f"{FASTQSCREEN_DIR}{{samplefs}}.html")
#SAMPLEFQAF, = glob_wildcards(f"{FASTQC_AF_DIR}{{samplefqaf}}.html")
#SAMPLEFP, = glob_wildcards(f"{FASTP_DIR}{{samplefp}}.html")
#BAM, = glob_wildcards(f"{BAM_DIR}{{bam}}.bam")
# Regle finale pour vérifier la présence des outputs et si ils sont présents ils ne lancent pas le job
rule final:
input:
out_fastqc = expand(f"{FASTQC_DIR}{{reads}}_r1_fastqc.html", reads = READS),
out_fastqs = expand(f"{FASTQSCREEN_DIR}{{reads}}_r1_screen.html", reads = READS),
out_fastp = expand(f"{FASTP_DIR}{{reads}}_report_fastp.html", reads = READS),
out_fastqc_af = expand(f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.html", reads = READS),
out_multiqc = expand(f"{MULTIQC_DIR}multiqc_report.html"),
out_hisat_idx = expand(f"{INDEX_DIR}P.Falci_3D7"),
out_hisat_map = expand(f"{MAPPINGH_DIR}{{reads}}_HISAT.bam" , reads = READS),
out_star_map = expand(f"{MAPPINGS_DIR}{{reads}}Aligned.sortedByCoord.out.bam" , reads = READS),
out_bam_sort = expand(f"{BAM_DIR}{{reads}}_STAR_sort.bam", reads = READS),
out_bam_filt = expand(f"{FILTER_DIR}{{reads}}_STAR_sort_mapped.bam", reads = READS),
out_bam_index = expand(f"{BAM_DIR}{{reads}}_STAR_sort.bam.bai", reads = READS),
out_bam_stats = expand(f"{STATS_DIR}{{reads}}_STAR_sort_flagstat.txt", reads = READS),
#out_bam_md = expand(f"{DUPL_DIR}{{reads}}_HISAT_sort_mapped_markdup_metric.txt", reads = READS),
out_htseq = expand(f"{HTSEQ_DIR}HISAT/{{reads}}.txt", reads = READS),
#out_gtf_list = expand(f'{STRINGTIE_DIR}STRINGTIE_MERGE/STAR_gtf_list.txt'),
#out_stringtie = expand(f'{STRINGTIE_DIR}STRINGTIE_MERGE/STAR_stringtie_merged.gtf'),
out_stringtiecc = expand(f'{STRINGTIECC_DIR}STAR/{{reads}}/{{reads}}.gtf', reads=READS),
out_gtf_list = expand(f'{STRINGTIECC_DIR}HISAT_gtf_list.txt', reads=READS),
out_csv = f'{STRINGTIECC_DIR}HISAT_transcript_count_matrix.csv'
# FastQC before filtering
rule fastqc:
"""
FastQC - Control Quality of reads
"""
threads:4
params:
out_fastqc=directory(f"{FASTQC_DIR}")
input:
r1 = f"{READS_DIR}{{reads}}_r1.fastq.gz",
r2 = f"{READS_DIR}{{reads}}_r2.fastq.gz"
output:
out_fastqc_r1 = f"{FASTQC_DIR}{{reads}}_r1_fastqc.html",
out_fastqc_r2 = f"{FASTQC_DIR}{{reads}}_r2_fastqc.html",
out_fastqc_r1_zip = f"{FASTQC_DIR}{{reads}}_r1_fastqc.zip",
out_fastqc_r2_zip = f"{FASTQC_DIR}{{reads}}_r2_fastqc.zip"
message:
"""
input=
r1 = {input.r1}
r2 = {input.r2}
threads={threads}
"""
shell:
"""
fastqc -o {params.out_fastqc} -t 4 {input.r1} {input.r2}
"""
# FastQScreen agaisnt 3D7 and human
rule fastqscreen:
"""
FastQScreen - Control Quality of reads
"""
threads:4
priority:10000000000000000
params:
out_fastqs=directory(f"{FASTQSCREEN_DIR}")
input:
r1 = f"{READS_DIR}{{reads}}_r1.fastq.gz",
r2 = f"{READS_DIR}{{reads}}_r2.fastq.gz"
output:
html_r1= f"{FASTQSCREEN_DIR}{{reads}}_r1_screen.html",
html_r2=f"{FASTQSCREEN_DIR}{{reads}}_r2_screen.html",
txt_r1=f"{FASTQSCREEN_DIR}{{reads}}_r1_screen.txt",
txt_r2=f"{FASTQSCREEN_DIR}{{reads}}_r2_screen.txt",
message:
"""
Execute {rule}
input=
r1 = {input.r1}
r2 = {input.r2}
threads={threads}
"""
shell:
"""
fastq_screen --conf /Users/COHEN/Documents/Logiciels/FastQ-Screen-0.14.1/fastq_screen.conf --aligner BOWTIE2 {input.r1} {input.r2} --outdir {params.out_fastqs}
"""
# Filtering and trimming with FastP
rule fastp:
"""
FastP - Filtering and trimming before mapping
"""
params:
out_fastp=directory(f"{FASTP_DIR}")
input:
r1 = f"{READS_DIR}{{reads}}_r1.fastq.gz",
r2 = f"{READS_DIR}{{reads}}_r2.fastq.gz"
output:
out_paired_r1 = f"{FASTP_DIR}{{reads}}_r1_paired.fastq.gz",
out_unpaired_r1 = f"{FASTP_DIR}{{reads}}_r1_unpaired.fastq.gz",
out_paired_r2 = f"{FASTP_DIR}{{reads}}_r2_paired.fastq.gz",
out_unpaired_r2 = f"{FASTP_DIR}{{reads}}_r2_unpaired.fastq.gz",
out_report= f"{FASTP_DIR}{{reads}}_report_fastp.html",
out_json= f"{FASTP_DIR}{{reads}}_fastp.json"
message:
"""
input={input.r1},{input.r2}
"""
shell:
"""
fastp --in1 {input.r1} --in2 {input.r2} --out1 {output.out_paired_r1} --out2 {output.out_paired_r2} --unpaired1 {output.out_unpaired_r1} --unpaired2 {output.out_unpaired_r2} -w 1 -h {output.out_report} -j {output.out_json}
"""
# FastQC after filtering
rule fastqc_af:
"""
FastQC - Control Quality of reads after FastP
"""
priority:100000000000000
params:
out_fastqc_af = directory(f"{FASTQC_AF_DIR}")
input:
r1 = f"{FASTP_DIR}{{reads}}_r1_paired.fastq.gz",
r2 = f"{FASTP_DIR}{{reads}}_r2_paired.fastq.gz"
output:
out_fastqc_af_r1 = f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.html",
out_fastqc_af_r2 = f"{FASTQC_AF_DIR}{{reads}}_r2_paired_fastqc.html",
out_fastqc_af_r1_zip = f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.zip",
out_fastqc_af_r2_zip = f"{FASTQC_AF_DIR}{{reads}}_r2_paired_fastqc.zip"
message:
"""
input={input.r1},{input.r2}
"""
shell:
"""
fastqc -o {params.out_fastqc_af} -t 4 {input.r1} {input.r2}
"""
# FastQScreen agaisnt 3D7 and human
rule multi_qc:
"""
MultiQC
"""
params:
out_multi = directory(f"{MULTIQC_DIR}")
input:
expand(f"{FASTQSCREEN_DIR}{{reads}}_r1_screen.html", reads = READS),
expand(f"{FASTQSCREEN_DIR}{{reads}}_r2_screen.html", reads = READS),
expand(f"{FASTQSCREEN_DIR}{{reads}}_r1_screen.txt", reads = READS),
expand(f"{FASTQSCREEN_DIR}{{reads}}_r2_screen.txt", reads = READS),
expand(f"{FASTQC_DIR}{{reads}}_r1_fastqc.html", reads=READS),
expand(f"{FASTQC_DIR}{{reads}}_r2_fastqc.html", reads=READS),
expand(f"{FASTQC_DIR}{{reads}}_r1_fastqc.zip", reads=READS),
expand(f"{FASTQC_DIR}{{reads}}_r2_fastqc.zip", reads=READS),
expand(f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.html", reads=READS),
expand(f"{FASTQC_AF_DIR}{{reads}}_r2_paired_fastqc.html", reads=READS),
expand(f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.zip", reads=READS),
expand(f"{FASTQC_AF_DIR}{{reads}}_r2_paired_fastqc.zip", reads=READS),
expand(f"{FASTP_DIR}{{reads}}_report_fastp.html", reads=READS),
expand(f"{FASTP_DIR}{{reads}}_fastp.json",reads=READS)
output:
f"{MULTIQC_DIR}multiqc_report.html",
shell:
"""
multiqc {input} -o {params.out_multi}
"""
# Indexing of 3D7 genome (environ 1 min)
rule hisat2_index:
"""
Make index with HISAT2 for 3D7
"""
input:
fasta = f"{FASTA_REF}"
output:
index = f"{INDEX_DIR}P.Falci_3D7",
message:
"""
Execute {rule}
input:
fasta : {input.fasta}
output:
index: {output.index}
"""
log:
output = f"{INDEX_DIR}LOG/3D7_HISAT-INDEX.o",
error = f"{INDEX_DIR}LOG/3D7_HISAT-INDEX.e",
shell:
"""
ln -s {input.fasta} {output.index} 1>{log.output} 2>{log.error}
/Users/COHEN/Documents/Logiciels/hisat2-2.2.0/hisat2-build {input.fasta} {output.index} --quiet
"""
# Mapping with HISAT2 on Pf 3D7 (max 2 min par échantillon, total 10 min)
rule hisat2_map:
"""
Map reads on the genome with HISAT2 on paired end
"""
threads: 4
input:
reference = rules.hisat2_index.output.index,
r1 = f"{FASTP_DIR}{{reads}}_r1_paired.fastq.gz",
r2 = f"{FASTP_DIR}{{reads}}_r2_paired.fastq.gz"
output:
summary = f"{MAPPINGH_DIR}{{reads}}_HISAT_summary.txt",
bam = f"{MAPPINGH_DIR}{{reads}}_HISAT.bam",
message:
"""
Execute {rule}
input:
reference : {input.reference}
R1 : {input.r1}
R2 : {input.r2}
output:
bam: {output.bam}
"""
shell:
"""
/Users/COHEN/Documents/Logiciels/hisat2-2.2.0/hisat2 -p {threads} -x {input.reference} -1 {input.r1} -2 {input.r2} --summary-file {output.summary} | samtools view -S -b >{output.bam}
"""
#Convert GFF in GTF with gffread for RNA-STAR
rule gff_to_gtf:
"""
Convert GFF in GTF for RNA-STAR
"""
threads: 8
input:
gff = f"{REFERENCE_DIR}{{annotation}}.gff"
output:
gtf = f"{REFERENCE_DIR}{{annotation}}.gtf"
message:
"""
Execute {rule}
input:
gff : {input.gff}
output:
gtf: {output.gtf}
"""
shell:
"""
/Users/COHEN/Documents/Logiciels/gffread-0.12.2.OSX_x86_64/gffread {input.gff} -T -o {output.gtf}
"""
#Indexing for RNA-STAR (environ 2 min)
rule star_index:
"""
Make index with STAR for 3D7
"""
threads: 4
params:
dir = directory(f"{INDEX_STAR_DIR}")
input:
fasta = f"{FASTA_REF}",
gtf = f"{GTF_REF}",
output:
index = f"{INDEX_STAR_DIR}Log.out",
message:
"""
Execute {rule}
input:
fasta : {input.fasta}
gtf : {input.gtf}
output:
index: {output.index}
"""
shell:
"""
ln -s {input.fasta} {output.index}
/Users/COHEN/Documents/Logiciels/STAR-2-7-5c/bin/MacOSX_x86_64/STAR --runMode genomeGenerate --runThreadN {threads} --genomeDir {params.dir} --genomeFastaFiles {input.fasta} --sjdbGTFfile {input.gtf}
"""
#Mapping with RNA-STAR on Pf 3D7 (environ 10 min par échantillon, total 1h)
rule star_mapping:
"""
Map reads on the genome with STAR on paired end
"""
threads: 4
params:
idx = directory(f"{INDEX_STAR_DIR}"),
bam = f"{MAPPINGS_DIR}{{reads}}"
input:
idx = rules.star_index.output.index,
gtf = f"{GTF_REF}",
r1 = f"{FASTP_DIR}{{reads}}_r1_paired.fastq.gz",
r2 = f"{FASTP_DIR}{{reads}}_r2_paired.fastq.gz"
output:
bam = f"{MAPPINGS_DIR}{{reads}}Aligned.sortedByCoord.out.bam"
message:
"""
Execute {rule}
input:
idx : {params.idx}
gtf : {input.gtf}
reads :
r1: {input.r1}
r2: {input.r2}
output:
bam: {output.bam}
"""
shell:
"""
/Users/COHEN/Documents/Logiciels/STAR-2-7-5c/bin/MacOSX_x86_64/STAR --runThreadN 4 --genomeDir {params.idx} --readFilesIn {input.r1} {input.r2} --readFilesCommand gunzip -c --outFileNamePrefix {params.bam} --outSAMtype BAM SortedByCoordinate --sjdbGTFfile {input.gtf}
"""
#Sort the BAM files with samtools
rule bam_sort:
"""
Sort the BAM files
"""
threads: 4
input:
star =f'{MAPPINGS_DIR}{{reads}}Aligned.sortedByCoord.out.bam',
hisat = f"{MAPPINGH_DIR}{{reads}}_HISAT.bam"
output:
out_star = f"{BAM_DIR}{{reads}}_STAR_sort.bam",
out_hisat = f"{BAM_DIR}{{reads}}_HISAT_sort.bam"
message:
"""
Execute {rule}
input:
star : {input.star}
hisat : {input.hisat}
output:
bam: {output.out_star}, {output.out_hisat}
"""
shell:
"""
samtools sort {input.star} > {output.out_star};
samtools sort {input.hisat} > {output.out_hisat}
"""
#Filtering of the BAM files with samtools
rule bam_filter:
"""
Filtering BAM files
"""
threads: 4
input:
star_bam = f"{BAM_DIR}{{reads}}_STAR_sort.bam",
hisat_bam = f"{BAM_DIR}{{reads}}_HISAT_sort.bam"
output:
mapped_star = f"{FILTER_DIR}{{reads}}_STAR_sort_mapped.bam",
unmapped_star = f"{FILTER_DIR}{{reads}}_STAR_sort_unmapped.bam",
mapped_hisat = f"{FILTER_DIR}{{reads}}_HISAT_sort_mapped.bam",
unmapped_hisat = f"{FILTER_DIR}{{reads}}_HISAT_sort_unmapped.bam"
message:
"""
Execute {rule}
input: {input.star_bam}
"""
shell:
"""
samtools view -b -F 4 {input.star_bam} > {output.mapped_star}
samtools view -b -f 4 {input.star_bam} > {output.unmapped_star}
samtools view -b -F 4 {input.hisat_bam} > {output.mapped_hisat}
samtools view -b -f 4 {input.hisat_bam} > {output.unmapped_hisat}
"""
# Identify the duplicates in BAM with Picard tools
rule bam_markdup:
"""
Mark duplicates in BAM files
"""
threads: 4
#params:
# récupérer l'option dans le yaml . voir pour mettre un if . A regarder !!!
input:
bam_star=f"{FILTER_DIR}{{reads}}_STAR_sort_mapped.bam",
bam_hisat = f"{FILTER_DIR}{{reads}}_HISAT_sort_mapped.bam",
output:
star = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup.bam",
star_txt = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup_metric.txt",
hisat = f"{DUPL_DIR}{{reads}}_HISAT_sort_mapped_markdup.bam",
hisat_txt = f"{DUPL_DIR}{{reads}}_HISAT_sort_mapped_markdup_metric.txt"
message:
"""
Execute {rule}
input: {input.bam_star}, {input.bam_hisat}
"""
shell:
"""
java -jar /Users/COHEN/Documents/Logiciels/picard.jar MarkDuplicates I={input.bam_star} O={output.star} M={output.star_txt}
java -jar /Users/COHEN/Documents/Logiciels/picard.jar MarkDuplicates I={input.bam_hisat} O={output.hisat} M={output.hisat_txt}
"""
#Index the BAM files with samtools
rule bam_index:
"""
Index BAM files
"""
threads: 4
input:
bam_star = f"{BAM_DIR}{{reads}}_STAR_sort.bam",
bam_hisat = f"{BAM_DIR}{{reads}}_HISAT_sort.bam",
bam_star_filt = f"{FILTER_DIR}{{reads}}_STAR_sort_mapped.bam",
bam_hisat_filt =f"{FILTER_DIR}{{reads}}_HISAT_sort_mapped.bam",
bam_star_filt_md = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup.bam",
bam_hisat_filt_md = f"{DUPL_DIR}{{reads}}_HISAT_sort_mapped_markdup.bam",
output:
bai = f"{BAM_DIR}{{reads}}_STAR_sort.bam.bai"
message:
"""
Execute {rule}
input: {input.bam_star}
"""
shell:
"""
samtools index {input.bam_star}
samtools index {input.bam_hisat}
samtools index {input.bam_star_filt}
samtools index {input.bam_hisat_filt}
samtools index {input.bam_star_filt_md}
samtools index {input.bam_hisat_filt_md}
"""
#Stats on the BAM files with samtools
rule bam_stats:
"""
Statistics data on the BAM files
"""
threads: 4
input:
bam_star = f"{BAM_DIR}{{reads}}_STAR_sort.bam",
bam_hisat = f"{BAM_DIR}{{reads}}_HISAT_sort.bam",
bam_star_filt = f"{FILTER_DIR}{{reads}}_STAR_sort_mapped.bam",
bam_hisat_filt =f"{FILTER_DIR}{{reads}}_HISAT_sort_mapped.bam",
bam_star_filt_md = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup.bam",
bam_hisat_filt_md = f"{DUPL_DIR}{{reads}}_HISAT_sort_mapped_markdup.bam",
output:
flagstat_star = f"{STATS_DIR}{{reads}}_STAR_sort_flagstat.txt",
flagstat_hisat = f"{STATS_DIR}{{reads}}_HISAT_sort_flagstat.txt",
flagstat_star_filt= f"{STATS_DIR}{{reads}}_STAR_sort_mapped_flagstat.txt",
flagstat_hisat_filt = f"{STATS_DIR}{{reads}}_HISAT_sort_mapped_flagstat.txt",
flagstat_star_filt_md= f"{STATS_DIR}{{reads}}_STAR_sort_mapped_markdup_flagstat.txt",
flagstat_hisat_filt_md= f"{STATS_DIR}{{reads}}_HISAT_sort_mapped_markdup_flagstat.txt",
message:
"""
Execute {rule}
input: {input}
"""
shell:
"""
samtools flagstat {input.bam_star} > {output.flagstat_star}
samtools flagstat {input.bam_hisat} > {output.flagstat_hisat}
samtools flagstat {input.bam_star_filt} > {output.flagstat_star_filt}
samtools flagstat {input.bam_star_filt_md} > {output.flagstat_star_filt_md}
samtools flagstat {input.bam_hisat_filt} > {output.flagstat_hisat_filt}
samtools flagstat {input.bam_hisat_filt_md} > {output.flagstat_hisat_filt_md}
"""
#Create the count table from the BAM files with Picard tools
rule htseq_counts :
"""
Create the count table with HTseq-count
"""
threads: 4
input:
bam_star = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup.bam",
bam_hisat = f"{DUPL_DIR}{{reads}}_HISAT_sort_mapped_markdup.bam",
gtf = f"{GTF_REF}"
output:
count_table_star = f"{HTSEQ_DIR}STAR/{{reads}}.txt",
count_table_hisat = f"{HTSEQ_DIR}HISAT/{{reads}}.txt"
message:
"""
Execute {rule}
input:
bam: {input.bam_star}
gtf: {input.gtf}
output: {output.count_table_star}
"""
shell:
"""
htseq-count -t exon -i gene_id -f bam {input.bam_star} {input.gtf} > {output.count_table_star}
htseq-count -t exon -i gene_id -f bam {input.bam_hisat} {input.gtf} > {output.count_table_hisat}
"""
#Create the count table from the BAM files with Picard tools
rule stringtie_cc :
"""
Create the count table with HTseq-count
"""
threads: 4
input:
bam_star = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup.bam",
bam_hisat = f"{DUPL_DIR}{{reads}}_HISAT_sort_mapped_markdup.bam",
gtf = f"{GTF_REF}"
output:
star_gtf = f'{STRINGTIECC_DIR}STAR/{{reads}}/{{reads}}.gtf',
hisat_gtf= f'{STRINGTIECC_DIR}HISAT/{{reads}}/{{reads}}.gtf',
star_tsv = f'{STRINGTIECC_DIR}STAR/{{reads}}/{{reads}}.tsv',
hisat_tsv = f'{STRINGTIECC_DIR}HISAT/{{reads}}/{{reads}}.tsv'
message:
"""
Execute {rule}
input:
bam: {input.bam_star}
gtf: {input.gtf}
output: {output.star_tsv}
"""
shell:
"""
/Users/COHEN/Documents/Logiciels/stringtie-2.1.4.OSX_x86_64/stringtie -p 8 -G {input.gtf} -e -B -o {output.star_gtf} -A {output.star_tsv} {input.bam_star}
/Users/COHEN/Documents/Logiciels/stringtie-2.1.4.OSX_x86_64/stringtie -p 8 -G {input.gtf} -e -B -o {output.hisat_gtf} -A {output.hisat_tsv} {input.bam_hisat}
"""
rule gtf_list:
threads : 1
input:
list_gtf_star = expand(f'{STRINGTIECC_DIR}STAR/{{reads}}/{{reads}}.gtf', reads = READS),
list_gtf_hisat = expand(f'{STRINGTIECC_DIR}HISAT/{{reads}}/{{reads}}.gtf', reads = READS),
names_reads= f'{NREADS}'
output :
gtf_star = f'{STRINGTIECC_DIR}STAR_gtf_list.txt',
gtf_hisat = f'{STRINGTIECC_DIR}HISAT_gtf_list.txt',
prep_star = f'{STRINGTIECC_DIR}STAR_gtf_list_prep.txt',
prep_hisat = f'{STRINGTIECC_DIR}HISAT_gtf_list_prep.txt'
shell:
"""
find {input.list_gtf_star} >> {output.gtf_star}
find {input.list_gtf_hisat} >> {output.gtf_hisat}
paste {input.names_reads} {output.gtf_star} | sed "s/\t/ /" >> {output.prep_star}
paste {input.names_reads} {output.gtf_hisat} | sed "s/\t/ /" >> {output.prep_hisat}
"""
rule stringtie_csv:
threads : 1
input :
mergelist_star = rules.gtf_list.output.prep_star,
mergelist_hisat = rules.gtf_list.output.prep_hisat,
script = f"{PREPDE}"
output :
gcsv_star = f'{STRINGTIECC_DIR}STAR_gene_count_matrix.csv',
tcsv_star = f'{STRINGTIECC_DIR}STAR_transcript_count_matrix.csv',
gcsv_hisat = f'{STRINGTIECC_DIR}HISAT_gene_count_matrix.csv',
tcsv_hisat = f'{STRINGTIECC_DIR}HISAT_transcript_count_matrix.csv',
shell:
"""
python2 {input.script} -i {input.mergelist_star} -t {output.tcsv_star} -g {output.gcsv_star}
python2 {input.script} -i {input.mergelist_hisat} -t {output.tcsv_hisat} -g {output.gcsv_hisat}
"""
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment