Create ProjetT-Pip-RNAseq_v2

b190eab3 · CamilleC · 02cc753b · b190eab3
Commit b190eab3 authored 4 years ago by CamilleC
--- a/Scripts/ProjetT-Pip-RNAseq_v2
+++ b/Scripts/ProjetT-Pip-RNAseq_v2
+# Importation des librairies glob, sys, et re
+import glob,sys,re
+
+## appel du fichier de config
+configfile: "configPT.yaml"
+
+## Definition des variables "constantes" à partir du fichier config (attention aux arborescences dans le yaml)
+REFERENCE_DIR = config['REFERENCE']
+FASTA_REF= config['FASTA']
+GFF_REF= config['GFF']
+GTF_REF= config['GTF']
+READS_DIR = config['READS']
+FASTQC_DIR = config['FASTQC']
+FASTQSCREEN_DIR = config['FASTQSCREEN']
+FASTP_DIR = config['FASTP']
+FASTQC_AF_DIR = config['FASTQC_AF']
+MULTIQC_DIR = config['MULTIQC']
+INDEX_DIR = config['INDEX']
+INDEX_STAR_DIR = config['INDEX_STAR']
+MAPPINGH_DIR = config['MAPPINGH']
+MAPPINGS_DIR = config['MAPPING_STAR']
+BAM_DIR = config['BAMFILES']
+STATS_DIR = config['STATS']
+FILTER_DIR = config['FILTER']
+DUPL_DIR = config['DUPL']
+HTSEQ_DIR = config['HTSEQ']
+STRINGTIE_DIR = config['STRINGTIE']
+STRINGTIECC_DIR = config['STRINGTIECC']
+PREPDE = config['PREPDE']
+NREADS= config['NREADS']
+
+#Wild cards
+REFERENCE, = glob_wildcards(f"{REFERENCE_DIR}{{reference}}.fasta")
+ANNOTATION, = glob_wildcards(f"{REFERENCE_DIR}{{annotation}}.gff")
+READS, = glob_wildcards(f"{READS_DIR}{{reads}}_r1.fastq.gz")
+#SAMPLEFQ, = glob_wildcards(f"{FASTQC_DIR}{{samplefq}}.html")
+#SAMPLEFS, = glob_wildcards(f"{FASTQSCREEN_DIR}{{samplefs}}.html")
+#SAMPLEFQAF, = glob_wildcards(f"{FASTQC_AF_DIR}{{samplefqaf}}.html")
+#SAMPLEFP, = glob_wildcards(f"{FASTP_DIR}{{samplefp}}.html")
+#BAM, = glob_wildcards(f"{BAM_DIR}{{bam}}.bam")
+
+
+# Regle finale pour vérifier la présence des outputs et si ils sont présents ils ne lancent pas le job
+rule final:
+     input:
+          out_fastqc = expand(f"{FASTQC_DIR}{{reads}}_r1_fastqc.html", reads = READS),
+          out_fastqs = expand(f"{FASTQSCREEN_DIR}{{reads}}_r1_screen.html", reads = READS),
+          out_fastp = expand(f"{FASTP_DIR}{{reads}}_report_fastp.html", reads = READS),
+          out_fastqc_af = expand(f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.html", reads = READS),
+          out_multiqc = expand(f"{MULTIQC_DIR}multiqc_report.html"),
+          out_hisat_idx = expand(f"{INDEX_DIR}P.Falci_3D7"),
+          out_hisat_map = expand(f"{MAPPINGH_DIR}{{reads}}_HISAT.bam" , reads = READS),
+          out_star_map = expand(f"{MAPPINGS_DIR}{{reads}}Aligned.sortedByCoord.out.bam" , reads = READS),
+          out_bam_sort = expand(f"{BAM_DIR}{{reads}}_STAR_sort.bam", reads = READS),
+          out_bam_filt = expand(f"{FILTER_DIR}{{reads}}_STAR_sort_mapped.bam", reads = READS),
+          out_bam_index = expand(f"{BAM_DIR}{{reads}}_STAR_sort.bam.bai", reads = READS),
+          out_bam_stats = expand(f"{STATS_DIR}{{reads}}_STAR_sort_flagstat.txt", reads = READS),
+          #out_bam_md = expand(f"{DUPL_DIR}{{reads}}_HISAT_sort_mapped_markdup_metric.txt", reads = READS),
+          out_htseq = expand(f"{HTSEQ_DIR}HISAT/{{reads}}.txt", reads = READS),
+          #out_gtf_list = expand(f'{STRINGTIE_DIR}STRINGTIE_MERGE/STAR_gtf_list.txt'),
+          #out_stringtie = expand(f'{STRINGTIE_DIR}STRINGTIE_MERGE/STAR_stringtie_merged.gtf'),
+          out_stringtiecc = expand(f'{STRINGTIECC_DIR}STAR/{{reads}}/{{reads}}.gtf', reads=READS),
+          out_gtf_list = expand(f'{STRINGTIECC_DIR}HISAT_gtf_list.txt', reads=READS),
+          out_csv = f'{STRINGTIECC_DIR}HISAT_transcript_count_matrix.csv'
+
+# FastQC before filtering
+rule fastqc:
+     """ 
+          FastQC - Control Quality of reads
+     """
+     threads:4
+     params:
+          out_fastqc=directory(f"{FASTQC_DIR}")
+     input:
+          r1 = f"{READS_DIR}{{reads}}_r1.fastq.gz",
+          r2 = f"{READS_DIR}{{reads}}_r2.fastq.gz"
+     output:
+        out_fastqc_r1 = f"{FASTQC_DIR}{{reads}}_r1_fastqc.html",
+        out_fastqc_r2 = f"{FASTQC_DIR}{{reads}}_r2_fastqc.html",
+        out_fastqc_r1_zip = f"{FASTQC_DIR}{{reads}}_r1_fastqc.zip",
+        out_fastqc_r2_zip = f"{FASTQC_DIR}{{reads}}_r2_fastqc.zip"
+     message:
+          """
+               input=
+                    r1 = {input.r1}
+                    r2 = {input.r2}
+               threads={threads}
+          """
+     shell:
+          """
+               fastqc -o {params.out_fastqc} -t 4 {input.r1} {input.r2}
+          """
+
+# FastQScreen agaisnt 3D7 and human
+rule fastqscreen:
+     """ 
+          FastQScreen - Control Quality of reads
+     """
+     threads:4
+     priority:10000000000000000
+     params:
+          out_fastqs=directory(f"{FASTQSCREEN_DIR}")
+     input:
+          r1 = f"{READS_DIR}{{reads}}_r1.fastq.gz",
+          r2 = f"{READS_DIR}{{reads}}_r2.fastq.gz"
+     output:
+        html_r1= f"{FASTQSCREEN_DIR}{{reads}}_r1_screen.html",
+        html_r2=f"{FASTQSCREEN_DIR}{{reads}}_r2_screen.html",
+        txt_r1=f"{FASTQSCREEN_DIR}{{reads}}_r1_screen.txt",
+        txt_r2=f"{FASTQSCREEN_DIR}{{reads}}_r2_screen.txt",
+     message:
+          """
+               Execute {rule}
+               input=
+                    r1 = {input.r1}
+                    r2 = {input.r2}
+               threads={threads}
+          """
+     shell:
+          """
+               fastq_screen --conf /Users/COHEN/Documents/Logiciels/FastQ-Screen-0.14.1/fastq_screen.conf --aligner BOWTIE2 {input.r1} {input.r2} --outdir {params.out_fastqs}
+          """
+
+# Filtering and trimming with FastP
+rule fastp:
+     """ 
+          FastP - Filtering and trimming before mapping
+     """
+     params:
+          out_fastp=directory(f"{FASTP_DIR}")
+     input:
+          r1 = f"{READS_DIR}{{reads}}_r1.fastq.gz",
+          r2 = f"{READS_DIR}{{reads}}_r2.fastq.gz"
+     output:
+          out_paired_r1 = f"{FASTP_DIR}{{reads}}_r1_paired.fastq.gz",
+          out_unpaired_r1 = f"{FASTP_DIR}{{reads}}_r1_unpaired.fastq.gz",
+          out_paired_r2 = f"{FASTP_DIR}{{reads}}_r2_paired.fastq.gz",
+          out_unpaired_r2 = f"{FASTP_DIR}{{reads}}_r2_unpaired.fastq.gz",
+          out_report= f"{FASTP_DIR}{{reads}}_report_fastp.html",
+          out_json= f"{FASTP_DIR}{{reads}}_fastp.json"
+
+     message:
+          """
+               input={input.r1},{input.r2}
+          """
+     shell:
+          """
+               fastp --in1 {input.r1} --in2 {input.r2} --out1 {output.out_paired_r1} --out2 {output.out_paired_r2}  --unpaired1 {output.out_unpaired_r1} --unpaired2 {output.out_unpaired_r2} -w 1 -h {output.out_report} -j {output.out_json}  
+          """
+
+# FastQC after filtering
+rule fastqc_af:
+     """ 
+          FastQC - Control Quality of reads after FastP
+     """
+     priority:100000000000000
+     params:
+          out_fastqc_af = directory(f"{FASTQC_AF_DIR}")
+     input:
+          r1 = f"{FASTP_DIR}{{reads}}_r1_paired.fastq.gz",
+          r2 = f"{FASTP_DIR}{{reads}}_r2_paired.fastq.gz"
+     output:
+          out_fastqc_af_r1 = f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.html",
+          out_fastqc_af_r2 = f"{FASTQC_AF_DIR}{{reads}}_r2_paired_fastqc.html",
+          out_fastqc_af_r1_zip = f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.zip",
+          out_fastqc_af_r2_zip = f"{FASTQC_AF_DIR}{{reads}}_r2_paired_fastqc.zip"
+     message:
+          """
+               input={input.r1},{input.r2}
+          """
+     shell:
+          """
+               fastqc -o {params.out_fastqc_af} -t 4 {input.r1} {input.r2}
+          """
+
+# FastQScreen agaisnt 3D7 and human
+rule multi_qc:
+    """
+    MultiQC
+    """
+    params:
+        out_multi = directory(f"{MULTIQC_DIR}")
+    input:
+        expand(f"{FASTQSCREEN_DIR}{{reads}}_r1_screen.html", reads = READS),
+        expand(f"{FASTQSCREEN_DIR}{{reads}}_r2_screen.html", reads = READS),
+        expand(f"{FASTQSCREEN_DIR}{{reads}}_r1_screen.txt", reads = READS),
+        expand(f"{FASTQSCREEN_DIR}{{reads}}_r2_screen.txt", reads = READS),
+        expand(f"{FASTQC_DIR}{{reads}}_r1_fastqc.html", reads=READS),
+        expand(f"{FASTQC_DIR}{{reads}}_r2_fastqc.html", reads=READS),
+        expand(f"{FASTQC_DIR}{{reads}}_r1_fastqc.zip", reads=READS),
+        expand(f"{FASTQC_DIR}{{reads}}_r2_fastqc.zip", reads=READS),
+        expand(f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.html", reads=READS),
+        expand(f"{FASTQC_AF_DIR}{{reads}}_r2_paired_fastqc.html", reads=READS),
+        expand(f"{FASTQC_AF_DIR}{{reads}}_r1_paired_fastqc.zip", reads=READS),
+        expand(f"{FASTQC_AF_DIR}{{reads}}_r2_paired_fastqc.zip", reads=READS),
+        expand(f"{FASTP_DIR}{{reads}}_report_fastp.html", reads=READS),
+        expand(f"{FASTP_DIR}{{reads}}_fastp.json",reads=READS)
+
+    output:
+        f"{MULTIQC_DIR}multiqc_report.html",
+    shell:
+        """
+             multiqc {input} -o {params.out_multi}
+        """
+
+# Indexing of 3D7 genome (environ 1 min)
+rule hisat2_index:
+    """                                                                                                                                                                                       
+    Make index with HISAT2 for 3D7                                                                                                                                                 
+    """
+    input:
+        fasta = f"{FASTA_REF}"
+    output:
+        index = f"{INDEX_DIR}P.Falci_3D7",
+    message:
+        """                                                                                                                                                                                   
+        Execute {rule}                                                                                                                                                                    
+        input:                                                                                                                                                                            
+            fasta : {input.fasta}                                                                                                                                                       
+        output:                                                                                                                                                                           
+            index: {output.index}                                                                                                                     
+        """
+    log:
+        output = f"{INDEX_DIR}LOG/3D7_HISAT-INDEX.o",
+        error = f"{INDEX_DIR}LOG/3D7_HISAT-INDEX.e",
+    shell:
+        """
+        ln -s {input.fasta} {output.index} 1>{log.output} 2>{log.error}
+        /Users/COHEN/Documents/Logiciels/hisat2-2.2.0/hisat2-build {input.fasta} {output.index} --quiet
+        """
+
+# Mapping with HISAT2 on Pf 3D7 (max 2 min par échantillon, total 10 min)
+rule hisat2_map:
+    """
+    Map reads on the genome with HISAT2 on paired end
+    """
+    threads: 4
+    input:
+        reference = rules.hisat2_index.output.index,
+        r1 = f"{FASTP_DIR}{{reads}}_r1_paired.fastq.gz",
+        r2 = f"{FASTP_DIR}{{reads}}_r2_paired.fastq.gz"
+    output:
+        summary = f"{MAPPINGH_DIR}{{reads}}_HISAT_summary.txt",
+        bam = f"{MAPPINGH_DIR}{{reads}}_HISAT.bam",
+    message:
+        """                                                                                                                                                                            
+        Execute {rule}                                                                                                                                                                   
+        input:                                                                                                                                                                            
+            reference : {input.reference}
+            R1 : {input.r1}
+            R2 : {input.r2}                                                                                                                                                       
+        output:                                                                                                                                                                           
+            bam: {output.bam}                                                                                                 
+        """
+    shell:
+        """
+        /Users/COHEN/Documents/Logiciels/hisat2-2.2.0/hisat2 -p {threads} -x {input.reference} -1 {input.r1} -2 {input.r2} --summary-file {output.summary} | samtools view -S -b >{output.bam} 
+        """
+
+#Convert GFF in GTF with gffread for RNA-STAR
+rule gff_to_gtf:
+    """                                                                                                                                                                                       
+    Convert GFF in GTF for RNA-STAR                                                                                                                                                 
+    """
+    threads: 8
+    input:
+        gff = f"{REFERENCE_DIR}{{annotation}}.gff"
+    output:
+        gtf = f"{REFERENCE_DIR}{{annotation}}.gtf"
+    message:
+        """                                                                                                                                                                                   
+        Execute {rule}                                                                                                                                                                    
+        input:                                                                                                                                                                            
+            gff : {input.gff}                                                                                                                                                       
+        output:                                                                                                                                                                           
+            gtf: {output.gtf}                                                                                                                     
+        """
+    shell:
+        """
+        /Users/COHEN/Documents/Logiciels/gffread-0.12.2.OSX_x86_64/gffread {input.gff} -T -o {output.gtf}
+        """
+
+#Indexing for RNA-STAR (environ 2 min)
+rule star_index:
+    """
+    Make index with STAR for 3D7
+    """
+    threads: 4
+    params:
+        dir = directory(f"{INDEX_STAR_DIR}")
+    input:
+        fasta = f"{FASTA_REF}",
+        gtf = f"{GTF_REF}",
+    output:
+        index = f"{INDEX_STAR_DIR}Log.out",
+    message:
+        """
+        Execute {rule}
+        input:
+            fasta : {input.fasta}
+            gtf : {input.gtf}
+        output:
+            index: {output.index}
+        """
+    shell:
+        """
+        ln -s {input.fasta} {output.index}
+        /Users/COHEN/Documents/Logiciels/STAR-2-7-5c/bin/MacOSX_x86_64/STAR --runMode genomeGenerate --runThreadN {threads} --genomeDir {params.dir} --genomeFastaFiles {input.fasta} --sjdbGTFfile {input.gtf}
+   
+        """
+
+#Mapping with RNA-STAR on Pf 3D7 (environ 10 min par échantillon, total 1h)
+rule star_mapping:
+    """
+    Map reads on the genome with STAR on paired end
+    """
+    threads: 4
+    params:
+        idx = directory(f"{INDEX_STAR_DIR}"),
+        bam = f"{MAPPINGS_DIR}{{reads}}"
+    input:
+        idx = rules.star_index.output.index,
+        gtf = f"{GTF_REF}",
+        r1 = f"{FASTP_DIR}{{reads}}_r1_paired.fastq.gz",
+        r2 = f"{FASTP_DIR}{{reads}}_r2_paired.fastq.gz"
+    output:
+        bam = f"{MAPPINGS_DIR}{{reads}}Aligned.sortedByCoord.out.bam"
+    message:
+        """
+        Execute {rule}
+        input:
+            idx : {params.idx}
+            gtf : {input.gtf}
+            reads : 
+                r1: {input.r1}
+                r2: {input.r2}
+        output:
+            bam: {output.bam}
+        """
+    shell:
+        """
+        /Users/COHEN/Documents/Logiciels/STAR-2-7-5c/bin/MacOSX_x86_64/STAR --runThreadN 4 --genomeDir {params.idx} --readFilesIn {input.r1} {input.r2} --readFilesCommand gunzip -c --outFileNamePrefix {params.bam} --outSAMtype BAM SortedByCoordinate --sjdbGTFfile {input.gtf} 
+        """
+
+#Sort the BAM files with samtools
+rule bam_sort:
+    """
+    Sort the BAM files     
+    """
+    threads: 4
+    input:
+        star =f'{MAPPINGS_DIR}{{reads}}Aligned.sortedByCoord.out.bam',
+        hisat = f"{MAPPINGH_DIR}{{reads}}_HISAT.bam"
+    output:
+        out_star = f"{BAM_DIR}{{reads}}_STAR_sort.bam",
+        out_hisat = f"{BAM_DIR}{{reads}}_HISAT_sort.bam"
+    message:
+        """
+        Execute {rule}
+        input:
+            star : {input.star}
+            hisat : {input.hisat}
+        output:
+            bam: {output.out_star}, {output.out_hisat}
+        """
+    shell:
+        """
+        samtools sort  {input.star} > {output.out_star};
+        samtools sort  {input.hisat} > {output.out_hisat}
+        """
+
+#Filtering of the BAM files with samtools
+rule bam_filter:
+    """
+    Filtering BAM files      
+    """
+    threads: 4
+    input:
+        star_bam = f"{BAM_DIR}{{reads}}_STAR_sort.bam",
+        hisat_bam = f"{BAM_DIR}{{reads}}_HISAT_sort.bam"
+    output:
+        mapped_star = f"{FILTER_DIR}{{reads}}_STAR_sort_mapped.bam",
+        unmapped_star = f"{FILTER_DIR}{{reads}}_STAR_sort_unmapped.bam",
+        mapped_hisat = f"{FILTER_DIR}{{reads}}_HISAT_sort_mapped.bam",
+        unmapped_hisat = f"{FILTER_DIR}{{reads}}_HISAT_sort_unmapped.bam"
+    message:
+        """
+        Execute {rule}
+        input: {input.star_bam}
+        
+        """
+    shell:
+        """
+        samtools view -b -F 4 {input.star_bam} > {output.mapped_star}
+        samtools view -b -f 4 {input.star_bam} > {output.unmapped_star}
+        samtools view -b -F 4 {input.hisat_bam} > {output.mapped_hisat}
+        samtools view -b -f 4 {input.hisat_bam} > {output.unmapped_hisat}
+        """
+
+# Identify the duplicates in BAM with Picard tools
+rule bam_markdup:
+    """
+    Mark duplicates in BAM files      
+    """
+    threads: 4
+    #params:
+        # récupérer l'option dans le yaml . voir pour mettre un if . A regarder !!!
+    input:
+         bam_star=f"{FILTER_DIR}{{reads}}_STAR_sort_mapped.bam",
+         bam_hisat = f"{FILTER_DIR}{{reads}}_HISAT_sort_mapped.bam",
+    output:
+        star = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup.bam",
+        star_txt = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup_metric.txt",
+        hisat = f"{DUPL_DIR}{{reads}}_HISAT_sort_mapped_markdup.bam",
+        hisat_txt = f"{DUPL_DIR}{{reads}}_HISAT_sort_mapped_markdup_metric.txt"
+    message:
+        """
+        Execute {rule}
+        input: {input.bam_star}, {input.bam_hisat}
+        """
+    shell:
+        """
+        java -jar /Users/COHEN/Documents/Logiciels/picard.jar MarkDuplicates I={input.bam_star} O={output.star} M={output.star_txt}
+        java -jar /Users/COHEN/Documents/Logiciels/picard.jar MarkDuplicates I={input.bam_hisat} O={output.hisat} M={output.hisat_txt}
+        """
+
+#Index the BAM files with samtools
+rule bam_index:
+    """
+    Index BAM files      
+    """
+    threads: 4
+    input:
+        bam_star = f"{BAM_DIR}{{reads}}_STAR_sort.bam",
+        bam_hisat = f"{BAM_DIR}{{reads}}_HISAT_sort.bam",
+        bam_star_filt = f"{FILTER_DIR}{{reads}}_STAR_sort_mapped.bam",
+        bam_hisat_filt =f"{FILTER_DIR}{{reads}}_HISAT_sort_mapped.bam",
+        bam_star_filt_md = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup.bam",
+        bam_hisat_filt_md = f"{DUPL_DIR}{{reads}}_HISAT_sort_mapped_markdup.bam",
+    output:
+        bai = f"{BAM_DIR}{{reads}}_STAR_sort.bam.bai"
+    message:
+        """
+        Execute {rule}
+        input: {input.bam_star}
+        """
+    shell:
+        """
+        samtools index {input.bam_star}
+        samtools index {input.bam_hisat}
+        samtools index {input.bam_star_filt}
+        samtools index {input.bam_hisat_filt}
+        samtools index {input.bam_star_filt_md}
+        samtools index {input.bam_hisat_filt_md}
+        """
+
+#Stats on the BAM files with samtools
+rule bam_stats:
+    """
+    Statistics data on the BAM files      
+    """
+    threads: 4
+    input:
+        bam_star = f"{BAM_DIR}{{reads}}_STAR_sort.bam",
+        bam_hisat = f"{BAM_DIR}{{reads}}_HISAT_sort.bam",
+        bam_star_filt = f"{FILTER_DIR}{{reads}}_STAR_sort_mapped.bam",
+        bam_hisat_filt =f"{FILTER_DIR}{{reads}}_HISAT_sort_mapped.bam",
+        bam_star_filt_md = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup.bam",
+        bam_hisat_filt_md = f"{DUPL_DIR}{{reads}}_HISAT_sort_mapped_markdup.bam",
+    output:
+        flagstat_star = f"{STATS_DIR}{{reads}}_STAR_sort_flagstat.txt",
+        flagstat_hisat = f"{STATS_DIR}{{reads}}_HISAT_sort_flagstat.txt",
+        flagstat_star_filt= f"{STATS_DIR}{{reads}}_STAR_sort_mapped_flagstat.txt",
+        flagstat_hisat_filt = f"{STATS_DIR}{{reads}}_HISAT_sort_mapped_flagstat.txt",
+        flagstat_star_filt_md= f"{STATS_DIR}{{reads}}_STAR_sort_mapped_markdup_flagstat.txt",
+        flagstat_hisat_filt_md= f"{STATS_DIR}{{reads}}_HISAT_sort_mapped_markdup_flagstat.txt",
+    message:
+        """
+        Execute {rule}
+        input: {input}
+        """
+    shell:
+        """
+        samtools flagstat {input.bam_star} > {output.flagstat_star}
+        samtools flagstat {input.bam_hisat} > {output.flagstat_hisat}
+        samtools flagstat {input.bam_star_filt} > {output.flagstat_star_filt}
+        samtools flagstat {input.bam_star_filt_md} > {output.flagstat_star_filt_md}
+        samtools flagstat {input.bam_hisat_filt} > {output.flagstat_hisat_filt}
+        samtools flagstat {input.bam_hisat_filt_md} > {output.flagstat_hisat_filt_md}
+        """
+
+#Create the count table from the BAM files with Picard tools
+rule htseq_counts :
+    """
+    Create the count table with HTseq-count      
+    """
+    threads: 4
+    input:
+         bam_star = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup.bam",
+         bam_hisat = f"{DUPL_DIR}{{reads}}_HISAT_sort_mapped_markdup.bam",
+         gtf = f"{GTF_REF}"
+    output:
+         count_table_star = f"{HTSEQ_DIR}STAR/{{reads}}.txt",
+         count_table_hisat = f"{HTSEQ_DIR}HISAT/{{reads}}.txt"
+    message:
+        """
+        Execute {rule}
+        input: 
+            bam: {input.bam_star}
+            gtf: {input.gtf}
+        output: {output.count_table_star}
+        """
+    shell:
+        """
+        htseq-count -t exon -i gene_id -f bam {input.bam_star} {input.gtf} > {output.count_table_star}
+        htseq-count -t exon -i gene_id -f bam {input.bam_hisat} {input.gtf} > {output.count_table_hisat}
+        """
+
+#Create the count table from the BAM files with Picard tools
+rule stringtie_cc :
+    """
+    Create the count table with HTseq-count
+    """
+    threads: 4
+    input:
+         bam_star = f"{DUPL_DIR}{{reads}}_STAR_sort_mapped_markdup.bam",
+         bam_hisat = f"{DUPL_DIR}{{reads}}_HISAT_sort_mapped_markdup.bam",
+         gtf = f"{GTF_REF}"
+    output:
+         star_gtf = f'{STRINGTIECC_DIR}STAR/{{reads}}/{{reads}}.gtf',
+         hisat_gtf= f'{STRINGTIECC_DIR}HISAT/{{reads}}/{{reads}}.gtf',
+         star_tsv = f'{STRINGTIECC_DIR}STAR/{{reads}}/{{reads}}.tsv',
+         hisat_tsv = f'{STRINGTIECC_DIR}HISAT/{{reads}}/{{reads}}.tsv'
+    message:
+        """
+        Execute {rule}
+        input:
+            bam: {input.bam_star}
+            gtf: {input.gtf}
+        output: {output.star_tsv}
+        """
+    shell:
+        """
+        /Users/COHEN/Documents/Logiciels/stringtie-2.1.4.OSX_x86_64/stringtie -p 8 -G {input.gtf} -e -B -o {output.star_gtf} -A {output.star_tsv} {input.bam_star}
+        /Users/COHEN/Documents/Logiciels/stringtie-2.1.4.OSX_x86_64/stringtie -p 8 -G {input.gtf} -e -B -o {output.hisat_gtf} -A {output.hisat_tsv} {input.bam_hisat}
+
+        """
+
+rule gtf_list:
+    threads : 1
+    input:
+        list_gtf_star = expand(f'{STRINGTIECC_DIR}STAR/{{reads}}/{{reads}}.gtf', reads = READS),
+        list_gtf_hisat = expand(f'{STRINGTIECC_DIR}HISAT/{{reads}}/{{reads}}.gtf', reads = READS),
+        names_reads= f'{NREADS}'
+
+    output :
+        gtf_star = f'{STRINGTIECC_DIR}STAR_gtf_list.txt',
+        gtf_hisat = f'{STRINGTIECC_DIR}HISAT_gtf_list.txt',
+        prep_star = f'{STRINGTIECC_DIR}STAR_gtf_list_prep.txt',
+        prep_hisat = f'{STRINGTIECC_DIR}HISAT_gtf_list_prep.txt'
+    shell:
+        """
+        find {input.list_gtf_star} >> {output.gtf_star} 
+        find {input.list_gtf_hisat} >> {output.gtf_hisat}
+        paste {input.names_reads} {output.gtf_star} | sed "s/\t/ /"  >> {output.prep_star}
+        paste {input.names_reads} {output.gtf_hisat} | sed "s/\t/ /" >> {output.prep_hisat}
+        """
+
+rule stringtie_csv:
+    threads : 1
+    input :
+        mergelist_star = rules.gtf_list.output.prep_star,
+        mergelist_hisat = rules.gtf_list.output.prep_hisat,
+        script = f"{PREPDE}"
+    output :
+        gcsv_star = f'{STRINGTIECC_DIR}STAR_gene_count_matrix.csv',
+        tcsv_star = f'{STRINGTIECC_DIR}STAR_transcript_count_matrix.csv',
+        gcsv_hisat = f'{STRINGTIECC_DIR}HISAT_gene_count_matrix.csv',
+        tcsv_hisat = f'{STRINGTIECC_DIR}HISAT_transcript_count_matrix.csv',
+    shell:
+        """
+        python2 {input.script} -i {input.mergelist_star} -t {output.tcsv_star} -g {output.gcsv_star}
+        python2 {input.script} -i {input.mergelist_hisat} -t {output.tcsv_hisat} -g {output.gcsv_hisat}
+
+       """
\ No newline at end of file