From 723960c0c405206830134f9117add448d5d6b9e9 Mon Sep 17 00:00:00 2001
From: "julie.orjuela" <julie.orjuela@ird.fr>
Date: Tue, 12 Mar 2024 14:56:53 +0100
Subject: [PATCH] upgrading docs

---
 README.md | 189 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 189 insertions(+)

diff --git a/README.md b/README.md
index a6a0a94..9bee2dc 100644
--- a/README.md
+++ b/README.md
@@ -94,6 +94,195 @@ If singularity was selected in installation of RNAja, it could be needed to give
 
 **Important Note** : In IRD "i-Trop" cluster, run RNAja using ONLY a node, data has to be in "/scratch" of chosen node. Use `nodelist : nodeX` parameter inside of `cluster_config.yaml`Ì€ file.
 
+
+## 3. Running your data
+
+### 3.1.create a pipeline by configuring the `config.yaml` file 
+
+Before to run RNAja, create a `config.yaml` by using before to adapt it.
+
+```
+RNAja create_config
+```
+
+Three sections are needed for RNAja into the `config.yaml` file: section DATA, MODE and PARAMS. 
+
+### DATA section
+
+Adapt `config.yaml` file with path to fastq files, reference and annotation file in the `DATA section` as well as the output directory.
+
+```
+DATA:
+    fastq_dir: "/path/to/FASTQ"
+    reference: "/path/to/reference/ref.fna"
+    annotation: "/path/to/reference/ref.gtf"
+    output_dir: "RNAJA_OUTPUT"
+    sample_info: "/path/to/sample_info.txt"
+    PAIRED : true
+    de_comparisons: "/path/to/treatmentsComparisons.csv"
+```
+
+#### sample_info key
+
+Into the `sample_info` DATA key, you need to give a comma separated `sample_info.txt` file containing information about samples. This file needs header with Forward,Reverse,SampleName,Direction,Treatment,Experiment columns.
+
+If you are in single mode, don't fill on 'reverse' column in `sample_info.txt` file 
+
+Here an example for experiment in `single` mode :
+
+```commandline
+Forward,Reverse,SampleName,Direction,Treatment,Experiment
+/path/to/FASTQ/Batch-rep1_R1.fastq.gz,,Batch_1,R1,Batch,E1
+/path/to/FASTQ/Batch-rep2_R1.fastq.gz,,Batch_2,R1,Batch,E2
+/path/to/FASTQ/Batch-rep3_R1.fastq.gz,,Batch_3,R1,Batch,E3
+/path/to/FASTQ/CENPK-rep1_R1.fastq.gz,,CENPK_1,R1,CENPK,E1
+/path/to/FASTQ/CENPK-rep2_R1.fastq.gz,,CENPK_2,R1,CENPK,E2
+/path/to/FASTQ/CENPK-rep3_R1.fastq.gz,,CENPK_3,R1,CENPK,E3
+```
+
+Or `paired` mode ...
+
+```commandline
+Forward,Reverse,Direction,Treatment,Experiment
+/path/to/FASTQ/Batch-rep1_R1.fastq.gz,/path/to/FASTQ/Batch-rep1_R2.fastq.gz,Batch_1,R1,Batch,E1
+/path/to/FASTQ/Batch-rep2_R1.fastq.gz,/path/to/FASTQ/Batch-rep2_R2.fastq.gz,Batch_2,R1,Batch,E2
+/path/to/FASTQ/Batch-rep3_R1.fastq.gz,/path/to/FASTQ/Batch-rep3_R2.fastq.gz,Batch_3,R1,Batch,E3
+/path/to/FASTQ/CENPK-rep1_R1.fastq.gz,/path/to/FASTQ/CENPK-rep1_R2.fastq.gz,CENPK_1,R1,CENPK,E1
+/path/to/FASTQ/CENPK-rep2_R1.fastq.gz,/path/to/FASTQ/CENPK-rep2_R2.fastq.gz,CENPK_2,R1,CENPK,E2
+/path/to/FASTQ/CENPK-rep3_R1.fastq.gz,/path/to/FASTQ/CENPK-rep3_R2.fastq.gz,CENPK_3,R1,CENPK,E3
+```
+
+Finally, you need confirm if reads are paired or single filling in `PAIRED` param using `true` or `false` boolean. If PAIRED : true, samples suffix should be `_R1.fastq.gz` and `_R2.fastq.gz`.
+
+ATTENTION : If yours reads are ilumina paired, you need rename reads SAMPLE_R1.fastq.gz and SAMPLE_R2.fastq.gz. For single reads use SAMPLE_R1.fastq.gz. RNAja uses compressed and decompressed fastq files.
+
+
+#### de_comparisons key
+
+In the `de_comparisons` indicate the path to the treatementsComparaison file. This file is used to differential expression analysis. Please declare treatments you want to compare. Here an example. RNAja expects a header with condA and CondB comma separated columns :
+
+```commandline
+condA,condB
+Batch,CENPK
+```
+
+### MODE section
+
+Five pipelines can be running in parallel by using RNAja !
+
+We have included for instance two mappers (STAR and HISAT2) and 3 counters (STRINGTIE, HTSEQCOUNT,STAR). You can activate or deactivate pipelines you would run in the MODE section such as ...  
+
+```
+MODE:
+    HISAT2_STRINGTIE: true
+    HISAT2_HTSEQCOUNT: true
+    STARmap_STARcount: true
+    STARmap_HTSEQCOUNT: true
+    STARmap_STRINGTIE: true
+```
+
+### PARAMS section
+
+In the PARAMS section, tools parameters can be modified and adapted.
+
+```
+PARAMS:
+    HISAT2:
+        indexation:
+            prefix: "REF"
+    STAR:
+        indexation:
+            params: "--sjdbOverhang 100 --genomeSAindexNbases 10"
+        mapping:
+            params: "--readFilesCommand zcat" # --outFilterMismatchNoverLmax 0.03
+    STRINGTIE:
+        discovery_mode : false
+    HTSEQCOUNT:
+        params: "-r pos -s reverse -m union -t gene "
+```
+
+You can modify `HISAT2` indexation, `STAR` indexation and  mapping options, change `STRINGTIE` mode (discovery or not) as well as `HTSEQCOUNT` params. Feel free to check documentation of these tools before to run RNAja!
+
+
+#### output
+
+Here an example of `output_dir` if you have activated all five pipelines proposed by RNAja.
+
+```commandline
+RNAJA_OUTPUT/
+â”œâ”€â”€ COUNT
+â”‚Â Â  â”œâ”€â”€ HTSEQCOUNT
+â”‚Â Â  â”‚Â Â  â”œâ”€â”€ HISAT2
+â”‚Â Â  â”‚Â Â  â””â”€â”€ STAR
+â”‚Â Â  â”œâ”€â”€ STAR
+â”‚Â Â  â”‚Â Â  â”œâ”€â”€ Batch-rep1Aligned.out.bam
+â”‚Â Â  â”‚Â Â  â”œâ”€â”€ Batch-rep1Aligned.toTranscriptome.out.bam
+â”‚Â Â  â”‚Â Â  â”œâ”€â”€ Batch-rep1Log.final.out
+â”‚Â Â  â”‚Â Â  â”œâ”€â”€ Batch-rep1Log.out
+â”‚Â Â  â”‚Â Â  â”œâ”€â”€ Batch-rep1Log.progress.out
+â”‚Â Â  â”‚Â Â  â”œâ”€â”€ Batch-rep1ReadsPerGene.out.tab
+â”‚Â Â  â”‚Â Â  â”œâ”€â”€ Batch-rep1SJ.out.tab
+...
+â”‚Â Â  â”‚Â Â  â”œâ”€â”€ CENPK-rep1Aligned.out.bam
+â”‚Â Â  â”‚Â Â  â”œâ”€â”€ CENPK-rep1Aligned.toTranscriptome.out.bam
+â”‚Â Â  â”‚Â Â  â”œâ”€â”€ CENPK-rep1Log.final.out
+â”‚Â Â  â”‚Â Â  â”œâ”€â”€ CENPK-rep1Log.out
+â”‚Â Â  â”‚Â Â  â”œâ”€â”€ CENPK-rep1Log.progress.out
+â”‚Â Â  â”‚Â Â  â”œâ”€â”€ CENPK-rep1ReadsPerGene.out.tab
+â”‚Â Â  â”‚Â Â  â”œâ”€â”€ CENPK-rep1SJ.out.tab
+...
+â”‚Â Â  â””â”€â”€ STRINGTIE
+â”‚Â Â      â”œâ”€â”€ HISAT2_Batch-rep1.gtf
+â”‚Â Â      â”œâ”€â”€ HISAT2_Batch-rep1.tsv
+...
+â”‚Â Â      â”œâ”€â”€ HISAT2_CENPK-rep3.gtf
+â”‚Â Â      â”œâ”€â”€ HISAT2_CENPK-rep3.tsv
+â”‚Â Â      â”œâ”€â”€ STAR_Batch-rep1.gtf
+â”‚Â Â      â”œâ”€â”€ STAR_Batch-rep1.tsv
+...
+â”‚Â Â      â”œâ”€â”€ STAR_CENPK-rep3.gtf
+â”‚Â Â      â”œâ”€â”€ STAR_CENPK-rep3.tsv
+â”œâ”€â”€ LOGS
+â”œâ”€â”€ MAPPING
+â”‚Â Â  â”œâ”€â”€ HISAT2
+â”‚Â Â  â”‚Â Â  â”œâ”€â”€ Batch-rep1.bam
+â”‚Â Â  â”‚Â Â  â”œâ”€â”€ Batch-rep1.bam.csi
+â”‚Â Â  â”‚Â Â  â”œâ”€â”€ Batch-rep1_HISAT_summary.txt
+...
+â”‚Â Â  â”‚Â Â  â”œâ”€â”€ CENPK-rep3.bam
+â”‚Â Â  â”‚Â Â  â”œâ”€â”€ CENPK-rep3.bam.csi
+â”‚Â Â  â”‚Â Â  â””â”€â”€ CENPK-rep3_HISAT_summary.txt
+â”‚Â Â  â””â”€â”€ STAR
+â”‚Â Â      â”œâ”€â”€ Batch-rep1.bam
+â”‚Â Â      â”œâ”€â”€ Batch-rep1.bam.csi
+...
+â”‚Â Â      â”œâ”€â”€ CENPK-rep3.bam
+â”‚Â Â      â””â”€â”€ CENPK-rep3.bam.csi
+â”œâ”€â”€ REF
+â”‚Â Â  â”œâ”€â”€ HISAT2
+â”‚Â Â  â”‚Â Â  â”œâ”€â”€ GCF_000146045.2_R64_genomic.fasta -> /scratch/rnaja_test/TEST/DATA_TEST/REF/GCF_000146045.2_R64_genomic.fna
+â”‚Â Â  â””â”€â”€ STAR
+â”‚Â Â      â”œâ”€â”€ chrLength.txt
+â”‚Â Â      â”œâ”€â”€ chrNameLength.txt
+â”‚Â Â      â”œâ”€â”€ chrName.txt
+â”‚Â Â      â”œâ”€â”€ chrStart.txt
+â”‚Â Â      â”œâ”€â”€ exonGeTrInfo.tab
+â”‚Â Â      â”œâ”€â”€ exonInfo.tab
+â”‚Â Â      â”œâ”€â”€ GCF_000146045.2_R64_genomic.fasta -> /scratch/rnaja_test/TEST/DATA_TEST/REF/GCF_000146045.2_R64_genomic.fna
+â”‚Â Â      â”œâ”€â”€ geneInfo.tab
+â”‚Â Â      â”œâ”€â”€ Genome
+â”‚Â Â      â”œâ”€â”€ genomeParameters.txt
+â”‚Â Â      â”œâ”€â”€ Log.out
+â”‚Â Â      â”œâ”€â”€ SA
+â”‚Â Â      â”œâ”€â”€ SAindex
+â”‚Â Â      â”œâ”€â”€ sjdbInfo.txt
+â”‚Â Â      â”œâ”€â”€ sjdbList.fromGTF.out.tab
+â”‚Â Â      â”œâ”€â”€ sjdbList.out.tab
+â”‚Â Â      â””â”€â”€ transcriptInfo.tab
+â””â”€â”€ slurm_logs
+
+```
+
 -----------------------
 ### Authors
 
-- 
GitLab