From 723960c0c405206830134f9117add448d5d6b9e9 Mon Sep 17 00:00:00 2001 From: "julie.orjuela" <julie.orjuela@ird.fr> Date: Tue, 12 Mar 2024 14:56:53 +0100 Subject: [PATCH] upgrading docs --- README.md | 189 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) diff --git a/README.md b/README.md index a6a0a94..9bee2dc 100644 --- a/README.md +++ b/README.md @@ -94,6 +94,195 @@ If singularity was selected in installation of RNAja, it could be needed to give **Important Note** : In IRD "i-Trop" cluster, run RNAja using ONLY a node, data has to be in "/scratch" of chosen node. Use `nodelist : nodeX` parameter inside of `cluster_config.yaml`̀ file. + +## 3. Running your data + +### 3.1.create a pipeline by configuring the `config.yaml` file + +Before to run RNAja, create a `config.yaml` by using before to adapt it. + +``` +RNAja create_config +``` + +Three sections are needed for RNAja into the `config.yaml` file: section DATA, MODE and PARAMS. + +### DATA section + +Adapt `config.yaml` file with path to fastq files, reference and annotation file in the `DATA section` as well as the output directory. + +``` +DATA: + fastq_dir: "/path/to/FASTQ" + reference: "/path/to/reference/ref.fna" + annotation: "/path/to/reference/ref.gtf" + output_dir: "RNAJA_OUTPUT" + sample_info: "/path/to/sample_info.txt" + PAIRED : true + de_comparisons: "/path/to/treatmentsComparisons.csv" +``` + +#### sample_info key + +Into the `sample_info` DATA key, you need to give a comma separated `sample_info.txt` file containing information about samples. This file needs header with Forward,Reverse,SampleName,Direction,Treatment,Experiment columns. + +If you are in single mode, don't fill on 'reverse' column in `sample_info.txt` file + +Here an example for experiment in `single` mode : + +```commandline +Forward,Reverse,SampleName,Direction,Treatment,Experiment +/path/to/FASTQ/Batch-rep1_R1.fastq.gz,,Batch_1,R1,Batch,E1 +/path/to/FASTQ/Batch-rep2_R1.fastq.gz,,Batch_2,R1,Batch,E2 +/path/to/FASTQ/Batch-rep3_R1.fastq.gz,,Batch_3,R1,Batch,E3 +/path/to/FASTQ/CENPK-rep1_R1.fastq.gz,,CENPK_1,R1,CENPK,E1 +/path/to/FASTQ/CENPK-rep2_R1.fastq.gz,,CENPK_2,R1,CENPK,E2 +/path/to/FASTQ/CENPK-rep3_R1.fastq.gz,,CENPK_3,R1,CENPK,E3 +``` + +Or `paired` mode ... + +```commandline +Forward,Reverse,Direction,Treatment,Experiment +/path/to/FASTQ/Batch-rep1_R1.fastq.gz,/path/to/FASTQ/Batch-rep1_R2.fastq.gz,Batch_1,R1,Batch,E1 +/path/to/FASTQ/Batch-rep2_R1.fastq.gz,/path/to/FASTQ/Batch-rep2_R2.fastq.gz,Batch_2,R1,Batch,E2 +/path/to/FASTQ/Batch-rep3_R1.fastq.gz,/path/to/FASTQ/Batch-rep3_R2.fastq.gz,Batch_3,R1,Batch,E3 +/path/to/FASTQ/CENPK-rep1_R1.fastq.gz,/path/to/FASTQ/CENPK-rep1_R2.fastq.gz,CENPK_1,R1,CENPK,E1 +/path/to/FASTQ/CENPK-rep2_R1.fastq.gz,/path/to/FASTQ/CENPK-rep2_R2.fastq.gz,CENPK_2,R1,CENPK,E2 +/path/to/FASTQ/CENPK-rep3_R1.fastq.gz,/path/to/FASTQ/CENPK-rep3_R2.fastq.gz,CENPK_3,R1,CENPK,E3 +``` + +Finally, you need confirm if reads are paired or single filling in `PAIRED` param using `true` or `false` boolean. If PAIRED : true, samples suffix should be `_R1.fastq.gz` and `_R2.fastq.gz`. + +ATTENTION : If yours reads are ilumina paired, you need rename reads SAMPLE_R1.fastq.gz and SAMPLE_R2.fastq.gz. For single reads use SAMPLE_R1.fastq.gz. RNAja uses compressed and decompressed fastq files. + + +#### de_comparisons key + +In the `de_comparisons` indicate the path to the treatementsComparaison file. This file is used to differential expression analysis. Please declare treatments you want to compare. Here an example. RNAja expects a header with condA and CondB comma separated columns : + +```commandline +condA,condB +Batch,CENPK +``` + +### MODE section + +Five pipelines can be running in parallel by using RNAja ! + +We have included for instance two mappers (STAR and HISAT2) and 3 counters (STRINGTIE, HTSEQCOUNT,STAR). You can activate or deactivate pipelines you would run in the MODE section such as ... + +``` +MODE: + HISAT2_STRINGTIE: true + HISAT2_HTSEQCOUNT: true + STARmap_STARcount: true + STARmap_HTSEQCOUNT: true + STARmap_STRINGTIE: true +``` + +### PARAMS section + +In the PARAMS section, tools parameters can be modified and adapted. + +``` +PARAMS: + HISAT2: + indexation: + prefix: "REF" + STAR: + indexation: + params: "--sjdbOverhang 100 --genomeSAindexNbases 10" + mapping: + params: "--readFilesCommand zcat" # --outFilterMismatchNoverLmax 0.03 + STRINGTIE: + discovery_mode : false + HTSEQCOUNT: + params: "-r pos -s reverse -m union -t gene " +``` + +You can modify `HISAT2` indexation, `STAR` indexation and mapping options, change `STRINGTIE` mode (discovery or not) as well as `HTSEQCOUNT` params. Feel free to check documentation of these tools before to run RNAja! + + +#### output + +Here an example of `output_dir` if you have activated all five pipelines proposed by RNAja. + +```commandline +RNAJA_OUTPUT/ +├── COUNT +│  ├── HTSEQCOUNT +│  │  ├── HISAT2 +│  │  └── STAR +│  ├── STAR +│  │  ├── Batch-rep1Aligned.out.bam +│  │  ├── Batch-rep1Aligned.toTranscriptome.out.bam +│  │  ├── Batch-rep1Log.final.out +│  │  ├── Batch-rep1Log.out +│  │  ├── Batch-rep1Log.progress.out +│  │  ├── Batch-rep1ReadsPerGene.out.tab +│  │  ├── Batch-rep1SJ.out.tab +... +│  │  ├── CENPK-rep1Aligned.out.bam +│  │  ├── CENPK-rep1Aligned.toTranscriptome.out.bam +│  │  ├── CENPK-rep1Log.final.out +│  │  ├── CENPK-rep1Log.out +│  │  ├── CENPK-rep1Log.progress.out +│  │  ├── CENPK-rep1ReadsPerGene.out.tab +│  │  ├── CENPK-rep1SJ.out.tab +... +│  └── STRINGTIE +│  ├── HISAT2_Batch-rep1.gtf +│  ├── HISAT2_Batch-rep1.tsv +... +│  ├── HISAT2_CENPK-rep3.gtf +│  ├── HISAT2_CENPK-rep3.tsv +│  ├── STAR_Batch-rep1.gtf +│  ├── STAR_Batch-rep1.tsv +... +│  ├── STAR_CENPK-rep3.gtf +│  ├── STAR_CENPK-rep3.tsv +├── LOGS +├── MAPPING +│  ├── HISAT2 +│  │  ├── Batch-rep1.bam +│  │  ├── Batch-rep1.bam.csi +│  │  ├── Batch-rep1_HISAT_summary.txt +... +│  │  ├── CENPK-rep3.bam +│  │  ├── CENPK-rep3.bam.csi +│  │  └── CENPK-rep3_HISAT_summary.txt +│  └── STAR +│  ├── Batch-rep1.bam +│  ├── Batch-rep1.bam.csi +... +│  ├── CENPK-rep3.bam +│  └── CENPK-rep3.bam.csi +├── REF +│  ├── HISAT2 +│  │  ├── GCF_000146045.2_R64_genomic.fasta -> /scratch/rnaja_test/TEST/DATA_TEST/REF/GCF_000146045.2_R64_genomic.fna +│  └── STAR +│  ├── chrLength.txt +│  ├── chrNameLength.txt +│  ├── chrName.txt +│  ├── chrStart.txt +│  ├── exonGeTrInfo.tab +│  ├── exonInfo.tab +│  ├── GCF_000146045.2_R64_genomic.fasta -> /scratch/rnaja_test/TEST/DATA_TEST/REF/GCF_000146045.2_R64_genomic.fna +│  ├── geneInfo.tab +│  ├── Genome +│  ├── genomeParameters.txt +│  ├── Log.out +│  ├── SA +│  ├── SAindex +│  ├── sjdbInfo.txt +│  ├── sjdbList.fromGTF.out.tab +│  ├── sjdbList.out.tab +│  └── transcriptInfo.tab +└── slurm_logs + +``` + ----------------------- ### Authors -- GitLab