diff --git a/README.md b/README.md index 003fea17ffb0efec87598e912a7be40108eda132..9fae089959a5ccaef570d3a5b52b9ba8e3cdfbed 100644 --- a/README.md +++ b/README.md @@ -31,22 +31,26 @@ _Good idea to include screenshots or GIFs (see ttygif or Asciinema)_ ### Conda _(prior!)_ ### Download and install **Conda**: [Latest Miniconda Installer](https://docs.conda.io/en/latest/miniconda.html#latest-miniconda-installer-links) -1. Donwload conda installer _(i.e. for Miniconda3 with Python 3.9 on Linux-64-bit)_: +1. Donwload conda installer _(i.e. for Miniconda3 with Python 3.9 on MacOSX-64-bit)_: ```shel -wget https://repo.anaconda.com/miniconda/Miniconda3-py39_4.10.3-Linux-x86_64.sh +wget https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh ``` + 2. Install conda using installer bash script: _Follow the prompts on the installer screens_ ```shell -bash Miniconda3-latest-Linux-x86_64.sh +bash Miniconda3-latest-MacOSX-x86_64.sh ``` + 3. Remove conda installer: +```shell +rm Miniconda3-latest-MacOSX-x86_64.sh ``` -rm Miniconda3-latest-Linux-x86_64.sh -``` + 4. Restart shell, close and reopen new terminal window ### Snakemake _(prior!)_ ### + Install **Snakemake** using Conda package management system _Follow the prompts on the installer screens_ ```shell @@ -54,29 +58,41 @@ conda install -c bioconda -c conda-forge snakemake ``` ### RQCP ### -Download _or_ clone the **Reads Quality Control Pipeline** project + +**Download** _OR_ clone the **Reads Quality Control Pipeline** project #### Download #### -1. Download source code archive (_zip_, **tar.gz**, _tar.bz2_, _tar_): [RQCP on GitLab](https://gitlab.com/ird_transvihmi/Reads_Quality_Control_Pipeline) +- Download source code archive (_zip_, **tar.gz**, _tar.bz2_, _tar_): [RQCP on GitLab](https://gitlab.com/ird_transvihmi/Reads_Quality_Control_Pipeline) +```shel +wget https://gitlab.com/ird_transvihmi/Reads_Quality_Control_Pipeline/-/archive/main/Reads_Quality_Control_Pipeline-main.tar.gz -O ~/Desktop/ +``` + +_alternatively_:  -2. Extract and remove the the archive (i.e. tar.gz): +- Extract and remove the the archive (i.e. tar.gz): ```shell -tar -xzvf path/to/archive/Reads_Quality_Control_Pipeline.tar.gz -rm path/to/archive/Reads_Quality_Control_Pipeline.tar.gz +tar -xzvf path/to/archive/Reads_Quality_Control_Pipeline-main.tar.gz +rm path/to/archive/Reads_Quality_Control_Pipeline-main.tar.gz +mv ~/Desktop/Reads_Quality_Control_Pipeline-main ~/Desktop/Reads_Quality_Control_Pipeline +cd ~/Desktop/Reads_Quality_Control_Pipeline ``` #### Clone #### -Clone with **SSH** when you want to authenticate only one time + +- Clone with **SSH** when you want to authenticate only one time Authenticate with GitLab by following the instructions in the [SSH documentation](https://docs.gitlab.com/ee/ssh/index.html) ```shell -git clone git@gitlab.com:ird_transvihmi/Reads_Quality_Control_Pipeline.git path/to/workdir/ +git clone git@gitlab.com:ird_transvihmi/Reads_Quality_Control_Pipeline.git + +cd Reads_Quality_Control_Pipeline ``` Clone with **HTTPS** when you want to authenticate each time you perform an operation between your computer and GitLab ```shell -git clone https://gitlab.com/ird_transvihmi/Reads_Quality_Control_Pipeline.git path/to/workdir/ +git clone https://gitlab.com/ird_transvihmi/Reads_Quality_Control_Pipeline.git +cd Reads_Quality_Control_Pipeline ``` #### Difference between download and clone #### @@ -191,8 +207,8 @@ You can also ask for new databases, for genomes references not yet included, to 5. Call me to `+33.(0)4.67.41.55.xx` (No don't please _O\_o_!) ## Roadmap ## -Finish documentation about "terminal" and "results" Add a wiki ! +Finish documentation about "terminal" and "results" Add new features ## Contributing ## diff --git a/config/config.yaml b/config/config.yaml index fa3036426e43740606475eea2edaf62f687589ff..3439dc259110e4fe30f99ce659c3d80adc9ebb17 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -34,11 +34,11 @@ datasets: #- 'sickle-trim_single-end' #- 'fastq-join_single-end' #- 'fastq-join_paired-end' - #- 'all_merged' + #- 'all_merged_single-end' ## CUTADAPT ------------------------------------------------------------------------------------------ cutadapt: - length: '75' # Discard reads shorter than length, after trim + length: '35' # Discard reads shorter than length, after trim kits: # Sequence of an adapter ligated to the 3' end of the first read truseq: 'AGATCGGAAGAGC' # Illumina TruSeq / ScriptSeq based kits libraries nextera: 'CTGTCTCTTATACACATC' # Illumina Nextera / TruSight based kits libraries @@ -59,17 +59,17 @@ sickle-trim: #- 'illumina' # if illumina (CASAVA 1.3 to 1.7) #- 'solexa' # if solexa (CASAVA < 1.3) quality: '30' # phred score limit - length: '75' # read length limit, after trim + length: '35' # read length limit, after trim ## FASTQ-JOIN ---------------------------------------------------------------------------------------- fastq-join: - percent: '5' # N-percent maximum difference (default: 8) - overlap: '25' # N-minimum overlap (default: 6) + percent: '8' # N-percent maximum difference (default: 8) + overlap: '6' # N-minimum overlap (default: 6) ## FASTQSCREEN --------------------------------------------------------------------------------------- fastq-screen: config: 'config/fastq-screen.conf' # path to the fastq-screen configuration file - subset: '1000' # Don't use the whole sequence file, but create a temporary dataset of this specified number of read (default: '100000', set '0' for all dataset) + subset: '10000' # Don't use the whole sequence file, but create a temporary dataset of this specified number of read (default: '100000', set '0' for all dataset) aligner: - 'bwa' # Burrows-Wheeler Aligner, for mapping low-divergent sequences against a large reference genome #- 'bowtie' # Bowtie, an ultrafast, memory-efficient short read aligner diff --git a/config/fastq-screen.conf b/config/fastq-screen.conf index 7cff70c5395d170567a97ff44d56faa57e7ec102..2a6f366e17f9269f7fb3122ed18b7ef4a0266c50 100644 --- a/config/fastq-screen.conf +++ b/config/fastq-screen.conf @@ -48,15 +48,15 @@ THREADS 1 ## ##---------- ## Human h38 -DATABASE Human resources/databases/bwa/Human/Homo_sapiens_h38 +#DATABASE Human resources/databases/bwa/Human/Homo_sapiens_h38 #DATABASE Human resources/databases/bowtie2/Human/Homo_sapiens_h38 ## ## Mouse m39 -DATABASE Mouse resources/databases/bwa/Mouse/Mus_musculus_m39 +#DATABASE Mouse resources/databases/bwa/Mouse/Mus_musculus_m39 #DATABASE Mouse resources/databases/bowtie2/Mouse/Mus_musculus_m39 ## ## Arabidopsis thaliana - sequence from NCBI: ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/735/GCF_000001735.3_TAIR10/GCF_000001735.3_TAIR10_genomic.fna.gz -DATABASE Arabidopsis resources/databases/bwa/Arabidopsis/Arabidopsis_thaliana_t10 +#DATABASE Arabidopsis resources/databases/bwa/Arabidopsis/Arabidopsis_thaliana_t10 #DATABASE Arabidopsis resources/databases/bowtie2/Arabidopsis/Arabidopsis_thaliana_t10 ## ## Ecoli - sequence available from EMBL accession U00096.2 @@ -78,15 +78,15 @@ DATABASE Vectors resources/databases/bwa/Vectors/UniVec_wo_phi-X174 ## ##----------- ## Gorilla g4 -DATABASE Gorilla resources/databases/bwa/Gorilla/Gorilla_gorilla_g4 +#DATABASE Gorilla resources/databases/bwa/Gorilla/Gorilla_gorilla_g4 #DATABASE Gorilla resources/databases/bowtie2/Gorilla/Gorilla_gorilla_g4 ## ## Chimpanzee -DATABASE Chimpanzee resources/databases/bwa/Chimpanzee/Pan_troglodytes_t3 +#DATABASE Chimpanzee resources/databases/bwa/Chimpanzee/Pan_troglodytes_t3 #DATABASE Chimpanzee resources/databases/bowtie2/Chimpanzee/Pan_troglodytes_t3 ## ## Bat 10 -DATABASE Bat resources/databases/bwa/Bat/Pteropus_vampyrus_v1 +#DATABASE Bat resources/databases/bwa/Bat/Pteropus_vampyrus_v1 #DATABASE Bat resources/databases/bowtie2/Bat/Pteropus_vampyrus_v1 ## ## HIV - HXB2