Skip to content
Snippets Groups Projects
Commit b303a555 authored by jacques.dainat_ird.fr's avatar jacques.dainat_ird.fr
Browse files

first working version

parent 84fde2c6
Branches main
No related tags found
No related merge requests found
......@@ -7,10 +7,10 @@ executor {
process {
cpus = 1
maxForks = 20
maxForks = 4
shell = ['/bin/bash', '-euo', 'pipefail']
stageOutMode = 'rsync'
withName: 'agat' {
withLabel: 'agat' {
cpus = 1
}
withName: 'prokka' {
......
process {
withName: 'agat' {
withLabel: 'agat' {
conda = { singularity.enabled || docker.enabled ? '' : "$baseDir/conda/process_fastp.yml" }
container = 'quay.io/biocontainers/agat:1.0.0--pl5321hdfd78af_0'
}
......
#! /usr/bin/env nextflow
/*
* Jacques Dainat 2023
*/
nextflow.enable.dsl=2
/*
......@@ -15,6 +17,7 @@ params.species = "coli"
params.strain = "POO247"
params.codon_table = 11
params.evalue = 1e-9
params.output = "results"
params.bakta_db="/path/to/bakta_db" // optional
params.busco_db = "/path/to/busco_db" // optional
......@@ -34,10 +37,10 @@ if(params.genus){
}
// Deal with BAKTA Parameters
params.bakta = ' '
params.bakta = ''
File bakta_db_file = new File( params.bakta_db );
if (bakta_db_file.exists() ){
params.bakta = '--db ${bakta_db}'
if (!bakta_db_file.exists() ){
exit 1, "Bakta database name ${params.bakta_db} does not exists! It is mandatory if you wish to run Bakta. Otherwise deactivate bakta (--skip_bakta).\n"
}
// Deal with BUSCO Parameters
......@@ -52,6 +55,10 @@ if(params.busco_lineage){
params.busco = '${params.busco} --auto-lineage-prok'
}
// ****************************** MODULES ****************************
include { busco_assembly; busco_annotation ; busco_annotation as busco_meta_annotation} from "${baseDir}/modules/busco.nf"
include { agat_sp_extract_sequences; agat_sp_extract_sequences as extractMetaAA; agat_sp_merge_annotations } from "${baseDir}/modules/agat.nf"
log.info """
IRD
.-./`) .-------. ______
......@@ -76,21 +83,28 @@ MbaP - Mutualize Bacterial Predictors
"""
/*if( !params.skip_trimming && !(params.trimmer in trimming_tools) ){
exit 1, "Error: ${params.trimmer} is not a valid trimming tool option.\n Please provide a valid option from ${trimming_tools}.\n"
}
if( !params.skip_trimming && params.trimmer == 'trimmomatic' && !file(params.trimmomatic_adapter_path).exists() ){
exit 1, "The adapter file '${params.trimmomatic_adapter_path}' does not exist!\n"
}
*/
workflow {
main:
Channel.fromPath(params.assembly, checkIfExists: true)
.ifEmpty { exit 1, "Cannot find assembly file matching ${params.assembly}!\n" }
.set {assembly}
mbap(assembly)
File input = new File( "${params.assembly}");
if ( input.exists() ){
// Folder case
if ( input.isDirectory()) {
Channel.fromPath( "${params.assembly}/*", type: 'file', checkIfExists: true)
.ifEmpty { exit 1, "No file within the ${params.assembly } folder found !\n" }
.set {assemblies}
}
// file case
else {
Channel.fromPath( params.assembly, type: 'file', checkIfExists: true)
.ifEmpty { exit 1, "No file with ${params.assembly } found !\n" }
.set {assemblies}
}
}
else {
exit 1, "${params.assembly} input does not exists!"
}
mbap(assemblies)
}
workflow mbap {
......@@ -99,30 +113,71 @@ workflow mbap {
assembly
main:
//busco(assembly)
prokka(assembly)
bakta(assembly)
make_id(assembly)
make_id.out[0].set{id_assembly}
make_id.out[1].set{id_assembly_bis}
//id_assembly.view()
busco_assembly(id_assembly)
prokka(id_assembly)
prokka.out[0].set{annotations}
bakta(id_assembly)
annotations.concat(bakta.out[0]).set{annotations}
annotations.multiMap { it ->
first: it
second: it
}.set{annotationsForked}
//annotations.view()
// annotations.concat(bakta.out[0]).groupTuple().set{annotations} // GOOD
id_assembly.combine(annotationsForked.first, by:0).set{id_assembly_annot}
//id_assembly.join(annotations).set{id_assembly_annot} // GOOD
//id_assembly_annot.view()
agat_sp_extract_sequences(id_assembly_annot).set{proteomes}
busco_annotation(proteomes)
//proteomes.map{fa, annotation -> [ fa.baseName, fa, annotation ]}.groupTuple().set{proteomesByAssembly}
annotationsForked.second.groupTuple().set{annotations_group}
id_assembly_bis.join(annotations_group).set{id_assembly_annots}
agat_sp_merge_annotations(id_assembly_annots)
extractMetaAA(agat_sp_merge_annotations.out)
busco_meta_annotation(extractMetaAA.out)
}
process make_id {
tag "${assembly}"
input:
path(assembly)
output:
tuple val("${assembly.baseName}"), path("${assembly}")
tuple val("${assembly.baseName}"), path("${assembly}")
script:
"""
"""
}
process prokka {
tag "${assembly}"
publishDir "${params.outdir}/${assembly}/prokka", mode: 'copy'
publishDir "${params.outdir}/${id}/prokka", mode: 'copy'
input:
path(assembly)
tuple val(id), path(assembly)
output:
tuple val(id), file('*/*.gff*')
file('*')
script:
"""
prokka \
--addgenes --locustag ${params.locus_prefix} \
--increment 10 --compliant \
--addgenes --addmrna --locustag ${params.locus_prefix} \
--outdir ${id}_prokka --prefix ${id}_annotation_prokka \
--kingdom Bacteria --gcode ${params.codon_table} \
--evalue ${params.evalue} --rfam \
--evalue ${params.evalue} --rfam --cpus ${task.cpus}\
${params.prokka} \
${assembly}
"""
......@@ -132,40 +187,33 @@ process prokka {
process bakta {
tag "$assembly"
publishDir "${params.outdir}/${assembly}/bakta", mode: 'copy'
publishDir "${params.outdir}/${id}/bakta", mode: 'copy'
conda 'bakta'
input:
path(assembly)
tuple val(id), path(assembly)
output:
path("${assembly}_bakta")
tuple val(id), file("${id}_annotation_bakta.gff")
path("${id}_bakta")
script:
"""
bakta \
bakta \
--keep-contig-headers --genus ${params.genus} \
--species ${params.species} --prefix ${assembly} --locus-tag ${params.locus_prefix} --output ${assembly}_bakta \
--verbose \
--species ${params.species} --prefix ${id} \
--locus-tag ${params.locus_prefix} --db ${params.bakta_db} \
--verbose --threads ${task.cpus} \
--output ${id}_bakta \
${params.bakta} \
${assembly}
mv ${id}_bakta/${id}.gff3 ${id}_annotation_bakta.gff
"""
}
process busco {
label 'busco'
publishDir "${params.output}/${assembly}/assembly", mode: 'copy'
input:
tuple val(id), path(contigs)
val(lineage)
output:
path("busco_${assembly}")
script:
"""
busco -i ${assembly} -o busco_${assembly} --mode genome -f ${params.busco}
"""
}
/************** onComplete ****************/
......
/* ****************** AGAT ****************** */
process agat_sp_extract_sequences {
tag "${id}"
label 'agat'
publishDir "${params.output}/${assembly.baseName}/agat", mode: 'copy'
input:
tuple val(id), file(assembly), file(annotation)
output:
tuple val(id), file(assembly), file("${annotation.baseName}.fa")
script:
"""
agat_sp_extract_sequences.pl --gff ${annotation} --fasta ${assembly} -p -o ${annotation.baseName}.fa
"""
}
process agat_sp_merge_annotations {
tag "${id}"
label 'agat'
publishDir "${params.output}/${assembly.baseName}/agat", mode: 'copy'
input:
tuple val(id), file(assembly), file(annotations)
output:
tuple val(id), file(assembly), file("*_complemented.gff")
script:
def annotation_list = []
annotation_list = annotations
annotation_list_bash = annotation_list.join(" "); // remove bracket and replace comma by space to be processed by bash
"""
parameters=""
for i in ${annotation_list_bash};do
parameters="\${parameters} --gff \${i}"
done
agat_sp_merge_annotations.pl \${parameters} -o ${id}_complemented.gff
"""
}
/* ****************** BUSCO ****************** */
process busco_assembly {
tag "${id}"
label 'busco'
publishDir "${params.output}/${id}/busco_assembly", mode: 'copy'
input:
tuple val(id), path(assembly)
output:
path("busco_${id}")
script:
"""
busco -i ${assembly} -o busco_${id} --cpu ${task.cpus} --mode genome -f ${params.busco}
"""
}
process busco_annotation {
tag "${id}"
label 'busco'
publishDir "${params.output}/${id}/busco_annotation", mode: 'copy'
input:
tuple val(id), file(assembly), file(annotation)
output:
path("busco_${annotation_tool}")
script:
annotation_tool = annotation.toString().tokenize(".")[-2].tokenize("_")[-1]
"""
busco -i ${annotation} -o busco_${annotation_tool} --cpu ${task.cpus} --mode protein -f ${params.busco}
"""
}
......@@ -27,6 +27,10 @@ profiles {
includeConfig "$baseDir/config/software_packages.config"
}
res_local {
includeConfig "$baseDir/config/compute_resources.config"
}
itrop {
executor {
name = 'slurm'
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment