configfile: "config/config.yaml"

fasta_prot_dir = config["DATA"]["PROTEIN"]
output_dir = config["DATA"]["OUTPUT"]
log_dir = f"{output_dir}LOGS/"
script_dir = config["DATA"]["SCRIPTS"]
gff_dir = config["DATA"]["GFF"]
dbcan_db = config["DATA"]["DBCAN_DB"]
phibase_db = config["DATA"]["BDD_PHIBASE"]

PROTEIN, = glob_wildcards(fasta_prot_dir+"{samples}.fasta", followlinks=True)




def get_threads(rule, default):
    """
    give threads or 'cpus-per-task from cluster_config rule : threads to SGE and cpus-per-task to SLURM
    """
    if cluster_config:
        if rule in cluster_config and 'threads' in cluster_config[rule]:
            return int(cluster_config[rule]['threads'])
        elif rule in cluster_config and 'cpus-per-task' in cluster_config[rule]:
            return int(cluster_config[rule]['cpus-per-task'])
        elif '__default__' in cluster_config and 'cpus-per-task' in cluster_config['__default__']:
            return int(cluster_config['__default__']['cpus-per-task'])
        elif '__default__' in cluster_config and 'threads' in cluster_config['__default__']:
            return int(cluster_config['__default__']['threads'])
    if workflow.global_resources["_cores"]:
        return workflow.global_resources["_cores"]
    return default



rule finale:
    input:
        domain_prot = expand(f"{output_dir}3_HMMER_PFAM/{{samples}}_secreted.tbl", samples = PROTEIN),
        effector_contig = expand(f"{output_dir}5_FINAL_RESULT/EFFECTOR/{{samples}}/{{samples}}_effector_per_contig.txt", samples = PROTEIN),
        cazyme_counts_list = expand(f"{output_dir}6_CAZYMES/dbcan_{{samples}}/{{samples}}_cazyme_count.csv",samples=PROTEIN),
        orthogroups_sequences = expand(f"{output_dir}7_ORTHOFINDER/Results_orthofinder/sequences_specific/prot_specific_{{samples}}.fasta",samples=PROTEIN),
        csv_orthogroups = expand(f"{output_dir}7_ORTHOFINDER/Results_orthofinder/sequences_specific/OG_specific_{{samples}}.csv",samples=PROTEIN),
        dbcan_list = expand(f"{output_dir}6_CAZYMES/dbcan_{{samples}}/overview.txt", samples = PROTEIN),
        interpro_gff_list = expand(f"{output_dir}8_INTERPROSCAN/{{samples}}/{{samples}}.fasta.gff3", samples = PROTEIN),
        blast_phibase = expand(f"{output_dir}9_PHIBASE/{{samples}}/{{samples}}_blast_phibase.out", samples = PROTEIN),
        hmmer_parsed= expand(f"{output_dir}3_HMMER_PFAM/PARSED_FILE/{{samples}}_parsed.csv",samples=PROTEIN)
        #gff_cazymes_list = expand(f"{output_dir}GFF_with_cazymes/{{samples}}_gff.csv",samples=PROTEIN)


rule rename_protein:
    threads: get_threads("rename_protein",1)
    input:
        protein = f"{fasta_prot_dir}{{samples}}.fasta"
    output:
        sorted_protein = f"{output_dir}1_PROTEIN_SORTED/{{samples}}.fasta"
    log :
        error =  f'{log_dir}protein_sorted/protein_sorted_{{samples}}.e',
        output = f'{log_dir}protein_sorted/protein_sorted_{{samples}}.o'
    message:
            f"""
             Running {{rule}}
                Input:
                    - Fasta : {{input.protein}}
                Output:
                    - Protein_sorted: {{output.sorted_protein}}
                Others
                    - Threads : {{threads}}
                    - LOG error: {{log.error}}
                    - LOG output: {{log.output}}

            """
    shell:
        f"python {script_dir}rename_prot.py -p {{input.protein}} -o {{output.sorted_protein}} -name {{wildcards.samples}} 1>{{log.output}} 2>{{log.error}}"


rule phobius:
    threads: get_threads("phobius",5)
    input:
        protein = rules.rename_protein.output.sorted_protein
    output:
        output_phobius = f"{output_dir}2_SECRETED_PROTEIN/PHOBIUS/{{samples}}/{{samples}}_phobius.tsv"
    log :
        error =  f'{log_dir}phobius/phobius_{{samples}}.e',
        output = f'{log_dir}phobius/phobius_{{samples}}.o'
    message:
            f"""
             Running {{rule}}
                Input:
                    - Fasta : {{input.protein}}
                Output:
                    - Phobius_TSV: {{output.output_phobius}}
                Others
                    - Threads : {{threads}}
                    - LOG error: {{log.error}}
                    - LOG output: {{log.output}}

            """
    envmodules:
            "phobius_local"
    shell:
        f"phobius.pl -short {{input.protein}} 1>{{output.output_phobius}} 2>{{log.error}}"

rule signalP:
    threads: get_threads("signalP",10)
    input:
        protein = rules.rename_protein.output.sorted_protein
    output:
        output_signalP = f"{output_dir}2_SECRETED_PROTEIN/SignalP/{{samples}}/output.gff3"
    params:
        output_dir_phobius =f"{output_dir}2_SECRETED_PROTEIN/SignalP/{{samples}}"
    log :
        error =  f'{log_dir}signalP/phobius_{{samples}}.e',
        output = f'{log_dir}signalP/phobius_{{samples}}.o'
    message:
            f"""
             Running {{rule}}
                Input:
                    - Fasta : {{input.protein}}
                Output:
                    - Phobius_TSV: {{output.output_signalP}}
                Others
                    - Threads : {{threads}}
                    - LOG error: {{log.error}}
                    - LOG output: {{log.output}}

            """
    envmodules:
            "signalp/"
    shell:
        f"signalp6 --fastafile {{input.protein}} --output_dir {{params.output_dir_phobius}} --organism eukarya 1>{{log.output}} 2>{{log.error}}"
        f"\nrm -rf {{output_dir}}2_SECRETED_PROTEIN/SignalP/{{wildcards.samples}}/output_*.txt"

rule targetp:
    threads: get_threads("targetp",10)
    input:
        protein = rules.rename_protein.output.sorted_protein
    output:
        output_targetp = f"{output_dir}2_SECRETED_PROTEIN/TargetP/{{samples}}/{{samples}}_summary.targetp2"
    log :
        error =  f'{log_dir}targetp/targetp_{{samples}}.e',
        output = f'{log_dir}targetp/targetp_{{samples}}.o'
    message:
            f"""
             Running {{rule}}
                Input:
                    - Protein : {{input.protein}}
                Output:
                    - TargetP_summary: {{output.output_targetp}}
                Others
                    - Threads : {{threads}}
                    - LOG error: {{log.error}}
                    - LOG output: {{log.output}}

            """
    envmodules:
            "targetp_local"
    shell:
        f"targetp -fasta {{input.protein}} -stdout 1>{{output.output_targetp}} 2>{{log.error}}"

rule predgpi:
    threads: get_threads("predgpi",10)
    input:
        protein = rules.rename_protein.output.sorted_protein
    output:
        output_predgpi = f"{output_dir}2_SECRETED_PROTEIN/PredGPI/{{samples}}/{{samples}}.predgpi"
    log:
        error =  f'{log_dir}predgpi/predgpi_{{samples}}.e',
        output = f'{log_dir}predgpi/predgpi_{{samples}}.o'
    message:
        f"""
                 Running {{rule}}
                    Input:
                        - Protein : {{input.protein}}
                    Output:
                        - Predgpi_summary: {{output.output_predgpi}}
                    Others
                        - Threads : {{threads}}
                        - LOG error: {{log.error}}
                        - LOG output: {{log.output}}

                """
    envmodules:
        "predgpi_local"
    shell:
        f"predgpi.py -f {{input.protein}} -o {{output.output_predgpi}} 1>{{log.output}} 2>{{log.error}}"

rule parse_phobius:
    threads: get_threads("parse_phobius",1)
    input:
        result_phobius = rules.phobius.output.output_phobius
    output:
        secreted_phobius = f"{output_dir}2_SECRETED_PROTEIN/ID_SECRETED/{{samples}}/PHOBIUS/{{samples}}_secretedID.phobius"
    log:
        error =  f'{log_dir}parse_phobius/parse_phobius_{{samples}}.e',
        output = f'{log_dir}parse_phobius/parse_phobius_{{samples}}.o'
    message:
        f"""
                 Running {{rule}}
                    Input:
                        - Results_phobius : {{input.result_phobius}}
                    Output:
                        - Parse_phobius: {{output.secreted_phobius}}
                    Others
                        - Threads : {{threads}}
                        - LOG error: {{log.error}}
                        - LOG output: {{log.output}}

                """
    shell:
        f"python {script_dir}parse_phobius.py -t {{input.result_phobius}} -o {{output.secreted_phobius}} 1>{{log.output}} 2>{{log.error}}"

rule parse_signalp:
    threads: get_threads("parse_signalp",1)
    input:
        result_signalp = rules.signalP.output.output_signalP
    output:
        secreted_signalp = f"{output_dir}2_SECRETED_PROTEIN/ID_SECRETED/{{samples}}/SIGNALP/{{samples}}_secretedID.signalp"
    params:
        threshold=config["TOOLS_PARAMS"]["PARSE_SIGNALP_TRESHOLD"]
    log:
        error =  f'{log_dir}parse_signalp/parse_signalp_{{samples}}.e',
        output = f'{log_dir}parse_signalp/parse_signalp_{{samples}}.o'
    message:
        f"""
                 Running {{rule}}
                    Input:
                        - Results_signalp : {{input.result_signalp}}
                    Output:
                        - Parse_signalp: {{output.secreted_signalp}}
                    Others
                        - Threads : {{threads}}
                        - LOG error: {{log.error}}
                        - LOG output: {{log.output}}

                """
    shell:
        f"python {script_dir}parse_signalp.py -s {{input.result_signalp}} -o {{output.secreted_signalp}} -th {{params.threshold}} 1>{{log.output}} 2>{{log.error}}"

rule parse_targetp:
    threads: get_threads("parse_targetp",1)
    input:
        result_targetp = rules.targetp.output.output_targetp
    output:
        secreted_targetp = f"{output_dir}2_SECRETED_PROTEIN/ID_SECRETED/{{samples}}/TARGETP/{{samples}}_secretedID.targetp"
    params:
        threshold=config["TOOLS_PARAMS"]["PARSE_TARGETP_TRESHOLD"]
    log:
        error =  f'{log_dir}parse_targetp/parse_targetp_{{samples}}.e',
        output = f'{log_dir}parse_targetp/parse_targetp_{{samples}}.o'
    message:
        f"""
                 Running {{rule}}
                    Input:
                        - Results_targetp : {{input.result_targetp}}
                    Output:
                        - Parse_targetp: {{output.secreted_targetp}}
                    Others
                        - Threads : {{threads}}
                        - LOG error: {{log.error}}
                        - LOG output: {{log.output}}

                """
    shell:
        f"python {script_dir}parse_targetp.py -t {{input.result_targetp}} -o {{output.secreted_targetp}} -th {{params.threshold}} 1>{{log.output}} 2>{{log.error}}"


rule parse_predgpi:
    threads: get_threads("parse_predgpi",1)
    input:
        result_predgpi = rules.predgpi.output.output_predgpi
    output:
        noanchor_predgpi = f"{output_dir}2_SECRETED_PROTEIN/ID_SECRETED/{{samples}}/PREDGPI/{{samples}}_noanchorID.predgpi"
    params:
        threshold=config["TOOLS_PARAMS"]["PARSE_TARGETP_TRESHOLD"]
    log:
        error =  f'{log_dir}parse_predgpi/parse_predgpi_{{samples}}.e',
        output = f'{log_dir}parse_predgpi/parse_predgpi_{{samples}}.o'
    message:
        f"""
                 Running {{rule}}
                    Input:
                        - Result_predgpi : {{input.result_predgpi}}
                    Output:
                        - Parse_predgpi: {{output.noanchor_predgpi}}
                    Others
                        - Threads : {{threads}}
                        - LOG error: {{log.error}}
                        - LOG output: {{log.output}}

                """
    shell:
        f"python {script_dir}parse_predgpi.py -p {{input.result_predgpi}} -o {{output.noanchor_predgpi}} 1>{{log.output}} 2>{{log.error}}"

rule intersect_tools:
    threads: get_threads("intersect_tools",1)
    input:
        result_parse_predgpi = rules.parse_predgpi.output.noanchor_predgpi,
        result_parse_targetp = rules.parse_targetp.output.secreted_targetp,
        result_parse_signalp = rules.parse_signalp.output.secreted_signalp,
        result_parse_phobius = rules.parse_phobius.output.secreted_phobius
    output:
        signalpeptide_id = f"{output_dir}2_SECRETED_PROTEIN/ID_SECRETED/{{samples}}/{{samples}}_intersect.signalpeptide"
    log:
        error=f'{log_dir}intersect_tools/intersect_tools_{{samples}}.e',
        output=f'{log_dir}intersect_tools/intersect_tools_{{samples}}.o'
    message:
        f"""
                     Running {{rule}}
                        Input:
                            - Parse_phobius : {{input.result_parse_phobius}}
                            - Parse_targetp : {{input.result_parse_targetp}}
                            - Parse_signalp : {{input.result_parse_signalp}}
                            - Parse_predgpi : {{input.result_parse_predgpi}}
                        Output:
                            - Intersect_signalpeptide: {{output.signalpeptide_id}}
                        Others
                            - Threads : {{threads}}
                            - LOG error: {{log.error}}
                            - LOG output: {{log.output}}

                    """
    shell:
        f"bash {script_dir}intersect.sh {{input.result_parse_predgpi}} {{input.result_parse_signalp}} {{input.result_parse_phobius}} {{input.result_parse_targetp}} 1>{{output.signalpeptide_id}} 2>{{log.error}}"


rule fasta_intersect:
    threads: get_threads("fasta_intersect",1)
    input:
        fasta_protein = rules.rename_protein.output.sorted_protein,
        intersect_id = rules.intersect_tools.output.signalpeptide_id
    output:
        intersect_fasta_prot = f"{output_dir}2_SECRETED_PROTEIN/ID_SECRETED/{{samples}}/{{samples}}_intersect.fasta"
    log:
        error=f'{log_dir}fasta_intersect/fasta_intersect_{{samples}}.e',
        output=f'{log_dir}fasta_intersect/fasta_intersect_{{samples}}.o'
    message:
        f"""
                         Running {{rule}}
                            Input:
                                - Fasta_protein : {{input.fasta_protein}}
                                - Intersect_id : {{input.intersect_id}}
                            Output:
                                - Fasta_intersect_protein : {{output.intersect_fasta_prot}}
                            Others
                                - Threads : {{threads}}
                                - LOG error: {{log.error}}
                                - LOG output: {{log.output}}

                        """
    shell:
        f"python {script_dir}fasta_intersect.py -p {{input.fasta_protein}} -s {{input.intersect_id}} -o {{output.intersect_fasta_prot}}  1>{{log.output}} 2>{{log.error}}"

rule tmhmm:
    threads: get_threads("tmhmm",5)
    input:
        fasta_intersect_prot = rules.fasta_intersect.output.intersect_fasta_prot
    output:
        tmhmm_output = f"{output_dir}2_SECRETED_PROTEIN/TMHMM/{{samples}}/{{samples}}.tmhmm"
    log:
        error=f'{log_dir}tmhmm/tmhmm_{{samples}}.e',
        output=f'{log_dir}tmhmm/tmhmm_{{samples}}.o'
    message:
        f"""
                             Running {{rule}}
                                Input:
                                    - Fasta_protein_intersect : {{input.fasta_intersect_prot}}
                                Output:
                                    - TMHMM output : {{output.tmhmm_output}}
                                Others
                                    - Threads : {{threads}}
                                    - LOG error: {{log.error}}
                                    - LOG output: {{log.output}}

                            """
    envmodules:
        "tmhmm/2.0c"
    shell:
        f"tmhmm -short {{input.fasta_intersect_prot}} 1>{{output.tmhmm_output}} 2>{{log.error}}"

rule parse_tmhmm:
    threads: get_threads("parse_tmhmm",1)
    input:
        tmhmm_outfile = rules.tmhmm.output.tmhmm_output
    output:
        tmhmm_parsed_file = f"{output_dir}2_SECRETED_PROTEIN/TMHMM/{{samples}}/{{samples}}_tmhmm_parsed.tsv"
    params:
        threshold=config["TOOLS_PARAMS"]["PARSE_TMHMM_TRESHOLD"]
    log:
        error=f'{log_dir}parse_tmhmm/parse_tmhmm_{{samples}}.e',
        output=f'{log_dir}parse_tmhmm/parse_tmhmm_{{samples}}.o'
    message:
        f"""
                                 Running {{rule}}
                                    Input:
                                        - TMHMM output : {{input.tmhmm_outfile}}
                                    Output:
                                        - TMHMM parsed : {{output.tmhmm_parsed_file}}
                                    Others
                                        - Threads : {{threads}}
                                        - LOG error: {{log.error}}
                                        - LOG output: {{log.output}}

                                """
    shell:
        f"python {script_dir}parse_tmhmm.py -in {{input.tmhmm_outfile}} -tm {{params.threshold}} -o {{output.tmhmm_parsed_file}}  1>{{log.output}} 2>{{log.error}}"

rule tmhmm_fasta:
    threads: get_threads("tmhmm_fasta",1)
    input:
        tmhmm_parsed = rules.parse_tmhmm.output.tmhmm_parsed_file,
        protein_intersected = rules.fasta_intersect.output.intersect_fasta_prot
    output:
        fasta_parsed = f"{output_dir}2_SECRETED_PROTEIN/TMHMM/{{samples}}/{{samples}}_tmhmm_parsed.fasta"
    log:
        error=f'{log_dir}tmhmm_fasta/tmhmm_fasta_{{samples}}.e',
        output=f'{log_dir}tmhmm_fasta/tmhmm_fasta{{samples}}.o'
    message:
        f"""
                                 Running {{rule}}
                                    Input:
                                        - TMHMM parsed : {{input.tmhmm_parsed}}
                                        - Fasta intersect : {{input.protein_intersected}}
                                    Output:
                                        - Fasta protein : {{output.fasta_parsed}}
                                    Others
                                        - Threads : {{threads}}
                                        - LOG error: {{log.error}}
                                        - LOG output: {{log.output}}

                                """
    shell:
        f"python {script_dir}tmhmm_to_fasta.py -p {{input.protein_intersected}} -tmhmm {{input.tmhmm_parsed}} -o {{output.fasta_parsed}}  1>{{log.output}} 2>{{log.error}}"

rule wolfpsort:
    threads: get_threads("wolfpsort",10)
    input:
        protein_tmhmm = rules.tmhmm_fasta.output.fasta_parsed
    output:
        result_wolfpsort = f"{output_dir}2_SECRETED_PROTEIN/WOLFPSORT/{{samples}}/{{samples}}_wolfpsort.txt"
    log:
        error=f'{log_dir}wolfpsort/wolfpsort_{{samples}}.e',
        output=f'{log_dir}wolfpsort/wolfpsort{{samples}}.o'
    message:
        f"""
                                    Running {{rule}}
                                       Input:
                                           - Fasta TMHMM : {{input.protein_tmhmm}}
                                       Output:
                                           - Result WOLFPSORT : {{output.result_wolfpsort}}
                                       Others
                                           - Threads : {{threads}}
                                           - LOG error: {{log.error}}
                                           - LOG output: {{log.output}}

                                   """
    envmodules:
        "wolfpsort/0.2"
    shell:
        f"runWolfPsortSummary fungi < {{input.protein_tmhmm}} 1>{{output.result_wolfpsort}} 2>{{log.error}}"
rule parse_wolfpsort:
    threads: get_threads("parse_wolfpsort",1)
    input:
        wolfpsort_output = rules.wolfpsort.output.result_wolfpsort
    output:
        id_secreted_prot = f"{output_dir}5_FINAL_RESULT/SECRETED_PROTEIN/{{samples}}/{{samples}}_secreted.id"
    params:
        threshold=config["TOOLS_PARAMS"]["PARSE_WOLFPOSORT_TRESHOLD"]
    log:
        error=f'{log_dir}parse_wolfpsort/parse_wolfpsort_{{samples}}.e',
        output=f'{log_dir}parse_wolfpsort/parse_wolfpsort{{samples}}.o'
    message:
        f"""
                                        Running {{rule}}
                                           Input:
                                               - WOLFPSORT OUTPUT : {{input.wolfpsort_output}}
                                           Output:
                                               - ID SECRETED PROTEIN : {{output.id_secreted_prot}}
                                           Others
                                               - Threads : {{threads}}
                                               - LOG error: {{log.error}}
                                               - LOG output: {{log.output}}

                                       """

    shell:
        f"python {script_dir}parse_wolfpsort.py -in {{input.wolfpsort_output}} -th {{params.threshold}} -o {{output.id_secreted_prot}}  1>{{log.output}} 2>{{log.error}}"

rule id_tofasta_secreted :
    threads: get_threads("id_tofasta_secreted",1)
    input:
        id_secreted = rules.parse_wolfpsort.output.id_secreted_prot,
        fasta_tmhmm = rules.tmhmm_fasta.output.fasta_parsed
    output:
        fasta_prot_secreted = f"{output_dir}5_FINAL_RESULT/SECRETED_PROTEIN/{{samples}}/{{samples}}_secreted.fasta"
    log:
        error=f'{log_dir}parse_wolfpsort/parse_wolfpsort_{{samples}}.e',
        output=f'{log_dir}parse_wolfpsort/parse_wolfpsort{{samples}}.o'
    message:
        f"""
                                            Running {{rule}}
                                               Input:
                                                   - FASTA PROTEIN : {{input.fasta_tmhmm}}
                                                   - ID SECRETED : {{input.id_secreted}}
                                               Output:
                                                   - FASTA SECRETED PROTEIN : {{output.fasta_prot_secreted}}
                                               Others
                                                   - Threads : {{threads}}
                                                   - LOG error: {{log.error}}
                                                   - LOG output: {{log.output}}

                                           """
    shell:
        f"python {script_dir}id_secreted_to_fasta.py -fasta {{input.fasta_tmhmm}} -id {{input.id_secreted}} -o {{output.fasta_prot_secreted}} 1>{{log.output}} 2>{{log.error}}"

rule hmmer_pfam :
    threads: get_threads("hmmer_pfam", 8)
    input:
        fasta_secreted = rules.id_tofasta_secreted.output.fasta_prot_secreted,
        bdd_pfam = config["DATA"]["BDD_PFAM"]
    output:
        protein_secreted_domain = f"{output_dir}3_HMMER_PFAM/{{samples}}_secreted.tbl"
    params:
        param_hmmer = config["TOOLS_PARAMS"]["HMMER"]
    log:
        error=f'{log_dir}hmmer_pfam/hmmer_pfam_{{samples}}.e',
        output=f'{log_dir}hmmer_pfam/hmmer_pfam_{{samples}}.o'
    message:
        f"""
                                                Running {{rule}}
                                                   Input:
                                                       - FASTA PROTEIN : {{input.fasta_secreted}}
                                                       - BDD PFAM : {{input.bdd_pfam}}
                                                   Output:
                                                       - DOMMAINES PROTEINES : {{output.protein_secreted_domain}}
                                                   Others
                                                       - Threads : {{threads}}
                                                       - LOG error: {{log.error}}
                                                       - LOG output: {{log.output}}

                                               """
    envmodules:
        "hmmer/3.2.1"
    shell:
        f"hmmsearch --tblout {{output.protein_secreted_domain}} {{params.param_hmmer}} {{input.bdd_pfam}} {{input.fasta_secreted}} 1>{{log.output}} 2>{{log.error}}"

rule parse_hmmer :
    threads: get_threads("parse_hmmer",1)
    input:
        result_hmmer = rules.hmmer_pfam.output.protein_secreted_domain
    output:
        hmmer_parsed = f"{output_dir}3_HMMER_PFAM/PARSED_FILE/{{samples}}_parsed.csv"
    log:
        error=f'{log_dir}hmmer_pfam/hmmer_pfam_parsed_{{samples}}.e',
        output=f'{log_dir}hmmer_pfam/hmmer_pfam_parsed_{{samples}}.o'
    message:
        f"""
                                                    Running {{rule}}
                                                       Input:
                                                           - HMMER RESULT : {{input.result_hmmer}}
                                                       Output:
                                                           - HMMER PARSED : {{output.hmmer_parsed}}
                                                       Others
                                                           - Threads : {{threads}}
                                                           - LOG error: {{log.error}}
                                                           - LOG output: {{log.output}}

                                                   """
    shell:
        f"python {script_dir}hmmer_parse.py -f {{input.result_hmmer}} -o {{output.hmmer_parsed}} 1>{{log.output}} 2>{{log.error}}"


rule effectorP :
    threads: get_threads("effectorP", 10)
    input:
        fasta_secreted = rules.id_tofasta_secreted.output.fasta_prot_secreted
    output:
        fasta_effectors = f"{output_dir}5_FINAL_RESULT/EFFECTOR/{{samples}}/{{samples}}_effector.fasta",
        effectorP_out = f"{output_dir}5_FINAL_RESULT/EFFECTOR/{{samples}}/{{samples}}_effectorP.out",
        no_effector_fasta = f"{output_dir}5_FINAL_RESULT/EFFECTOR/{{samples}}/{{samples}}_non_effector.fasta"
    log:
        error=f'{log_dir}effectorP/effectorP_{{samples}}.e',
        output=f'{log_dir}effectorP/effectorP_{{samples}}.o'
    message:
        f"""
                                                    Running {{rule}}
                                                       Input:
                                                           - FASTA PROTEIN : {{input.fasta_secreted}}
                                                       Output:
                                                           - EFFECTOR FASTA : {{output.fasta_effectors}}
                                                           - NON EFFECTOR FASTA : {{output.no_effector_fasta}}
                                                           - EFFECTORP_OUT : {{output.effectorP_out}}
                                                       Others
                                                           - Threads : {{threads}}
                                                           - LOG error: {{log.error}}
                                                           - LOG output: {{log.output}}

                                                   """
    envmodules:
        "effectorp_local"
    shell:
        f"EffectorP.py -o {{output.effectorP_out}} -E {{output.fasta_effectors}} -N {{output.no_effector_fasta}} -i {{input.fasta_secreted}} 1>{{log.output}} 2>{{log.error}}"

rule sort_gff:
    threads: get_threads("sort_gff",1)
    input:
        gff_file = f"{gff_dir}{{samples}}.gff3"
    output:
        gff_sorted = f"{output_dir}4_GFF_SORTED/{{samples}}/{{samples}}_sorted.gff3",
    log:
        error=f'{log_dir}sort_gff/sort_gff_{{samples}}.e',
        output=f'{log_dir}sort_gff/sort_gff_{{samples}}.o'
    message:
        f"""
                                                        Running {{rule}}
                                                           Input:
                                                               - GFF FILE : {{input.gff_file}}
                                                           Output:
                                                               - GFF SORTED : {{output.gff_sorted}}
                                                           Others
                                                               - Threads : {{threads}}
                                                               - LOG error: {{log.error}}
                                                               - LOG output: {{log.output}}

                                                       """
    shell:
        f"python {script_dir}gff_sort.py -g {{input.gff_file}} -o {{output.gff_sorted}} -name {{wildcards.samples}} 1>{{log.output}} 2>{{log.error}}"

rule count_effector:
    threads: get_threads("count_effector", 1)
    input:
        fasta_effectors = rules.effectorP.output.fasta_effectors,
        gff_protein = rules.sort_gff.output.gff_sorted
    output:
        effector_per_contig = f"{output_dir}5_FINAL_RESULT/EFFECTOR/{{samples}}/{{samples}}_effector_per_contig.txt"
    log:
        error=f'{log_dir}count_effector/count_effector_{{samples}}.e',
        output=f'{log_dir}count_effector/count_effector_{{samples}}.o'
    message:
        f"""
                                                        Running {{rule}}
                                                           Input:
                                                               - FASTA EFFECTOR : {{input.fasta_effectors}}
                                                               - GFF SORTED : {{input.gff_protein}}
                                                           Output:
                                                               - EFFECTOR PER CONTING : {{output.effector_per_contig}}
                                                           Others
                                                               - Threads : {{threads}}
                                                               - LOG error: {{log.error}}
                                                               - LOG output: {{log.output}}

                                                       """

    shell:
        """
        python {script_dir}count_effectors.py -g {input.gff_protein} -o {output.effector_per_contig} -fasta {input.fasta_effectors} 1>{log.output} 2>{log.error}
        sort -V {output.effector_per_contig} -o {output.effector_per_contig}
        """

rule dbcan2:
    threads: get_threads("dbcan2", 2)
    input:
        fasta_proteins = rules.rename_protein.output.sorted_protein
    params:
        dbcan_dir = f"{output_dir}6_CAZYMES/dbcan_{{samples}}/",
        database_dir = dbcan_db
    output:
        dbcan_result = f"{output_dir}6_CAZYMES/dbcan_{{samples}}/overview.txt"
    log :
        error = f'{log_dir}dbcan/{{samples}}.e',
        output = f'{log_dir}dbcan/{{samples}}.o'
    message:
        f"""
                 Running {{rule}}
                    Input:
                        - prot : {{input.fasta_proteins}}
                    Output:
                        - result: {{output.dbcan_result}}
                    Others
                        - Threads : {{threads}}
                        - LOG error: {{log.error}}
                        - LOG output: {{log.output}}
                """
    envmodules:
        "run-dbcan/2.0.11"
    shell:
        "run_dbcan.py {input.fasta_proteins} protein --out_dir {params.dbcan_dir} --db_dir {params.database_dir} 1>{log.output} 2>{log.error}"


rule cazyme_count:
    threads: get_threads("cazyme_count", 1)
    input:
        dbcan_file = rules.dbcan2.output.dbcan_result,
        gff_renamed = rules.sort_gff.output.gff_sorted
    output:
        cazyme_count = f"{output_dir}6_CAZYMES/dbcan_{{samples}}/{{samples}}_cazyme_count.csv"
    log:
        error=f'{log_dir}caz_count/{{samples}}.e',
        output=f'{log_dir}caz_count/{{samples}}.o'
    message:
        f"""
                     Running {{rule}}
                        Input:
                            - dbcan_file : {{input.dbcan_file}}
                        Output:
                            - cazyme_count: {{output.cazyme_count}}
                        Others
                            - Threads : {{threads}}
                            - LOG error: {{log.error}}
                            - LOG output: {{log.output}}
                    """
    shell: f"python {script_dir}dbcan.py -i {{input.dbcan_file}} -g {{input.gff_renamed}} -o {{output.cazyme_count}} 1>{{log.output}} 2>{{log.error}}"

'''
rule add_caz_to_gff:
    threads: get_threads("add_caz_to_gff", 1)
    input:
        dbcan_file = rules.dbcan2.output.dbcan_result,
        gff_renamed = rules.rename_id_gff.output.gff_renamed
    output:
        gff_caz = f"{output_dir}GFF_with_cazymes/{{samples}}_gff.csv"
    log:
        error=f'{log_dir}caz_gff/{{samples}}.e',
        output=f'{log_dir}caz_gff/{{samples}}.o'
    message:
        f"""
                         Running {{rule}}
                            Input:
                                - dbcan_file : {{input.dbcan_file}}
                            Output:
                                - gff: {{output.gff_caz}}
                            Others
                                - Threads : {{threads}}
                                - LOG error: {{log.error}}
                                - LOG output: {{log.output}}
                        """
    shell: "python cazymes_add_gff.py -i {input.dbcan_file} -g {input.gff_renamed} -o {output.gff_caz}"
'''

rule orthofinder:
    threads: get_threads("orthofinder", 10)
    input:
        prot_files = expand(rules.rename_protein.output.sorted_protein, samples = PROTEIN)
    params:
        dir_prot = f"{output_dir}1_PROTEIN_SORTED/",
        dir_out = f"{output_dir}7_ORTHOFINDER/",
        name_dir = "orthofinder"
    output:
        orthogroups_file = f"{output_dir}7_ORTHOFINDER/Results_orthofinder/Orthogroups/Orthogroups.txt",
        orthogroups_table= f"{output_dir}7_ORTHOFINDER/Results_orthofinder/Orthogroups/Orthogroups.GeneCount.tsv"
    log:
        error=f'{log_dir}orthofinder/orthofinder.e',
        output=f'{log_dir}orthofinder/orthofinder.o'
    message:
        f"""
                         Running {{rule}}
                            Input:
                                - prot_files : {{input.prot_files}}
                            Output:
                                - orthogroups: {{output.orthogroups_file}}
                            Others
                                - Threads : {{threads}}
                                - LOG error: {{log.error}}
                                - LOG output: {{log.output}}
                        """
    envmodules:
        "orthofinder/2.5.2"
    shell:
        """
        rm -rf {output_dir}7_ORTHOFINDER/
        orthofinder -o {params.dir_out} -n {params.name_dir} -f {params.dir_prot} -t 10 -M msa -a 10 1>{log.output} 2>{log.error}
        """

rule orthofinder_parse:
    threads: get_threads("orthofinder_parse", 1)
    input:
        orthogroups_table= rules.orthofinder.output.orthogroups_table
    params:
        strain_name = f"{{samples}}",
        path_seq = f"{output_dir}7_ORTHOFINDER/Results_orthofinder/Orthogroup_Sequences/"
    output:
        fasta_og = f"{output_dir}7_ORTHOFINDER/Results_orthofinder/sequences_specific/prot_specific_{{samples}}.fasta",
        csv_orthogroups_specific = f"{output_dir}7_ORTHOFINDER/Results_orthofinder/sequences_specific/OG_specific_{{samples}}.csv"
    log:
        error=f'{log_dir}orthofinder_parse/{{samples}}.e',
        output=f'{log_dir}orthofinder_parse/{{samples}}.o'
    message:
        f"""
                             Running {{rule}}
                                Input:
                                    - orthogroups_table : {{input.orthogroups_table}}
                                Output:
                                    - fasta: {{output.fasta_og}}
                                Others
                                    - Threads : {{threads}}
                                    - LOG error: {{log.error}}
                                    - LOG output: {{log.output}}
                            """
    shell:
        f"python {script_dir}orthofinder_parse.py -t {{input.orthogroups_table}} -n {{params.strain_name}} -p {{params.path_seq}} -f {{output.fasta_og}} -c {{output.csv_orthogroups_specific}} 1>{{log.output}} 2>{{log.error}}"

rule interproscan:
    threads: get_threads("interproscan", 4)
    input:
        fasta_proteins = rules.rename_protein.output.sorted_protein
    params:
        directory_output = f"{output_dir}8_INTERPROSCAN/{{samples}}/",
        interpro_params = config["TOOLS_PARAMS"]["INTERPROSCAN"]
    output:
        gff = f"{output_dir}8_INTERPROSCAN/{{samples}}/{{samples}}.fasta.gff3"
    log:
        error=f'{log_dir}interproscan/{{samples}}.e',
        output=f'{log_dir}interproscan/{{samples}}.o'
    message:
        f"""
                                 Running {{rule}}
                                    Input:
                                        - fasta_proteins : {{input.fasta_proteins}}
                                    Output:
                                        - gff: {{output.gff}}
                                    Others
                                        - Threads : {{threads}}
                                        - LOG error: {{log.error}}
                                        - LOG output: {{log.output}}
                                """
    envmodules:
        "interproscan/5.54-87.0"
    shell:
        "interproscan.sh -i {input.fasta_proteins} -d {params.directory_output} {params.interpro_params} 1>{log.output} 2>{log.error}"

rule makeblast_db_phibase:
    threads: get_threads("makeblast_db_phibase", 1)
    input:
        phibase_database = f"{phibase_db}phi-base_current.fasta"
    output:
        complete_database = f"{phibase_db}phi-base_current.fasta.phr"
    log:
        error=f'{log_dir}phi_base_db.e',
        output=f'{log_dir}phi_base_db.o'
    message:
        f"""
                                     Running {{rule}}
                                        Input:
                                            - database : {{input.phibase_database}}
                                        Output:
                                            - complete_database: {{output.complete_database}}
                                        Others
                                            - Threads : {{threads}}
                                            - LOG error: {{log.error}}
                                            - LOG output: {{log.output}}
                                    """
    envmodules:
        "blast"
    shell:
        "makeblastdb -in {input.phibase_database} -dbtype prot 1>{log.output} 2>{log.error}"

rule phibase:
    threads: get_threads("phibase", 4)
    input:
        fasta_effectors = rules.effectorP.output.fasta_effectors,
        phibase_database = f"{phibase_db}phi-base_current.fasta",
        verif = rules.makeblast_db_phibase.output.complete_database
    output:
        blast_result = f"{output_dir}9_PHIBASE/{{samples}}/{{samples}}_blast_phibase.out"
    log:
        error=f'{log_dir}phi_base/{{samples}}.e',
        output=f'{log_dir}phi_base/{{samples}}.o'
    message:
        f"""
                                         Running {{rule}}
                                            Input:
                                                - database : {{input.phibase_database}}
                                            Output:
                                                - blast_results: {{output.blast_result}}
                                            Others
                                                - Threads : {{threads}}
                                                - LOG error: {{log.error}}
                                                - LOG output: {{log.output}}
                                        """
    envmodules:
        "blast"
    shell:
        "blastp -db {input.phibase_database} -query {input.fasta_effectors} -out {output.blast_result} -outfmt '6 qacc sacc length evalue score title' -evalue 0.001"