Skip to content
Snippets Groups Projects
Commit 060713b0 authored by tdurand's avatar tdurand
Browse files

add rules and script parse hmmer

parent e95a84a3
No related branches found
No related tags found
No related merge requests found
......@@ -93,3 +93,35 @@ effectorP:
count_effector:
cpus-per-task: 1
partition: fast
dbcan2:
cpus-per-task: 2
partition: fast
cazyme_count:
cpus-per-task: 1
partition: fast
orthofinder:
cpus-per-task: 10
partition: fast
orthofinder_parse:
cpus-per-task: 1
partition: fast
interproscan:
cpus-per-task: 4
partition: fast
makeblast_db_phibase:
cpus-per-task: 1
partition: fast
phibase:
cpus-per-task: 1
partition: fast
parse_hmmer:
cpus-per-task: 1
partition: fast
import re
import sys
from collections import defaultdict
from Bio import SeqIO
import pandas as pd
import click
from Bio import SearchIO
@click.command(context_settings={'help_option_names': ('-h', '--help'), "max_content_width": 800})
@click.option('--hmmer_file', '-f', default=None,
type=click.Path(exists=True, file_okay=True, dir_okay=False, readable=True, resolve_path=True),
required=True, show_default=True, help='Path to output of hmmer file')
@click.option('--parse_output', '-o', default=None,
type=click.Path(exists=False, file_okay=True, dir_okay=False, readable=True, resolve_path=True),
required=True, show_default=True, help='Path to create output of hmmer results parsed')
def main(hmmer_file, parse_output):
""" This program parse the output .tbl of hmmer """
hits = defaultdict(list)
test = set()
attribs = ['id','bitscore','evalue']
query = defaultdict(list)
with open(hmmer_file) as f1:
for lignes in f1:
if re.search('^[^#]',lignes):
ligne = lignes.split()
query["query_name"].append(ligne[2])
query["accession"].append(ligne[3])
test.add(ligne[0])
with open(hmmer_file) as f2:
for record in SearchIO.parse(f2, 'hmmer3-tab'):
for hit in record.hits:
for attrib in attribs:
hits[attrib].append(getattr(hit, attrib))
table = pd.DataFrame.from_dict(hits)
table2 = pd.DataFrame.from_dict(query)
frames = [table,table2]
results = pd.concat(frames,axis=1)
print(results)
sort_table = (results.sort_values(by='evalue'))
sort_table = sort_table.drop_duplicates(subset=["id"],keep='first')
#print(sort_table)
#print(len(test))
sort_table.to_csv(parse_output,index=False)
if __name__ == '__main__':
main()
......@@ -41,7 +41,8 @@ rule finale:
csv_orthogroups = expand(f"{output_dir}7_ORTHOFINDER/Results_orthofinder/sequences_specific/OG_specific_{{samples}}.csv",samples=PROTEIN),
dbcan_list = expand(f"{output_dir}6_CAZYMES/dbcan_{{samples}}/overview.txt", samples = PROTEIN),
interpro_gff_list = expand(f"{output_dir}8_INTERPROSCAN/{{samples}}/{{samples}}.fasta.gff3", samples = PROTEIN),
blast_phibase = expand(f"{output_dir}9_PHIBASE/{{samples}}/{{samples}}_blast_phibase.out", samples = PROTEIN)
blast_phibase = expand(f"{output_dir}9_PHIBASE/{{samples}}/{{samples}}_blast_phibase.out", samples = PROTEIN),
hmmer_parsed= expand(f"{output_dir}3_HMMER_PFAM/PARSED_FILE/{{samples}}_parsed.csv",samples=PROTEIN)
#gff_cazymes_list = expand(f"{output_dir}GFF_with_cazymes/{{samples}}_gff.csv",samples=PROTEIN)
......@@ -541,6 +542,32 @@ rule hmmer_pfam :
shell:
f"hmmsearch --tblout {{output.protein_secreted_domain}} {{params.param_hmmer}} {{input.bdd_pfam}} {{input.fasta_secreted}} 1>{{log.output}} 2>{{log.error}}"
rule parse_hmmer :
threads: get_threads("parse_hmmer",1)
input:
result_hmmer = rules.hmmer_pfam.output.protein_secreted_domain
output:
hmmer_parsed = f"{output_dir}3_HMMER_PFAM/PARSED_FILE/{{samples}}_parsed.csv"
log:
error=f'{log_dir}hmmer_pfam/hmmer_pfam_parsed_{{samples}}.e',
output=f'{log_dir}hmmer_pfam/hmmer_pfam_parsed_{{samples}}.o'
message:
f"""
Running {{rule}}
Input:
- HMMER RESULT : {{input.result_hmmer}}
Output:
- HMMER PARSED : {{output.hmmer_parsed}}
Others
- Threads : {{threads}}
- LOG error: {{log.error}}
- LOG output: {{log.output}}
"""
shell:
f"python {script_dir}hmmer_parse.py -f {{input.result_hmmer}} -o {{output.hmmer_parsed}} 1>{{log.output}} 2>{{log.error}}"
rule effectorP :
threads: get_threads("effectorP", 10)
input:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment