Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import re
import sys
from collections import defaultdict
from Bio import SeqIO
import click
@click.command(context_settings={'help_option_names': ('-h', '--help'), "max_content_width": 800})
@click.option('--protein_file', '-p', default=None,
type=click.Path(exists=True, file_okay=True, dir_okay=False, readable=True, resolve_path=True),
required=True, show_default=True, help='Path to fasta protein sorted with the 4 predicted tools (PredGPI , Phobius, SignalP, TargetP')
@click.option('--tmhmm_parsed', '-tmhmm', default=None,
type=click.Path(exists=False, file_okay=True, dir_okay=False, readable=True, resolve_path=True),
required=True, show_default=True, help='Path to output file of tmhmm parsed')
@click.option('--fasta_output', '-o', default=None,
type=click.Path(exists=False, file_okay=True, dir_okay=False, readable=True, resolve_path=True),
required=True, show_default=True, help='Path to fasta protein output')
def main(protein_file, tmhmm_parsed, fasta_output):
"""This programme use ID of protein with signal peptide and protein fasta file to generate fasta protein with this ID"""
id_tmhmm = []
fasta_prot = defaultdict(str)
with open(tmhmm_parsed, "r") as f1:
for lignes in f1:
without_backspace = lignes.rstrip("\n")
col = without_backspace.split("\t")
id_tmhmm.append(col[0])
for record in SeqIO.parse(protein_file, "fasta"):
fasta_prot[record.id] = record
fasta_secretion = []
for cle in fasta_prot:
for elem in id_tmhmm:
if elem == cle:
fasta_secretion.append(fasta_prot[elem])
SeqIO.write(fasta_secretion,fasta_output,"fasta")
if __name__ == '__main__':
main()