Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
EffiCAZ
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
PHIM
sravel
EffiCAZ
Commits
060713b0
Commit
060713b0
authored
2 years ago
by
tdurand
Browse files
Options
Downloads
Patches
Plain Diff
add rules and script parse hmmer
parent
e95a84a3
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
cluster_config_SLURM.yaml
+32
-0
32 additions, 0 deletions
cluster_config_SLURM.yaml
scripts/hmmer_parse.py
+52
-0
52 additions, 0 deletions
scripts/hmmer_parse.py
snakefile
+28
-1
28 additions, 1 deletion
snakefile
with
112 additions
and
1 deletion
cluster_config_SLURM.yaml
+
32
−
0
View file @
060713b0
...
...
@@ -93,3 +93,35 @@ effectorP:
count_effector
:
cpus-per-task
:
1
partition
:
fast
dbcan2
:
cpus-per-task
:
2
partition
:
fast
cazyme_count
:
cpus-per-task
:
1
partition
:
fast
orthofinder
:
cpus-per-task
:
10
partition
:
fast
orthofinder_parse
:
cpus-per-task
:
1
partition
:
fast
interproscan
:
cpus-per-task
:
4
partition
:
fast
makeblast_db_phibase
:
cpus-per-task
:
1
partition
:
fast
phibase
:
cpus-per-task
:
1
partition
:
fast
parse_hmmer
:
cpus-per-task
:
1
partition
:
fast
This diff is collapsed.
Click to expand it.
scripts/hmmer_parse.py
0 → 100644
+
52
−
0
View file @
060713b0
import
re
import
sys
from
collections
import
defaultdict
from
Bio
import
SeqIO
import
pandas
as
pd
import
click
from
Bio
import
SearchIO
@click.command
(
context_settings
=
{
'
help_option_names
'
:
(
'
-h
'
,
'
--help
'
),
"
max_content_width
"
:
800
})
@click.option
(
'
--hmmer_file
'
,
'
-f
'
,
default
=
None
,
type
=
click
.
Path
(
exists
=
True
,
file_okay
=
True
,
dir_okay
=
False
,
readable
=
True
,
resolve_path
=
True
),
required
=
True
,
show_default
=
True
,
help
=
'
Path to output of hmmer file
'
)
@click.option
(
'
--parse_output
'
,
'
-o
'
,
default
=
None
,
type
=
click
.
Path
(
exists
=
False
,
file_okay
=
True
,
dir_okay
=
False
,
readable
=
True
,
resolve_path
=
True
),
required
=
True
,
show_default
=
True
,
help
=
'
Path to create output of hmmer results parsed
'
)
def
main
(
hmmer_file
,
parse_output
):
"""
This program parse the output .tbl of hmmer
"""
hits
=
defaultdict
(
list
)
test
=
set
()
attribs
=
[
'
id
'
,
'
bitscore
'
,
'
evalue
'
]
query
=
defaultdict
(
list
)
with
open
(
hmmer_file
)
as
f1
:
for
lignes
in
f1
:
if
re
.
search
(
'
^[^#]
'
,
lignes
):
ligne
=
lignes
.
split
()
query
[
"
query_name
"
].
append
(
ligne
[
2
])
query
[
"
accession
"
].
append
(
ligne
[
3
])
test
.
add
(
ligne
[
0
])
with
open
(
hmmer_file
)
as
f2
:
for
record
in
SearchIO
.
parse
(
f2
,
'
hmmer3-tab
'
):
for
hit
in
record
.
hits
:
for
attrib
in
attribs
:
hits
[
attrib
].
append
(
getattr
(
hit
,
attrib
))
table
=
pd
.
DataFrame
.
from_dict
(
hits
)
table2
=
pd
.
DataFrame
.
from_dict
(
query
)
frames
=
[
table
,
table2
]
results
=
pd
.
concat
(
frames
,
axis
=
1
)
print
(
results
)
sort_table
=
(
results
.
sort_values
(
by
=
'
evalue
'
))
sort_table
=
sort_table
.
drop_duplicates
(
subset
=
[
"
id
"
],
keep
=
'
first
'
)
#print(sort_table)
#print(len(test))
sort_table
.
to_csv
(
parse_output
,
index
=
False
)
if
__name__
==
'
__main__
'
:
main
()
This diff is collapsed.
Click to expand it.
snakefile
+
28
−
1
View file @
060713b0
...
...
@@ -41,7 +41,8 @@ rule finale:
csv_orthogroups = expand(f"{output_dir}7_ORTHOFINDER/Results_orthofinder/sequences_specific/OG_specific_{{samples}}.csv",samples=PROTEIN),
dbcan_list = expand(f"{output_dir}6_CAZYMES/dbcan_{{samples}}/overview.txt", samples = PROTEIN),
interpro_gff_list = expand(f"{output_dir}8_INTERPROSCAN/{{samples}}/{{samples}}.fasta.gff3", samples = PROTEIN),
blast_phibase = expand(f"{output_dir}9_PHIBASE/{{samples}}/{{samples}}_blast_phibase.out", samples = PROTEIN)
blast_phibase = expand(f"{output_dir}9_PHIBASE/{{samples}}/{{samples}}_blast_phibase.out", samples = PROTEIN),
hmmer_parsed= expand(f"{output_dir}3_HMMER_PFAM/PARSED_FILE/{{samples}}_parsed.csv",samples=PROTEIN)
#gff_cazymes_list = expand(f"{output_dir}GFF_with_cazymes/{{samples}}_gff.csv",samples=PROTEIN)
...
...
@@ -541,6 +542,32 @@ rule hmmer_pfam :
shell:
f"hmmsearch --tblout {{output.protein_secreted_domain}} {{params.param_hmmer}} {{input.bdd_pfam}} {{input.fasta_secreted}} 1>{{log.output}} 2>{{log.error}}"
rule parse_hmmer :
threads: get_threads("parse_hmmer",1)
input:
result_hmmer = rules.hmmer_pfam.output.protein_secreted_domain
output:
hmmer_parsed = f"{output_dir}3_HMMER_PFAM/PARSED_FILE/{{samples}}_parsed.csv"
log:
error=f'{log_dir}hmmer_pfam/hmmer_pfam_parsed_{{samples}}.e',
output=f'{log_dir}hmmer_pfam/hmmer_pfam_parsed_{{samples}}.o'
message:
f"""
Running {{rule}}
Input:
- HMMER RESULT : {{input.result_hmmer}}
Output:
- HMMER PARSED : {{output.hmmer_parsed}}
Others
- Threads : {{threads}}
- LOG error: {{log.error}}
- LOG output: {{log.output}}
"""
shell:
f"python {script_dir}hmmer_parse.py -f {{input.result_hmmer}} -o {{output.hmmer_parsed}} 1>{{log.output}} 2>{{log.error}}"
rule effectorP :
threads: get_threads("effectorP", 10)
input:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment