From ca0706b9b6aa1b8642d3a2537f8f6044a6f106f7 Mon Sep 17 00:00:00 2001 From: SimonBache <simon.bache@etu.unistra.fr> Date: Tue, 28 Jun 2022 11:39:35 +0200 Subject: [PATCH] add dbcan parse script --- scripts/dbcan.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 scripts/dbcan.py diff --git a/scripts/dbcan.py b/scripts/dbcan.py new file mode 100644 index 0000000..10c9c1f --- /dev/null +++ b/scripts/dbcan.py @@ -0,0 +1,39 @@ +import pandas as pd +import click + + + +@click.command(context_settings={'help_option_names': ('-h', '--help'), "max_content_width": 800}) +@click.option('--dbcan', '-i', default=None, + type=click.Path(exists=True, file_okay=True, dir_okay=False, readable=True, resolve_path=True), + required=True, show_default=True, help='Path to input dbcan result file') +@click.option('--gff', '-g', default=None, + type=click.Path(exists=True, file_okay=True, dir_okay=False, readable=True, resolve_path=True), + required=True, show_default=True, help='Path to input gff file') +@click.option('--output', '-o', default=None, + type=click.Path(exists=False, file_okay=True, dir_okay=False, readable=True, resolve_path=True), + required=True, show_default=True, help='Path to output count csv file') + +def main(dbcan, gff, output): + + dbcan = pd.read_table(dbcan) + dbcan.columns = ["gene", "HMMER", "Hotpep", "Diamond", "#ofTools"] + dbcan = dbcan[dbcan["#ofTools"] == 3] + + gff = pd.read_table(gff) + gff.columns = ["contig", "source", "feature", "start", "stop", "score_1", "strand", "score_2", "id"] + gff = gff[gff["feature"] == "gene"] + gff["id"] = gff["id"].str.replace(r'Name=', "") + gff["id"] = gff["id"].str.replace(r'Parent=', "") + gff["id"] = gff["id"].str.replace(r'\;[0-9a-zA-Z_]*', "") + gff["id"] = gff["id"].str.replace(r'ID=', "") + print(gff.head()) + gff = gff[gff["id"].isin(dbcan["gene"])] + #gff.reset_index(inplace=True) + + dbcan = pd.concat([dbcan, gff], axis=1) + count = dbcan["contig"].value_counts() + count.to_csv(str(output)) + +if __name__ == '__main__': + main() \ No newline at end of file -- GitLab