From 073eb2a83f3d1dce315b17f5d754c75fe95f01f6 Mon Sep 17 00:00:00 2001 From: tdurand <theo.durand@etu.uca.fr> Date: Thu, 9 Jun 2022 11:54:34 +0200 Subject: [PATCH] ADD good scripts --- scripts/count_effectors.py | 6 +++--- scripts/gff_sort.py | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/count_effectors.py b/scripts/count_effectors.py index 1afcf32..97def27 100644 --- a/scripts/count_effectors.py +++ b/scripts/count_effectors.py @@ -36,9 +36,9 @@ def main(gff, output, fasta_file): for lignes in f1: ligne = lignes.rstrip("\n") col = ligne.split("\t") - id_1 = re.sub("ID=", "", col[8]) - id_2 = re.sub(";Name=\w+", "", id_1) - if col[2] =="gene": + if col[2] == "gene": + id_1 = re.sub("ID=", "", col[8]) + id_2 = re.sub(";Name=\w+", "", id_1) gff_parse.append(col[0]+" "+col[1]+" "+col[2]+" "+id_2) dico_gff[col[0]].append(id_2) diff --git a/scripts/gff_sort.py b/scripts/gff_sort.py index 6c465f3..fe5996c 100644 --- a/scripts/gff_sort.py +++ b/scripts/gff_sort.py @@ -25,7 +25,8 @@ def main(gff, output, strain_name): if re.search("gene",col[2]): id_strain = re.sub("ID=","ID="+strain_name+"_",col[8]) prot_gff = re.sub(";","T0;",id_strain) - gene_gff.append(col[0]+"\t"+col[1]+"\t"+col[2]+"\t"+col[3]+"\t"+col[4]+"\t"+col[5]+"\t"+col[6]+"\t"+col[7]+"\t"+prot_gff) + all_gff = re.sub("T0T0;","T0;",prot_gff) + gene_gff.append(col[0]+"\t"+col[1]+"\t"+col[2]+"\t"+col[3]+"\t"+col[4]+"\t"+col[5]+"\t"+col[6]+"\t"+col[7]+"\t"+all_gff) output_file = open(output,"w") for elem in gene_gff: -- GitLab