diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..866714fe81e98aa68f069ae19229d1b40c40e9f0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +#IDE +.idea +.project +.pydevproject +.settings/ +venv +config.yaml + +# snakemake +.snakemake \ No newline at end of file diff --git a/scripts/count_effectors.py b/scripts/count_effectors.py index 97def27588e4794a04909fbcc283969a5676a0e0..1afcf32ef960cb09cf46ac42d7a75bf7acd0b724 100644 --- a/scripts/count_effectors.py +++ b/scripts/count_effectors.py @@ -36,9 +36,9 @@ def main(gff, output, fasta_file): for lignes in f1: ligne = lignes.rstrip("\n") col = ligne.split("\t") - if col[2] == "gene": - id_1 = re.sub("ID=", "", col[8]) - id_2 = re.sub(";Name=\w+", "", id_1) + id_1 = re.sub("ID=", "", col[8]) + id_2 = re.sub(";Name=\w+", "", id_1) + if col[2] =="gene": gff_parse.append(col[0]+" "+col[1]+" "+col[2]+" "+id_2) dico_gff[col[0]].append(id_2) diff --git a/scripts/gff_sort.py b/scripts/gff_sort.py index e90487d9a33b549d3c57477ce6f3af3bb70f8597..6c465f322ff13802e1726fe5ae821cf364440ae6 100644 --- a/scripts/gff_sort.py +++ b/scripts/gff_sort.py @@ -22,10 +22,10 @@ def main(gff, output, strain_name): with open(gff, "r") as f1: for lignes in f1: col = lignes.split("\t") - id_strain = re.sub("ID=","ID="+strain_name+"_",col[8]) - prot_gff = re.sub(";","T0;",id_strain) - prot_gff = re.sub("T0T0", "T0", prot_gff) - gene_gff.append(col[0]+"\t"+col[1]+"\t"+col[2]+"\t"+col[3]+"\t"+col[4]+"\t"+col[5]+"\t"+col[6]+"\t"+col[7]+"\t"+prot_gff) + if re.search("gene",col[2]): + id_strain = re.sub("ID=","ID="+strain_name+"_",col[8]) + prot_gff = re.sub(";","T0;",id_strain) + gene_gff.append(col[0]+"\t"+col[1]+"\t"+col[2]+"\t"+col[3]+"\t"+col[4]+"\t"+col[5]+"\t"+col[6]+"\t"+col[7]+"\t"+prot_gff) output_file = open(output,"w") for elem in gene_gff: