modified the output of the variations affecting the start codon

f900c6e4 · nina.marthe_ird.fr · 35e00a52 · f900c6e4
Commit f900c6e4 authored 1 year ago by nina.marthe_ird.fr
--- a/inference.py
+++ b/inference.py
@@ -154,7 +154,10 @@ def traduction(sequence_arn): # translate rna
    list_codons=cut_codon(sequence_arn)
    prot=list()
    for codon in list_codons:
-        prot.append(get_aa(codon))
+        if len(codon)==3:
+            prot.append(get_aa(codon))
+        else:
+            print("attempt to get the amino acid for an incomplete codon")
    return prot
 def get_sequence_on_genome(feature,segments_on_target_genome): # returns the sequence of the feature on the target genome
@@ -242,7 +245,6 @@ def print_variation_change(deleted_sequence,inserted_sequence): # print the cons
    return stop
 [paths,seg_seq]=get_segments_sequence_and_paths(gfa)
 segments_on_target_genome=get_segments_positions_on_genome(pos_seg)
 cds_var=get_cds_variations(var_file)
@@ -253,7 +255,7 @@ for feature in Features.values(): # add the sequence of all features
 # analysing the variations for all the cds :
 for cds_id in cds_var.keys():
    cds=Features[cds_id]
-    print("analysis of the variations in the CDS",cds_id,"\n")
+    print("analysis of the variations in the CDS",cds_id,":\n")
    frame_shift=0
    for index, var in enumerate(cds_var[cds_id]): # for each variation in the current cds :
        type_var=var[8]
@@ -271,6 +273,11 @@ for cds_id in cds_var.keys():
                length_alt=len(var[10])
            print("variation",index, ":")
+            if posVar[0]<=3:
+                print("variation of the start codon, mRNA most likely wont be translated")
+                #findOtherStart(cds,segments_on_target_genome) # for now we don't look for another start codon
+                break
            if abs(length_alt-length_ref)%3 == 0: # size diff 3k -> no frame shift.
@@ -311,7 +318,6 @@ for cds_id in cds_var.keys():
                # possible that it prints too many variations : for ex if we have a snp on the first and the last base of a codon, 
                # while printing the effect of the first snp we aso use the second one.
            else: # size diff !=3k
                print("frameshift variation")
                old_frameshift=frame_shift
@@ -373,9 +379,6 @@ for cds_id in cds_var.keys():
                        if stop:
                            break
-            if posVar[0]<=3:
-                print("start codon affected, mRNA most likely wont be translated")
-                #findOtherStart(cds,segments_on_target_genome) # for now we don't look for another start codon
-                break
        print("\n")
\ No newline at end of file