Skip to content
Snippets Groups Projects
Commit 2dff4cf5 authored by nina.marthe_ird.fr's avatar nina.marthe_ird.fr
Browse files

corrected the inference when there is a frameshift. it simplfied the algorithm (less conditions)

parent 13c4e598
No related branches found
No related tags found
No related merge requests found
......@@ -213,7 +213,7 @@ def findOtherStart(cds,segments_on_target_genome): # look for another start codo
if True not in ( (stop_pos%3==start_pos_frame) & (stop_pos>start_pos) for stop_pos in stop_pos_list) :
#print("codon start candidat trouvé dans l'arn messager,",n,"bases en amont du cds")
# calculer le décalage : si on en trouve un 2 bases en amont, ça décale le cadre de lecture !
frame_shift=(frame_shift+n)%3 # vérifier le frame shift !!
frame_shift=(frame_shift+n)%3 # vérifier le frameshift !!
print("the start codon at the position",start_pos,",",n,"bases before the CDS, doesn't have a stop codon after in the same reading frame")
else:
print("the start codon at the position",start_pos,",",n,"bases before the CDS, has a stop codon after in the same reading frame")
......@@ -256,7 +256,7 @@ for feature in Features.values(): # add the sequence of all features
for cds_id in cds_var.keys():
cds=Features[cds_id]
print("analysis of the variations in the CDS",cds_id,":\n")
frame_shift=0
frameshift=0
for index, var in enumerate(cds_var[cds_id]): # for each variation in the current cds :
type_var=var[8]
if type_var!="no_var": # if there is a variation
......@@ -279,106 +279,72 @@ for cds_id in cds_var.keys():
#findOtherStart(cds,segments_on_target_genome) # for now we don't look for another start codon
break
if abs(length_alt-length_ref)%3 == 0: # size diff 3k -> no frame shift.
if (posVar[0])%3==0: #size diff 3k, position 3k
print("variation between two codons not causing a frameshift")
if type_var=="insertion":
print("insertion of",var[10])
elif type_var=="deletion":
print("deletion of",var[9])
else:
print("substitution of",var[9],"by",var[10])
old_frameshift=frameshift
frameshift=(frameshift+length_ref-length_alt)%3
if old_frameshift!=frameshift:
print("variation causing a frameshift")
# frameshift=0 -> reading frame recovered. may need to get a base before.
# frameshift=1 -> frameshift of 1 base to the right
# frameshift=2 -> frameshift of 2 bases to the right
if frameshift==0:
print("recovery of the original reading frame")
if old_frameshift==0:
print("loss of the original reading frame")
#else:
# print("variation not causing a frameshift")
len_fragment_after=(3-length_ref)%3
deleted_sequence=cds.sequence[posVar[0]:posVar[0]+length_ref+len_fragment_after]
inserted_sequence=sequence_target[posVar[1]:posVar[1]+length_alt+len_fragment_after]
stop=print_variation_change(deleted_sequence,inserted_sequence)
if stop:
break
else: # size diff 3k, position !=3k
print("variation in the middle of a codon not causing a frameshift")
if type_var=="insertion":
print("insertion of",var[10])
elif type_var=="deletion":
print("deletion of",var[9])
else:
print("substitution of",var[9],"by",var[10])
if type_var=="insertion":
print("insertion of",var[10])
elif type_var=="deletion":
print("deletion of",var[9])
else:
print("substitution of",var[9],"by",var[10])
len_fragment_before=(posVar[0])%3
len_fragment_after=(3-(len_fragment_before+length_ref))%3
len_fragment_before_del=(posVar[0])%3
len_fragment_before_ins=(posVar[1])%3
total_ins=sequence_target[posVar[1]-len_fragment_before:posVar[1]+length_alt+len_fragment_after]
total_del=cds.sequence[posVar[0]-len_fragment_before:posVar[0]+length_ref+len_fragment_after]
if frameshift==0:
# print only the local change.
len_fragment_after_del=(3-(len_fragment_before_del+length_ref))%3
len_fragment_after_ins=(3-(len_fragment_before_ins+length_alt))%3
total_ins=sequence_target[posVar[1]-len_fragment_before_ins:posVar[1]+length_alt+len_fragment_after_ins]
total_del=cds.sequence[posVar[0]-len_fragment_before_del:posVar[0]+length_ref+len_fragment_after_del]
stop=print_variation_change(total_del,total_ins)
if stop:
break
stop=print_variation_change(total_del,total_ins)
else:
# print changes from local var to next var. at the next var, we will see if the reading frame is recovered.
print("frameshift of",frameshift,"base(s) to the right.")
if index==len(cds_var[cds_id])-1: # it is the last variation. translate until the end of the cds.
total_total_del=cds.sequence[posVar[0]-len_fragment_before_del:]
total_total_ins=sequence_target[posVar[1]-len_fragment_before_ins:]
stop=print_variation_change(total_total_del,total_total_ins)
if stop:
break
# possible that it prints too many variations : for ex if we have a snp on the first and the last base of a codon,
# while printing the effect of the first snp we aso use the second one.
else: # not the last variation. translate until the next var.
nextVar=cds_var[cds_id][index+1]
posNextVar=[int(nextVar[12]),int(nextVar[13])]
else: # size diff !=3k
print("variation causing a frameshift")
old_frameshift=frame_shift
frame_shift=(frame_shift+length_ref-length_alt)%3
# frameshift=0 -> reading frame recovered. may need to get a base before.
# frameshift=1 -> frame shift of 1 base to the right
# frameshift=2 -> frame shift of 2 bases to the right
if type_var=="insertion":
print("insertion of",var[10])
elif type_var=="deletion":
print("deletion of",var[9])
else:
print("substitution of",var[9],"by",var[10])
len_fragment_before_del=(posVar[0])%3
len_fragment_before_ins=(posVar[1])%3
print(len_fragment_before_ins,len_fragment_before_del)
if frame_shift==0:
# print only the local change.
len_fragment_after_del=(3-(len_fragment_before_del+length_ref))%3
len_fragment_after_ins=(3-(len_fragment_before_ins+length_alt))%3
total_ins=sequence_target[posVar[1]-len_fragment_before_ins:posVar[1]+length_alt+len_fragment_after_ins]
total_del=cds.sequence[posVar[0]-len_fragment_before_del:posVar[0]+length_ref+len_fragment_after_del]
print("recovery of the original reading frame")
stop=print_variation_change(total_del,total_ins)
if nextVar[8]=="insertion":
length_ref_nextvar=0
else:
length_ref_nextvar:len(nextVar[9])
if nextVar[8]=="deletion":
length_alt_nextvar=0
else:
length_alt_nextvar=len(nextVar[10])
len_fragment_before_del_nextvar=(posNextVar[0])%3
len_fragment_before_ins_nextvar=(posNextVar[1])%3
total_total_del=cds.sequence[posVar[0]-len_fragment_before_del:posNextVar[0]-len_fragment_before_del_nextvar]
total_total_ins=sequence_target[posVar[1]-len_fragment_before_ins:posNextVar[1]-len_fragment_before_ins_nextvar]
stop=print_variation_change(total_total_del,total_total_ins)
if stop:
break
else:
# print changes from local var to next var. at the next var, we will see if the reading frame is recovered.
print("creating frame shift of",frame_shift,"base(s) to the right.")
if old_frameshift==0:
print("loss of the original reading frame")
if index==len(cds_var[cds_id])-1: # it is the last variation. translate until the end of the cds.
total_total_del=cds.sequence[posVar[0]-len_fragment_before_del:]
total_total_ins=sequence_target[posVar[1]-len_fragment_before_ins:]
stop=print_variation_change(total_total_del,total_total_ins)
if stop:
break
else:
nextVar=cds_var[cds_id][index+1]
posNextVar=[int(nextVar[12]),int(nextVar[13])]
if nextVar[8]=="insertion":
length_ref_nextvar=0
else:
length_ref_nextvar:len(nextVar[9])
if nextVar[8]=="deletion":
length_alt_nextvar=0
else:
length_alt_nextvar=len(nextVar[10])
len_fragment_before_del_nextvar=(posNextVar[0])%3
len_fragment_before_ins_nextvar=(posNextVar[1])%3
total_total_del=cds.sequence[posVar[0]-len_fragment_before_del:posNextVar[0]-len_fragment_before_del_nextvar]
total_total_ins=sequence_target[posVar[1]-len_fragment_before_ins:posNextVar[1]-len_fragment_before_ins_nextvar]
stop=print_variation_change(total_total_del,total_total_ins)
if stop:
break
# possible that it prints too many variations : for ex if we have a snp on the first and the last base of a codon,
# while printing the effect of the first snp we aso use the second one.
print("\n")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment