Skip to content
Snippets Groups Projects
Commit 2dff4cf5 authored by nina.marthe_ird.fr's avatar nina.marthe_ird.fr
Browse files

corrected the inference when there is a frameshift. it simplfied the algorithm (less conditions)

parent 13c4e598
No related branches found
No related tags found
No related merge requests found
...@@ -213,7 +213,7 @@ def findOtherStart(cds,segments_on_target_genome): # look for another start codo ...@@ -213,7 +213,7 @@ def findOtherStart(cds,segments_on_target_genome): # look for another start codo
if True not in ( (stop_pos%3==start_pos_frame) & (stop_pos>start_pos) for stop_pos in stop_pos_list) : if True not in ( (stop_pos%3==start_pos_frame) & (stop_pos>start_pos) for stop_pos in stop_pos_list) :
#print("codon start candidat trouvé dans l'arn messager,",n,"bases en amont du cds") #print("codon start candidat trouvé dans l'arn messager,",n,"bases en amont du cds")
# calculer le décalage : si on en trouve un 2 bases en amont, ça décale le cadre de lecture ! # calculer le décalage : si on en trouve un 2 bases en amont, ça décale le cadre de lecture !
frame_shift=(frame_shift+n)%3 # vérifier le frame shift !! frame_shift=(frame_shift+n)%3 # vérifier le frameshift !!
print("the start codon at the position",start_pos,",",n,"bases before the CDS, doesn't have a stop codon after in the same reading frame") print("the start codon at the position",start_pos,",",n,"bases before the CDS, doesn't have a stop codon after in the same reading frame")
else: else:
print("the start codon at the position",start_pos,",",n,"bases before the CDS, has a stop codon after in the same reading frame") print("the start codon at the position",start_pos,",",n,"bases before the CDS, has a stop codon after in the same reading frame")
...@@ -256,7 +256,7 @@ for feature in Features.values(): # add the sequence of all features ...@@ -256,7 +256,7 @@ for feature in Features.values(): # add the sequence of all features
for cds_id in cds_var.keys(): for cds_id in cds_var.keys():
cds=Features[cds_id] cds=Features[cds_id]
print("analysis of the variations in the CDS",cds_id,":\n") print("analysis of the variations in the CDS",cds_id,":\n")
frame_shift=0 frameshift=0
for index, var in enumerate(cds_var[cds_id]): # for each variation in the current cds : for index, var in enumerate(cds_var[cds_id]): # for each variation in the current cds :
type_var=var[8] type_var=var[8]
if type_var!="no_var": # if there is a variation if type_var!="no_var": # if there is a variation
...@@ -279,106 +279,72 @@ for cds_id in cds_var.keys(): ...@@ -279,106 +279,72 @@ for cds_id in cds_var.keys():
#findOtherStart(cds,segments_on_target_genome) # for now we don't look for another start codon #findOtherStart(cds,segments_on_target_genome) # for now we don't look for another start codon
break break
if abs(length_alt-length_ref)%3 == 0: # size diff 3k -> no frame shift. old_frameshift=frameshift
frameshift=(frameshift+length_ref-length_alt)%3
if (posVar[0])%3==0: #size diff 3k, position 3k if old_frameshift!=frameshift:
print("variation between two codons not causing a frameshift") print("variation causing a frameshift")
if type_var=="insertion": # frameshift=0 -> reading frame recovered. may need to get a base before.
print("insertion of",var[10]) # frameshift=1 -> frameshift of 1 base to the right
elif type_var=="deletion": # frameshift=2 -> frameshift of 2 bases to the right
print("deletion of",var[9]) if frameshift==0:
else: print("recovery of the original reading frame")
print("substitution of",var[9],"by",var[10]) if old_frameshift==0:
print("loss of the original reading frame")
#else:
# print("variation not causing a frameshift")
len_fragment_after=(3-length_ref)%3
deleted_sequence=cds.sequence[posVar[0]:posVar[0]+length_ref+len_fragment_after]
inserted_sequence=sequence_target[posVar[1]:posVar[1]+length_alt+len_fragment_after]
stop=print_variation_change(deleted_sequence,inserted_sequence)
if stop:
break
else: # size diff 3k, position !=3k if type_var=="insertion":
print("variation in the middle of a codon not causing a frameshift") print("insertion of",var[10])
if type_var=="insertion": elif type_var=="deletion":
print("insertion of",var[10]) print("deletion of",var[9])
elif type_var=="deletion": else:
print("deletion of",var[9]) print("substitution of",var[9],"by",var[10])
else:
print("substitution of",var[9],"by",var[10])
len_fragment_before=(posVar[0])%3 len_fragment_before_del=(posVar[0])%3
len_fragment_after=(3-(len_fragment_before+length_ref))%3 len_fragment_before_ins=(posVar[1])%3
total_ins=sequence_target[posVar[1]-len_fragment_before:posVar[1]+length_alt+len_fragment_after] if frameshift==0:
total_del=cds.sequence[posVar[0]-len_fragment_before:posVar[0]+length_ref+len_fragment_after] # print only the local change.
len_fragment_after_del=(3-(len_fragment_before_del+length_ref))%3
len_fragment_after_ins=(3-(len_fragment_before_ins+length_alt))%3
total_ins=sequence_target[posVar[1]-len_fragment_before_ins:posVar[1]+length_alt+len_fragment_after_ins]
total_del=cds.sequence[posVar[0]-len_fragment_before_del:posVar[0]+length_ref+len_fragment_after_del]
stop=print_variation_change(total_del,total_ins)
if stop:
break
stop=print_variation_change(total_del,total_ins) else:
# print changes from local var to next var. at the next var, we will see if the reading frame is recovered.
print("frameshift of",frameshift,"base(s) to the right.")
if index==len(cds_var[cds_id])-1: # it is the last variation. translate until the end of the cds.
total_total_del=cds.sequence[posVar[0]-len_fragment_before_del:]
total_total_ins=sequence_target[posVar[1]-len_fragment_before_ins:]
stop=print_variation_change(total_total_del,total_total_ins)
if stop: if stop:
break break
# possible that it prints too many variations : for ex if we have a snp on the first and the last base of a codon, else: # not the last variation. translate until the next var.
# while printing the effect of the first snp we aso use the second one. nextVar=cds_var[cds_id][index+1]
posNextVar=[int(nextVar[12]),int(nextVar[13])]
else: # size diff !=3k if nextVar[8]=="insertion":
print("variation causing a frameshift") length_ref_nextvar=0
old_frameshift=frame_shift else:
frame_shift=(frame_shift+length_ref-length_alt)%3 length_ref_nextvar:len(nextVar[9])
# frameshift=0 -> reading frame recovered. may need to get a base before. if nextVar[8]=="deletion":
# frameshift=1 -> frame shift of 1 base to the right length_alt_nextvar=0
# frameshift=2 -> frame shift of 2 bases to the right else:
length_alt_nextvar=len(nextVar[10])
if type_var=="insertion":
print("insertion of",var[10]) len_fragment_before_del_nextvar=(posNextVar[0])%3
elif type_var=="deletion": len_fragment_before_ins_nextvar=(posNextVar[1])%3
print("deletion of",var[9]) total_total_del=cds.sequence[posVar[0]-len_fragment_before_del:posNextVar[0]-len_fragment_before_del_nextvar]
else: total_total_ins=sequence_target[posVar[1]-len_fragment_before_ins:posNextVar[1]-len_fragment_before_ins_nextvar]
print("substitution of",var[9],"by",var[10]) stop=print_variation_change(total_total_del,total_total_ins)
len_fragment_before_del=(posVar[0])%3
len_fragment_before_ins=(posVar[1])%3
print(len_fragment_before_ins,len_fragment_before_del)
if frame_shift==0:
# print only the local change.
len_fragment_after_del=(3-(len_fragment_before_del+length_ref))%3
len_fragment_after_ins=(3-(len_fragment_before_ins+length_alt))%3
total_ins=sequence_target[posVar[1]-len_fragment_before_ins:posVar[1]+length_alt+len_fragment_after_ins]
total_del=cds.sequence[posVar[0]-len_fragment_before_del:posVar[0]+length_ref+len_fragment_after_del]
print("recovery of the original reading frame")
stop=print_variation_change(total_del,total_ins)
if stop: if stop:
break break
# possible that it prints too many variations : for ex if we have a snp on the first and the last base of a codon,
else: # while printing the effect of the first snp we aso use the second one.
# print changes from local var to next var. at the next var, we will see if the reading frame is recovered.
print("creating frame shift of",frame_shift,"base(s) to the right.")
if old_frameshift==0:
print("loss of the original reading frame")
if index==len(cds_var[cds_id])-1: # it is the last variation. translate until the end of the cds.
total_total_del=cds.sequence[posVar[0]-len_fragment_before_del:]
total_total_ins=sequence_target[posVar[1]-len_fragment_before_ins:]
stop=print_variation_change(total_total_del,total_total_ins)
if stop:
break
else:
nextVar=cds_var[cds_id][index+1]
posNextVar=[int(nextVar[12]),int(nextVar[13])]
if nextVar[8]=="insertion":
length_ref_nextvar=0
else:
length_ref_nextvar:len(nextVar[9])
if nextVar[8]=="deletion":
length_alt_nextvar=0
else:
length_alt_nextvar=len(nextVar[10])
len_fragment_before_del_nextvar=(posNextVar[0])%3
len_fragment_before_ins_nextvar=(posNextVar[1])%3
total_total_del=cds.sequence[posVar[0]-len_fragment_before_del:posNextVar[0]-len_fragment_before_del_nextvar]
total_total_ins=sequence_target[posVar[1]-len_fragment_before_ins:posNextVar[1]-len_fragment_before_ins_nextvar]
stop=print_variation_change(total_total_del,total_total_ins)
if stop:
break
print("\n") print("\n")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment