Skip to content
Snippets Groups Projects
Commit 040a09c5 authored by nina.marthe_ird.fr's avatar nina.marthe_ird.fr
Browse files

tried to adapt the code to use several cds. detected a problem in the...

tried to adapt the code to use several cds. detected a problem in the coordinate system to correct in main
parent 18259c64
No related branches found
No related tags found
No related merge requests found
......@@ -53,10 +53,15 @@ def get_segments_positions_on_genome(pos_seg): # creates a dict with the positio
def add_feature_sequence(feature,seg_seq): # add the feature's sequence in the Feature object
feature_sequence=""
for segment in feature.segments_list:
if (segment==feature.segments_list[0]) & (segment==feature.segments_list[-1]):# the segment is the first AND the last of the feature
feature_sequence=get_segment_sequence(seg_seq,segment)[feature.pos_start-2:feature.pos_stop-1]
if segment==feature.segments_list[0]:
feature_sequence+=get_segment_sequence(seg_seq,segment)[feature.pos_start-2:] # revérifier les +/- 1 pour la position, avec de vraies données
if feature.type=="CDS":
print(get_segment_sequence(seg_seq,segment), feature.pos_start)
print(feature.segments_list)
elif segment==feature.segments_list[-1]:
feature_sequence+=get_segment_sequence(seg_seq,segment)[0:feature.pos_stop] # revérifier les +/- 1 pour la position, avec de vraies données
feature_sequence+=get_segment_sequence(seg_seq,segment)[0:feature.pos_stop-1] # revérifier les +/- 1 pour la position, avec de vraies données
else:
feature_sequence+=get_segment_sequence(seg_seq,segment)
feature.sequence=feature_sequence
......@@ -171,7 +176,7 @@ def get_sequence_on_genome(feature,segments_on_target_genome): # returns the seq
if segment==cds.segments_list[0]:
new_sequence+=get_segment_sequence(seg_seq,segment)[cds.pos_start-2:]
elif segment==cds.segments_list[-1]:
new_sequence+=get_segment_sequence(seg_seq,segment)[0:cds.pos_stop]
new_sequence+=get_segment_sequence(seg_seq,segment)[0:cds.pos_stop-1]
else:
new_sequence+=get_segment_sequence(seg_seq,segment)
return new_sequence
......@@ -255,103 +260,125 @@ segments_on_target_genome=get_segments_positions_on_genome(pos_seg)
mrna_var=get_mrna_var(var_file)
for feature in Features.values(): # add the sequence of all features
add_feature_sequence(feature,seg_seq)
if feature.type=="CDS":
print(feature.id,feature.sequence)
# analysing the variations for all the mrna :
for mrna_id in mrna_var.keys():
print("analysis of the variations in the mRNA",mrna_id,":\n")
cds_var=mrna_var[mrna_id]
for cds_id in cds_var:
cds=Features[cds_id]
print("\nCDS",cds_id,":")
analysis=False
if analysis==True:
# analysing the variations for all the mrna :
for mrna_id in mrna_var.keys():
print("analysis of the variations in the mRNA",mrna_id,":\n")
leftover_ref=""
leftover_target=""
frameshift=0
for index, var in enumerate(cds_var[cds_id]): # for each variation in the current cds :
type_var=var[8]
if type_var!="no_var": # if there is a variation
posVar=[int(var[12]),int(var[13])]
sequence_target=get_sequence_on_genome(cds_id,segments_on_target_genome)
if type_var=="insertion":
length_ref=0
else:
length_ref=len(var[9])
if type_var=="deletion":
length_alt=0
else:
length_alt=len(var[10])
print("\nvariation",index+1, ":")
if posVar[0]<3:
print("variation of the start codon, mRNA most likely wont be translated")
#findOtherStart(cds,segments_on_target_genome) # for now we don't look for another start codon
break
old_frameshift=frameshift
frameshift=(frameshift+length_ref-length_alt)%3
if old_frameshift!=frameshift:
print("variation causing a frameshift")
# frameshift=0 -> reading frame recovered. may need to get a base before.
# frameshift=1 -> frameshift of 1 base to the right
# frameshift=2 -> frameshift of 2 bases to the right
if frameshift==0:
print("recovery of the original reading frame")
if old_frameshift==0:
print("loss of the original reading frame")
#else:
# print("variation not causing a frameshift")
if type_var=="insertion":
print("insertion of",var[10])
elif type_var=="deletion":
print("deletion of",var[9])
else:
print("substitution of",var[9],"by",var[10])
len_fragment_before_del=(posVar[0])%3
len_fragment_before_ins=(posVar[1])%3
if frameshift==0:
# print only the local change.
len_fragment_after_del=(3-(len_fragment_before_del+length_ref))%3
len_fragment_after_ins=(3-(len_fragment_before_ins+length_alt))%3
total_ins=sequence_target[posVar[1]-len_fragment_before_ins:posVar[1]+length_alt+len_fragment_after_ins]
total_del=cds.sequence[posVar[0]-len_fragment_before_del:posVar[0]+length_ref+len_fragment_after_del]
stop=print_variation_change(total_del,total_ins)
if stop:
cds_var=mrna_var[mrna_id]
for index_cds,cds_id in enumerate(cds_var):
cds=Features[cds_id]
print("\nCDS",cds_id,":")
for index_var, var in enumerate(cds_var[cds_id]): # for each variation in the current cds :
type_var=var[8]
if type_var!="no_var": # if there is a variation
posVar=[int(var[12]),int(var[13])]
sequence_target=get_sequence_on_genome(cds_id,segments_on_target_genome)
if type_var=="insertion":
length_ref=0
else:
length_ref=len(var[9])
if type_var=="deletion":
length_alt=0
else:
length_alt=len(var[10])
print("\nvariation",index_var+1, ":")
if (index_cds==0) & (posVar[0]<3):
print("variation of the start codon, mRNA most likely wont be translated")
#findOtherStart(cds,segments_on_target_genome) # for now we don't look for another start codon
break
else:
# print changes from local var to next var. at the next var, we will see if the reading frame is recovered.
print("frameshift of",frameshift,"base(s) to the right.")
if index==len(cds_var[cds_id])-1: # it is the last variation. translate until the end of the cds.
total_total_del=cds.sequence[posVar[0]-len_fragment_before_del:]
total_total_ins=sequence_target[posVar[1]-len_fragment_before_ins:]
stop=print_variation_change(total_total_del,total_total_ins)
if stop:
break
else: # not the last variation. translate until the next var.
nextVar=cds_var[cds_id][index+1]
posNextVar=[int(nextVar[12]),int(nextVar[13])]
if nextVar[8]=="insertion":
length_ref_nextvar=0
else:
length_ref_nextvar:len(nextVar[9])
if nextVar[8]=="deletion":
length_alt_nextvar=0
else:
length_alt_nextvar=len(nextVar[10])
len_fragment_before_del_nextvar=(posNextVar[0])%3
len_fragment_before_ins_nextvar=(posNextVar[1])%3
total_total_del=cds.sequence[posVar[0]-len_fragment_before_del:posNextVar[0]-len_fragment_before_del_nextvar]
total_total_ins=sequence_target[posVar[1]-len_fragment_before_ins:posNextVar[1]-len_fragment_before_ins_nextvar]
stop=print_variation_change(total_total_del,total_total_ins)
old_frameshift=frameshift
frameshift=(frameshift+length_ref-length_alt)%3
if old_frameshift!=frameshift:
print("variation causing a frameshift")
# frameshift=0 -> reading frame recovered. may need to get a base before.
# frameshift=1 -> frameshift of 1 base to the right
# frameshift=2 -> frameshift of 2 bases to the right
if frameshift==0:
print("recovery of the original reading frame")
if old_frameshift==0:
print("loss of the original reading frame")
#else:
# print("variation not causing a frameshift")
if type_var=="insertion":
print("insertion of",var[10])
elif type_var=="deletion":
print("deletion of",var[9])
else:
print("substitution of",var[9],"by",var[10])
len_fragment_before_del=(posVar[0])%3
len_fragment_before_ins=(posVar[1])%3
if frameshift==0:
# print only the local change.
print(cds.sequence,sequence_target)
len_fragment_after_del=(3-(len_fragment_before_del+length_ref))%3
len_fragment_after_ins=(3-(len_fragment_before_ins+length_alt))%3
total_ins=sequence_target[posVar[1]-len_fragment_before_ins:posVar[1]+length_alt+len_fragment_after_ins]
total_del=cds.sequence[posVar[0]-len_fragment_before_del:posVar[0]+length_ref+len_fragment_after_del]
stop=print_variation_change(total_del,total_ins)
print("a",total_del,total_ins)
if stop:
break
# possible that it prints too many variations : for ex if we have a snp on the first and the last base of a codon,
# while printing the effect of the first snp we aso use the second one.
else:
# print changes from local var to next var. at the next var, we will see if the reading frame is recovered.
print("frameshift of",frameshift,"base(s) to the right.")
if index_var==len(cds_var[cds_id])-1: # it is the last variation. translate until the end of the cds while having full codons
total_total_del=cds.sequence[posVar[0]-len_fragment_before_del:]
print(cds.sequence,sequence_target)
total_total_ins=sequence_target[posVar[1]-len_fragment_before_ins:]
# supprimer taille%3 des sequences ins et del.
# en fait non. il faut récupérer les séquences sur les seq des cds (sans l'intron dcp), prendre la taille sur ça et supprimer sur ça
del_leftover_len=len(total_total_del)%3
ins_leftover_len=len(total_total_ins)%3
print("b",ins_leftover_len,total_total_ins)
if del_leftover_len!=0:
total_total_del=total_total_del[0:-del_leftover_len]
if ins_leftover_len!=0:
total_total_ins=total_total_ins[0:-ins_leftover_len]
stop=print_variation_change(total_total_del,total_total_ins)
print("b",total_total_del,total_total_ins)
if stop:
break
else: # not the last variation. translate until the next var.
nextVar=cds_var[cds_id][index_var+1]
posNextVar=[int(nextVar[12]),int(nextVar[13])]
if nextVar[8]=="insertion":
length_ref_nextvar=0
else:
length_ref_nextvar:len(nextVar[9])
if nextVar[8]=="deletion":
length_alt_nextvar=0
else:
length_alt_nextvar=len(nextVar[10])
len_fragment_before_del_nextvar=(posNextVar[0])%3
len_fragment_before_ins_nextvar=(posNextVar[1])%3
total_total_del=cds.sequence[posVar[0]-len_fragment_before_del:posNextVar[0]-len_fragment_before_del_nextvar]
total_total_ins=sequence_target[posVar[1]-len_fragment_before_ins:posNextVar[1]-len_fragment_before_ins_nextvar]
stop=print_variation_change(total_total_del,total_total_ins)
print("c",total_total_del,total_total_ins)
if stop:
break
if index_var==len(cds_var[cds_id])-1: # it is the last variation, get the end of the cds
leftover_ref=cds.sequence[len(cds.sequence)-3:]
leftover_target=sequence_target[len(sequence_target)-3:]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment