Skip to content
Snippets Groups Projects
Commit 2bbe863d authored by NMarthe's avatar NMarthe
Browse files

amélioré la gestion des substitutions quand le nombre de segments est différent

parent 409c59ea
No related branches found
No related tags found
No related merge requests found
......@@ -2,6 +2,10 @@ from Graph_gff import Segments, Features, get_feature_start_on_segment, get_feat
global segments_on_target_genome
segments_on_target_genome={}
global target_genome_name
target_genome_name="CM020642.1_Azucena_chromosome10"
target_genome_name="genome2_chr10"
# get the start position of the features on the linear genome, using their coordinates on the graph and the coordinantes of the segments on the genome
def get_feature_start_on_genome(start_seg,feat_id):
seg_start_pos=segments_on_target_genome[start_seg][1]
......@@ -247,11 +251,11 @@ def get_feature_path(paths,first_seg,last_seg):
first_seg_stranded=first_strand+first_seg
last_strand=convert_strand(segments_on_target_genome[last_seg][3])
last_seg_stranded=last_strand+last_seg
index_first_seg=int(paths["CM020642.1_Azucena_chromosome10"].index(first_seg_stranded))
index_last_seg=int(paths["CM020642.1_Azucena_chromosome10"].index(last_seg_stranded))
index_first_seg=int(paths[target_genome_name].index(first_seg_stranded))
index_last_seg=int(paths[target_genome_name].index(last_seg_stranded))
first_index=min(index_first_seg,index_last_seg)
last_index=max(index_last_seg,index_first_seg)
list_segfeat_azu=paths["CM020642.1_Azucena_chromosome10"][first_index:last_index+1]
list_segfeat_azu=paths[target_genome_name][first_index:last_index+1]
list_segfeat_azu_corrected=[convert_strand(segment_stranded[0])+segment_stranded[1:] for segment_stranded in list_segfeat_azu]
return list_segfeat_azu_corrected
......@@ -318,7 +322,7 @@ def reset_var(variation):
variation.ref=''
variation.alt=''
def get_old_new_pos_substitution(feat_start,variation,list_segfeat_azu,feat,j):
def get_old_new_pos_substitution(feat_start,variation,list_segfeat_azu,feat):
pos_old=str(int(Segments[variation.start_var].start)-int(feat_start)+1)
start_feat=get_feature_start_on_genome(list_segfeat_azu[0][1:],feat)
start_var=int(segments_on_target_genome[variation.start_on_target][1])
......@@ -368,11 +372,16 @@ def init_new_var(variation,type,list_segfeat_nb,list_segfeat_azu,i,j,seg_seq,fea
variation.ref=seg_seq[list_segfeat_nb[i][1:]]
variation.alt="-"
def continue_var(variation,seg_seq,list_segfeat_nb,list_segfeat_azu,i,j):
def continue_var(variation,seg_seq,list_segfeat_nb,list_segfeat_azu,i,j,genome_to_continue):
if variation.type=="substitution":
variation.ref+=seg_seq[list_segfeat_nb[i][1:]]
variation.alt+=seg_seq[list_segfeat_azu[j][1:]]
if genome_to_continue==0: # genome_to_continue allows to choose if the substitution continues for the original or the target genome, or both.
variation.ref+=seg_seq[list_segfeat_nb[i][1:]]
variation.alt+=seg_seq[list_segfeat_azu[j][1:]]
elif genome_to_continue==1: # deletion
variation.ref+=seg_seq[list_segfeat_nb[i][1:]]
elif genome_to_continue==2: # insertion
variation.alt+=seg_seq[list_segfeat_azu[j][1:]]
elif variation.type=="insertion":
variation.alt+=seg_seq[list_segfeat_azu[j][1:]]
elif variation.type=="deletion":
......
......@@ -55,14 +55,14 @@ def genome_gff(pos_seg, gff, out, gfa):
print("generation of the genome's gff ")
# create variables and open files
[once,detail,var,stats]=[False,False,True,False]
[once,detail,var,stats]=[True,True,True,False]
if var:
[paths,seg_seq]=get_segments_sequence_and_paths(gfa)
file_out_var = open("variations_chr10.txt", 'w')
file_out_var = open("test_data/variations.txt", 'w')
global output_variations
output_variations=[0,"",file_out_var]
if detail:
file_out_detail = open("azucena_detail_chr10.gff", 'w')
file_out_detail = open("test_data/genome2_detail.gff", 'w')
global output_detail_gff
output_detail_gff=[0,"",file_out_detail]
if once:
......@@ -160,34 +160,55 @@ def print_variations(first_seg,last_seg,feat,paths,seg_seq):
# if both segments are absent in the other genome, its a substitution
variation.last_seg_in_target=list_segfeat_azu[j][1:]
if (variation.type=='insertion') | (variation.type=='deletion'): # print the current variation before treating the substitution
print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j)
print_current_var(variation,feat_start,list_segfeat_azu,feat,i)
print(list_segfeat_nb[i],list_segfeat_azu[j],variation.type,"\n")
reset_var(variation)
var_count+=1
if variation.type=='substitution':
continue_var(variation,seg_seq,list_segfeat_nb,list_segfeat_azu,i,j)
continue_var(variation,seg_seq,list_segfeat_nb,list_segfeat_azu,i,j,0)
else: # initiate substitution
init_new_var(variation,"substitution",list_segfeat_nb,list_segfeat_azu,i,j,seg_seq,feature)
i+=1;j+=1
else: # azu segment not in nb, but nb segment in azu : insertion
if (variation.type=='substitution') | (variation.type=='deletion'): # print the current variation before treating the insertion
print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j)
else: # azu segment not in nb, but nb segment in azu : insertion or continue substitution
if variation.type=='deletion': # print the current variation before treating the insertion
print_current_var(variation,feat_start,list_segfeat_azu,feat,i)
print(list_segfeat_nb[i],list_segfeat_azu[j],variation.type,"\n")
reset_var(variation)
var_count+=1
variation.last_seg_in_target=list_segfeat_azu[j][1:]
if variation.type=='insertion':
continue_var(variation,seg_seq,list_segfeat_nb,list_segfeat_azu,i,j)
continue_var(variation,seg_seq,list_segfeat_nb,list_segfeat_azu,i,j,0)
elif variation.type=="substitution":
while list_segfeat_azu[j]!=list_segfeat_nb[i]:
continue_var(variation,seg_seq,list_segfeat_nb,list_segfeat_azu,i,j,2)
j+=1
print_current_var(variation,feat_start,list_segfeat_azu,feat,i)
print(list_segfeat_nb[i],list_segfeat_azu[j],variation.type,"\n")
reset_var(variation)
var_count+=1
j-=1
else: # intiate insertion
init_new_var(variation,"insertion",list_segfeat_nb,list_segfeat_azu,i,j,seg_seq,feature)
j+=1
elif list_segfeat_nb[i] not in list_segfeat_azu: # nb segment not in azu, but azu segment in nb : deletion
if (variation.type=='substitution') | (variation.type=='insertion'): # print the current variation before treating the deletion
print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j)
elif list_segfeat_nb[i] not in list_segfeat_azu: # nb segment not in azu, but azu segment in nb : deletion or continue substitution
if variation.type=='insertion': # print the current variation before treating the deletion
print_current_var(variation,feat_start,list_segfeat_azu,feat,i)
print(list_segfeat_nb[i],list_segfeat_azu[j],variation.type,"\n")
reset_var(variation)
var_count+=1
if variation.type=='deletion':
continue_var(variation,seg_seq,list_segfeat_nb,list_segfeat_azu,i,j)
continue_var(variation,seg_seq,list_segfeat_nb,list_segfeat_azu,i,j,0)
elif variation.type=="substitution":
while list_segfeat_azu[j]!=list_segfeat_nb[i]:
continue_var(variation,seg_seq,list_segfeat_nb,list_segfeat_azu,i,j,1)
i+=1
print_current_var(variation,feat_start,list_segfeat_azu,feat,i)
print(list_segfeat_nb[i],list_segfeat_azu[j],variation.type,"\n")
reset_var(variation)
var_count+=1
i-=1
else: # intiate deletion
init_new_var(variation,"deletion",list_segfeat_nb,list_segfeat_azu,i,j,seg_seq,feature)
i+=1
......@@ -199,19 +220,22 @@ def print_variations(first_seg,last_seg,feat,paths,seg_seq):
else: # segment present in both, no variation. print the running indel if there is one
if variation.type!='': # print the current variation if there is one
print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j)
print_current_var(variation,feat_start,list_segfeat_azu,feat,i)
print(list_segfeat_nb[i],list_segfeat_azu[j],variation.type,"\n")
var_count+=1
reset_var(variation)
variation.last_seg_in_target=list_segfeat_azu[j][1:]
i+=1;j+=1
if (variation.type!=''): # if there was a current variation when we reached the end, print it
print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j)
print_current_var(variation,feat_start,list_segfeat_azu,feat,i)
print(list_segfeat_nb[i],list_segfeat_azu[j-1],variation.type,"\n")
var_count+=1
reset_var(variation)
if i<=len(list_segfeat_nb)-1: # if we didn't reach the length of the segment list for the first genome, the end is missing for the second genome
print_last_deletion(variation,list_segfeat_nb,i,feat_start,feature,seg_seq)
print(list_segfeat_nb[i],"end deletion","\n")
var_count+=1
reset_var(variation)
......@@ -219,7 +243,7 @@ def print_variations(first_seg,last_seg,feat,paths,seg_seq):
print_novar(variation)
def print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j):
def print_current_var(variation,feat_start,list_segfeat_azu,feat,i):
if variation.type=='insertion':
[pos_old,pos_new]=get_old_new_pos_insertion(variation,feat_start,list_segfeat_azu,feat)
line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tinsertion\t-\t{variation.alt}\t{len(variation.alt)}\t{pos_old}\t{pos_new}\n'
......@@ -229,7 +253,7 @@ def print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j):
line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tdeletion\t{variation.ref}\t-\t{len(variation.ref)}\t{pos_old}\t{pos_new}\n'
write_line(line,output_variations,False)
elif variation.type=='substitution':
[pos_old,pos_new]=get_old_new_pos_substitution(feat_start,variation,list_segfeat_azu,feat,j)
[pos_old,pos_new]=get_old_new_pos_substitution(feat_start,variation,list_segfeat_azu,feat)
if len(variation.ref) == len(variation.alt): # if the substituion is between two segment of the same size, print it
size_subs=len(variation.ref)
line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tsubstitution\t{variation.ref}\t{variation.alt}\t{size_subs}\t{pos_old}\t{pos_new}\n'
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment