Skip to content
Snippets Groups Projects
Commit fc25da27 authored by NMarthe's avatar NMarthe
Browse files

supprimé fonctions non utilisées

parent cf864aac
Branches
No related tags found
No related merge requests found
...@@ -273,40 +273,6 @@ def get_sequence_list_seg(list_segfeat_nb,i,feature,seg_seq): ...@@ -273,40 +273,6 @@ def get_sequence_list_seg(list_segfeat_nb,i,feature,seg_seq):
del_sequence+=seg_seq[list_segfeat_nb[k]] del_sequence+=seg_seq[list_segfeat_nb[k]]
return del_sequence return del_sequence
def get_old_new_pos_substitution(feat_start,list_segfeat_nb,list_segfeat_azu,feat,i,j):
pos_old=int(Segments[list_segfeat_nb[i]].start)-int(feat_start)+1
start_feat=get_feature_start_on_genome(list_segfeat_azu[0],feat)
start_var=int(segments_on_target_genome[list_segfeat_azu[j-1]][2])+1
pos_new=str(start_var-start_feat+1)
return [pos_old,pos_new]
def get_old_new_pos_insertion(last_start,feat_start,list_segfeat_azu,feat):
pos_old=str(int(Segments[last_start].start)-int(feat_start)+1)
start_feat=get_feature_start_on_genome(list_segfeat_azu[0],feat)
start_var=int(segments_on_target_genome[last_start][1])-1
pos_new=str(start_var-start_feat+1)
return [pos_old,pos_new]
def get_old_new_pos_deletion(last_start,feat_start,list_segfeat_azu,feat,last_in_azu,i):
if i==0:
pos_old=int(Segments[last_start].start)-int(feat_start)+1+Features[feat].pos_start
else:
pos_old=int(Segments[last_start].start)-int(feat_start)+1
if pos_old<0:
pos_old=0
if last_in_azu=="": # deletion of the beggining of the feature, so no segment placed in the new genome yet.
pos_new="1"
else:
start_feat=get_feature_start_on_genome(list_segfeat_azu[0],feat)
start_var=int(segments_on_target_genome[last_in_azu][2])+1
pos_new=str(start_var-start_feat+1)
return [pos_old,pos_new]
class Variation: class Variation:
def __init__(self,feature_id,feature_type,chr,start_new,stop_new,inversion,size_diff): def __init__(self,feature_id,feature_type,chr,start_new,stop_new,inversion,size_diff):
self.feature_id=feature_id self.feature_id=feature_id
...@@ -349,7 +315,7 @@ def reset_var(variation): ...@@ -349,7 +315,7 @@ def reset_var(variation):
variation.ref='' variation.ref=''
variation.alt='' variation.alt=''
def get_old_new_pos_substitution_2(feat_start,variation,list_segfeat_azu,feat,j): def get_old_new_pos_substitution(feat_start,variation,list_segfeat_azu,feat,j):
#pos_old=int(Segments[list_segfeat_nb[i]].start)-int(feat_start)+1 #pos_old=int(Segments[list_segfeat_nb[i]].start)-int(feat_start)+1
pos_old=str(int(Segments[variation.start_var].start)-int(feat_start)+1) pos_old=str(int(Segments[variation.start_var].start)-int(feat_start)+1)
start_feat=get_feature_start_on_genome(list_segfeat_azu[0],feat) start_feat=get_feature_start_on_genome(list_segfeat_azu[0],feat)
...@@ -357,7 +323,7 @@ def get_old_new_pos_substitution_2(feat_start,variation,list_segfeat_azu,feat,j) ...@@ -357,7 +323,7 @@ def get_old_new_pos_substitution_2(feat_start,variation,list_segfeat_azu,feat,j)
pos_new=str(start_var-start_feat+1) pos_new=str(start_var-start_feat+1)
return [pos_old,pos_new] return [pos_old,pos_new]
def get_old_new_pos_insertion_2(variation,feat_start,list_segfeat_azu,feat): def get_old_new_pos_insertion(variation,feat_start,list_segfeat_azu,feat):
pos_old=str(int(Segments[variation.start_var].start)-int(feat_start)+1) pos_old=str(int(Segments[variation.start_var].start)-int(feat_start)+1)
start_feat=get_feature_start_on_genome(list_segfeat_azu[0],feat) start_feat=get_feature_start_on_genome(list_segfeat_azu[0],feat)
...@@ -365,7 +331,7 @@ def get_old_new_pos_insertion_2(variation,feat_start,list_segfeat_azu,feat): ...@@ -365,7 +331,7 @@ def get_old_new_pos_insertion_2(variation,feat_start,list_segfeat_azu,feat):
pos_new=str(start_var-start_feat+1) pos_new=str(start_var-start_feat+1)
return [pos_old,pos_new] return [pos_old,pos_new]
def get_old_new_pos_deletion_2(variation,feat_start,list_segfeat_azu,feat,i): def get_old_new_pos_deletion(variation,feat_start,list_segfeat_azu,feat,i):
if i==0: if i==0:
pos_old=int(Segments[variation.start_var].start)-int(feat_start)+1+Features[feat].pos_start pos_old=int(Segments[variation.start_var].start)-int(feat_start)+1+Features[feat].pos_start
else: else:
......
...@@ -112,7 +112,7 @@ def genome_gff(pos_seg, gff, out, gfa): ...@@ -112,7 +112,7 @@ def genome_gff(pos_seg, gff, out, gfa):
# outputs the detail of variations of the feature : # outputs the detail of variations of the feature :
if var: if var:
print_variations_2(first_seg,last_seg,feat,paths,seg_seq) print_variations(first_seg,last_seg,feat,paths,seg_seq)
write_line("",output_variations,True) write_line("",output_variations,True)
if stats==True: if stats==True:
...@@ -143,162 +143,6 @@ def genome_gff(pos_seg, gff, out, gfa): ...@@ -143,162 +143,6 @@ def genome_gff(pos_seg, gff, out, gfa):
def print_variations(first_seg,last_seg,feat,paths,seg_seq): def print_variations(first_seg,last_seg,feat,paths,seg_seq):
if (first_seg!=''): # if the feature is not completly absent # add the else, output absent features
var=0 # count variations, to see if there is any
feature=Features[feat]
feat_start=feature.start
# get the lengths of the feature, on the original genome and on the new one
start_new_genome=get_feature_start_on_genome(first_seg,feat)
stop_new_genome=get_feature_stop_on_genome(last_seg,feat)
size_new_genome=int(stop_new_genome)-int(start_new_genome)+1
size_diff=str(size_new_genome-feature.size)
# get feature paths on the original genome and on the target genome
list_segfeat_azu=get_feature_path(paths,first_seg,last_seg)
list_segfeat_nb=feature.segments_list
# loop to go through both paths
i=0
j=0
[last,last_taille,last_seq,last_start,last_in_azu]=['',0,'','',''] # rename last_taille
# check if there is an inversion and remove strands
[list_segfeat_nb,list_segfeat_azu,inversion]=detect_inversion(list_segfeat_nb,list_segfeat_azu)
# detect and print variations ignoring the strands
while (i<len(list_segfeat_nb)) & (j<len(list_segfeat_azu)):
if list_segfeat_nb[i] != list_segfeat_azu[j]: # if there is a difference between the two paths
if list_segfeat_azu[j] not in list_segfeat_nb: # if the segment in azu is absent in nb
if list_segfeat_nb[i] not in list_segfeat_azu: # if the segment in nb is absent is azu
# is both segments are absent in the other genome, its a substitution
last_in_azu=list_segfeat_azu[j]
# print if we had an insertion or deletion running
if last=='insertion': # last : type énuméré. # fct compare cas précedent à cas courant, si différent imprimer cas précédent.
[pos_old,pos_new]=get_old_new_pos_insertion(last_start,feat_start,list_segfeat_azu,feat)
# line : formated line f"{feat}\t"
line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tinsertion\t-\t"+last_seq+"\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
write_line(line,output_variations,False)
var+=1
elif last=='deletion':
[pos_old,pos_new]=get_old_new_pos_deletion(last_start,feat_start,list_segfeat_azu,feat,last_in_azu,i)
line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tdeletion\t"+last_seq+"\t-\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
write_line(line,output_variations,False)
var+=1
last='';last_taille=0;last_seq='';last_start=''
# print the substitution # what if plusieurs substitutions à la suite ?
# substitution of segment list_segfeat_nb[i][1:] by segment list_segfeat_azu[j][1:]
[pos_old,pos_new]=get_old_new_pos_substitution(feat_start,list_segfeat_nb,list_segfeat_azu,feat,i,j)
if len(seg_seq[list_segfeat_nb[i]]) == len(seg_seq[list_segfeat_azu[j]]): # if the substituion is between two segment of the same size, print it
size_subs=len(seg_seq[list_segfeat_nb[i]])
line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tsubstitution\t"+seg_seq[list_segfeat_nb[i]]+"\t"+seg_seq[list_segfeat_azu[j]]+"\t"+str(size_subs)+"\t"+str(pos_old)+"\t"+pos_new+"\n"
else :
# if the segments of the substitution have a different size, print deletion then insertion at the same position.
line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tdeletion\t"+seg_seq[list_segfeat_nb[i]]+"\t-\t"+str(len(seg_seq[list_segfeat_nb[i]]))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
line+=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tinsertion\t-\t"+seg_seq[list_segfeat_azu[j]]+"\t"+str(len(seg_seq[list_segfeat_azu[j]]))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
var+=1
write_line(line,output_variations,False)
var+=1;i+=1;j+=1
else: # azu segment not in nb, but nb segment in azu : insertion
if last=='deletion':
[pos_old,pos_new]=get_old_new_pos_deletion(last_start,feat_start,list_segfeat_azu,feat,last_in_azu,i)
line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tdeletion\t"+last_seq+"\t-\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
write_line(line,output_variations,False)
var+=1;last='';last_taille=0;last_start='';last_seq=''
last_in_azu=list_segfeat_azu[j]
if last=='insertion':
last_seq=last_seq+seg_seq[list_segfeat_azu[j]]
else:
last='insertion'
last_seq=seg_seq[list_segfeat_azu[j]]
last_start=list_segfeat_nb[i]
j+=1
elif list_segfeat_nb[i] not in list_segfeat_azu: # nb segment not in azu, but azu segment in nb : deletion
if last=='insertion':
[pos_old,pos_new]=get_old_new_pos_insertion(last_start,feat_start,list_segfeat_azu,feat)
line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tinsertion\t-\t"+last_seq+"\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
write_line(line,output_variations,False)
var+=1;last='';last_start='';last_taille=0;last_seq=''
if last=='deletion':
last_seq=last_seq+seg_seq[list_segfeat_nb[i]]
else:
last='deletion'
last_start=list_segfeat_nb[i]
if i==0: # if the deletion is at the start of the feature, the deletion doesnt start at the start at the first segment :
#use pos_start, position of the feature on its first segment
last_seq=seg_seq[list_segfeat_nb[i]][feature.pos_start-1:]
else: # else, the deletion will always start at the start of the first segment.
last_seq=seg_seq[list_segfeat_nb[i]]
i+=1
else : # idk yet. if both segments are present in the other genome but not at the same position. probably substitution then
line="weird order change\n"
write_line(line,output_variations,False)
var+=1;i+=1;j+=1
else: # segment present in both. print the running indel if there is one
if last=='insertion':
[pos_old,pos_new]=get_old_new_pos_insertion(last_start,feat_start,list_segfeat_azu,feat)
line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tinsertion\t-\t"+last_seq+"\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
write_line(line,output_variations,False)
var+=1
elif last=='deletion':
[pos_old,pos_new]=get_old_new_pos_deletion(last_start,feat_start,list_segfeat_azu,feat,last_in_azu,i)
line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tdeletion\t"+last_seq+"\t-\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
write_line(line,output_variations,False)
var+=1
last_in_azu=list_segfeat_azu[j]
last='';last_taille=0;last_start='';last_seq='';i+=1;j+=1
# finish printing the current indel
if last=='insertion':
[pos_old,pos_new]=get_old_new_pos_insertion(last_start,feat_start,list_segfeat_azu,feat)
line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tinsertion\t-\t"+last_seq+"\t"+str(last_taille)+"\t"+str(pos_old)+"\t"+pos_new+"\n"
write_line(line,output_variations,False)
var+=1
elif last=='deletion':
[pos_old,pos_new]=get_old_new_pos_deletion(last_start,feat_start,list_segfeat_azu,feat,last_in_azu,i)
line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tdeletion\t"+last_seq+"\t-\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
write_line(line,output_variations,False)
var+=1
# see if the end is missing for one of the two genomes
if not((i>=len(list_segfeat_nb)-1) & (j>=len(list_segfeat_azu)-1)):
pos_old=int(Segments[list_segfeat_nb[i]].start)-int(feat_start)+1
del_sequence=get_sequence_list_seg(list_segfeat_nb,i,feature,seg_seq)
length=len(del_sequence)
pos_new=str(size_new_genome+1) # the deletion is at the end of the feature on the new genome
line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tdeletion\t"+del_sequence+"\t-\t"+str(length)+"\t"+str(pos_old)+"\t"+pos_new+"\n"
write_line(line,output_variations,False)
var+=1
if var==0:
line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tno_var\t-\t-\t-\t-\t-\n"
write_line(line,output_variations,False)
def print_variations_2(first_seg,last_seg,feat,paths,seg_seq):
if (first_seg!=''): # if the feature is not completly absent # add the else, output absent features if (first_seg!=''): # if the feature is not completly absent # add the else, output absent features
[variation,list_segfeat_nb,list_segfeat_azu]=create_var(feat,first_seg,last_seg,paths) # removes the strands in the segment lists [variation,list_segfeat_nb,list_segfeat_azu]=create_var(feat,first_seg,last_seg,paths) # removes the strands in the segment lists
feature=Features[feat] feature=Features[feat]
...@@ -376,15 +220,15 @@ def print_variations_2(first_seg,last_seg,feat,paths,seg_seq): ...@@ -376,15 +220,15 @@ def print_variations_2(first_seg,last_seg,feat,paths,seg_seq):
def print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j): def print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j):
if variation.type=='insertion': if variation.type=='insertion':
[pos_old,pos_new]=get_old_new_pos_insertion_2(variation,feat_start,list_segfeat_azu,feat) [pos_old,pos_new]=get_old_new_pos_insertion(variation,feat_start,list_segfeat_azu,feat)
line=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tinsertion\t-\t"+variation.alt+"\t"+str(len(variation.alt))+"\t"+str(pos_old)+"\t"+pos_new+"\n" line=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tinsertion\t-\t"+variation.alt+"\t"+str(len(variation.alt))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
write_line(line,output_variations,False) write_line(line,output_variations,False)
elif variation.type=='deletion': elif variation.type=='deletion':
[pos_old,pos_new]=get_old_new_pos_deletion_2(variation,feat_start,list_segfeat_azu,feat,i) [pos_old,pos_new]=get_old_new_pos_deletion(variation,feat_start,list_segfeat_azu,feat,i)
line=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tdeletion\t"+variation.ref+"\t-\t"+str(len(variation.ref))+"\t"+str(pos_old)+"\t"+pos_new+"\n" line=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tdeletion\t"+variation.ref+"\t-\t"+str(len(variation.ref))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
write_line(line,output_variations,False) write_line(line,output_variations,False)
elif variation.type=='substitution': elif variation.type=='substitution':
[pos_old,pos_new]=get_old_new_pos_substitution_2(feat_start,variation,list_segfeat_azu,feat,j) [pos_old,pos_new]=get_old_new_pos_substitution(feat_start,variation,list_segfeat_azu,feat,j)
if len(variation.ref) == len(variation.alt): # if the substituion is between two segment of the same size, print it if len(variation.ref) == len(variation.alt): # if the substituion is between two segment of the same size, print it
size_subs=len(variation.ref) size_subs=len(variation.ref)
line=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tsubstitution\t"+variation.ref+"\t"+variation.alt+"\t"+str(size_subs)+"\t"+str(pos_old)+"\t"+pos_new+"\n" line=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tsubstitution\t"+variation.ref+"\t"+variation.alt+"\t"+str(size_subs)+"\t"+str(pos_old)+"\t"+pos_new+"\n"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment