Skip to content
Snippets Groups Projects
Commit 3b364c8e authored by nina.marthe_ird.fr's avatar nina.marthe_ird.fr
Browse files

changed how the inversion info is passed

parent 24e078d9
No related branches found
No related tags found
No related merge requests found
......@@ -46,7 +46,7 @@ def create_line_target_gff(first_seg,last_seg,feature_id,size_diff,inversion):
var_count=count_variations(feature_id)
annotation=f'{feature.annot};Size_diff={size_diff};Nb_variants={var_count}'
if inversion=='1':
if inversion:
start=get_feature_start_on_target_genome_inv(last_seg,feature_id)
stop=get_feature_stop_on_target_genome_inv(first_seg,feature_id)
strand=invert_strand(strand)
......@@ -344,22 +344,6 @@ def get_first_seg(list_seg): # get the first segment of the list that is in the
# functions to get the detail of the variations in the features
def compare_strand(list_1,list_2,list_1_unstrand,list_2_unstrand):
# get the list of segments in common
seg_common=[]
for segment in list_1_unstrand:
if segment in list_2_unstrand:
seg_common.append(segment)
# for each segment in common, check if the strand is the same. check index in list unstranded to get the segment in list stranded
same_strand_count=0
for segment in seg_common:
index_1=list_1_unstrand.index(segment)
index_2=list_2_unstrand.index(segment)
if list_1[index_1]==list_2[index_2]:
same_strand_count+=1
return [seg_common,same_strand_count]
# find on what target path the segments of the feature are (ie what chromosome/contig)
def find_feature_target_path(first_seg,last_seg,target_genome_paths):
feature="not_found"
......@@ -407,7 +391,7 @@ def count_variations(feature_id):
if len(target_list)!=0:
source_list=feature.segments_list_source
inversion=detect_feature_inversion(source_list,target_list)
if inversion=="1":
if inversion:
target_list=invert_segment_list(target_list)
target_dict=dict.fromkeys(target_list,"")
source_dict=dict.fromkeys(source_list,"") # convert list into dict to search segments in dict quicker.
......@@ -495,7 +479,7 @@ def create_var(feature_id,first_seg,last_seg):
feature_path_source_genome=feature.segments_list_source
inversion=detect_feature_inversion(feature_path_source_genome,feature_path_target_genome)
if inversion=="1":
if inversion:
feature_path_target_genome=invert_segment_list(feature_path_target_genome)
start_new_genome=get_feature_start_on_target_genome_inv(last_seg,feature_id)
stop_new_genome=get_feature_stop_on_target_genome_inv(first_seg,feature_id)
......@@ -525,7 +509,7 @@ def get_old_new_pos_substitution(feat_start,variation,feature_path_target_genome
start_feat_seg=feature_path_target_genome[0]
var_start_seg=variation.start_on_target
if variation.inversion=='1':
if variation.inversion:
start_feat_seg=invert_seg(start_feat_seg)
var_start_seg=invert_seg(var_start_seg)
end_var=segments_on_target_genome[var_start_seg][2]
......@@ -543,7 +527,7 @@ def get_old_new_pos_insertion(variation,feat_start,feature_path_target_genome,fe
start_feat_seg=feature_path_target_genome[0]
start_var_seg=variation.start_var
if variation.inversion=='1':
if variation.inversion:
start_feat_seg=invert_seg(start_feat_seg)
start_var_seg=invert_seg(start_var_seg)
end_var=segments_on_target_genome[start_var_seg][2]+len(variation.alt) # start_var_seg is the segment AFTER the insertion
......@@ -571,7 +555,7 @@ def get_old_new_pos_deletion(variation,feat_start,feature_path_target_genome,fea
else:
start_feat_seg=feature_path_target_genome[0]
start_var_seg=variation.last_seg_in_target
if variation.inversion=='1':
if variation.inversion:
start_feat_seg=invert_seg(start_feat_seg)
start_var_seg=invert_seg(start_var_seg)
start_var=segments_on_target_genome[start_var_seg][1]-1
......@@ -637,6 +621,22 @@ def get_common_segments(list1,list2):
list_output.append(elem)
return list_output
def compare_strand(list_1,list_2,list_1_unstrand,list_2_unstrand):
# get the list of segments in common
seg_common=[]
for segment in list_1_unstrand:
if segment in list_2_unstrand:
seg_common.append(segment)
# for each segment in common, check if the strand is the same. check index in list unstranded to get the segment in list stranded
same_strand_count=0
for segment in seg_common:
index_1=list_1_unstrand.index(segment)
index_2=list_2_unstrand.index(segment)
if list_1[index_1]==list_2[index_2]:
same_strand_count+=1
return [seg_common,same_strand_count]
def detect_segment_order_inversion(list_1,list_2):
if (len(list_1)==1) | (len(list_2)==1):
return False
......@@ -664,6 +664,8 @@ def detect_orient_inversion(list_1,list_2):
# takes two lists of segments for two genes, check if the first list is an inversion of the second one (if the segments in common are on the opposite strand)
def detect_feature_inversion(list_1,list_2):
# target_dict=dict.fromkeys(target_list,"")
# check if we have an inversion of the orientation of the segments
[strand_inversion,list_1_unstrand,list_2_unstrand]=detect_orient_inversion(list_1,list_2)
......@@ -672,12 +674,9 @@ def detect_feature_inversion(list_1,list_2):
# if there we have both inversions, the gene is in an inverted region. reverse the second list for the comparison.
if segment_order_inversion & strand_inversion:
inversion=1
list_2=invert_segment_list(list_2)
return True
else :
inversion=0
return str(inversion)
return False
def invert_segment_list(seg_list):
list_inverted=list()
......
......@@ -151,7 +151,7 @@ def print_alignment(first_seg,feat,seg_seq):
feature_path_target_genome=feature.segments_list_target
feature_path_source_genome=feature.segments_list_source
inversion=detect_feature_inversion(feature_path_source_genome,feature_path_target_genome)
if inversion=="1":
if inversion:
feature_path_target_genome=invert_segment_list(feature_path_target_genome)
line_aln=create_line_aln(feature_path_source_genome,feature_path_target_genome,seg_seq,feat)
......@@ -266,27 +266,27 @@ def print_current_var(variation,feat_start,feature_path_target_genome,feat):
warning=''
if variation.type=='insertion':
[pos_old,pos_new]=get_old_new_pos_insertion(variation,feat_start,feature_path_target_genome,feat)
line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tinsertion\t-\t{variation.alt}\t{len(variation.alt)}\t{pos_old}\t{pos_new}{warning}\n'
line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{print_inversion(variation.inversion)}\t{variation.size_diff}\tinsertion\t-\t{variation.alt}\t{len(variation.alt)}\t{pos_old}\t{pos_new}{warning}\n'
write_line(line,output_variations,False)
elif variation.type=='deletion':
[pos_old,pos_new]=get_old_new_pos_deletion(variation,feat_start,feature_path_target_genome,feat)
line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tdeletion\t{variation.ref}\t-\t{len(variation.ref)}\t{pos_old}\t{pos_new}{warning}\n'
line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{print_inversion(variation.inversion)}\t{variation.size_diff}\tdeletion\t{variation.ref}\t-\t{len(variation.ref)}\t{pos_old}\t{pos_new}{warning}\n'
write_line(line,output_variations,False)
elif variation.type=='substitution':
warning=detect_small_inversion(variation)
[pos_old,pos_new]=get_old_new_pos_substitution(feat_start,variation,feature_path_target_genome,feat)
size_subs=f'{len(variation.ref)}/{len(variation.alt)}'
line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tsubstitution\t{variation.ref}\t{variation.alt}\t{size_subs}\t{pos_old}\t{pos_new}{warning}\n'
line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{print_inversion(variation.inversion)}\t{variation.size_diff}\tsubstitution\t{variation.ref}\t{variation.alt}\t{size_subs}\t{pos_old}\t{pos_new}{warning}\n'
# print the substitutions of different size as deletion+insertion.
#if len(variation.ref) == len(variation.alt): # if the substituion is between two segment of the same size, print it
# size_subs=len(variation.ref)
# line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tsubstitution\t{variation.ref}\t{variation.alt}\t{size_subs}\t{pos_old}\t{pos_new}{warning}\n'
# line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{inversion}\t{variation.size_diff}\tsubstitution\t{variation.ref}\t{variation.alt}\t{size_subs}\t{pos_old}\t{pos_new}{warning}\n'
#else :
# # if the segments of the substitution have a different size, print deletion then insertion at the same position.
# line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tdeletion\t{variation.ref}\t-\t{len(variation.ref)}\t{pos_old}\t{pos_new}{warning}\n'
# line+=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tinsertion\t-\t{variation.alt}\t{len(variation.alt)}\t{pos_old}\t{pos_new}{warning}\n'
# line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{inversion}\t{variation.size_diff}\tdeletion\t{variation.ref}\t-\t{len(variation.ref)}\t{pos_old}\t{pos_new}{warning}\n'
# line+=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{inversion}\t{variation.size_diff}\tinsertion\t-\t{variation.alt}\t{len(variation.alt)}\t{pos_old}\t{pos_new}{warning}\n'
write_line(line,output_variations,False)
def detect_small_inversion(variation):
......@@ -314,13 +314,23 @@ def print_last_deletion(variation,feature_path_source_genome,i,feat_start,featur
length=len(del_sequence)
pos_new=str(int(variation.size_new)+1) # the deletion is at the end of the feature on the new genome
line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tdeletion\t{del_sequence}\t-\t{length}\t{pos_old}\t{pos_new}\n'
if variation.inversion:
inversion='1'
else:
inversion='0'
line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{inversion}\t{variation.size_diff}\tdeletion\t{del_sequence}\t-\t{length}\t{pos_old}\t{pos_new}\n'
write_line(line,output_variations,False)
def print_novar(variation):
line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tno_var\t-\t-\t-\t-\t-\n'
line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{print_inversion(variation.inversion)}\t{variation.size_diff}\tno_var\t-\t-\t-\t-\t-\n'
write_line(line,output_variations,False)
def print_inversion(bool):
if bool==True:
return '1'
else:
return '0'
# not used.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment