Skip to content
Snippets Groups Projects
Commit 356c8ff6 authored by NMarthe's avatar NMarthe
Browse files

corrigé (en partie) la détection des inversions

parent 1d4bba0d
No related branches found
No related tags found
No related merge requests found
......@@ -33,7 +33,7 @@ class Feature:
self.annot=annot
self.childs=childs # liste of child features (exons, cds, etc)
self.parent=parent
self.segments_list=seg # list of oriented segments on which the feature is (-s1/+s1, depending on the strand)
self.segments_list=seg # list of oriented segments on which the feature is (>s1/<s1, depending on the path of the gene in the graph)
self.note=""
......
......@@ -166,20 +166,12 @@ def stats_features(feature_missing_segments):
# functions to generate the different gffs
def get_segments_positions_on_genome(pos_seg):
#global segments_on_target_genome
#segments_on_target_genome={}
bed=open(pos_seg,'r')
lines=bed.readlines() # read line by line ?
bed.close()
for line in lines:
line=line.split()
[seg,chrom,start,stop]=[line[3][1:],line[0],line[1],line[2]]
if line[3][0:1]=='>':
strand='+'
elif line[3][0:1]=='<':
strand='-'
else:
strand=''
[seg,chrom,start,stop,strand]=[line[3][1:],line[0],line[1],line[2],line[3][0:1]]
segments_on_target_genome[seg]=[chrom,start,stop,strand]
def get_segments_sequence_and_paths(gfa):
......@@ -251,16 +243,30 @@ def compare_strand(list_1,list_2,list_1_unstrand,list_2_unstrand):
def get_feature_path(paths,first_seg,last_seg):
# find the path in azucena.
first_strand=segments_on_target_genome[first_seg][3]
first_strand=convert_strand(segments_on_target_genome[first_seg][3])
first_seg_stranded=first_strand+first_seg
last_strand=segments_on_target_genome[last_seg][3]
last_strand=convert_strand(segments_on_target_genome[last_seg][3])
last_seg_stranded=last_strand+last_seg
id_first_seg=int(paths["CM020642.1_Azucena_chromosome10"].index(first_seg_stranded))
id_last_seg=int(paths["CM020642.1_Azucena_chromosome10"].index(last_seg_stranded))
first_index=min(id_first_seg,id_last_seg)
last_index=max(id_last_seg,id_first_seg)
index_first_seg=int(paths["CM020642.1_Azucena_chromosome10"].index(first_seg_stranded))
index_last_seg=int(paths["CM020642.1_Azucena_chromosome10"].index(last_seg_stranded))
first_index=min(index_first_seg,index_last_seg)
last_index=max(index_last_seg,index_first_seg)
list_segfeat_azu=paths["CM020642.1_Azucena_chromosome10"][first_index:last_index+1]
return list_segfeat_azu
list_segfeat_azu_corrected=[convert_strand(segment_stranded[0])+segment_stranded[1:] for segment_stranded in list_segfeat_azu]
return list_segfeat_azu_corrected
def convert_strand(strand):
match strand:
case "+":
return ">"
case "-":
return "<"
case ">":
return "+"
case "<":
return "-"
case default:
return ""
def get_sequence_list_seg(list_segfeat_nb,i,feature,seg_seq):
del_sequence=""
......@@ -299,7 +305,6 @@ def create_var(feature_id,first_seg,last_seg,paths):
# get feature paths on the original genome and on the target genome
list_segfeat_azu=get_feature_path(paths,first_seg,last_seg)
list_segfeat_nb=feature.segments_list
[list_segfeat_nb,list_segfeat_azu,inversion]=detect_inversion(list_segfeat_nb,list_segfeat_azu)
variation=Variation(feature_id,feature.type,feature.chr,start_new_genome,stop_new_genome,inversion,size_diff)
......@@ -382,26 +387,23 @@ def get_common_segments(list1,list2):
return list_output
def detect_segment_order_inversion(list_1,list_2):
if (len(list_1)==1) | (len(list_2)==1):
return [False,list_1,list_2]
cpt=0
i=0
list_1_common=get_common_segments(list_1,list_2)
list_2_common=get_common_segments(list_2,list_1)
list_2_common_reversed=list(reversed(list_2_common))
while i<len(list_1_common):
#if list_2_common[i]!=list_1_common[i]: pour l'option 1 qui semble fonctionner ??
# cpt+=1
#i+=1
if list_2_common_reversed[i]==list_1_common[i]: # pour l'option 3
if list_2_common_reversed[i]==list_1_common[i]:
cpt+=1
i+=1
#if cpt>len(list_1_common)/2: # if more than half of the common segments are not at the same position ?? semble fonctionner
#if list_1_common==reversed(list_2_common): fonctionne pas
if cpt>len(list_1_common)*0.9:# if more than 90% of the segments are on the same position when the lists are reversed, there is an inversion.
#print("ee")
return [True,list_1,list(reversed(list_2))]
else:
return [False,list_1,list_2]
# takes two lists of segments for two genes, check if the first list is an inversion of the second one (if the segments in common are on the opposite strand)
def detect_inversion(list_1,list_2):
# removes strand for the lists of stranded segments
......
......@@ -153,6 +153,8 @@ def print_variations(first_seg,last_seg,feat,paths,seg_seq):
# detect and print variations ignoring the strands
while (i<len(list_segfeat_nb)) & (j<len(list_segfeat_azu)):
if list_segfeat_nb[i] != list_segfeat_azu[j]: # if there is a difference between the two paths
# if the difference is only a matter of strand, print the current variation, then init the variation "inversion".
# continue the inversion until no difference or the difference is not a matter of strand, then print it and start the new variation.
if list_segfeat_azu[j] not in list_segfeat_nb: # if the segment in azu is absent in nb
if list_segfeat_nb[i] not in list_segfeat_azu: # if the segment in nb is absent is azu
......
......@@ -247,6 +247,6 @@ def get_segments_length(gfa):
segment_encountered=True
seg_id='s'+line[1]
seg_len[seg_id]=len(line[2])
elif segment_encountered==True:
elif segment_encountered==True: # so we don't have to go through all the L lines. once we found one S, if we find something else than S it stops
return seg_len
return seg_len
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment