Skip to content
Snippets Groups Projects
Commit 5a555dc2 authored by nina.marthe_ird.fr's avatar nina.marthe_ird.fr
Browse files

removed functions for path indexing and binary search

parent dc97c74a
No related branches found
No related tags found
No related merge requests found
......@@ -253,7 +253,6 @@ def get_segments_sequence_and_paths(gfa,genomes_to_index):
file_gfa.close()
seg_seq={}
paths={}
paths_index={}
for line in lines_gfa:
line=line.split()
if (line[0]=="S"): # get the sequence of the segment
......@@ -267,22 +266,15 @@ def get_segments_sequence_and_paths(gfa,genomes_to_index):
path=line[6].replace(">",";>")
path=path.replace("<",";<").split(';')
list_segments=[]
#list_index=[]
#index=0
for segment in path:
if segment[0:1]=='>':
list_segments.append('>s'+segment[1:])
elif segment[0:1]=='<':
list_segments.append('<s'+segment[1:])
#list_index.append(index)
#index+=1
#zipped_lists=zip(list_segments,list_index)
#sorted_pairs=sorted(zipped_lists,key=key_segment)
#paths_index[line[3]]=list(sorted_pairs)
paths[line[3]]=list_segments
return [paths,paths_index,seg_seq]
return [paths,seg_seq]
def get_first_seg(list_seg): # get the first segment of the list that is in the target genome
first_seg_found=''
......@@ -311,44 +303,8 @@ def compare_strand(list_1,list_2,list_1_unstrand,list_2_unstrand):
same_strand_count+=1
return [seg_common,same_strand_count]
def key_segment(pair):
return int(pair[0][2:])
def path_binary_search(indexed_path,segment): # add the option to use the strand or not. rn the strand is not used. it will complicate the comparisons.
index=-1
if len(indexed_path)==0:
print("empty path")
return index
[found,lower,upper]=[False,0,len(indexed_path)]
check_index=(upper+lower)//2
while found==False:
if upper-lower<=1:
if int(indexed_path[check_index][0][2:])==int(segment[2:]):
index=indexed_path[check_index][1]
break
if int(segment[2:])==int(indexed_path[check_index][0][2:]):
index=indexed_path[check_index][1]
found=True
elif int(segment[2:])>int(indexed_path[check_index][0][2:]):
lower=check_index+1
check_index=(upper+lower)//2
else:
upper=check_index
check_index=(upper+lower)//2
return index
def get_feature_path_binary(target_genome_path_index,target_genome_path,first_seg,last_seg):
def get_feature_path(target_genome_path,first_seg,last_seg):
# find the path in target genome
first_strand=segments_on_target_genome[first_seg][3]
first_seg_stranded=first_strand+first_seg
last_strand=segments_on_target_genome[last_seg][3]
last_seg_stranded=last_strand+last_seg
#first_seg_index=path_binary_search(target_genome_path_index,first_seg_stranded)
#last_seg_index=path_binary_search(target_genome_path_index,last_seg_stranded)
first_seg_index=segments_on_target_genome[first_seg][4]
last_seg_index=segments_on_target_genome[last_seg][4]
first_index=min(first_seg_index,last_seg_index)
......@@ -422,7 +378,7 @@ class Variation:
#def __str__(self):
# return f"id={self.id}, position on the original genome={self.chr}:{self.start}-{self.stop}, size={self.size}, features={self.features}"
def create_var(feature_id,first_seg,last_seg,target_genome_path,target_genome_path_index):
def create_var(feature_id,first_seg,last_seg,target_genome_path):
feature=Features[feature_id]
start_new_genome=get_feature_start_on_target_genome(first_seg,feature_id)-1
stop_new_genome=get_feature_stop_on_target_genome(last_seg,feature_id)
......@@ -430,7 +386,7 @@ def create_var(feature_id,first_seg,last_seg,target_genome_path,target_genome_pa
size_diff=str(size_new_genome-feature.size)
# get feature paths on the original genome and on the target genome
feature_path_target_genome=get_feature_path_binary(target_genome_path_index,target_genome_path,first_seg,last_seg)
feature_path_target_genome=get_feature_path(target_genome_path,first_seg,last_seg)
feature_path_source_genome=feature.segments_list
[feature_path_source_genome,feature_path_target_genome,inversion]=detect_feature_inversion(feature_path_source_genome,feature_path_target_genome)
variation=Variation(feature_id,feature.type,feature.chr,start_new_genome,stop_new_genome,inversion,size_diff,size_new_genome)
......
......@@ -31,9 +31,8 @@ def transfer_on_target(pos_seg, gfa, out_once, out_var,out_clustal,target_genome
[once,var,stats,clustal]=[True,True,False,True]
genomes_to_index=[target_genome_name]
if var:
[paths,paths_index,seg_seq]=get_segments_sequence_and_paths(gfa,genomes_to_index)
[paths,seg_seq]=get_segments_sequence_and_paths(gfa,genomes_to_index)
target_genome_path=paths[target_genome_name]
target_genome_path_index=''#paths_index[target_genome_name]
file_out_var = open(out_var, 'w')
global output_variations
output_variations=[0,"",file_out_var]
......@@ -84,7 +83,7 @@ def transfer_on_target(pos_seg, gfa, out_once, out_var,out_clustal,target_genome
# outputs the detail of variations of the feature :
if var:
print_variations(first_seg,last_seg,feat,target_genome_path,target_genome_path_index,seg_seq)
print_variations(first_seg,last_seg,feat,target_genome_path,seg_seq)
write_line("",output_variations,True)
if stats==True:
......@@ -111,10 +110,10 @@ def transfer_on_target(pos_seg, gfa, out_once, out_var,out_clustal,target_genome
# functions to get the detail of the variations in the features
def print_variations(first_seg,last_seg,feat,target_genome_path,target_genome_path_index,seg_seq):
def print_variations(first_seg,last_seg,feat,target_genome_path,seg_seq):
if (first_seg!=''): # if the feature is not completly absent # add the else, output absent features
[variation,feature_path_source_genome,feature_path_target_genome]=create_var(feat,first_seg,last_seg,target_genome_path,target_genome_path_index) # removes the strands in the segment lists
[variation,feature_path_source_genome,feature_path_target_genome]=create_var(feat,first_seg,last_seg,target_genome_path) # removes the strands in the segment lists
feature=Features[feat]
feat_start=feature.start
# loop to go through both paths with i and j
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment