Skip to content
Snippets Groups Projects
Commit 66d5db2d authored by nina.marthe_ird.fr's avatar nina.marthe_ird.fr
Browse files

changed the detection of the target genome path in which each feature is

parent 2259c63a
No related branches found
No related tags found
No related merge requests found
......@@ -243,10 +243,11 @@ def get_segments_positions_on_genome(pos_seg):
seg_count=0
for line in lines:
line=line.split()
file_name='.'.join(pos_seg.split('/')[-1].split('.')[0:-1])
[seg,chrom,start,stop,strand,index]=[line[3][1:],line[0],int(line[1])+1,int(line[2]),line[3][0:1],seg_count] # +1 in the start to convert the bed 0-based coordinate to a 1-based system
# if seg in segments_on_target_genome:
# print("seg already present on target genome")
segments_on_target_genome[seg]=[chrom,start,stop,strand,index]
segments_on_target_genome[seg]=[chrom,start,stop,strand,index,file_name]
seg_count+=1
def get_segments_sequence(segments_file,segments_list):
......@@ -283,7 +284,7 @@ def get_first_seg(list_seg): # get the first segment of the list that is in the
first_seg_found=''
for segment in list_seg:
if segment[1:] in segments_on_target_genome:
first_seg_found=segment[1:]
first_seg_found=segment
break
return first_seg_found
......@@ -306,29 +307,34 @@ def compare_strand(list_1,list_2,list_1_unstrand,list_2_unstrand):
same_strand_count+=1
return [seg_common,same_strand_count]
def get_chr_path(feature_id,target_genome_paths):
feat_chr_number=Features[feature_id].chr[3:]
for path_name in target_genome_paths.keys():
if feat_chr_number in path_name[-3:]:
return target_genome_paths[path_name]
return False
def find_feature_target_path(first_seg,last_seg,target_genome_paths):
feature="not_found"
path_first_seg=segments_on_target_genome[first_seg[1:]][5]
path_last_seg=segments_on_target_genome[last_seg[1:]][5]
if path_first_seg==path_last_seg:
for path in target_genome_paths:
if path in path_first_seg:
return path
return feature
def add_target_genome_path(feature_id,target_genome_paths):
target_genome_path=get_chr_path(feature_id,target_genome_paths)
if target_genome_path!=False:
feature=Features[feature_id]
list_seg=feature.segments_list_source
first_seg=get_first_seg(list_seg)
last_seg=get_first_seg(reversed(list_seg))
feature_path=[]
if first_seg!='':
feature_path=get_feature_path(target_genome_path,first_seg,last_seg)
feature.segments_list_target=feature_path
feature=Features[feature_id]
list_seg=feature.segments_list_source
first_seg=get_first_seg(list_seg)
last_seg=get_first_seg(reversed(list_seg))
feature_path=[]
if first_seg!='':
path=find_feature_target_path(first_seg,last_seg,target_genome_paths)
if path=="frag":
print(f'feature {feature_id} fragmented')
elif path!="not_found":
feature_path=get_feature_path(target_genome_paths[path],first_seg,last_seg)
feature.segments_list_target=feature_path
def get_feature_path(target_genome_path,first_seg,last_seg):
# find the path in target genome
first_seg_index=segments_on_target_genome[first_seg][4]
last_seg_index=segments_on_target_genome[last_seg][4]
first_seg_index=segments_on_target_genome[first_seg[1:]][4]
last_seg_index=segments_on_target_genome[last_seg[1:]][4]
first_index=min(first_seg_index,last_seg_index)
last_index=max(first_seg_index,last_seg_index)
feature_path_target_genome=target_genome_path[first_index:last_index+1]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment