Skip to content
Snippets Groups Projects
Commit 1f552e7b authored by nina.marthe_ird.fr's avatar nina.marthe_ird.fr
Browse files

added a function that finds all the copies of a gene in a given walk

parent 688f0324
No related branches found
No related tags found
No related merge requests found
......@@ -398,14 +398,110 @@ def add_target_genome_paths(feature_id,target_genome_paths):
for match in list_first_last_segs:
[first_seg,last_seg,walk_name]=match
feature_path=[walk_name]
# first check if several copies in the walk
# [first_segs_list,last_segs_list]=detect_gene_copies(list_seg,walk_name)
# copy_number=len(first_segs_list)
# # then get first_seg+last_seg pairs to create the paths
# if copy_number==1:
# feature_path.append(get_feature_path(target_genome_paths[walk_name],first_seg,last_seg,walk_name))
# feature.segments_list_target.append(feature_path)
# else: # several copies.
# # ADAPT TO ALL PAIRS OF SEGS
feature_path.append(get_feature_path(target_genome_paths[walk_name],first_seg,last_seg,walk_name))
feature.segments_list_target.append(feature_path)
if len(list_first_last_segs)==0: # the latter steps expect this list to not be empty.
feature.segments_list_target.append(['',[]])
def detect_gene_copies(list_seg_source,walk_name):
# find all copies of all segments from the gene in the target genome (in both orientations)
index=0
list_seg_target=[] # contains list of info for each seg [seg_id,seg_strand,start_pos,index_on_source_walk]
list_seg_source_unstranded=[]
for seg in list_seg_source:
list_seg_source_unstranded.append([seg[1:],seg[0]]) # seg_id,seg_strand : [s24,>]
seg_inverted=invert_seg(seg)
# look for all the segment copies in the target genome walk, in both orientations
if (seg in segments_on_target_genome) and (walk_name in segments_on_target_genome[seg]):
for copy in segments_on_target_genome[seg][walk_name]:
seg_info=[seg[1:],seg[0],int(copy[1]),index] # [s24,>,584425,4]
list_seg_target.append(seg_info)
if (seg_inverted in segments_on_target_genome) and (walk_name in segments_on_target_genome[seg_inverted]) :
for copy in segments_on_target_genome[seg_inverted][walk_name]:
seg_info=[seg_inverted[1:],seg_inverted[0],int(copy[1]),index]
list_seg_target.append(seg_info)
index+=1
list_seg_target.sort(key=sort_seg_info) # order the list of segments by start position
old_index=list_seg_target[0][3]
old_strand=list_seg_target[0][1]
copy_number=1
first_segs_list=[]
last_segs_list=[]
old_seg_id=list_seg_target[0][1]+list_seg_target[0][0]
first_segs_list.append(old_seg_id)
# adjust old_index for the first iteration of the loop
first_inversion=(old_strand!=list_seg_source_unstranded[old_index][1])
if first_inversion:
old_index+=1
else:
old_index-=1
# find each copy of the gene in the ordered list of segments
for seg in list_seg_target:
new_seg_id=seg[1]+seg[0]
new_index=seg[3] # index in the list_source
new_strand=seg[1]
seg_start=seg[2]
inversion=(seg[1]!=list_seg_source_unstranded[new_index][1]) # inversion if this segment's strand is not the same as in the source walk
if inversion :
if (old_strand==new_strand) and (old_index>new_index): # if the index decreases and the strand stays the same, it is the same gene copy
for segment in segments_on_target_genome[new_seg_id][walk_name]:
if segment[1]==seg_start:
copy_id="copy_"+str(copy_number)
segment.append(copy_id)
seg.append(copy_id)
else: # end of the copy
copy_number+=1
last_segs_list.append(old_seg_id)
first_segs_list.append(new_seg_id)
for segment in segments_on_target_genome[new_seg_id][walk_name]:
if segment[1]==seg_start:
copy_id="copy_"+str(copy_number)
segment.append(copy_id)
seg.append(copy_id)
else:
if (old_strand==new_strand) and (old_index<new_index): # if the index increases and the strand stays the same, it is the same gene copy
for segment in segments_on_target_genome[new_seg_id][walk_name]:
if segment[1]==seg_start:
copy_id="copy_"+str(copy_number)
segment.append(copy_id)
seg.append(copy_id)
else: # end of the copy
copy_number+=1
last_segs_list.append(old_seg_id)
first_segs_list.append(new_seg_id)
for segment in segments_on_target_genome[new_seg_id][walk_name]:
if segment[1]==seg_start:
copy_id="copy_"+str(copy_number)
segment.append(copy_id)
seg.append(copy_id)
# if the strand changes, it is possible that it is an inversion inside the gene. treat this case later
old_strand=new_strand
old_index=new_index
old_seg_id=new_seg_id
last_segs_list.append(old_seg_id)
return [first_segs_list,last_segs_list] # return two lists, first_segs and last_segs
def sort_seg_info(seg_info):
return seg_info[2]
# find the feature's path in target genome walk
def get_feature_path(target_genome_path,first_seg,last_seg,walk_name):
# if there is several occurences of first_seg and last_seg ???
first_seg_index=segments_on_target_genome[first_seg][walk_name][-1][4]
last_seg_index=segments_on_target_genome[last_seg][walk_name][-1][4]
first_index=min(first_seg_index,last_seg_index)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment