Skip to content
Snippets Groups Projects
Commit fc06a1b6 authored by nina.marthe_ird.fr's avatar nina.marthe_ird.fr
Browse files

added the paths of all the copies on the target genome in the objects Feature

parent 1f552e7b
No related branches found
No related tags found
No related merge requests found
......@@ -399,22 +399,20 @@ def add_target_genome_paths(feature_id,target_genome_paths):
[first_seg,last_seg,walk_name]=match
feature_path=[walk_name]
# first check if several copies in the walk
# [first_segs_list,last_segs_list]=detect_gene_copies(list_seg,walk_name)
# copy_number=len(first_segs_list)
# # then get first_seg+last_seg pairs to create the paths
# if copy_number==1:
# feature_path.append(get_feature_path(target_genome_paths[walk_name],first_seg,last_seg,walk_name))
# feature.segments_list_target.append(feature_path)
# else: # several copies.
# # ADAPT TO ALL PAIRS OF SEGS
feature_path.append(get_feature_path(target_genome_paths[walk_name],first_seg,last_seg,walk_name))
feature.segments_list_target.append(feature_path)
# get the first and last segments of all the copies
[first_last_segs_list]=detect_gene_copies(list_seg,walk_name,feature_id)
copy_number=0
for first_seg,last_seg in first_last_segs_list: # get the feature path for all the copies
copy_number+=1
copy_id="copy_"+str(copy_number) # get the copy that corresponds to this pair of first_seg,last_seg
feature_path.append(copy_id)
feature_path.append(get_feature_path(target_genome_paths[walk_name],first_seg,last_seg,walk_name,copy_id,feature_id))
feature.segments_list_target.append(feature_path)
if len(list_first_last_segs)==0: # the latter steps expect this list to not be empty.
feature.segments_list_target.append(['',[]])
def detect_gene_copies(list_seg_source,walk_name):
def detect_gene_copies(list_seg_source,walk_name,feature_id):
# find all copies of all segments from the gene in the target genome (in both orientations)
index=0
......@@ -462,8 +460,9 @@ def detect_gene_copies(list_seg_source,walk_name):
for segment in segments_on_target_genome[new_seg_id][walk_name]:
if segment[1]==seg_start:
copy_id="copy_"+str(copy_number)
segment.append(copy_id)
seg.append(copy_id)
feat_copy=(feature_id,copy_id)
segment.append(feat_copy)
break
else: # end of the copy
copy_number+=1
last_segs_list.append(old_seg_id)
......@@ -471,15 +470,17 @@ def detect_gene_copies(list_seg_source,walk_name):
for segment in segments_on_target_genome[new_seg_id][walk_name]:
if segment[1]==seg_start:
copy_id="copy_"+str(copy_number)
segment.append(copy_id)
seg.append(copy_id)
feat_copy=(feature_id,copy_id)
segment.append(feat_copy)
break
else:
if (old_strand==new_strand) and (old_index<new_index): # if the index increases and the strand stays the same, it is the same gene copy
for segment in segments_on_target_genome[new_seg_id][walk_name]:
if segment[1]==seg_start:
copy_id="copy_"+str(copy_number)
segment.append(copy_id)
seg.append(copy_id)
feat_copy=(feature_id,copy_id)
segment.append(feat_copy)
break
else: # end of the copy
copy_number+=1
last_segs_list.append(old_seg_id)
......@@ -487,23 +488,42 @@ def detect_gene_copies(list_seg_source,walk_name):
for segment in segments_on_target_genome[new_seg_id][walk_name]:
if segment[1]==seg_start:
copy_id="copy_"+str(copy_number)
segment.append(copy_id)
seg.append(copy_id)
feat_copy=(feature_id,copy_id)
segment.append(feat_copy)
break
# if the strand changes, it is possible that it is an inversion inside the gene. treat this case later
old_strand=new_strand
old_index=new_index
old_seg_id=new_seg_id
last_segs_list.append(old_seg_id)
return [first_segs_list,last_segs_list] # return two lists, first_segs and last_segs
first_last_segs_list=[]
index=0
for first_seg in first_segs_list:
last_seg=last_segs_list[index]
pair=(first_seg,last_seg)
first_last_segs_list.append(pair)
index+=1
return [first_last_segs_list] # return a list of pairs (first_seg,last_seg)
def sort_seg_info(seg_info):
return seg_info[2]
# find the feature's path in target genome walk
def get_feature_path(target_genome_path,first_seg,last_seg,walk_name):
first_seg_index=segments_on_target_genome[first_seg][walk_name][-1][4]
last_seg_index=segments_on_target_genome[last_seg][walk_name][-1][4]
def get_feature_path(target_genome_path,first_seg,last_seg,walk_name,copy_id,feature_id):
# look for first_seg and last_seg that has the right copy_id for this feature
seg_in_walk=segments_on_target_genome[first_seg][walk_name]
for seg_occurence in seg_in_walk:
for feat_seg in seg_occurence[5:]:
if (feat_seg[0]==feature_id) & (feat_seg[1]==copy_id):
first_seg_index=seg_occurence[4] # find first_seg_index
seg_in_walk=segments_on_target_genome[last_seg][walk_name]
for seg_occurence in seg_in_walk:
for feat_seg in seg_occurence[5:]:
if (feat_seg[0]==feature_id) & (feat_seg[1]==copy_id):
last_seg_index=seg_occurence[4] # find last_seg_index
first_index=min(first_seg_index,last_seg_index)
last_index=max(first_seg_index,last_seg_index)
feature_path_target_genome=target_genome_path[first_index:last_index+1]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment