Skip to content
Snippets Groups Projects
Commit 7b975a9c authored by nina.marthe_ird.fr's avatar nina.marthe_ird.fr
Browse files

fixed errors between feature key and feature id; this fixes a bug when a...

fixed errors between feature key and feature id; this fixes a bug when a feature is split on several lines int he source gff
parent 399f408d
No related branches found
No related tags found
No related merge requests found
......@@ -6,15 +6,15 @@ class Segment:
self.feat_info_list=[[chr,int(start),int(stop),strand]] # position on the original genome for the features on this segment
# add a feature to an existing segment
def add_feature(self,line,new_feat_id,new_strand):
if not self.find_feat(new_feat_id)[0]:
def add_feature(self,line,new_feat_key,new_strand):
if not self.find_feat(new_feat_key)[0]: # if feature not in list
feat_info=[line[0],int(line[1])+1,int(line[2]),new_strand] # +1 in the start to convert the bed 0-based coordinate to a 1-based system
if feat_info in self.feat_info_list:
feat_tuple=new_feat_id,self.feat_info_list.index(feat_info)
feat_tuple=new_feat_key,self.feat_info_list.index(feat_info)
self.feature_set.add(feat_tuple)
else:
feat_tuple=(new_feat_id,len(self.feat_info_list))
feat_tuple=(new_feat_key,len(self.feat_info_list))
self.feat_info_list.append(feat_info)
self.feature_set.add(feat_tuple)
......@@ -58,8 +58,9 @@ class Segment:
class Feature:
def __init__(self,id,type,chr,start,stop,annot,childs,seg_list,strand,complete,discontinuous):
def __init__(self,id,key,type,chr,start,stop,annot,childs,seg_list,strand,complete,discontinuous):
self.id=id
self.key=key # unique key in the Features dict
self.type=type
# position on the original genome
......
......@@ -71,7 +71,7 @@ def add_childs_paths(feature_id,feature_target_path,walk_name,copy_id,target_gen
# get the path of the child feature
child_path=get_feature_path(target_genome_paths[walk_name],child_first_seg,child_last_seg,walk_name,copy_id,feature_id,segments_on_target_genome)
feat_copy=(child.id,copy_id)
feat_copy=(child.key,copy_id)
feature_path=[walk_name,copy_id]
feature_path.append(child_path)
......
......@@ -62,15 +62,15 @@ def transfer_childs(feature_id,feature_target_path,walk,copy_id,inversion,seg_si
break
if inversion: # calculate the child feature size
child_size_on_new_genome=get_feature_start_on_target_genome_inv(child_last_seg,child.id,walk,copy_id,segments_on_target_genome)-get_feature_stop_on_target_genome_inv(child_first_seg,child.id,walk,copy_id,segments_on_target_genome)+1
child_size_on_new_genome=get_feature_start_on_target_genome_inv(child_last_seg,child.key,walk,copy_id,segments_on_target_genome)-get_feature_stop_on_target_genome_inv(child_first_seg,child.key,walk,copy_id,segments_on_target_genome)+1
else:
child_size_on_new_genome=get_feature_stop_on_target_genome(child_last_seg,child.id,walk,copy_id,segments_on_target_genome)-get_feature_start_on_target_genome(child_first_seg,child.id,walk,copy_id,segments_on_target_genome)+1
child_size_on_new_genome=get_feature_stop_on_target_genome(child_last_seg,child.key,walk,copy_id,segments_on_target_genome)-get_feature_start_on_target_genome(child_first_seg,child.key,walk,copy_id,segments_on_target_genome)+1
child_size_diff=abs(child_size_on_new_genome-child.size)
for match in child.segments_list_target: # look for the right occurence of the child feature
if (match[0]==walk) and (match[1]==copy_id):
seg_list=match[2]
[cov,id]=get_id_cov(child.id,seg_size,seg_list)
[cov,id]=get_id_cov(child.key,seg_size,seg_list)
break
if inversion:
......@@ -79,7 +79,7 @@ def transfer_childs(feature_id,feature_target_path,walk,copy_id,inversion,seg_si
child_last_seg=temp
# transfer the child feature:
line_gff=create_line_target_gff(child_first_seg,child_last_seg,child.id,child_size_diff,inversion,walk,cov,id,copy_id,segments_on_target_genome)
line_gff=create_line_target_gff(child_first_seg,child_last_seg,child.key,child_size_diff,inversion,walk,cov,id,copy_id,segments_on_target_genome)
file_out_gff.write(line_gff)
......
......@@ -79,19 +79,18 @@ def load_intersect(intersect_path,verbose):
strand=line[10]
segment_oriented=line[3]
load_feature_intersect(line,feature_id,segment_oriented)
load_segment_intersect(line,segment_id,feature_id,strand)
feature_key=load_feature_intersect(line,feature_id,segment_oriented)
load_segment_intersect(line,segment_id,feature_key,strand)
# for all the features, add the position of the feature on its first and last segment, and the note.
# cant do it before because for that i need to have all the segments in the list segments_list for each feature.
for feat_id in Features:
add_feature_obj(Features[feat_id])
for feat_id in Features:
feature=Features[feat_id]
for feat_key in Features:
add_feature_obj(Features[feat_key])
for feat_key in Features:
feature=Features[feat_key]
if feature.complete==False:
print('***',feat_id,feature)
#print(feature.id,feat_id)
add_pos(feature)
print('***',feat_key,feature)
add_pos(feat_key)
feature.set_note()
# order the dictionnary
order_dict(Features)
......@@ -100,6 +99,7 @@ def load_intersect(intersect_path,verbose):
def load_feature_intersect(line,feature_id,seg_oriented):
if (feature_id not in Features) or (not Features[feature_id].complete): # if the feature doesn't exist or is not complete, create it or complete it
init_feature(line,feature_id,seg_oriented)
return feature_id
else: # if it exists, add the current segment to the list of segments that have the existing feature
# if the feature existing starts and stops at the same positions as the current feature, add_seg
......@@ -108,15 +108,17 @@ def load_feature_intersect(line,feature_id,seg_oriented):
feature=Features[feature_id]
if (current_start==feature.start) and (current_stop==feature.stop):
feature.add_seg(seg_oriented)
return feature_id
else: # same feature_id, but not same part of the feature -> need to store them in two separate objects
add_segment_to_discontinuous_feature(line,feature,seg_oriented,current_start,current_stop)
feature_key=add_segment_to_discontinuous_feature(line,feature,seg_oriented,current_start,current_stop)
return feature_key
# handle the Segment object creation
def load_segment_intersect(line,segment_id,feat_id,strand):
def load_segment_intersect(line,segment_id,feature_key,strand):
if segment_id not in Segments: # if the segment doesn't exist, create it and add the current feature to its feat list
init_seg(line,segment_id,feat_id,strand)
init_seg(line,segment_id,feature_key,strand)
else: # if it exists, add the current feature to the list of features on the existing segment
Segments[segment_id].add_feature(line,feat_id,strand)
Segments[segment_id].add_feature(line,feature_key,strand)
# create a feature with the first segment its on
def init_feature(line,feature_id,segment_oriented):
......@@ -128,14 +130,14 @@ def init_feature(line,feature_id,segment_oriented):
# add the current segment to the list of segments that have the feature
segments_list=[segment_oriented]
# create the feature, store it in the dict Features
Features[feature_id]=Feature(feature_id,type,chr,start,stop,annot,childs,segments_list,strand,True,False)
Features[feature_id]=Feature(feature_id,feature_id,type,chr,start,stop,annot,childs,segments_list,strand,True,False)
add_child(Features[feature_id].parent,feature_id) # add this feature to its parent's child list
# create a segment with its first feature
def init_seg(line,segment_id,feat_id,strand):
def init_seg(line,segment_id,feature_key,strand):
[chr,start,stop]=[line[0],int(line[1])+1,int(line[2])] # +1 in the start to convert the bed 0-based coordinate to a 1-based system
# create the segment, store it in the Segments dict
Segments[segment_id]=Segment(segment_id,feat_id,chr,start,stop,strand)
Segments[segment_id]=Segment(segment_id,feature_key,chr,start,stop,strand)
# add a child feature to an existing feature
......@@ -144,21 +146,21 @@ def add_child(parent_id,new_child):
if parent_id in Features: # if the feature exists, add new_child
Features[parent_id].childs.append(new_child)
else: # create the feature "empty", only with child
Features[parent_id]=Feature(parent_id,"","",0,4,"",[new_child],[],"",False,False)
Features[parent_id]=Feature(parent_id,parent_id,"","",0,4,"",[new_child],[],"",False,False)
def add_segment_to_discontinuous_feature(line,feature,seg_oriented,current_start,current_stop):
added=False
for other_part_id in feature.other_parts_list:
other_part=Features[other_part_id]
for other_part_key in feature.other_parts_list:
other_part=Features[other_part_key]
if (other_part.start==current_start) and (other_part.stop==current_stop): # look for the current part of the feature to add the segment
other_part.add_seg(seg_oriented)
added=True
break
return other_part_key
if added==False : # if the right part of the feature was not found, create a new part for the feature
new_part_name=feature.id+''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(7))
[chr,start,stop]=[line[4],int(line[7]),int(line[8])]
segments_list=[seg_oriented]
Features[new_part_name]=Feature(feature.id,feature.type,chr,start,stop,feature.annot,feature.childs,segments_list,feature.strand,True,True)
Features[new_part_name]=Feature(feature.id,new_part_name,feature.type,chr,start,stop,feature.annot,feature.childs,segments_list,feature.strand,True,True)
new_part_feature=Features[new_part_name]
new_part_feature.first=False
new_part_feature.first_part=feature.id
......@@ -166,27 +168,28 @@ def add_segment_to_discontinuous_feature(line,feature,seg_oriented,current_start
feature.discontinuous=True
if feature.parent in Features:
add_child(feature.parent,new_part_name)
return new_part_name
# get the feature's start position on the segment (among segments on the source genome)
def get_feature_start_on_segment(seg_id,feat_id):
s=Segments[search_segment(seg_id,feat_id)]
f=Features[feat_id].find_part_segment_source(s.id)
if s.get_start(f.id)>=f.start:
def get_feature_start_on_segment(seg_id,feat_key):
s=Segments[search_segment(seg_id,feat_key)]
f=Features[feat_key].find_part_segment_source(s.id)
if s.get_start(f.key)>=f.start:
result=1
else:
result=f.start-s.get_start(f.id)+1
result=f.start-s.get_start(f.key)+1
return result
# get the feature's stop position on the segment (among segments on the source genome)
def get_feature_stop_on_segment(seg_id,feat_id):
s=Segments[search_segment(seg_id,feat_id)]
f=Features[feat_id].find_part_segment_source(s.id)
if s.get_stop(f.id)<=f.stop:
result=s.get_size(f.id)
def get_feature_stop_on_segment(seg_id,feat_key):
s=Segments[search_segment(seg_id,feat_key)]
f=Features[feat_key].find_part_segment_source(s.id)
if s.get_stop(f.key)<=f.stop:
result=s.get_size(f.key)
else:
result=f.stop-s.get_start(f.id)+1
result=f.stop-s.get_start(f.key)+1
return result
# look for a segment in the dict Segments, in one orientation or the other
......@@ -202,7 +205,8 @@ def search_segment(segment,feat_id):
elif invert_seg(segment) in Segments:
if invert_seg(segment) in list_seg_source:
return invert_seg(segment)
print("segment not found")
print(segment,list_seg_source,feat_id)
# get the inverted segment
......@@ -234,9 +238,10 @@ def add_feature_obj(feature):
feature.other_parts_list=other_parts
# add the position of the feature on its first and last segment
def add_pos(feature):
feature.pos_start=get_feature_start_on_segment(feature.segments_list_source[0],feature.id)
feature.pos_stop=get_feature_stop_on_segment(feature.segments_list_source[-1],feature.id)
def add_pos(feat_key):
feature=Features[feat_key]
feature.pos_start=get_feature_start_on_segment(feature.segments_list_source[0],feat_key)
feature.pos_stop=get_feature_stop_on_segment(feature.segments_list_source[-1],feat_key)
# orders the features so that each features is always preceded by its parent feature (gene -> mrna -> exons, cds, utr). useful for the transfer later.
def order_dict(Features):
......@@ -252,11 +257,11 @@ def add_feature_dict(feature_id,copy_Features,Features):
feature=copy_Features[feature_id]
# check that the parent is present
if feature.parent!='': # recursively find the parent (most of the time a gene)
add_feature_dict(feature.parent.id,copy_Features,Features)
add_feature_dict(feature.parent.key,copy_Features,Features)
# if the feature is discontinuous, check that the other parts are present :
if feature.discontinuous and feature.first:
for part in feature.other_parts_list:
add_feature_dict(feature.parent.id,copy_Features,Features)
add_feature_dict(feature.parent.key,copy_Features,Features)
# once the parent is present and the other parts are added, add the feature
Features[feature_id]=feature
from .intersect import Features,get_feature_start_on_segment,get_feature_stop_on_segment,invert_seg
# find the occurence of the segment that corresponds to the feature's copy
def get_seg_occ(seg,walk,feat,copy,segments_on_target_genome):
seg_in_walk=segments_on_target_genome[seg][walk]
for seg_occurence in seg_in_walk:
......
......@@ -25,7 +25,7 @@ def child_variation_details(feature_id,copy_id,walk,seg_seq,feature_target_path,
child_target_path=find_child_target_path(child.segments_list_target,walk,copy_id)
# treat the child feature:
print_variations(child.id,child_first_seg,child_last_seg,copy_id,walk,seg_seq,child_target_path,inversion,file_out_var,segments_on_target_genome)
print_variations(child.key,child_first_seg,child_last_seg,copy_id,walk,seg_seq,child_target_path,inversion,file_out_var,segments_on_target_genome)
def get_child_first_last_seg(child_segments_list_source,feature_target_path,inversion):
[child_first_seg,child_last_seg]=['','']
......@@ -249,8 +249,8 @@ class FeatureVariation:
# print a deletion at the end of a feature
def print_last_deletion(self,i,feature,seg_seq):
seg_del=search_segment(self.feature_path_source_genome[i],feature.id)
pos_old=int(Segments[seg_del].get_start(feature.id))-self.feat_start+1
seg_del=search_segment(self.feature_path_source_genome[i],feature.key)
pos_old=int(Segments[seg_del].get_start(feature.key))-self.feat_start+1
pos_new=str(self.size_new+1) # the deletion is at the end of the feature on the new genome
del_sequence=FeatureVariation.get_sequence_list_seg(self.feature_path_source_genome,i,feature.pos_stop,seg_seq)
......@@ -323,7 +323,7 @@ class FeatureVariation:
def get_old_new_pos_substitution(self,segments_on_target_genome):
feat=self.feature_key
seg_pos=search_segment(self.start_var,feat)
pos_old=str(int(Segments[seg_pos].get_start(Features[feat].id))-self.feat_start)
pos_old=str(int(Segments[seg_pos].get_start(Features[feat].key))-self.feat_start)
var_start_seg=self.start_on_target
if self.inversion:
......@@ -341,7 +341,7 @@ class FeatureVariation:
def get_old_new_pos_insertion(self,segments_on_target_genome):
feat=self.feature_key
seg_pos=search_segment(self.start_var,feat) # start_var is the segment AFTER the insertion
pos_old=str(int(Segments[seg_pos].get_start(Features[feat].id))-self.feat_start)
pos_old=str(int(Segments[seg_pos].get_start(Features[feat].key))-self.feat_start)
start_var_seg=self.start_var
if self.inversion:
......@@ -360,9 +360,9 @@ class FeatureVariation:
feat=self.feature_key
seg_pos=search_segment(self.start_var,feat)
if self.start_var_index==0:
pos_old=int(Segments[seg_pos].get_start(Features[feat].id))-self.feat_start+Features[feat].pos_start-1
pos_old=int(Segments[seg_pos].get_start(Features[feat].key))-self.feat_start+Features[feat].pos_start-1
else:
pos_old=int(Segments[seg_pos].get_start(Features[feat].id))-self.feat_start
pos_old=int(Segments[seg_pos].get_start(Features[feat].key))-self.feat_start
if pos_old<0:
pos_old=0
print("error with variation position",self.inversion,"***")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment