Skip to content
Snippets Groups Projects
Commit 1bcd358b authored by nina.marthe_ird.fr's avatar nina.marthe_ird.fr
Browse files

the dict segments_on_target_genome now stores the segment twice if it is...

the dict segments_on_target_genome now stores the segment twice if it is present twice in a given walk. adapted the rest of the code to use the last occurence (like before, will change that later)
parent 4ef67a6f
No related branches found
No related tags found
No related merge requests found
......@@ -5,30 +5,42 @@ segments_on_target_genome={}
# get the start position of the features on the linear genome, using their coordinates on the graph and the coordinantes of the segments on the genome
def get_feature_start_on_target_genome(start_seg,feat_id,walk):
seg_start_pos=segments_on_target_genome[start_seg][walk][1]
seg_start_pos=segments_on_target_genome[start_seg][walk][-1][1]
feat_start_pos=get_feature_start_on_segment(start_seg,feat_id)
if len(segments_on_target_genome[start_seg][walk])>1:
print("*** several start_seg found ",start_seg)
return seg_start_pos+feat_start_pos-1
# get the stop position of the features on the linear genome, using their coordinates on the graph and the coordinantes of the segments on the genome
def get_feature_stop_on_target_genome(stop_seg,feat_id,walk):
seg_start_pos=segments_on_target_genome[stop_seg][walk][1]
seg_start_pos=segments_on_target_genome[stop_seg][walk][-1][1]
feat_stop_pos=get_feature_stop_on_segment(stop_seg,feat_id)
if len(segments_on_target_genome[stop_seg][walk])>1:
print("*** several stop_seg found ",stop_seg)
return seg_start_pos+feat_stop_pos-1
# get the start position of the features on the linear genome for inverted features
def get_feature_start_on_target_genome_inv(start_seg,feat_id,walk):
seg_end_pos=segments_on_target_genome[start_seg][walk][2]
seg_end_pos=segments_on_target_genome[start_seg][walk][-1][2]
feat_start_pos=get_feature_start_on_segment(start_seg,feat_id)
if len(segments_on_target_genome[start_seg][walk])>1:
print("*** several start_seg found ",start_seg)
return seg_end_pos-feat_start_pos+1
# get the stop position of the features on the linear genome for inverted features
def get_feature_stop_on_target_genome_inv(stop_seg,feat_id,walk):
seg_end_pos=segments_on_target_genome[stop_seg][walk][2]
seg_end_pos=segments_on_target_genome[stop_seg][walk][-1][2]
feat_stop_pos=get_feature_stop_on_segment(stop_seg,feat_id)
if len(segments_on_target_genome[stop_seg][walk])>1:
print("*** several stop_seg found ",stop_seg)
return seg_end_pos-feat_stop_pos+1
......@@ -41,7 +53,7 @@ def right_size(size,max_diff,feat):
return not ((size>Features[feat].size*max_diff) or (size<Features[feat].size/max_diff))
def create_line_target_gff(first_seg,last_seg,feature_id,size_diff,inversion,walk,cov,id):
[chr,strand,feature]=[segments_on_target_genome[first_seg][walk][0],Features[feature_id].strand,Features[feature_id]]
[chr,strand,feature]=[segments_on_target_genome[first_seg][walk][-1][0],Features[feature_id].strand,Features[feature_id]]
annotation=f'{feature.annot};Size_diff={size_diff};coverage={cov};sequence_ID={id}' # Nb_variants={var_count};
......@@ -289,7 +301,11 @@ def get_segments_positions_on_genome(pos_seg): # add to the dict the info about
#segments_on_target_genome[seg]=[chrom,start,stop,strand,index,file_name]
if seg not in segments_on_target_genome:
segments_on_target_genome[seg]={} # dict of walks->segment_info; you get the info about the segment for each walk
segments_on_target_genome[seg][file_name]=[chrom,start,stop,strand,index]
if file_name not in segments_on_target_genome[seg]:
segments_on_target_genome[seg][file_name]=list()
else:
print("doublon",file_name,seg)
segments_on_target_genome[seg][file_name].append([chrom,start,stop,strand,index])
seg_count+=1
# look for the segment on either strand of the target genome
......@@ -375,10 +391,15 @@ def add_target_genome_path(feature_id,target_genome_paths):
# find the feature's path in target genome
def get_feature_path(target_genome_path,first_seg,last_seg,walk_name):
#first_seg=search_seg_on_target_genome(first_seg)
#last_seg=search_seg_on_target_genome(last_seg)
first_seg_index=segments_on_target_genome[first_seg][walk_name][4]
last_seg_index=segments_on_target_genome[last_seg][walk_name][4]
# si on a plusieurs occurences de first_seg et last_seg, ???
# prendre la dernière et print qu'il y en a d'autre.
if len(segments_on_target_genome[first_seg][walk_name])>1:
print("*** several first_seg found ",first_seg)
if len(segments_on_target_genome[last_seg][walk_name])>1:
print("*** several last_seg found ",last_seg)
first_seg_index=segments_on_target_genome[first_seg][walk_name][-1][4]
last_seg_index=segments_on_target_genome[last_seg][walk_name][-1][4]
first_index=min(first_seg_index,last_seg_index)
last_index=max(first_seg_index,last_seg_index)
feature_path_target_genome=target_genome_path[first_index:last_index+1]
......@@ -568,7 +589,7 @@ def create_var(feature_id,first_seg,last_seg,walk):
stop_new_genome=get_feature_stop_on_target_genome(last_seg,feature_id,walk)
size_new_genome=stop_new_genome-start_new_genome+1
size_diff=str(size_new_genome-feature.size)
sequence_name=segments_on_target_genome[first_seg][walk][0]
sequence_name=segments_on_target_genome[first_seg][walk][-1][0]
variation=Variation(feature_id,feature.type,sequence_name,start_new_genome,stop_new_genome,inversion,size_diff,size_new_genome)
return(variation,feature_path_source_genome,feature_path_target_genome)
......@@ -590,11 +611,11 @@ def get_old_new_pos_substitution(feat_start,variation,start_feat_seg_target,feat
if variation.inversion:
start_feat_seg_target=invert_seg(start_feat_seg_target)
var_start_seg=invert_seg(var_start_seg)
end_var=segments_on_target_genome[var_start_seg][walk][2]
end_var=segments_on_target_genome[var_start_seg][walk][-1][2]
start_feat=get_feature_start_on_target_genome_inv(start_feat_seg_target,feat,walk)
pos_new=str(start_feat-end_var)
else:
start_var=segments_on_target_genome[var_start_seg][walk][1]
start_var=segments_on_target_genome[var_start_seg][walk][-1][1]
start_feat=get_feature_start_on_target_genome(start_feat_seg_target,feat,walk)
pos_new=str(start_var-start_feat)
return [pos_old,pos_new] # pos_old and pos_new are the base before the change
......@@ -607,11 +628,11 @@ def get_old_new_pos_insertion(variation,feat_start,start_feat_seg_target,feat,wa
if variation.inversion:
start_feat_seg_target=invert_seg(start_feat_seg_target)
start_var_seg=invert_seg(start_var_seg)
end_var=segments_on_target_genome[start_var_seg][walk][2]+len(variation.alt) # start_var_seg is the segment AFTER the insertion
end_var=segments_on_target_genome[start_var_seg][walk][-1][2]+len(variation.alt) # start_var_seg is the segment AFTER the insertion
start_feat=get_feature_start_on_target_genome_inv(start_feat_seg_target,feat,walk)
pos_new=str(start_feat-end_var)
else:
start_var=segments_on_target_genome[start_var_seg][walk][1]-len(variation.alt) # start_var_seg is the segment AFTER the insertion
start_var=segments_on_target_genome[start_var_seg][walk][-1][1]-len(variation.alt) # start_var_seg is the segment AFTER the insertion
start_feat=get_feature_start_on_target_genome(start_feat_seg_target,feat,walk)
pos_new=str(start_var-start_feat)
return [pos_old,pos_new] # pos_old and pos_new are the base before the change
......@@ -634,11 +655,11 @@ def get_old_new_pos_deletion(variation,feat_start,start_feat_seg_target,feat,wal
if variation.inversion:
start_feat_seg_target=invert_seg(start_feat_seg_target)
start_var_seg=invert_seg(start_var_seg)
start_var=segments_on_target_genome[start_var_seg][walk][1]-1
start_var=segments_on_target_genome[start_var_seg][walk][-1][1]-1
start_feat=get_feature_start_on_target_genome_inv(start_feat_seg_target,feat,walk)
pos_new=str(start_feat-start_var)
else:
start_var=segments_on_target_genome[start_var_seg][walk][2]+1
start_var=segments_on_target_genome[start_var_seg][walk][-1][2]+1
start_feat=get_feature_start_on_target_genome(start_feat_seg_target,feat,walk)
pos_new=str(start_var-start_feat)
return [pos_old,pos_new] # pos_old and pos_new are the base before the change
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment