Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from Graph_gff import Segments, Features, write_line
from Functions import *
### functions to generate a genome's gff from the graph's gff
# functions to get the detailed gff with all the fragments of the features
# outputs each fragment of the feature in a gff
def gff_detail(list_seg,feature_id):
# loop that goes through all the segments that have the current feature
# keeps the first segment present in the genome found, and when it finds a segment absent in the genome, prints the current part of the fragment, and resets the first segment present.
# continues to go through the segments, keeps the next segment present in the genome, and when it finds a segment absent, prints the second part of the feature, etc.
# at the end of the loop, prints the last part of the fragment.
[feat_start,seg_start,last_seg]=["","",""] # first segment of the feature, first segment of the current part of the feature, last segment in the part of the feature, for loop below
for segment in list_seg:
if segment[1:] in segments_on_target_genome:
if feat_start=="":
feat_start=segment[1:]
if seg_start=="": # if we dont have a start, take the current segment for the start of the part of the feature
seg_start=segment[1:]
#else: if we already have a start, keep going though the segments until we find a stop (segment not in azucena)
else:
if seg_start!="": # found a stop and we have a start. print the line, reset seg_start, and keep going through the segments to find the next seg_start
out_line=create_line_detail(last_seg,feature_id,seg_start,feat_start)
write_line(out_line,output_detail_gff,False)
seg_start=""
#else: if the current segment is not in azucena but there is no start, keep looking for a start
last_seg=segment
if last_seg[1:] in segments_on_target_genome:
out_line=create_line_detail(list_seg[-1],feature_id,seg_start,feat_start)
write_line(out_line,output_detail_gff,False)
# functions to get the gff with one line per feature
# outputs the feature once in a gff, from the first to the last segment present on the new genome (if the size is ok) :
def gff_one(first_seg,last_seg,feature_id,list_seg,max_diff):
if (first_seg!=''): # feature present on the target genome
size_on_new_genome=get_feature_stop_on_genome(last_seg,feature_id)-get_feature_start_on_genome(first_seg,feature_id)+1
size_diff=abs(size_on_new_genome-Features[feature_id].size)
if right_size(size_on_new_genome,max_diff,feature_id):
line=create_line_gff_one(first_seg,last_seg,feature_id,size_diff,list_seg)
write_line(line,output_gff_one,False)
return size_diff
else:
return "bad_size"
else :
return "absent"
# writes the gff of azucena using the gff of the graph
def genome_gff(pos_seg, gff, out, gfa):
print("generation of the genome's gff ")
# create variables and open files
[once,detail,var,stats]=[False,False,True,False]
if var:
[paths,seg_seq]=get_segments_sequence_and_paths(gfa)
file_out_var = open("variations_chr10.txt", 'w')
global output_variations
output_variations=[0,"",file_out_var]
if detail:
file_out_detail = open("azucena_detail_chr10.gff", 'w')
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
global output_detail_gff
output_detail_gff=[0,"",file_out_detail]
if once:
file_out=open(out,'w')
global output_gff_one
output_gff_one=[0,"",file_out]
bad_size_features=0
absent_features=0
diff_size_transfered_features=[0,0] # [count,sum], to get the average
if stats:
# create objects for stats on how many segments are absent in azucena, their average length, etc
feature_missing_segments=[[],[],[],[],[],[],[]] # [feature_missing_first,feature_missing_middle,feature_missing_last,feature_missing_all,feature_missing_total,feature_total,feature_ok]
# the fist segment of the feature is missing - feature_missing_first
# the last segment of the feature is missing - feature_missing_last
# at least one middle segment of the feature is missing - feature_missing_middle
# the entire feature is missing - feature_missing_all
# at least one segment is missing first, last, or middle) - feature_missing_total
# no segment is missing, the feature is complete - feature_ok
# total number of features, with missing segments or not - feature_total
get_segments_positions_on_genome(pos_seg)
list_feature_to_transfer=get_all_features_in_gff(gff) # get the list of all the features to transfer from the gff
for feat in list_feature_to_transfer:
# for each feature, get list of the segments where it is and the first and last segment of the feature on the new genome
list_seg=Features[feat].segments_list
first_seg=get_first_seg(list_seg)
last_seg=get_first_seg(reversed(list_seg))
# outputs the feature once in a gff, from the first to the last segment present on the new genome
if once:
max_diff=2 # maximum difference size (n times bigger of smaller)
transfer_stat=gff_one(first_seg,last_seg,feat,list_seg,max_diff) # insertion not considered !!!
if transfer_stat=="bad_size":
bad_size_features+=1
elif transfer_stat=="absent":
absent_features+=1
else:
diff_size_transfered_features[0]+=1
diff_size_transfered_features[1]+=transfer_stat
write_line("",output_gff_one,True)
# outputs each fragment of the feature in a gff
if detail:
gff_detail(list_seg,feat) # insertions !
write_line("",output_detail_gff,True)
# outputs the detail of variations of the feature :
if var:
print_variations_2(first_seg,last_seg,feat,paths,seg_seq)
write_line("",output_variations,True)
if stats==True:
stats_feature_missing_segment(feature_missing_segments,first_seg,last_seg,list_seg,feat)
# close output_files
if detail:
file_out_detail.close()
if once:
file_out.close()
if var:
file_out_var.close()
# print stats
if stats==True:
if once:
print(len(Features)-(bad_size_features+absent_features),"out of",len(Features),"features are transfered.")
print(bad_size_features,"out of",len(Features), "features are not transfered because they are too big or too small compared to the original genome.")
print(absent_features,"out of",len(Features),"features are not transfered because they are absent in the new genome.")
print("average length difference of the transfered genes : ",diff_size_transfered_features[1]/diff_size_transfered_features[0])
stats_features(feature_missing_segments)
# functions to get the detail of the variations in the features
def print_variations(first_seg,last_seg,feat,paths,seg_seq):
if (first_seg!=''): # if the feature is not completly absent # add the else, output absent features
var=0 # count variations, to see if there is any
feature=Features[feat]
feat_start=feature.start
# get the lengths of the feature, on the original genome and on the new one
start_new_genome=get_feature_start_on_genome(first_seg,feat)
stop_new_genome=get_feature_stop_on_genome(last_seg,feat)
size_new_genome=int(stop_new_genome)-int(start_new_genome)+1
size_diff=str(size_new_genome-feature.size)
# get feature paths on the original genome and on the target genome
list_segfeat_azu=get_feature_path(paths,first_seg,last_seg)
list_segfeat_nb=feature.segments_list
# loop to go through both paths
i=0
j=0
[last,last_taille,last_seq,last_start,last_in_azu]=['',0,'','',''] # rename last_taille
# check if there is an inversion and remove strands
[list_segfeat_nb,list_segfeat_azu,inversion]=detect_inversion(list_segfeat_nb,list_segfeat_azu)
# detect and print variations ignoring the strands
while (i<len(list_segfeat_nb)) & (j<len(list_segfeat_azu)):
if list_segfeat_nb[i] != list_segfeat_azu[j]: # if there is a difference between the two paths
if list_segfeat_azu[j] not in list_segfeat_nb: # if the segment in azu is absent in nb
if list_segfeat_nb[i] not in list_segfeat_azu: # if the segment in nb is absent is azu
# is both segments are absent in the other genome, its a substitution
last_in_azu=list_segfeat_azu[j]
# print if we had an insertion or deletion running
if last=='insertion': # last : type énuméré. # fct compare cas précedent à cas courant, si différent imprimer cas précédent.
[pos_old,pos_new]=get_old_new_pos_insertion(last_start,feat_start,list_segfeat_azu,feat)
# line : formated line f"{feat}\t"
line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tinsertion\t-\t"+last_seq+"\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
write_line(line,output_variations,False)
var+=1
elif last=='deletion':
[pos_old,pos_new]=get_old_new_pos_deletion(last_start,feat_start,list_segfeat_azu,feat,last_in_azu,i)
line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tdeletion\t"+last_seq+"\t-\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
write_line(line,output_variations,False)
var+=1
last='';last_taille=0;last_seq='';last_start=''
# print the substitution # what if plusieurs substitutions à la suite ?
# substitution of segment list_segfeat_nb[i][1:] by segment list_segfeat_azu[j][1:]
[pos_old,pos_new]=get_old_new_pos_substitution(feat_start,list_segfeat_nb,list_segfeat_azu,feat,i,j)
if len(seg_seq[list_segfeat_nb[i]]) == len(seg_seq[list_segfeat_azu[j]]): # if the substituion is between two segment of the same size, print it
size_subs=len(seg_seq[list_segfeat_nb[i]])
line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tsubstitution\t"+seg_seq[list_segfeat_nb[i]]+"\t"+seg_seq[list_segfeat_azu[j]]+"\t"+str(size_subs)+"\t"+str(pos_old)+"\t"+pos_new+"\n"
else :
# if the segments of the substitution have a different size, print deletion then insertion at the same position.
line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tdeletion\t"+seg_seq[list_segfeat_nb[i]]+"\t-\t"+str(len(seg_seq[list_segfeat_nb[i]]))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
line+=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tinsertion\t-\t"+seg_seq[list_segfeat_azu[j]]+"\t"+str(len(seg_seq[list_segfeat_azu[j]]))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
var+=1
write_line(line,output_variations,False)
var+=1;i+=1;j+=1
else: # azu segment not in nb, but nb segment in azu : insertion
if last=='deletion':
[pos_old,pos_new]=get_old_new_pos_deletion(last_start,feat_start,list_segfeat_azu,feat,last_in_azu,i)
line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tdeletion\t"+last_seq+"\t-\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
write_line(line,output_variations,False)
var+=1;last='';last_taille=0;last_start='';last_seq=''
last_in_azu=list_segfeat_azu[j]
if last=='insertion':
last_seq=last_seq+seg_seq[list_segfeat_azu[j]]
else:
last='insertion'
last_seq=seg_seq[list_segfeat_azu[j]]
last_start=list_segfeat_nb[i]
j+=1
elif list_segfeat_nb[i] not in list_segfeat_azu: # nb segment not in azu, but azu segment in nb : deletion
if last=='insertion':
[pos_old,pos_new]=get_old_new_pos_insertion(last_start,feat_start,list_segfeat_azu,feat)
line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tinsertion\t-\t"+last_seq+"\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
write_line(line,output_variations,False)
var+=1;last='';last_start='';last_taille=0;last_seq=''
if last=='deletion':
last_seq=last_seq+seg_seq[list_segfeat_nb[i]]
else:
last='deletion'
last_start=list_segfeat_nb[i]
if i==0: # if the deletion is at the start of the feature, the deletion doesnt start at the start at the first segment :
#use pos_start, position of the feature on its first segment
last_seq=seg_seq[list_segfeat_nb[i]][feature.pos_start-1:]
else: # else, the deletion will always start at the start of the first segment.
last_seq=seg_seq[list_segfeat_nb[i]]
i+=1
else : # idk yet. if both segments are present in the other genome but not at the same position. probably substitution then
line="weird order change\n"
write_line(line,output_variations,False)
var+=1;i+=1;j+=1
else: # segment present in both. print the running indel if there is one
if last=='insertion':
[pos_old,pos_new]=get_old_new_pos_insertion(last_start,feat_start,list_segfeat_azu,feat)
line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tinsertion\t-\t"+last_seq+"\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
write_line(line,output_variations,False)
var+=1
elif last=='deletion':
[pos_old,pos_new]=get_old_new_pos_deletion(last_start,feat_start,list_segfeat_azu,feat,last_in_azu,i)
line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tdeletion\t"+last_seq+"\t-\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
write_line(line,output_variations,False)
var+=1
last_in_azu=list_segfeat_azu[j]
last='';last_taille=0;last_start='';last_seq='';i+=1;j+=1
# finish printing the current indel
if last=='insertion':
[pos_old,pos_new]=get_old_new_pos_insertion(last_start,feat_start,list_segfeat_azu,feat)
line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tinsertion\t-\t"+last_seq+"\t"+str(last_taille)+"\t"+str(pos_old)+"\t"+pos_new+"\n"
write_line(line,output_variations,False)
var+=1
elif last=='deletion':
[pos_old,pos_new]=get_old_new_pos_deletion(last_start,feat_start,list_segfeat_azu,feat,last_in_azu,i)
line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tdeletion\t"+last_seq+"\t-\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
write_line(line,output_variations,False)
var+=1
# see if the end is missing for one of the two genomes
if not((i>=len(list_segfeat_nb)-1) & (j>=len(list_segfeat_azu)-1)):
pos_old=int(Segments[list_segfeat_nb[i]].start)-int(feat_start)+1
del_sequence=get_sequence_list_seg(list_segfeat_nb,i,feature,seg_seq)
length=len(del_sequence)
pos_new=str(size_new_genome+1) # the deletion is at the end of the feature on the new genome
line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tdeletion\t"+del_sequence+"\t-\t"+str(length)+"\t"+str(pos_old)+"\t"+pos_new+"\n"
write_line(line,output_variations,False)
var+=1
if var==0:
line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tno_var\t-\t-\t-\t-\t-\n"
write_line(line,output_variations,False)
def print_variations_2(first_seg,last_seg,feat,paths,seg_seq):
if (first_seg!=''): # if the feature is not completly absent # add the else, output absent features
[variation,list_segfeat_nb,list_segfeat_azu]=create_var(feat,first_seg,last_seg,paths) # removes the strands in the segment lists
feature=Features[feat]
feat_start=feature.start
# loop to go through both paths with i and j
[i,j,var_count]=[0,0,0]
# detect and print variations ignoring the strands
while (i<len(list_segfeat_nb)) & (j<len(list_segfeat_azu)):
if list_segfeat_nb[i] != list_segfeat_azu[j]: # if there is a difference between the two paths
if list_segfeat_azu[j] not in list_segfeat_nb: # if the segment in azu is absent in nb
if list_segfeat_nb[i] not in list_segfeat_azu: # if the segment in nb is absent is azu
# if both segments are absent in the other genome, its a substitution
variation.last_seg_in_target=list_segfeat_azu[j]
if (variation.type=='insertion') | (variation.type=='deletion'): # print the current variation before treating the substitution
print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j)
reset_var(variation)
var_count+=1
if variation.type=='substitution':
continue_var(variation,seg_seq,list_segfeat_nb,list_segfeat_azu,i,j)
init_new_var(variation,"substitution",list_segfeat_nb,list_segfeat_azu,i,j,seg_seq,feature)
i+=1;j+=1
else: # azu segment not in nb, but nb segment in azu : insertion
if (variation.type=='substitution') | (variation.type=='deletion'): # print the current variation before treating the insertion
print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j)
reset_var(variation)
var_count+=1
variation.last_seg_in_target=list_segfeat_azu[j]
if variation.type=='insertion':
continue_var(variation,seg_seq,list_segfeat_nb,list_segfeat_azu,i,j)
init_new_var(variation,"insertion",list_segfeat_nb,list_segfeat_azu,i,j,seg_seq,feature)
j+=1
elif list_segfeat_nb[i] not in list_segfeat_azu: # nb segment not in azu, but azu segment in nb : deletion
if (variation.type=='substitution') | (variation.type=='insertion'): # print the current variation before treating the deletion
print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j)
reset_var(variation)
var_count+=1
if variation.type=='deletion':
continue_var(variation,seg_seq,list_segfeat_nb,list_segfeat_azu,i,j)
init_new_var(variation,"deletion",list_segfeat_nb,list_segfeat_azu,i,j,seg_seq,feature)
i+=1
else : # if both segments are present in the other genome but not at the same position. probably substitution ? weird case never found yet
line="weird order change\n"
write_line(line,output_variations,False)
var_count+=1;i+=1;j+=1
else: # segment present in both, no variation. print the running indel if there is one
if variation.type!='': # print the current variation if there is one
print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j)
var_count+=1
reset_var(variation)
variation.last_seg_in_target=list_segfeat_azu[j]
i+=1;j+=1
if (variation.type!=''): # if there was a current variation when we reached the end, print it
print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j)
var_count+=1
reset_var(variation)
if i<=len(list_segfeat_nb)-1: # if we didn't reach the length of the segment list for the first genome, the end is missing for the second genome
print_last_deletion(variation,list_segfeat_nb,i,feat_start,feature,seg_seq)
var_count+=1
reset_var(variation)
if var_count==0: # if no variation was encountered
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
print_novar(variation)
def print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j):
if variation.type=='insertion':
[pos_old,pos_new]=get_old_new_pos_insertion_2(variation,feat_start,list_segfeat_azu,feat)
line=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tinsertion\t-\t"+variation.alt+"\t"+str(len(variation.alt))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
write_line(line,output_variations,False)
elif variation.type=='deletion':
[pos_old,pos_new]=get_old_new_pos_deletion_2(variation,feat_start,list_segfeat_azu,feat,i)
line=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tdeletion\t"+variation.ref+"\t-\t"+str(len(variation.ref))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
write_line(line,output_variations,False)
elif variation.type=='substitution':
[pos_old,pos_new]=get_old_new_pos_substitution_2(feat_start,variation,list_segfeat_azu,feat,j)
if len(variation.ref) == len(variation.alt): # if the substituion is between two segment of the same size, print it
size_subs=len(variation.ref)
line=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tsubstitution\t"+variation.ref+"\t"+variation.alt+"\t"+str(size_subs)+"\t"+str(pos_old)+"\t"+pos_new+"\n"
else :
# if the segments of the substitution have a different size, print deletion then insertion at the same position.
line=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tdeletion\t"+variation.ref+"\t-\t"+str(len(variation.ref))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
line+=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tinsertion\t-\t"+variation.alt+"\t"+str(len(variation.alt))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
write_line(line,output_variations,False)
def print_last_deletion(variation,list_segfeat_nb,i,feat_start,feature,seg_seq):
pos_old=int(Segments[list_segfeat_nb[i]].start)-int(feat_start)+1
del_sequence=get_sequence_list_seg(list_segfeat_nb,i,feature,seg_seq)
length=len(del_sequence)
pos_new=str(int(variation.size_new)+1) # the deletion is at the end of the feature on the new genome
line=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tdeletion\t"+del_sequence+"\t-\t"+str(length)+"\t"+str(pos_old)+"\t"+pos_new+"\n"
write_line(line,output_variations,False)
def print_novar(variation):
line=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tno_var\t-\t-\t-\t-\t-\n"
write_line(line,output_variations,False)
# not used.
def get_list_segments_missing(list_seg,segments_on_target_genome):
segments_missing=[]
for segment in list_seg:
if segment[1:] not in segments_on_target_genome:
segments_missing.append(Segments[segment[1:]])
return segments_missing
# takes a feature and a feature type, returns a list of child features that have the wanted type. currently not used.
def get_child_list(feature,child_type):
if type=="":
return feature.childs
list_childs=[]
for child in feature.childs:
if Features[child].type==child_type:
list_childs.append(child)
return list_childs