Newer
Older
from .usefull_little_functions import invert_segment_list
from .intersect import Features
from .detect_inversion import detect_feature_inversion
# functions to compute coverage and sequence id scores
# get the coverage and sequence id of a feature
def get_id_cov(feature_id,seg_size,target_list): # seg_size has unoriented segments : s25
feature=Features[feature_id]
source_list=feature.segments_list_source
inversion=detect_feature_inversion(source_list,target_list)
if inversion:
target_list=invert_segment_list(target_list)
[match,subs,inser,delet]=[0,0,0,0]
[i,j]=[0,0]
first=True # when writing the first part of the feature, dont take the whole segment, only the part that the feature is on
last=False # same for the last part of the feature # for id and ins.

nina.marthe_ird.fr
committed
# make dictionnaries with the two segments lists to search into.
dict_source = {source_list[i]: "" for i in range(len(source_list))}
dict_target = {target_list[i]: "" for i in range(len(target_list))}
while (i<len(source_list)) and (j<len(target_list)):
if i==len(source_list)-1:
last=True
if source_list[i] != target_list[j]: # if there is a difference between the two paths

nina.marthe_ird.fr
committed
if target_list[j] not in dict_source: # if the segment in target genome is absent in source genome
if source_list[i] not in dict_target: # if the segment in source genome is absent is target genome : substitution
add=segment_id_cov("substitution",seg_size,source_list[i],target_list[j],first,feature,last)
match+=add[0];subs+=add[1];inser+=add[2];delet+=add[3];first=add[4]
i+=1;j+=1
else: # target genome segment not in source_genome, but source_genome segment in target genome : insertion
add=segment_id_cov("insertion",seg_size,source_list[i],target_list[j],first,feature,last)
match+=add[0];subs+=add[1];inser+=add[2];delet+=add[3];first=add[4]
j+=1

nina.marthe_ird.fr
committed
elif source_list[i] not in dict_target: # source_genome segment not in target genome, but target genome segment in source_genome : deletion
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
add=segment_id_cov("deletion",seg_size,source_list[i],target_list[j],first,feature,last)
match+=add[0];subs+=add[1];inser+=add[2];delet+=add[3];first=add[4]
i+=1
else : # if both segments are present in the other genome but not at the same position. weird case never found yet
add=segment_id_cov("substitution",seg_size,source_list[i],target_list[j],first,feature,last)
match+=add[0];subs+=add[1];inser+=add[2];delet+=add[3];first=add[4]
i+=1;j+=1
else: # segment present in both, no variation.
add=segment_id_cov("identity",seg_size,source_list[i],target_list[j],first,feature,last)
match+=add[0];subs+=add[1];inser+=add[2];delet+=add[3];first=add[4]
i+=1;j+=1
if i<=len(source_list)-1: # if we didn't reach the length of the segment list for the first genome, the end is missing for the second genome
add=segment_id_cov("end_deletion",seg_size,source_list[i:],'',first,feature,last)
match+=add[0];subs+=add[1];inser+=add[2];delet+=add[3];first=add[4]
denom_cov=match+subs+delet
denom_id=denom_cov+inser
[cov,id]=[0,0]
if denom_cov>0:
cov=round((match+subs)/(match+subs+delet),3)
if denom_id>0:
id=round(match/(match+subs+inser+delet),3)
return [cov,id]
# computes the cov/id calculation for a segment pair
def segment_id_cov(type,seg_size,seg_a,seg_b,first,feature,last):
[match,subs,inser,delet]=[0,0,0,0]
match type:
case "identity":
if first:
match+=seg_size[seg_a[1:]]-feature.pos_start+1
elif last:
match+=feature.pos_stop
else:
match+=seg_size[seg_a[1:]]
case "substitution":
if seg_size[seg_b[1:]]!=seg_size[seg_a[1:]]: # substitution can be between segments of different size
if seg_size[seg_b[1:]]>seg_size[seg_a[1:]]:
subs+=seg_size[seg_a[1:]]
inser+=seg_size[seg_b[1:]]-seg_size[seg_a[1:]]
elif seg_size[seg_b[1:]]<seg_size[seg_a[1:]]:
subs+=seg_size[seg_b[1:]]
delet+=seg_size[seg_a[1:]]-seg_size[seg_b[1:]]
else:
subs+=seg_size[seg_a[1:]]
case "insertion":
inser+=seg_size[seg_b[1:]]
case "deletion":
if first:
delet+=seg_size[seg_a[1:]]-feature.pos_start+1
else:
delet+=seg_size[seg_a[1:]]
case "end_deletion":
for seg in seg_a[:-1]:
delet+=seg_size[seg[1:]]
delet+=feature.pos_stop
return [match,subs,inser,delet,False] # check the orientation of the segment later