Newer
Older

nina.marthe_ird.fr
committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from .usefull_little_functions import *
from .intersect import invert_seg,Features
from .detect_inversion import detect_feature_inversion
from .Functions import get_id_cov
# outputs the gff line of the target genome for a feature occurence (a feature can be transfered at several positions)
def generate_target_gff(feature_id,seg_size,args,reason_features_not_transfered,match,file_out_gff,segments_on_target_genome):
feature=Features[feature_id]
# get list of the segments where it is and the first and last segment of the feature on the new genome
[first_seg,last_seg,walk,copy_id,feature_target_path]=get_featurePath_ends(match)
inversion=detect_feature_inversion(feature.segments_list_source,feature_target_path)
if first_seg!='': # feature present on the target genome
if inversion:
size_on_new_genome=get_feature_start_on_target_genome_inv(last_seg,feature_id,walk,copy_id,segments_on_target_genome)-get_feature_stop_on_target_genome_inv(first_seg,feature_id,walk,copy_id,segments_on_target_genome)+1
else:
size_on_new_genome=get_feature_stop_on_target_genome(last_seg,feature_id,walk,copy_id,segments_on_target_genome)-get_feature_start_on_target_genome(first_seg,feature_id,walk,copy_id,segments_on_target_genome)+1
size_diff=abs(size_on_new_genome-feature.size)
[cov,id]=match[3]
if (cov*100<args.coverage) or (id*100<args.identity):
reason_features_not_transfered[1]+=1
match.append(False) # store the information that this gene copy didnt pass the filters
return "no"
else:
line_gff=create_line_target_gff(first_seg,last_seg,feature_id,size_diff,inversion,walk,cov,id,copy_id,segments_on_target_genome) # transfer the gene
file_out_gff.write(line_gff)
# transfer all the gene's childs
transfer_childs(feature_id,feature_target_path,walk,copy_id,inversion,seg_size,file_out_gff,segments_on_target_genome)
match.append(True) # store the information that this gene copy was transfered
return size_diff
else :
reason_features_not_transfered[0]+=1
match.append(False) # store the information that this gene copy didnt pass the filters
return "no"
def transfer_childs(feature_id,feature_target_path,walk,copy_id,inversion,seg_size,file_out_gff,segments_on_target_genome):
childs_list=Features[feature_id].get_child_list(Features)
for child_id in childs_list:
child=Features[child_id]
# look for the first and last seg of the child in its parent path
[child_first_seg,child_last_seg]=['','']
for seg in child.segments_list_source: # get first seg in the parent path
if seg in feature_target_path: # parent path
child_first_seg=seg
break
elif invert_seg(seg) in feature_target_path:
child_first_seg=invert_seg(seg)
break
if child_first_seg!='': # check that the child feature is not absent in this occurence of the parent feature
for seg in reversed(child.segments_list_source): # get last seg in the parent path
if seg in feature_target_path: # parent path
child_last_seg=seg
break
elif invert_seg(seg) in feature_target_path:
child_last_seg=invert_seg(seg)
break
if inversion: # calculate the child feature size
child_size_on_new_genome=get_feature_start_on_target_genome_inv(child_last_seg,child_id,walk,copy_id,segments_on_target_genome)-get_feature_stop_on_target_genome_inv(child_first_seg,child_id,walk,copy_id,segments_on_target_genome)+1
else:
child_size_on_new_genome=get_feature_stop_on_target_genome(child_last_seg,child_id,walk,copy_id,segments_on_target_genome)-get_feature_start_on_target_genome(child_first_seg,child_id,walk,copy_id,segments_on_target_genome)+1
child_size_diff=abs(child_size_on_new_genome-child.size)
for match in child.segments_list_target: # look for the right occurence of the child feature
if (match[0]==walk) and (match[1]==copy_id):
seg_list=match[2]
[cov,id]=get_id_cov(child_id,seg_size,seg_list)
break
if inversion:
temp=child_first_seg
child_first_seg=child_last_seg
child_last_seg=temp
# transfer the child feature:
line_gff=create_line_target_gff(child_first_seg,child_last_seg,child_id,child_size_diff,inversion,walk,cov,id,copy_id,segments_on_target_genome)
file_out_gff.write(line_gff)
# generates the line for the gff of the target genome
def create_line_target_gff(first_seg,last_seg,feature_id,size_diff,inversion,walk,cov,id,copy_id,segments_on_target_genome):
[chr,strand,feature]=[get_seg_occ(first_seg,walk,feature_id,copy_id,segments_on_target_genome)[0],Features[feature_id].strand,Features[feature_id]]
annotation=f'{feature.annot};Size_diff={size_diff};coverage={cov};sequence_ID={id}' # Nb_variants={var_count};
if inversion:
start=get_feature_start_on_target_genome_inv(last_seg,feature_id,walk,copy_id,segments_on_target_genome)
stop=get_feature_stop_on_target_genome_inv(first_seg,feature_id,walk,copy_id,segments_on_target_genome)
strand=invert_strand(strand)
else:
start=get_feature_start_on_target_genome(first_seg,feature_id,walk,copy_id,segments_on_target_genome)
stop=get_feature_stop_on_target_genome(last_seg,feature_id,walk,copy_id,segments_on_target_genome)
if start>stop:
temp=start
start=stop
stop=temp
if strand=='':
strand='.'
output_line=f'{chr}\tGrAnnoT\t{feature.type}\t{start}\t{stop}\t.\t{strand}\t.\t{annotation}\n'
return output_line