Newer
Older

nina.marthe_ird.fr
committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from .ClassSegFeat import *
from .load_intersect import search_segment,Segments,Features
from tqdm import tqdm
# functions to generate the graph's gff from the segments and features created with create_seg_feat
# get the feature's start position on the segment (among segments on the source genome)
def get_feature_start_on_segment(seg_id,feat_id):
s=Segments[search_segment(seg_id)]
f=Features[find_part_segment_source(s.id,feat_id)]
if s.get_start(f.id)>=f.start:
result=1
else:
result=f.start-s.get_start(f.id)+1
return result
# get the feature's stop position on the segment (among segments on the source genome)
def get_feature_stop_on_segment(seg_id,feat_id):
s=Segments[search_segment(seg_id)]
f=Features[find_part_segment_source(s.id,feat_id)]
if s.get_stop(f.id)<=f.stop:
result=s.get_size(f.id)
else:
result=f.stop-s.get_start(f.id)+1
return result
# generates the annotation for the gff of the graph, with the rank of the segment in the feature
def make_annot_graph(feature_id,segment):
# get the rank and the total number of ocurrences for the feature
feature=Features[find_part_segment_source(segment,feature_id)]
rank=str(feature.segments_list_source.index(segment)+1)
total=str(len(feature.segments_list_source))
# create the annotation with the rank information
annotation=feature.annot+";Rank_occurrence="+rank+";Total_occurrences="+total
return annotation
def find_part_segment_source(segment,feature_id):
first_feature=Features[feature_id]
if segment in first_feature.segments_list_source:
return feature_id
else:
list_parts=first_feature.other_parts_list
for part in list_parts:
if segment in Features[part].segments_list_source:
return part
# create the line for the gff of the graph
def create_line_graph_gff(feature_stranded,segment):
[strand,feature_id]=[feature_stranded[0:1],feature_stranded[1:]]
annotation=make_annot_graph(feature_id,segment)
type=Features[feature_id].type
start=get_feature_start_on_segment(segment,feature_id)
stop=get_feature_stop_on_segment(segment,feature_id)
line=segment+"\tGrAnnot\t"+type+"\t"+str(start)+"\t"+str(stop)+"\t.\t"+strand+"\t.\t"+annotation+"\n"
return line
# go through all the segments in Segments and prints the gff of the graph, with one line per segment/feature intersection
def graph_gff(graph_gff_path,verbose):
if not verbose:
print("Generation of the graph's gff")
with open(graph_gff_path,'w') as graph_gff_file:
for segment_stranded in tqdm(Segments,desc="Generation of the graph's gff",unit=" segment",disable=not verbose):
features_on_seg=Segments[segment_stranded].feature_list # get the list of the features on the segment
for feature_tuple in features_on_seg: # go through all the segment's features, and print the gff line for each
feature_id=feature_tuple[0]
strand=Segments[segment_stranded].get_strand(feature_id)
feature_stranded=strand+feature_id
line = create_line_graph_gff(feature_stranded,segment_stranded)
graph_gff_file.write(line)
# go through all the features in Features and print the gaf of the graph, with one line per feature
def graph_gaf(graph_gaf_path,seg_len,verbose):
if not verbose:
print("Generation of the graph's gaf")
with open(graph_gaf_path,'w') as graph_gaf_file:
for feature_id in tqdm(Features,desc="Generation of the graph's gaf",unit=" feature",disable=not verbose):
feature=Features[feature_id]
feature_segments=feature.segments_list_source
feature_path=""
path_length=0
for segment in feature_segments:
feature_path+=segment
path_length+=seg_len[segment[1:]]
strand=feature.strand
size=feature.size
start_on_path=feature.pos_start-1 # -1 to get the position 0-based bed-like
stop_on_path=start_on_path+size
line=f'{feature.id}\t{size}\t0\t{size}\t{strand}\t{feature_path}\t{path_length}\t{start_on_path}\t{stop_on_path}\t{size}\t{size}\t255\n'
graph_gaf_file.write(line)
# creates a dictionnary with the length of the segments : s5->8
def get_segments_length(segments,verbose):
seg_len={}
total_lines=0
if verbose:
total_lines=len(["" for line in open(segments,"r")])
with open(segments,'r') as segments_file:
for line in tqdm(segments_file, desc="Computing the lengths of the segments", total=total_lines, unit=" segment", disable=not verbose):
line=line.split()
seg_id=line[1]
seg_len[seg_id]=len(line[2])
return seg_len