from .ClassSegFeat import * from .load_intersect import search_segment,Segments,Features from tqdm import tqdm # functions to generate the graph's gff from the segments and features created with create_seg_feat # get the feature's start position on the segment (among segments on the source genome) def get_feature_start_on_segment(seg_id,feat_id): s=Segments[search_segment(seg_id)] f=Features[find_part_segment_source(s.id,feat_id)] if s.get_start(f.id)>=f.start: result=1 else: result=f.start-s.get_start(f.id)+1 return result # get the feature's stop position on the segment (among segments on the source genome) def get_feature_stop_on_segment(seg_id,feat_id): s=Segments[search_segment(seg_id)] f=Features[find_part_segment_source(s.id,feat_id)] if s.get_stop(f.id)<=f.stop: result=s.get_size(f.id) else: result=f.stop-s.get_start(f.id)+1 return result # generates the annotation for the gff of the graph, with the rank of the segment in the feature def make_annot_graph(feature_id,segment): # get the rank and the total number of ocurrences for the feature feature=Features[find_part_segment_source(segment,feature_id)] rank=str(feature.segments_list_source.index(segment)+1) total=str(len(feature.segments_list_source)) # create the annotation with the rank information annotation=feature.annot+";Rank_occurrence="+rank+";Total_occurrences="+total return annotation def find_part_segment_source(segment,feature_id): first_feature=Features[feature_id] if segment in first_feature.segments_list_source: return feature_id else: list_parts=first_feature.other_parts_list for part in list_parts: if segment in Features[part].segments_list_source: return part # create the line for the gff of the graph def create_line_graph_gff(feature_stranded,segment): [strand,feature_id]=[feature_stranded[0:1],feature_stranded[1:]] annotation=make_annot_graph(feature_id,segment) type=Features[feature_id].type start=get_feature_start_on_segment(segment,feature_id) stop=get_feature_stop_on_segment(segment,feature_id) line=segment+"\tGrAnnot\t"+type+"\t"+str(start)+"\t"+str(stop)+"\t.\t"+strand+"\t.\t"+annotation+"\n" return line # go through all the segments in Segments and prints the gff of the graph, with one line per segment/feature intersection def graph_gff(graph_gff_path,verbose): if not verbose: print("Generation of the graph's gff") with open(graph_gff_path,'w') as graph_gff_file: for segment_stranded in tqdm(Segments,desc="Generation of the graph's gff",unit=" segment",disable=not verbose): features_on_seg=Segments[segment_stranded].feature_list # get the list of the features on the segment for feature_tuple in features_on_seg: # go through all the segment's features, and print the gff line for each feature_id=feature_tuple[0] strand=Segments[segment_stranded].get_strand(feature_id) feature_stranded=strand+feature_id line = create_line_graph_gff(feature_stranded,segment_stranded) graph_gff_file.write(line) # go through all the features in Features and print the gaf of the graph, with one line per feature def graph_gaf(graph_gaf_path,seg_len,verbose): if not verbose: print("Generation of the graph's gaf") with open(graph_gaf_path,'w') as graph_gaf_file: for feature_id in tqdm(Features,desc="Generation of the graph's gaf",unit=" feature",disable=not verbose): feature=Features[feature_id] feature_segments=feature.segments_list_source feature_path="" path_length=0 for segment in feature_segments: feature_path+=segment path_length+=seg_len[segment[1:]] strand=feature.strand size=feature.size start_on_path=feature.pos_start-1 # -1 to get the position 0-based bed-like stop_on_path=start_on_path+size line=f'{feature.id}\t{size}\t0\t{size}\t{strand}\t{feature_path}\t{path_length}\t{start_on_path}\t{stop_on_path}\t{size}\t{size}\t255\n' graph_gaf_file.write(line) # creates a dictionnary with the length of the segments : s5->8 def get_segments_length(segments,verbose): seg_len={} total_lines=0 if verbose: total_lines=len(["" for line in open(segments,"r")]) with open(segments,'r') as segments_file: for line in tqdm(segments_file, desc="Computing the lengths of the segments", total=total_lines, unit=" segment", disable=not verbose): line=line.split() seg_id=line[1] seg_len[seg_id]=len(line[2]) return seg_len