Skip to content
Snippets Groups Projects
graph_annot.py 3.18 KiB
Newer Older
from .load_intersect import Segments,Features,find_part_segment_source
from .usefull_little_functions import *
from tqdm import tqdm

# functions to generate the graph's gff from the segments and features created with create_seg_feat

# generates the annotation for the gff of the graph, with the rank of the segment in the feature
def make_annot_graph(feature_id,segment):
    # get the rank and the total number of ocurrences for the feature
    feature=Features[find_part_segment_source(segment,feature_id)]
    rank=str(feature.segments_list_source.index(segment)+1)
    total=str(len(feature.segments_list_source))
    # create the annotation with the rank information
    annotation=feature.annot+";Rank_occurrence="+rank+";Total_occurrences="+total
    return annotation

# create the line for the gff of the graph
def create_line_graph_gff(feature_stranded,segment):
    [strand,feature_id]=[feature_stranded[0:1],feature_stranded[1:]]

    annotation=make_annot_graph(feature_id,segment)
    type=Features[feature_id].type
    start=get_feature_start_on_segment(segment,feature_id)
    stop=get_feature_stop_on_segment(segment,feature_id)
    line=segment+"\tGrAnnot\t"+type+"\t"+str(start)+"\t"+str(stop)+"\t.\t"+strand+"\t.\t"+annotation+"\n"
    return line

# go through all the segments in Segments and prints the gff of the graph, with one line per segment/feature intersection
def graph_gff(graph_gff_path,verbose):
    if not verbose:
        print("Generation of the graph's gff")

    with open(graph_gff_path,'w') as graph_gff_file:
        for segment_stranded in tqdm(Segments,desc="Generation of the graph's gff",unit=" segment",disable=not verbose):
            features_on_seg=Segments[segment_stranded].feature_list # get the list of the features on the segment
            for feature_tuple in features_on_seg: # go through all the segment's features, and print the gff line for each
                feature_id=feature_tuple[0]
                strand=Segments[segment_stranded].get_strand(feature_id)
                feature_stranded=strand+feature_id
                line = create_line_graph_gff(feature_stranded,segment_stranded)
                graph_gff_file.write(line)

# go through all the features in Features and print the gaf of the graph, with one line per feature
def graph_gaf(graph_gaf_path,seg_len,verbose):
    if not verbose:
        print("Generation of the graph's gaf")

    with open(graph_gaf_path,'w') as graph_gaf_file:
        for feature_id in tqdm(Features,desc="Generation of the graph's gaf",unit=" feature",disable=not verbose):
            feature=Features[feature_id]
            feature_segments=feature.segments_list_source
            feature_path=""
            path_length=0
            for segment in feature_segments:
                feature_path+=segment
                path_length+=seg_len[segment[1:]]
            strand=feature.strand
            size=feature.size
            start_on_path=feature.pos_start-1 # -1 to get the position 0-based bed-like
            stop_on_path=start_on_path+size

            line=f'{feature.id}\t{size}\t0\t{size}\t{strand}\t{feature_path}\t{path_length}\t{start_on_path}\t{stop_on_path}\t{size}\t{size}\t255\n'
            graph_gaf_file.write(line)