Skip to content
Snippets Groups Projects
ClassSegFeat.py 4.39 KiB
Newer Older
class Segment:
    def __init__(self,id,feature_id,chr,start,stop,strand):
        self.id=id
        self.feature_list=[(feature_id,0)] # list of the features on this segment. if the feature is on the + strand, it will be named '+feature_id', else '-feature_id'

        self.feat_info_list=[[chr,int(start),int(stop),strand]] # position on the original genome for the first feature


    def add_feature(self,feat_id,chr,start,stop,strand):
        feat_info=[chr,start,stop,strand]
        if feat_info in self.feat_info_list:
            feat_tuple=feat_id,self.feat_info_list.index(feat_info)
            self.feature_list.append(feat_tuple)
        else:
            feat_tuple=(feat_id,len(self.feat_info_list))
            self.feat_info_list.append(feat_info)
            self.feature_list.append(feat_tuple)

    def find_feat(self,feat_id):
        for feat_tuple in self.feature_list:
            if feat_tuple[0]==feat_id:
                return (True,feat_tuple[1])
        return (False,0)
    
    def get_start(self,feat_id):
        feat_present=self.find_feat(feat_id)
        if feat_present[0]:
            index_feat=feat_present[1]
            return self.feat_info_list[index_feat][1]

    def get_stop(self,feat_id):
        feat_present=self.find_feat(feat_id)
        if feat_present[0]:
            index_feat=feat_present[1]
            return self.feat_info_list[index_feat][2]

    def get_chr(self,feat_id):
        feat_present=self.find_feat(feat_id)
        if feat_present[0]:
            index_feat=feat_present[1]
            return self.feat_info_list[index_feat][0]

    def get_strand(self,feat_id):
        feat_present=self.find_feat(feat_id)
        if feat_present[0]:
            index_feat=feat_present[1]
            return self.feat_info_list[index_feat][3]

    def get_size(self,feat_id):
        feat_present=self.find_feat(feat_id)
        if feat_present[0]:
            return self.get_stop(feat_id)-self.get_start(feat_id)+1

    def __str__(self):
        return f"id={self.id}, features={self.feature_list}, features info={self.feat_info_list}"
    def __init__(self,id,type,chr,start,stop,annot,childs,parent,seg_list,strand,complete,discontinuous):
        self.id=id
        self.type=type
        
        # position on the original genome
        self.chr=chr
        self.start=start
        self.stop=stop
        self.strand=strand
        self.size=stop-start+1

        self.pos_start=0 # position on the first segment, setup later
        self.pos_stop=0 # position on the last segment, setup later
        
        self.annot=annot
        self.childs=childs # list of child features (exons, cds, etc)
        self.parent=parent
        self.segments_list_source=seg_list # list of oriented segments on which the feature is (>1/<1, depending on the path of the gene in the graph)
        self.segments_list_target=[] # list of lists, as there can be several occurences of a feature on a given target genome

        self.note="" # from the annotation file
        self.sequence=""

        self.complete=complete # boolean, is the feature initialised with all the info or not

        self.discontinuous=discontinuous # boolean, is the feature discontinuous in the gff (split in several lines, like a fragmented cds for instance)
        self.first='' # boolean, is it the first part of the feature encountered
        self.first_part='' # if first=False, string, the id of the first part encountered
        self.other_parts_list=[] # if first=True, list ot the id of the other parts encountered

    # returns a list of feature's child, and their childs' childs, etc. 
    def get_child_list(self,Features):
        list_childs=[]
        for child_id in self.childs:
            list_childs.append(child_id) # add the child to the list
            child=Features[child_id]
            list_childs+=child.get_child_list(Features) # add the child's childs to the list
        return list_childs
        
    def __str__(self):
        if self.parent=="":
            return f"id={self.id}, type={self.type}, segments={self.segments_list_source}, position on the original genome={self.chr}:{self.start}-{self.stop}, childs={self.childs}, annotation={self.annot}"
            return f"id={self.id}, type={self.type}, segments={self.segments_list_source}, position on the original genome={self.chr}:{self.start}-{self.stop}, parent={self.parent}, childs={self.childs}, annotation={self.annot}"