Skip to content
Snippets Groups Projects
genome_transfer.py 5.86 KiB
Newer Older
from .usefull_little_functions import *
from .intersect import invert_seg,Features
from .detect_inversion import detect_feature_inversion
from .Functions import get_id_cov

# outputs the gff line of the target genome for a feature occurence (a feature can be transfered at several positions)
def generate_target_gff(feature_id,seg_size,args,reason_features_not_transfered,match,file_out_gff,segments_on_target_genome):
    feature=Features[feature_id]

    # get list of the segments where it is and the first and last segment of the feature on the new genome
    [first_seg,last_seg,walk,copy_id,feature_target_path]=get_featurePath_ends(match)
    inversion=detect_feature_inversion(feature.segments_list_source,feature_target_path)
    if first_seg!='': # feature present on the target genome

        if inversion:
            size_on_new_genome=get_feature_start_on_target_genome_inv(last_seg,feature_id,walk,copy_id,segments_on_target_genome)-get_feature_stop_on_target_genome_inv(first_seg,feature_id,walk,copy_id,segments_on_target_genome)+1
        else:
            size_on_new_genome=get_feature_stop_on_target_genome(last_seg,feature_id,walk,copy_id,segments_on_target_genome)-get_feature_start_on_target_genome(first_seg,feature_id,walk,copy_id,segments_on_target_genome)+1
        size_diff=abs(size_on_new_genome-feature.size)

        [cov,id]=match[3]

        if (cov*100<args.coverage) or (id*100<args.identity):
            reason_features_not_transfered[1]+=1
            match.append(False) # store the information that this gene copy didnt pass the filters
            return "no"
        else:
            line_gff=create_line_target_gff(first_seg,last_seg,feature_id,size_diff,inversion,walk,cov,id,copy_id,segments_on_target_genome) # transfer the gene
            file_out_gff.write(line_gff)

            # transfer all the gene's childs
            transfer_childs(feature_id,feature_target_path,walk,copy_id,inversion,seg_size,file_out_gff,segments_on_target_genome)

            match.append(True) # store the information that this gene copy was transfered
            return size_diff
    else :
        reason_features_not_transfered[0]+=1
        match.append(False) # store the information that this gene copy didnt pass the filters
        return "no"
    

def transfer_childs(feature_id,feature_target_path,walk,copy_id,inversion,seg_size,file_out_gff,segments_on_target_genome):
    childs_list=Features[feature_id].get_child_list(Features)
    for child_id in childs_list:
        child=Features[child_id]
        # look for the first and last seg of the child in its parent path
        [child_first_seg,child_last_seg]=['','']
        for seg in child.segments_list_source: # get first seg in the parent path
            if seg in feature_target_path: # parent path
                child_first_seg=seg
                break
            elif invert_seg(seg) in feature_target_path:
                child_first_seg=invert_seg(seg)
                break

        if child_first_seg!='': # check that the child feature is not absent in this occurence of the parent feature
            for seg in reversed(child.segments_list_source): # get last seg in the parent path
                if seg in feature_target_path: # parent path
                    child_last_seg=seg
                    break
                elif invert_seg(seg) in feature_target_path:
                    child_last_seg=invert_seg(seg)
                    break

            if inversion: # calculate the child feature size
                child_size_on_new_genome=get_feature_start_on_target_genome_inv(child_last_seg,child_id,walk,copy_id,segments_on_target_genome)-get_feature_stop_on_target_genome_inv(child_first_seg,child_id,walk,copy_id,segments_on_target_genome)+1
            else:
                child_size_on_new_genome=get_feature_stop_on_target_genome(child_last_seg,child_id,walk,copy_id,segments_on_target_genome)-get_feature_start_on_target_genome(child_first_seg,child_id,walk,copy_id,segments_on_target_genome)+1
            child_size_diff=abs(child_size_on_new_genome-child.size)

            for match in child.segments_list_target: # look for the right occurence of the child feature
                if (match[0]==walk) and (match[1]==copy_id):
                    seg_list=match[2]
                    [cov,id]=get_id_cov(child_id,seg_size,seg_list)
                    break

            if inversion:
                temp=child_first_seg
                child_first_seg=child_last_seg
                child_last_seg=temp

            # transfer the child feature:
            line_gff=create_line_target_gff(child_first_seg,child_last_seg,child_id,child_size_diff,inversion,walk,cov,id,copy_id,segments_on_target_genome)
            file_out_gff.write(line_gff)


# generates the line for the gff of the target genome
def create_line_target_gff(first_seg,last_seg,feature_id,size_diff,inversion,walk,cov,id,copy_id,segments_on_target_genome):
    [chr,strand,feature]=[get_seg_occ(first_seg,walk,feature_id,copy_id,segments_on_target_genome)[0],Features[feature_id].strand,Features[feature_id]]

    annotation=f'{feature.annot};Size_diff={size_diff};coverage={cov};sequence_ID={id}' # Nb_variants={var_count};

    if inversion:
        start=get_feature_start_on_target_genome_inv(last_seg,feature_id,walk,copy_id,segments_on_target_genome)
        stop=get_feature_stop_on_target_genome_inv(first_seg,feature_id,walk,copy_id,segments_on_target_genome)
        strand=invert_strand(strand)
    else:
        start=get_feature_start_on_target_genome(first_seg,feature_id,walk,copy_id,segments_on_target_genome)
        stop=get_feature_stop_on_target_genome(last_seg,feature_id,walk,copy_id,segments_on_target_genome)

    if start>stop:
        temp=start
        start=stop
        stop=temp

    if strand=='':
        strand='.'

    output_line=f'{chr}\tGrAnnoT\t{feature.type}\t{start}\t{stop}\t.\t{strand}\t.\t{annotation}\n'
    return output_line