from .usefull_little_functions import *
from .intersect import Features,Segments,search_segment,invert_seg
from .detect_inversion import detect_feature_inversion,invert_segment_list

# functions to get the detail of the variations in the features

# handle the variations analysis between the feature on the source genome and the feature on the target genome
def generate_variations_details(feature_id,seg_seq,match,file_out_var,segments_on_target_genome):
    # for each feature, get list of the segments where it is and the first and last segment of the feature on the new genome
    [first_seg,last_seg,walk,copy_id,feature_target_path]=get_featurePath_ends(match)

    # do it for the gene, then for its childs.
    if match[4]==True: # gene passed the filters
        print_variations(feature_id,first_seg,last_seg,copy_id,walk,seg_seq,feature_target_path,file_out_var,segments_on_target_genome)

        inversion=detect_feature_inversion(Features[feature_id].segments_list_source,feature_target_path)

        # print child feature var
        childs_list=Features[feature_id].get_child_list(Features)
        for child_id in childs_list:
            child=Features[child_id]
            # look for the first and last seg of the child in its parent path
            [child_first_seg,child_last_seg]=['','']
            for seg in child.segments_list_source: # get first seg in the parent path
                if seg in feature_target_path: # parent path
                    child_first_seg=seg
                    break
                elif invert_seg(seg) in feature_target_path:
                    child_first_seg=invert_seg(seg)
                    break

            if child_first_seg!='': # check that the child feature is not absent in this occurence of the parent feature
                for seg in reversed(child.segments_list_source): # get last seg in the parent path
                    if seg in feature_target_path: # parent path
                        child_last_seg=seg
                        break
                    elif invert_seg(seg) in feature_target_path:
                        child_last_seg=invert_seg(seg)
                        break
                
                if inversion:
                    temp=child_first_seg
                    child_first_seg=child_last_seg
                    child_last_seg=temp
                
                for match in child.segments_list_target: # look for the right occurence of the child feature
                    if (match[0]==walk) and (match[1]==copy_id):
                        child_target_path=match[2]
                        break

                # treat the child feature:
                print_variations(child_id,child_first_seg,child_last_seg,copy_id,walk,seg_seq,child_target_path,file_out_var,segments_on_target_genome)

def print_variations(feature_id,first_seg,last_seg,copy_id,walk,seg_seq,feature_target_path,file_out_var,segments_on_target_genome):

    if first_seg!='': # if the feature is not completly absent        # add the else, output absent features
        [variation,feature_path_source_genome,feature_path_target_genome]=create_var(feature_id,first_seg,last_seg,walk,copy_id,feature_target_path,segments_on_target_genome) # removes the strands in the segment lists
        feature=Features[feature_id]
        feat_start=feature.start
        # loop to go through both paths with i and j
        [i,j,var_count]=[0,0,0]

        # detect and print variations ignoring the strands
        start_feat_seg_target=feature_path_target_genome[0]
        while (i<len(feature_path_source_genome)) and (j<len(feature_path_target_genome)):
            if feature_path_source_genome[i] != feature_path_target_genome[j]: # if there is a difference between the two paths

                if feature_path_target_genome[j] not in feature_path_source_genome: # if the segment in target genome is absent in source genome
                    if feature_path_source_genome[i] not in feature_path_target_genome: # if the segment in source genome is absent is target genome

                        # if both segments are absent in the other genome, its a substitution
                        variation.last_seg_in_target=feature_path_target_genome[j]
                        if (variation.type=='insertion') or (variation.type=='deletion'): # print the current variation before treating the substitution
                            line=print_current_var(variation,feat_start,start_feat_seg_target,feature_id,walk,copy_id,segments_on_target_genome)
                            file_out_var.write(line)
                            reset_var(variation)
                            var_count+=1
                        if variation.type=='substitution':
                            continue_var(variation,seg_seq,feature_path_source_genome,feature_path_target_genome,i,j,0)
                        else: # initiate substitution
                            init_new_var(variation,"substitution",feature_path_source_genome,feature_path_target_genome,i,j,seg_seq,feature)
                        i+=1;j+=1

                    else: # target genome segment not in source_genome, but source_genome segment in target genome : insertion or continue substitution
                        if variation.type=='deletion': # print the current variation before treating the insertion
                            line=print_current_var(variation,feat_start,start_feat_seg_target,feature_id,walk,copy_id,segments_on_target_genome)
                            file_out_var.write(line)
                            reset_var(variation)
                            var_count+=1
                        variation.last_seg_in_target=feature_path_target_genome[j]
                        if variation.type=='insertion':
                            continue_var(variation,seg_seq,feature_path_source_genome,feature_path_target_genome,i,j,0)
                        elif variation.type=="substitution":
                            while feature_path_target_genome[j]!=feature_path_source_genome[i]:
                                continue_var(variation,seg_seq,feature_path_source_genome,feature_path_target_genome,i,j,2)
                                j+=1
                            line=print_current_var(variation,feat_start,start_feat_seg_target,feature_id,walk,copy_id,segments_on_target_genome)
                            file_out_var.write(line)
                            reset_var(variation)
                            var_count+=1
                            j-=1
                        else: # intiate insertion
                            init_new_var(variation,"insertion",feature_path_source_genome,feature_path_target_genome,i,j,seg_seq,feature)
                        j+=1

                elif feature_path_source_genome[i] not in feature_path_target_genome: # source_genome segment not in target genome, but target genome segment in source_genome : deletion or continue substitution
                    if variation.type=='insertion': # print the current variation before treating the deletion
                        line=print_current_var(variation,feat_start,start_feat_seg_target,feature_id,walk,copy_id,segments_on_target_genome)
                        file_out_var.write(line)
                        reset_var(variation)
                        var_count+=1
                    if variation.type=='deletion':
                        continue_var(variation,seg_seq,feature_path_source_genome,feature_path_target_genome,i,j,0)
                    elif variation.type=="substitution":
                        while feature_path_target_genome[j]!=feature_path_source_genome[i]:
                            continue_var(variation,seg_seq,feature_path_source_genome,feature_path_target_genome,i,j,1)
                            i+=1
                        line=print_current_var(variation,feat_start,start_feat_seg_target,feature_id,walk,copy_id,segments_on_target_genome)
                        file_out_var.write(line)
                        reset_var(variation)
                        var_count+=1
                        i-=1
                    else: # intiate deletion
                        init_new_var(variation,"deletion",feature_path_source_genome,feature_path_target_genome,i,j,seg_seq,feature)
                    i+=1

                else : # if both segments are present in the other genome but not at the same position. weird case never found yet
                    # can be a substitution. check later if its not an inversion
                    variation.last_seg_in_target=feature_path_target_genome[j]
                    if (variation.type=='insertion') or (variation.type=='deletion'): # print the current variation before treating the substitution
                        line=print_current_var(variation,feat_start,start_feat_seg_target,feature_id,walk,copy_id,segments_on_target_genome)
                        file_out_var.write(line)
                        reset_var(variation)
                        var_count+=1
                    if variation.type=='substitution':
                        continue_var(variation,seg_seq,feature_path_source_genome,feature_path_target_genome,i,j,0)
                    else: # initiate substitution
                        init_new_var(variation,"substitution",feature_path_source_genome,feature_path_target_genome,i,j,seg_seq,feature)
                    i+=1;j+=1

            else: # segment present in both, no variation. print the running indel if there is one
                if variation.type!='': # print the current variation if there is one
                    line=print_current_var(variation,feat_start,start_feat_seg_target,feature_id,walk,copy_id,segments_on_target_genome)
                    file_out_var.write(line)
                    var_count+=1
                    reset_var(variation)
                variation.last_seg_in_target=feature_path_target_genome[j]
                i+=1;j+=1

        if (variation.type!=''): # if there was a current variation when we reached the end, print it
            line=print_current_var(variation,feat_start,start_feat_seg_target,feature_id,walk,copy_id,file_out_var,segments_on_target_genome)
            file_out_var.write(line)
            var_count+=1
            reset_var(variation)

        if i<=len(feature_path_source_genome)-1: # if we didn't reach the length of the segment list for the first genome, the end is missing for the second genome
            line=print_last_deletion(variation,feature_path_source_genome,i,feat_start,feature,seg_seq)
            file_out_var.write(line)
            var_count+=1
            reset_var(variation)

        if var_count==0: # if no variation was encountered
            line=print_novar(variation)
            file_out_var.write(line)

# print a variation
def print_current_var(variation,feat_start,start_feat_seg_target,feat,walk,copy_id,segments_on_target_genome):
    warning=''
    if variation.type=='insertion':
        [pos_old,pos_new]=get_old_new_pos_insertion(variation,feat_start,start_feat_seg_target,feat,walk,copy_id,segments_on_target_genome)
        line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{print_inversion(variation.inversion)}\t{variation.size_diff}\tinsertion\t-\t{variation.alt}\t{len(variation.alt)}\t{pos_old}\t{pos_new}{warning}\n'
        return line
    elif variation.type=='deletion':
        [pos_old,pos_new]=get_old_new_pos_deletion(variation,feat_start,start_feat_seg_target,feat,walk,copy_id,segments_on_target_genome)
        line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{print_inversion(variation.inversion)}\t{variation.size_diff}\tdeletion\t{variation.ref}\t-\t{len(variation.ref)}\t{pos_old}\t{pos_new}{warning}\n'
        return line
    elif variation.type=='substitution':
        warning=detect_small_inversion(variation)
        [pos_old,pos_new]=get_old_new_pos_substitution(feat_start,variation,start_feat_seg_target,feat,walk,copy_id,segments_on_target_genome)

        size_subs=f'{len(variation.ref)}/{len(variation.alt)}'
        line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{print_inversion(variation.inversion)}\t{variation.size_diff}\tsubstitution\t{variation.ref}\t{variation.alt}\t{size_subs}\t{pos_old}\t{pos_new}{warning}\n'
        
        # print the substitutions of different size as deletion+insertion.
        #if len(variation.ref) == len(variation.alt): # if the substituion is between two segment of the same size, print it
        #    size_subs=len(variation.ref)
        #    line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{inversion}\t{variation.size_diff}\tsubstitution\t{variation.ref}\t{variation.alt}\t{size_subs}\t{pos_old}\t{pos_new}{warning}\n'
        #else :
        #    # if the segments of the substitution have a different size, print deletion then insertion at the same position.
        #    line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{inversion}\t{variation.size_diff}\tdeletion\t{variation.ref}\t-\t{len(variation.ref)}\t{pos_old}\t{pos_new}{warning}\n'
        #    line+=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{inversion}\t{variation.size_diff}\tinsertion\t-\t{variation.alt}\t{len(variation.alt)}\t{pos_old}\t{pos_new}{warning}\n'
        return line

# check if a substitution could be an inversion inside a feature
def detect_small_inversion(variation):
    [list_ref_common,list_alt_common]=[list(),list()]
    list_ref_unstrand=[segment_stranded[1:] for segment_stranded in variation.seg_ref]
    list_alt_unstrand=[segment_stranded[1:] for segment_stranded in variation.seg_alt]
    for seg in variation.seg_ref:
        if seg[1:] in list_alt_unstrand:
            list_ref_common.append(seg)
    for seg in variation.seg_alt:
        if seg[1:] in list_ref_unstrand:
            list_alt_common.append(seg)
    if (len(list_ref_common)>len(list_ref_unstrand)*0.5) and (len(list_alt_common)>len(list_alt_unstrand)*0.5):
        return f'\t# Suspected inversion within this substitution.'
    else:
        return ''

# print a deletion at the end of a feature
def print_last_deletion(variation,feature_path_source_genome,i,feat_start,feature,seg_seq):
    seg_del=search_segment(feature_path_source_genome[i])
    pos_old=int(Segments[seg_del].get_start(feature.id))-int(feat_start)+1

    del_sequence=get_sequence_list_seg(feature_path_source_genome,i,feature,seg_seq)
    length=len(del_sequence)
    pos_new=str(int(variation.size_new)+1) # the deletion is at the end of the feature on the new genome

    if variation.inversion:
        inversion='1'
    else:
        inversion='0'

    line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{inversion}\t{variation.size_diff}\tdeletion\t{del_sequence}\t-\t{length}\t{pos_old}\t{pos_new}\n'
    return line

# print a feature with no variation
def print_novar(variation):
    line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{print_inversion(variation.inversion)}\t{variation.size_diff}\tno_var\t-\t-\t-\t-\t-\n'
    return line

# get a digit for the inversion (convert bool in int)
def print_inversion(bool):
    if bool==True:
        return '1'
    else:
        return '0'

# initiate a Variation object with the information on the feature it is on
def create_var(feature_id,first_seg,last_seg,walk,copy_id,feature_target_path,segments_on_target_genome):
    feature=Features[feature_id]
    # get feature paths on the original genome and on the target genome
    feature_path_target_genome=feature_target_path
    feature_path_source_genome=feature.segments_list_source
    inversion=detect_feature_inversion(feature_path_source_genome,feature_path_target_genome)

    if inversion:
        feature_path_target_genome=invert_segment_list(feature_path_target_genome)
        stop_new_genome=get_feature_start_on_target_genome_inv(last_seg,feature_id,walk,copy_id,segments_on_target_genome)
        start_new_genome=get_feature_stop_on_target_genome_inv(first_seg,feature_id,walk,copy_id,segments_on_target_genome)
    else:
        start_new_genome=get_feature_start_on_target_genome(first_seg,feature_id,walk,copy_id,segments_on_target_genome)
        stop_new_genome=get_feature_stop_on_target_genome(last_seg,feature_id,walk,copy_id,segments_on_target_genome)
    size_new_genome=stop_new_genome-start_new_genome+1
    size_diff=str(size_new_genome-feature.size)
    sequence_name=get_seg_occ(first_seg,walk,feature_id,copy_id,segments_on_target_genome)[0]

    variation=Variation(feature.id,feature_id,feature.type,sequence_name,start_new_genome,stop_new_genome,inversion,size_diff,size_new_genome)
    return(variation,feature_path_source_genome,feature_path_target_genome)

# reset the informations of the variation, but keep the information about the feature
def reset_var(variation):
    variation.type='' # make type enumerate
    variation.size_var=0
    variation.start_var=''
    variation.start_var_index=0
    variation.ref=''
    variation.alt=''

# find the position of a substitution on the source and the target sequence
def get_old_new_pos_substitution(feat_start,variation,start_feat_seg_target,feat,walk,copy_id,segments_on_target_genome):
    seg_pos=search_segment(variation.start_var)
    pos_old=str(int(Segments[seg_pos].get_start(Features[feat].id))-int(feat_start))

    var_start_seg=variation.start_on_target
    if variation.inversion:
        start_feat_seg_target=invert_seg(start_feat_seg_target)
        var_start_seg=invert_seg(var_start_seg)
        end_var=get_seg_occ(var_start_seg,walk,feat,copy_id,segments_on_target_genome)[2]
        start_feat=get_feature_start_on_target_genome_inv(start_feat_seg_target,feat,walk,copy_id,segments_on_target_genome)
        pos_new=str(start_feat-end_var)
    else:
        start_var=get_seg_occ(var_start_seg,walk,feat,copy_id,segments_on_target_genome)[1]
        start_feat=get_feature_start_on_target_genome(start_feat_seg_target,feat,walk,copy_id,segments_on_target_genome)
        pos_new=str(start_var-start_feat)
    return [pos_old,pos_new] # pos_old and pos_new are the base before the change

# find the position of an insertion on the source and the target sequence
def get_old_new_pos_insertion(variation,feat_start,start_feat_seg_target,feat,walk,copy_id,segments_on_target_genome):
    seg_pos=search_segment(variation.start_var) # start_var is the segment AFTER the insertion
    pos_old=str(int(Segments[seg_pos].get_start(Features[feat].id))-int(feat_start))

    start_var_seg=variation.start_var
    if variation.inversion:
        start_feat_seg_target=invert_seg(start_feat_seg_target)
        start_var_seg=invert_seg(start_var_seg)
        end_var=get_seg_occ(start_var_seg,walk,feat,copy_id,segments_on_target_genome)[2]+len(variation.alt) # start_var_seg is the segment AFTER the insertion
        start_feat=get_feature_start_on_target_genome_inv(start_feat_seg_target,feat,walk,copy_id,segments_on_target_genome)
        pos_new=str(start_feat-end_var)
    else:
        start_var=get_seg_occ(start_var_seg,walk,feat,copy_id,segments_on_target_genome)[1]-len(variation.alt) # start_var_seg is the segment AFTER the insertion
        start_feat=get_feature_start_on_target_genome(start_feat_seg_target,feat,walk,copy_id,segments_on_target_genome)
        pos_new=str(start_var-start_feat)
    return [pos_old,pos_new] # pos_old and pos_new are the base before the change

# find the position of a deletion on the source and the target sequence
def get_old_new_pos_deletion(variation,feat_start,start_feat_seg_target,feat,walk,copy_id,segments_on_target_genome):
    i=variation.start_var_index
    seg_pos=search_segment(variation.start_var)
    if i==0:
        pos_old=int(Segments[seg_pos].get_start(Features[feat].id))-int(feat_start)+Features[feat].pos_start-1
    else:
        pos_old=int(Segments[seg_pos].get_start(Features[feat].id))-int(feat_start)
        if pos_old<0:
            pos_old=0
            print("error with variation position",variation.inversion,"***")

    if variation.last_seg_in_target=="": # deletion of the beggining of the feature, so no segment placed in the new genome yet. 
        pos_new=0
    else:
        start_var_seg=variation.last_seg_in_target
        if variation.inversion:
            start_feat_seg_target=invert_seg(start_feat_seg_target)
            start_var_seg=invert_seg(start_var_seg)
            start_var=get_seg_occ(start_var_seg,walk,feat,copy_id,segments_on_target_genome)[1]-1
            start_feat=get_feature_start_on_target_genome_inv(start_feat_seg_target,feat,walk,copy_id,segments_on_target_genome)
            pos_new=str(start_feat-start_var)
        else:
            start_var=get_seg_occ(start_var_seg,walk,feat,copy_id,segments_on_target_genome)[2]+1
            start_feat=get_feature_start_on_target_genome(start_feat_seg_target,feat,walk,copy_id,segments_on_target_genome)
            pos_new=str(start_var-start_feat)
    return [pos_old,pos_new] # pos_old and pos_new are the base before the change

# change the variation information, but keep the feature information (the variation is on the feature)
def init_new_var(variation,type,feature_path_source_genome,feature_path_target_genome,i,j,seg_seq,feature):
    variation.type=type
    variation.start_var=feature_path_source_genome[i]
    variation.start_var_index=i
    if type=="substitution":
        variation.start_on_target=feature_path_target_genome[j]
        variation.ref=get_segment_sequence(seg_seq,feature_path_source_genome[i])
        variation.alt=get_segment_sequence(seg_seq,feature_path_target_genome[j])
        variation.seg_ref.append(feature_path_source_genome[i])
        variation.seg_alt.append(feature_path_target_genome[j])
    elif type=="insertion":
        variation.ref="-"
        variation.alt=get_segment_sequence(seg_seq,feature_path_target_genome[j])
        variation.seg_alt.append(feature_path_target_genome[j])
    elif type=="deletion":
        if i==0: # if the deletion is at the start of the feature, the deletion doesnt always start at the start at the first segment : 
            #use pos_start, position of the feature on its first segment
            variation.ref=get_segment_sequence(seg_seq,feature_path_source_genome[i])[feature.pos_start-1:]
            variation.seg_ref.append(feature_path_source_genome[i])
        else: # else, the deletion will always start at the start of the first segment.
            variation.ref=get_segment_sequence(seg_seq,feature_path_source_genome[i])
            variation.seg_ref.append(feature_path_source_genome[i])
        variation.alt="-"

# update the variation
def continue_var(variation,seg_seq,feature_path_source_genome,feature_path_target_genome,i,j,genome_to_continue):
    if variation.type=="substitution":
        if genome_to_continue==0: # genome_to_continue allows to choose if the substitution continues for the original or the target genome, or both.
            variation.ref+=get_segment_sequence(seg_seq,feature_path_source_genome[i])
            variation.alt+=get_segment_sequence(seg_seq,feature_path_target_genome[j])
            variation.seg_ref.append(feature_path_source_genome[i])
            variation.seg_alt.append(feature_path_target_genome[j])
        elif genome_to_continue==1: # deletion
            variation.ref+=get_segment_sequence(seg_seq,feature_path_source_genome[i])
            variation.seg_ref.append(feature_path_source_genome[i])
        elif genome_to_continue==2: # insertion
            variation.alt+=get_segment_sequence(seg_seq,feature_path_target_genome[j])
            variation.seg_alt.append(feature_path_target_genome[j])
    elif variation.type=="insertion":
        variation.alt+=get_segment_sequence(seg_seq,feature_path_target_genome[j])
        variation.seg_alt.append(feature_path_target_genome[j])
    elif variation.type=="deletion":
        variation.ref+=get_segment_sequence(seg_seq,feature_path_source_genome[i])
        variation.seg_ref.append(feature_path_source_genome[i])

# stores information about a feature and its current variation
class Variation:
    def __init__(self,feature_id,feature_key,feature_type,chr,start_new,stop_new,inversion,size_diff,size_new):
        self.feature_id=feature_id # real id in the gff
        self.feature_key=feature_key # key to access the feature info in the dict Features
        self.feature_type=feature_type
        self.chr=chr
        self.start_new=start_new
        self.stop_new=stop_new
        self.inversion=inversion
        self.size_diff=size_diff
        self.size_new=size_new
        self.type=''
        self.last_seg_in_target=''
        self.seg_ref=list()
        self.seg_alt=list()

# outputs the nucleotide sequence of a list of segments, corresponding to the end of a feature
def get_sequence_list_seg(list_seg,i,feature,seg_seq):
    del_sequence=""
    for k in range(i,len(list_seg)):
        if k==len(list_seg)-1:
            del_sequence+=get_segment_sequence(seg_seq,list_seg[k])[0:feature.pos_stop]
        else:
            del_sequence+=get_segment_sequence(seg_seq,list_seg[k])
    return del_sequence