Skip to content
Snippets Groups Projects
Functions.py 47.3 KiB
Newer Older
from Graph_gff import Segments, Features, get_feature_start_on_segment, get_feature_stop_on_segment, write_line

### functions to generate a genome's gff from the graph's gff

# get the start position of the features on the linear genome, using their coordinates on the graph and the coordinantes of the segments on the genome
def get_feature_start_on_genome(start_seg,feat_id):
    seg_start_pos=segments_on_target_genome[start_seg][1]
    feat_start_pos=get_feature_start_on_segment(start_seg,feat_id)
    start=int(seg_start_pos)+int(feat_start_pos)-1
    return start

# get the stop position of the features on the linear genome, using their coordinates on the graph and the coordinantes of the segments on the genome
def get_feature_stop_on_genome(stop_seg,feat_id):
    seg_start_pos=segments_on_target_genome[stop_seg][1]
    feat_stop_pos=get_feature_stop_on_segment(stop_seg,feat_id)
    stop=int(seg_start_pos)+int(feat_stop_pos)-1
NMarthe's avatar
NMarthe committed


# functions to get the detailed gff with all the fragments of the features

NMarthe's avatar
NMarthe committed
# get the position of a part of a feature on the complete feature (on the original genome)
def get_position_on_feature(start_seg,stop_seg,feature_start_segment,feature):
    start_on_feature=get_segment_start_on_feature(feature_start_segment,start_seg,feature)
    stop_on_feature=get_segment_stop_on_feature(start_seg,stop_seg,feature,start_on_feature)
    position=";position="+str(start_on_feature)+"-"+str(stop_on_feature)
    #position=";start_position_on_feature="+str(start_on_gene)+":stop_position_on_feature="+str(stop_on_gene)
def get_segment_start_on_feature(feature_start_segment,start_seg,feature):
    feature_start_pos=Segments[feature_start_segment].start+get_feature_start_on_segment(feature_start_segment,feature.id)-1
    start_on_reference=Segments[start_seg].start+get_feature_start_on_segment(start_seg,feature.id)-1
    start_on_feature=int(start_on_reference)-int(feature_start_pos)+1
    return start_on_feature

def get_segment_stop_on_feature(start_seg,stop_seg,feature,start_on_feature):
    start_on_new_genome=get_feature_start_on_genome(start_seg,feature.id)
    stop_on_new_genome=get_feature_stop_on_genome(stop_seg,feature.id)
    # length on new genome is the same as length on reference, as we only get the positions of conserved parts of the feature
    stop_on_feature=start_on_feature+(stop_on_new_genome-start_on_new_genome) # stop position : start+length
    return stop_on_feature
NMarthe's avatar
NMarthe committed

# get the proportion of a part of the feature on the total length
def get_proportion_of_feature_part(start_seg,stop_seg,feature):
    start_on_new_genome=get_feature_start_on_genome(start_seg,feature.id)
    stop_on_new_genome=get_feature_stop_on_genome(stop_seg,feature.id)
    # length on new genome is the same as length on reference, as we only get the positions of conserved parts of the feature
    proportion=";proportion="+str(stop_on_new_genome-start_on_new_genome+1)+"/"+str(feature.size)
    #proportion=";number_bases="+str(stop_new_genome-start_new_genome+1)+";total_bases="+str(feature.size)
# returns the gff line to write in the output file for the function gff_detail
def create_line_detail(last_seg,feature_id,start_seg,feat_start):
    [stop_seg,feature,chr,strand]=[last_seg[1:],Features[feature_id],segments_on_target_genome[start_seg][0],last_seg[0:1]]
    # start and stop position of the feature on the genome we transfer on
    start_on_new_genome=get_feature_start_on_genome(start_seg,feature_id) 
    stop_on_new_genome=get_feature_stop_on_genome(stop_seg,feature_id)
    proportion=get_proportion_of_feature_part(start_seg,stop_seg,feature)
    position=get_position_on_feature(start_seg,stop_seg,feat_start,feature)
    annotation=feature.annot+proportion+position
    
    output_line=chr+"\tGrAnnot\t"+feature.type+"\t"+str(start_on_new_genome)+"\t"+str(stop_on_new_genome)+"\t.\t"+strand+"\t.\t"+annotation+"\n"
    return output_line
# outputs each fragment of the feature in a gff
def gff_detail(list_seg,feature_id):
    # loop that goes through all the segments that have the current feature
    # keeps the first segment present in the genome found, and when it finds a segment absent in the genome, prints the current part of the fragment, and resets the first segment present.
    # continues to go through the segments, keeps the next segment present in the genome, and when it finds a segment absent, prints the second part of the feature, etc.
    # at the end of the loop, prints the last part of the fragment.
    [feat_start,seg_start,last_seg]=["","",""] # first segment of the feature, first segment of the current part of the feature, last segment in the part of the feature, for loop below
    for segment in list_seg:
        if segment[1:] in segments_on_target_genome: 
            if feat_start=="":
                feat_start=segment[1:]
            if seg_start=="": # if we dont have a start, take the current segment for the start of the part of the feature
                seg_start=segment[1:]
            #else: if we already have a start, keep going though the segments until we find a stop (segment not in azucena)
        else:
            if seg_start!="": # found a stop and we have a start. print the line, reset seg_start, and keep going through the segments to find the next seg_start
                out_line=create_line_detail(last_seg,feature_id,seg_start,feat_start)
                write_line(out_line,output_detail_gff,False)
                seg_start=""
            #else: if the current segment is not in azucena but there is no start, keep looking for a start
        last_seg=segment    
    if last_seg[1:] in segments_on_target_genome:
        out_line=create_line_detail(list_seg[-1],feature_id,seg_start,feat_start)
        write_line(out_line,output_detail_gff,False)
# functions to get the gff with one line per feature

def right_size(size,max_diff,feat):
    if max_diff==0:
    return not ((size>Features[feat].size*max_diff) | (size<Features[feat].size/max_diff)) 
def create_line_gff_one(first_seg,last_seg,feature_id,size_diff,list_seg):
    [chr,strand,feature]=[segments_on_target_genome[first_seg][0],list_seg[0][0:1],Features[feature_id]]
    variant_count=0
    for segment in list_seg:
        if segment[1:] not in segments_on_target_genome: # if a segment is absent, it is either a deletion of a substitution. insertions are not considered here.
            variant_count+=1
    annotation=feature.annot+";Size_diff="+str(size_diff)+";Nb_variants="+str(variant_count)
    line=chr+"  GrAnnot   "+feature.type+" "+str(get_feature_start_on_genome(first_seg,feature_id))+" "+str(get_feature_stop_on_genome(last_seg,feature_id))+"   .   "+strand+"   .   "+annotation+"\n"
    return line
# outputs the feature once in a gff, from the first to the last segment present on the new genome (if the size is ok) :
def gff_one(first_seg,last_seg,feature_id,list_seg,max_diff):

    if (first_seg!=''): # feature present on the target genome
        size_on_new_genome=get_feature_stop_on_genome(last_seg,feature_id)-get_feature_start_on_genome(first_seg,feature_id)+1
        size_diff=abs(size_on_new_genome-Features[feature_id].size)
        if right_size(size_on_new_genome,max_diff,feature_id): 
            line=create_line_gff_one(first_seg,last_seg,feature_id,size_diff,list_seg)
            write_line(line,output_gff_one,False)
            return size_diff
        else:
            return "bad_size"
    else :
        return "absent"
NMarthe's avatar
NMarthe committed
# functions to output the stats on the transfer

def stats_feature_missing_segment(feature_missing_segments,first_seg,last_seg,list_seg,feature_id):
# [feature_missing_first,feature_missing_middle,feature_missing_last,feature_missing_all,feature_missing_total,feature_total,feature_ok]
    feature_missing_segments[5].append(feature_id)

    if (first_seg=='') : # no segment of the feature is in the genome, the feature is missing entirely
        feature_missing_segments[3].append(feature_id)
    elif first_seg != list_seg[0][1:]: # the first segment is missing 
        feature_missing_segments[0].append(feature_id)
    elif last_seg!=list_seg[-1][1:]: # the last segment is missing
        feature_missing_segments[2].append(feature_id)

    # go through all the segments, check if some are missing in the middle of the feature
    elif (len(list_seg)!=1) & (feature_id not in feature_missing_segments[3]): # to access the second to last element
        for segment in list_seg[1-(len(list_seg)-2)]:
            if segment[1:] not in segments_on_target_genome:
                feature_missing_segments[1].append(feature_id)
                break

    # go through the segments, to see if one is missing anywhere on the feature
    for segment in list_seg:
        if segment[1:] not in segments_on_target_genome:
            if feature_id not in feature_missing_segments[4]:
                feature_missing_segments[4].append(feature_id)
                break

    # if the feature doesnt have a missing segment, it is complete.         ADD THE PATH CHECK FOR INSERTIONS !!
    if feature_id not in feature_missing_segments[4]:
        feature_missing_segments[6].append(feature_id)

def get_annot_features(list_features):
    list_annot_features=[]
    for feature in list_features:
        list_annot_features.append(Features[feature].note)
    return list_annot_features

def count_hypput_total(list_annot_first):
    total=len(list_annot_first)
    count_hypput=0
    for annot in list_annot_first:
        if ("hypothetical" in annot) | ("putative" in annot):
            count_hypput+=1
    return [count_hypput,total]

# print stats on the transfer : number of feature that have segments in different positions missing. 
def stats_features(feature_missing_segments):
# [feature_missing_first,feature_missing_middle,feature_missing_last,feature_missing_all,feature_missing_total,feature_total,feature_ok]
    list_annot_first=get_annot_features(feature_missing_segments[0])
    [hyp_put,total]=count_hypput_total(list_annot_first)
    print("\nthe first segment is missing for", total,"features, including",round(100*(hyp_put)/total,2),"% hypothetical or putative.")

    list_annot_middle=get_annot_features(feature_missing_segments[1])
    [hyp_put,total]=count_hypput_total(list_annot_middle)
    print("a middle segment is missing for", total,"features, including",round(100*(hyp_put)/total,2),"% hypothetical or putative.")

    list_annot_last=get_annot_features(feature_missing_segments[2])
    [hyp_put,total]=count_hypput_total(list_annot_last)
    print("the last segment is missing for", total,"features, including",round(100*(hyp_put)/total,2),"% hypothetical or putative.")

    list_annot_all=get_annot_features(feature_missing_segments[3])
    [hyp_put,total]=count_hypput_total(list_annot_all)
    print(total,"features are entirely missing, including",round(100*(hyp_put)/total,2),"% hypothetical or putative.")

    list_annot_total=get_annot_features(feature_missing_segments[4])
    [hyp_put,total]=count_hypput_total(list_annot_total)
    print("there is at least one segment missing for", total,"features, including",round(100*(hyp_put)/total,2),"% hypothetical or putative.")

    list_annot_ok=get_annot_features(feature_missing_segments[6])
    [hyp_put,total]=count_hypput_total(list_annot_ok)
    print(total ,"features are entirely present in the new genome, including",round(100*(hyp_put)/total,2),"% hypothetical or putative.")

    list_annot_features=get_annot_features(feature_missing_segments[5])
    [hyp_put,total]=count_hypput_total(list_annot_features)
    print("there is", total,"features in total, including",round(100*(hyp_put)/total,2),"% hypothetical or putative.")



NMarthe's avatar
NMarthe committed
def get_segments_positions_on_genome(pos_seg):
    global segments_on_target_genome
    segments_on_target_genome={}
    bed=open(pos_seg,'r')
    lines=bed.readlines() # read line by line ?
    bed.close()
    for line in lines:
        line=line.split()
        [seg,chrom,start,stop]=[line[3][1:],line[0],line[1],line[2]]
        if line[3][0:1]=='>':
            strand='+'
        elif line[3][0:1]=='<':
            strand='-'
        else:
            strand=''
        segments_on_target_genome[seg]=[chrom,start,stop,strand]

def get_segments_sequence_and_paths(gfa):
    file_gfa=open(gfa,'r')
    lines_gfa=file_gfa.readlines()
    file_gfa.close()
    seg_seq={}
    paths={}
    for line in lines_gfa:
        line=line.split()
        if (line[0]=="S"): # get the sequence of the segment
            seg_id='s'+line[1]
            seg_seq[seg_id]=line[2]

        if (line[0]=="W") & (line[1]!="_MINIGRAPH_"): # get the walk of the genome
            
            path=line[6].replace(">",";>")
            path=path.replace("<",";<").split(';')
            list_path=[]
            for segment in path:
                if segment[0:1]=='>':
                    list_path.append('+s'+segment[1:])
                elif segment[0:1]=='<':
                    list_path.append('-s'+segment[1:])
                else:
                    list_path.append('s'+segment[1:])
 
            paths[line[3]]=list_path
    return [paths,seg_seq]

def get_first_seg(list_seg):
    first_seg_found=''
    for segment in list_seg:
        if segment[1:] in segments_on_target_genome:
            first_seg_found=segment[1:]
            break
    return first_seg_found

def get_all_features_in_gff(gff):
    gff=open(gff,'r')
    lines=gff.readlines()
    gff.close()
    list_feature=[]
    for line in lines:
        feature_id=(line.split()[8].split(";")[0].split("=")[1].replace(".","_").replace(":","_"))
        if feature_id not in list_feature:
            list_feature.append(feature_id)
    return list_feature



# writes the gff of azucena using the gff of the graph
def genome_gff(pos_seg, gff, out, gfa):
    print("generation of the genome's gff ")

    # create variables and open files
    [once,detail,var,stats]=[False,False,True,False]
NMarthe's avatar
NMarthe committed
    if var:
        [paths,seg_seq]=get_segments_sequence_and_paths(gfa)
        file_out_var = open("variations.txt", 'w')
        global output_variations
        output_variations=[0,"",file_out_var]
    if detail:
        file_out_detail = open("azucena_chr10_detail.gff", 'w')
        global output_detail_gff
        output_detail_gff=[0,"",file_out_detail]
    if once:
        file_out=open(out,'w')
        global output_gff_one
        output_gff_one=[0,"",file_out]
        bad_size_features=0
        absent_features=0
        diff_size_transfered_features=[0,0] # [count,sum], to get the average
    get_segments_positions_on_genome(pos_seg)
    list_feature_to_transfer=get_all_features_in_gff(gff) # get the list of all the features to transfer from the gff

    # create objects for stats on how many segments are absent in azucena, their average length, etc
    if stats==True:
        feature_missing_segments=[[],[],[],[],[],[],[]] # [feature_missing_first,feature_missing_middle,feature_missing_last,feature_missing_all,feature_missing_total,feature_total,feature_ok]
        # the fist segment of the feature is missing - feature_missing_first
        # the last segment of the feature is missing - feature_missing_last
        # at least one middle segment of the feature is missing - feature_missing_middle
        # the entire feature is missing - feature_missing_all
        # at least one segment is missing first, last, or middle) - feature_missing_total
        # no segment is missing, the feature is complete - feature_ok  
        # total number of features, with missing segments or not - feature_total

    for feat in list_feature_to_transfer:

        # for each feature, get list of the segments where it is and the first and last segment of the feature on the new genome
        list_seg=Features[feat].segments_list
        first_seg=get_first_seg(list_seg)
        last_seg=get_first_seg(reversed(list_seg))

        if stats==True:
            stats_feature_missing_segment(feature_missing_segments,first_seg,last_seg,list_seg,feat)

NMarthe's avatar
NMarthe committed
        # outputs the feature once in a gff, from the first to the last segment present on the new genome
        if once:
            max_diff=2 # maximum difference size (n times bigger of smaller)
            transfer_stat=gff_one(first_seg,last_seg,feat,list_seg,max_diff) # insertion not considered !!!
            if transfer_stat=="bad_size":
NMarthe's avatar
NMarthe committed
                bad_size_features+=1
            elif transfer_stat=="absent":
NMarthe's avatar
NMarthe committed
                absent_features+=1
            else:
                diff_size_transfered_features[0]+=1
                diff_size_transfered_features[1]+=transfer_stat
NMarthe's avatar
NMarthe committed
            write_line("",output_gff_one,True)

        # outputs each fragment of the feature in a gff
        if detail:
            gff_detail(list_seg,feat) # insertions !
            write_line("",output_detail_gff,True)
NMarthe's avatar
NMarthe committed
        # outputs the detail of variations of the feature :
        if var:
            print_variations_2(first_seg,last_seg,feat,paths,seg_seq)
NMarthe's avatar
NMarthe committed
            write_line("",output_variations,True)

NMarthe's avatar
NMarthe committed
    if detail:
        file_out_detail.close()
    if once:
        file_out.close()
    if var:
        file_out_var.close()


    # print stats
    from statistics import median, mean
    if stats==True:
        if once:
            print(len(Features)-(bad_size_features+absent_features),"out of",len(Features),"features are transfered.")
            print(bad_size_features,"out of",len(Features), "features are not transfered because they are too big or too small compared to the original genome.")
            print(absent_features,"out of",len(Features),"features are not transfered because they are absent in the new genome.")
            print("average length difference of the transfered genes : ",diff_size_transfered_features[1]/diff_size_transfered_features[0])
        stats_features(feature_missing_segments)
        





# functions to get the detail of the variations in the features
def compare_strand(list_1,list_2,list_1_unstrand,list_2_unstrand):
    # get the list of segments in common
    seg_common=[]
    for segment in list_1_unstrand:
        if segment in list_2_unstrand:
            seg_common.append(segment)
389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916
    # for each segment in common, check if the strand is the same. check index in list unstranded to get the segment in list stranded
    same_strand_count=0
    for segment in seg_common:
        index_1=list_1_unstrand.index(segment)
        index_2=list_2_unstrand.index(segment)
        if list_1[index_1]==list_2[index_2]:
            same_strand_count+=1
    return [seg_common,same_strand_count]



def print_variations(first_seg,last_seg,feat,paths,seg_seq):

    if (first_seg!=''): # if the feature is not completly absent        # add the else, output absent features

        var=0 # count variations, to see if there is any
        feature=Features[feat]
        feat_start=feature.start

        # get the lengths of the feature, on the original genome and on the new one
        start_new_genome=get_feature_start_on_genome(first_seg,feat)
        stop_new_genome=get_feature_stop_on_genome(last_seg,feat)
        size_new_genome=int(stop_new_genome)-int(start_new_genome)+1
        size_diff=str(size_new_genome-feature.size)

        # get feature paths on the original genome and on the target genome
        list_segfeat_azu=get_feature_path(paths,first_seg,last_seg)
        list_segfeat_nb=feature.segments_list

        # loop to go through both paths
        i=0
        j=0
        [last,last_taille,last_seq,last_start,last_in_azu]=['',0,'','',''] # rename last_taille 

        # check if there is an inversion and remove strands
        [list_segfeat_nb,list_segfeat_azu,inversion]=detect_inversion(list_segfeat_nb,list_segfeat_azu)

        # detect and print variations ignoring the strands
        while (i<len(list_segfeat_nb)) & (j<len(list_segfeat_azu)):
            if list_segfeat_nb[i] != list_segfeat_azu[j]: # if there is a difference between the two paths
                if list_segfeat_azu[j] not in list_segfeat_nb: # if the segment in azu is absent in nb
                    if list_segfeat_nb[i] not in list_segfeat_azu: # if the segment in nb is absent is azu
                        # is both segments are absent in the other genome, its a substitution
                        last_in_azu=list_segfeat_azu[j]

                        # print if we had an insertion or deletion running
                        if last=='insertion': # last : type énuméré.    # fct compare cas précedent à cas courant, si différent imprimer cas précédent. 
                            [pos_old,pos_new]=get_old_new_pos_insertion(last_start,feat_start,list_segfeat_azu,feat)
                            # line : formated line f"{feat}\t"
                            line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tinsertion\t-\t"+last_seq+"\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
                            write_line(line,output_variations,False)
                            var+=1
                        elif last=='deletion':
                            [pos_old,pos_new]=get_old_new_pos_deletion(last_start,feat_start,list_segfeat_azu,feat,last_in_azu,i)

                            line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tdeletion\t"+last_seq+"\t-\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
                            write_line(line,output_variations,False)                          
                            var+=1
                        last='';last_taille=0;last_seq='';last_start=''

                        # print the substitution        # what if plusieurs substitutions à la suite ? 
                        # substitution of segment list_segfeat_nb[i][1:] by segment list_segfeat_azu[j][1:]
                        [pos_old,pos_new]=get_old_new_pos_substitution(feat_start,list_segfeat_nb,list_segfeat_azu,feat,i,j)

                        if len(seg_seq[list_segfeat_nb[i]]) == len(seg_seq[list_segfeat_azu[j]]): # if the substituion is between two segment of the same size, print it
                            size_subs=len(seg_seq[list_segfeat_nb[i]])
                            line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tsubstitution\t"+seg_seq[list_segfeat_nb[i]]+"\t"+seg_seq[list_segfeat_azu[j]]+"\t"+str(size_subs)+"\t"+str(pos_old)+"\t"+pos_new+"\n"
                        else :
                            # if the segments of the substitution have a different size, print deletion then insertion at the same position.
                            line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tdeletion\t"+seg_seq[list_segfeat_nb[i]]+"\t-\t"+str(len(seg_seq[list_segfeat_nb[i]]))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
                            line+=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tinsertion\t-\t"+seg_seq[list_segfeat_azu[j]]+"\t"+str(len(seg_seq[list_segfeat_azu[j]]))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
                            var+=1
                        write_line(line,output_variations,False)
                        var+=1;i+=1;j+=1

                    else: # azu segment not in nb, but nb segment in azu : insertion
                        if last=='deletion':
                            [pos_old,pos_new]=get_old_new_pos_deletion(last_start,feat_start,list_segfeat_azu,feat,last_in_azu,i)

                            line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tdeletion\t"+last_seq+"\t-\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
                            write_line(line,output_variations,False)
                            var+=1;last='';last_taille=0;last_start='';last_seq=''

                        last_in_azu=list_segfeat_azu[j]

                        if last=='insertion':
                            last_seq=last_seq+seg_seq[list_segfeat_azu[j]]
                        else:
                            last='insertion'
                            last_seq=seg_seq[list_segfeat_azu[j]]
                            last_start=list_segfeat_nb[i]
                        j+=1
                        
                elif list_segfeat_nb[i] not in list_segfeat_azu: # nb segment not in azu, but azu segment in nb : deletion
                    if last=='insertion':
                        [pos_old,pos_new]=get_old_new_pos_insertion(last_start,feat_start,list_segfeat_azu,feat)
                        
                        line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tinsertion\t-\t"+last_seq+"\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
                        write_line(line,output_variations,False)
                        var+=1;last='';last_start='';last_taille=0;last_seq=''

                    if last=='deletion':
                        last_seq=last_seq+seg_seq[list_segfeat_nb[i]]
                    else:
                        last='deletion'
                        last_start=list_segfeat_nb[i]
                        if i==0: # if the deletion is at the start of the feature, the deletion doesnt start at the start at the first segment : 
                                    #use pos_start, position of the feature on its first segment
                            last_seq=seg_seq[list_segfeat_nb[i]][feature.pos_start-1:]
                        else: # else, the deletion will always start at the start of the first segment.
                            last_seq=seg_seq[list_segfeat_nb[i]]
                    i+=1
                    
                else : # idk yet. if both segments are present in the other genome but not at the same position. probably substitution then
                    line="weird order change\n"
                    write_line(line,output_variations,False)
                    var+=1;i+=1;j+=1

            else: # segment present in both. print the running indel if there is one
                if last=='insertion':
                    [pos_old,pos_new]=get_old_new_pos_insertion(last_start,feat_start,list_segfeat_azu,feat)
                    
                    line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tinsertion\t-\t"+last_seq+"\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
                    write_line(line,output_variations,False)
                    var+=1

                elif last=='deletion':
                    [pos_old,pos_new]=get_old_new_pos_deletion(last_start,feat_start,list_segfeat_azu,feat,last_in_azu,i)
                    
                    line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tdeletion\t"+last_seq+"\t-\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
                    write_line(line,output_variations,False)
                    var+=1

                last_in_azu=list_segfeat_azu[j]
                last='';last_taille=0;last_start='';last_seq='';i+=1;j+=1

        # finish printing the current indel
        if last=='insertion':
            [pos_old,pos_new]=get_old_new_pos_insertion(last_start,feat_start,list_segfeat_azu,feat)

            line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tinsertion\t-\t"+last_seq+"\t"+str(last_taille)+"\t"+str(pos_old)+"\t"+pos_new+"\n"
            write_line(line,output_variations,False)
            var+=1

        elif last=='deletion':
            [pos_old,pos_new]=get_old_new_pos_deletion(last_start,feat_start,list_segfeat_azu,feat,last_in_azu,i)

            line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tdeletion\t"+last_seq+"\t-\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
            write_line(line,output_variations,False)
            var+=1

        # see if the end is missing for one of the two genomes
        if not((i>=len(list_segfeat_nb)-1) & (j>=len(list_segfeat_azu)-1)):
            pos_old=int(Segments[list_segfeat_nb[i]].start)-int(feat_start)+1
            del_sequence=get_sequence_list_seg(list_segfeat_nb,i,feature,seg_seq)
            length=len(del_sequence)
            pos_new=str(size_new_genome+1) # the deletion is at the end of the feature on the new genome

            line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tdeletion\t"+del_sequence+"\t-\t"+str(length)+"\t"+str(pos_old)+"\t"+pos_new+"\n"
            write_line(line,output_variations,False)
            var+=1

        if var==0:
            line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tno_var\t-\t-\t-\t-\t-\n"
            write_line(line,output_variations,False)

def get_feature_path(paths,first_seg,last_seg):
    # find the path in azucena. 
    first_strand=segments_on_target_genome[first_seg][3]
    first_seg_stranded=first_strand+first_seg
    last_strand=segments_on_target_genome[last_seg][3]
    last_seg_stranded=last_strand+last_seg
    id_first_seg=int(paths["CM020642.1_Azucena_chromosome10"].index(first_seg_stranded))
    id_last_seg=int(paths["CM020642.1_Azucena_chromosome10"].index(last_seg_stranded))
    first_index=min(id_first_seg,id_last_seg)
    last_index=max(id_last_seg,id_first_seg)
    list_segfeat_azu=paths["CM020642.1_Azucena_chromosome10"][first_index:last_index+1]
    return list_segfeat_azu

def get_sequence_list_seg(list_segfeat_nb,i,feature,seg_seq):
    del_sequence=""
    for k in range(i,len(list_segfeat_nb)):
        if k==len(list_segfeat_nb):
            del_sequence+=seg_seq[list_segfeat_nb[k]][0:feature.pos_stop]
        else:
            del_sequence+=seg_seq[list_segfeat_nb[k]]
    return del_sequence


def get_old_new_pos_substitution(feat_start,list_segfeat_nb,list_segfeat_azu,feat,i,j):
    pos_old=int(Segments[list_segfeat_nb[i]].start)-int(feat_start)+1

    start_feat=get_feature_start_on_genome(list_segfeat_azu[0],feat)
    start_var=int(segments_on_target_genome[list_segfeat_azu[j-1]][2])+1
    pos_new=str(start_var-start_feat+1)
    return [pos_old,pos_new]

def get_old_new_pos_insertion(last_start,feat_start,list_segfeat_azu,feat):
    pos_old=str(int(Segments[last_start].start)-int(feat_start)+1)

    start_feat=get_feature_start_on_genome(list_segfeat_azu[0],feat) 
    start_var=int(segments_on_target_genome[last_start][1])-1
    pos_new=str(start_var-start_feat+1)
    return [pos_old,pos_new]

def get_old_new_pos_deletion(last_start,feat_start,list_segfeat_azu,feat,last_in_azu,i):
    if i==0:
        pos_old=int(Segments[last_start].start)-int(feat_start)+1+Features[feat].pos_start
    else:
        pos_old=int(Segments[last_start].start)-int(feat_start)+1
        if pos_old<0:
            pos_old=0

    if last_in_azu=="": # deletion of the beggining of the feature, so no segment placed in the new genome yet. 
        pos_new="1"
    else:
        start_feat=get_feature_start_on_genome(list_segfeat_azu[0],feat) 
        start_var=int(segments_on_target_genome[last_in_azu][2])+1 
        pos_new=str(start_var-start_feat+1)
    
    return [pos_old,pos_new]






class Variation:
    def __init__(self,feature_id,feature_type,chr,start_new,stop_new,inversion,size_diff):
        self.feature_id=feature_id
        self.feature_type=feature_type
        self.chr=chr
        self.start_new=start_new
        self.stop_new=stop_new
        self.inversion=str(inversion)
        self.size_diff=str(size_diff)
        self.size_new=str(self.stop_new-self.start_new+1)
        self.type=''
        self.last_seg_in_target=''
        
    # ajouter fct pour écrire la ligne. return line.    # line : formated line f"{feat}\t"

    #def __str__(self):
    #    return f"id={self.id}, position on the original genome={self.chr}:{self.start}-{self.stop}, size={self.size}, features={self.features}"

def create_var(feature_id,first_seg,last_seg,paths):
    feature=Features[feature_id]
    start_new_genome=get_feature_start_on_genome(first_seg,feature_id)
    stop_new_genome=get_feature_stop_on_genome(last_seg,feature_id)
    size_new_genome=int(stop_new_genome)-int(start_new_genome)+1
    size_diff=str(size_new_genome-feature.size)

    # get feature paths on the original genome and on the target genome
    list_segfeat_azu=get_feature_path(paths,first_seg,last_seg)
    list_segfeat_nb=feature.segments_list

    [list_segfeat_nb,list_segfeat_azu,inversion]=detect_inversion(list_segfeat_nb,list_segfeat_azu)

    variation=Variation(feature_id,feature.type,feature.chr,start_new_genome,stop_new_genome,inversion,size_diff)

    return(variation,list_segfeat_nb,list_segfeat_azu)

def reset_var(variation):
    variation.type='' # type énuméré.
    variation.size_var=0
    variation.start_var=''
    variation.ref=''
    variation.alt=''

def get_old_new_pos_substitution_2(feat_start,variation,list_segfeat_azu,feat,j):
    #pos_old=int(Segments[list_segfeat_nb[i]].start)-int(feat_start)+1
    pos_old=str(int(Segments[variation.start_var].start)-int(feat_start)+1)
    start_feat=get_feature_start_on_genome(list_segfeat_azu[0],feat)
    start_var=int(segments_on_target_genome[variation.start_on_target][1])
    pos_new=str(start_var-start_feat+1)
    return [pos_old,pos_new]

def get_old_new_pos_insertion_2(variation,feat_start,list_segfeat_azu,feat):
    pos_old=str(int(Segments[variation.start_var].start)-int(feat_start)+1)

    start_feat=get_feature_start_on_genome(list_segfeat_azu[0],feat) 
    start_var=int(segments_on_target_genome[variation.start_var][1])-1
    pos_new=str(start_var-start_feat+1)
    return [pos_old,pos_new]

def get_old_new_pos_deletion_2(variation,feat_start,list_segfeat_azu,feat,i):
    if i==0:
        pos_old=int(Segments[variation.start_var].start)-int(feat_start)+1+Features[feat].pos_start
    else:
        pos_old=int(Segments[variation.start_var].start)-int(feat_start)+1
        if pos_old<0:
            pos_old=0

    if variation.last_seg_in_target=="": # deletion of the beggining of the feature, so no segment placed in the new genome yet. 
        pos_new="1"
    else:
        start_feat=get_feature_start_on_genome(list_segfeat_azu[0],feat) 
        start_var=int(segments_on_target_genome[variation.last_seg_in_target][2])+1 
        pos_new=str(start_var-start_feat+1)
    
    return [pos_old,pos_new]



def print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j):
    if variation.type=='insertion':
        [pos_old,pos_new]=get_old_new_pos_insertion_2(variation,feat_start,list_segfeat_azu,feat)
        line=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tinsertion\t-\t"+variation.alt+"\t"+str(len(variation.alt))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
        write_line(line,output_variations,False)
    elif variation.type=='deletion':
        [pos_old,pos_new]=get_old_new_pos_deletion_2(variation,feat_start,list_segfeat_azu,feat,i)
        line=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tdeletion\t"+variation.ref+"\t-\t"+str(len(variation.ref))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
        write_line(line,output_variations,False)
    elif variation.type=='substitution':
        [pos_old,pos_new]=get_old_new_pos_substitution_2(feat_start,variation,list_segfeat_azu,feat,j)
        if len(variation.ref) == len(variation.alt): # if the substituion is between two segment of the same size, print it
            size_subs=len(variation.ref)
            line=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tsubstitution\t"+variation.ref+"\t"+variation.alt+"\t"+str(size_subs)+"\t"+str(pos_old)+"\t"+pos_new+"\n"
        else :
            # if the segments of the substitution have a different size, print deletion then insertion at the same position.
            line=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tdeletion\t"+variation.ref+"\t-\t"+str(len(variation.ref))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
            line+=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tinsertion\t-\t"+variation.alt+"\t"+str(len(variation.alt))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
        write_line(line,output_variations,False)

def print_last_deletion(variation,list_segfeat_nb,list_segfeat_azu,i,feat_start,feature,seg_seq):
    pos_old=int(Segments[list_segfeat_nb[i]].start)-int(feat_start)+1
    del_sequence=get_sequence_list_seg(list_segfeat_nb,i,feature,seg_seq)
    length=len(del_sequence)
    pos_new=str(int(variation.size_new)+1) # the deletion is at the end of the feature on the new genome

    line=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tdeletion\t"+del_sequence+"\t-\t"+str(length)+"\t"+str(pos_old)+"\t"+pos_new+"\n"
    write_line(line,output_variations,False)



def print_novar(variation):
    line=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tno_var\t-\t-\t-\t-\t-\n"
    write_line(line,output_variations,False)


def init_new_var(variation,type,list_segfeat_nb,list_segfeat_azu,i,j,seg_seq,feature):
    variation.type=type
    variation.start_var=list_segfeat_nb[i]
    if type=="substitution":
        variation.start_on_target=list_segfeat_azu[j]
        variation.ref=seg_seq[list_segfeat_nb[i]]
        variation.alt=seg_seq[list_segfeat_azu[j]]
    elif type=="insertion":
        variation.ref="-"
        variation.alt=seg_seq[list_segfeat_azu[j]]
    elif type=="deletion":
        if i==0: # if the deletion is at the start of the feature, the deletion doesnt always start at the start at the first segment : 
            #use pos_start, position of the feature on its first segment
            variation.ref=seg_seq[list_segfeat_nb[i]][feature.pos_start-1:]
        else: # else, the deletion will always start at the start of the first segment.
            variation.ref=seg_seq[list_segfeat_nb[i]]
        variation.alt="-"



def continue_var(variation,seg_seq,list_segfeat_nb,list_segfeat_azu,i,j):

    if variation.type=="substitution":
        variation.ref+=seg_seq[list_segfeat_nb[i]]
        variation.alt+=seg_seq[list_segfeat_azu[j]]
    elif variation.type=="insertion":
        variation.alt+=seg_seq[list_segfeat_azu[j]]
    elif variation.type=="deletion":
        variation.ref+=seg_seq[list_segfeat_nb[i]]

def get_common_segments(list1,list2):
    list_output=[]
    for elem in list1:
        if elem in list2:
            list_output.append(elem)
    return list_output

def detect_segment_order_inversion(list_1,list_2):
    cpt=0
    i=0
    list_1_common=get_common_segments(list_1,list_2)
    list_2_common=get_common_segments(list_2,list_1)
    list_2_common_reversed=list(reversed(list_2_common))
    while i<len(list_1_common):
        #if list_2_common[i]!=list_1_common[i]: pour l'option 1 qui semble fonctionner ??
        #    cpt+=1
        #i+=1
        if list_2_common_reversed[i]==list_1_common[i]: # pour l'option 3
            cpt+=1
        i+=1
    #if cpt>len(list_1_common)/2: # if more than half of the common segments are not at the same position ?? semble fonctionner
    #if list_1_common==reversed(list_2_common): fonctionne pas
    if cpt>len(list_1_common)*0.9:# if more than 90% of the segments are on the same position when the lists are reversed, there is an inversion. 
        #print("ee")
        return [True,list_1,list(reversed(list_2))]
    else:
        return [False,list_1,list_2]



# takes two lists of segments for two genes, check if the first list is an inversion of the second one (if the segments in common are on the opposite strand)
def detect_inversion(list_1,list_2):
    # removes strand for the lists of stranded segments
    list_1_unstrand=[segment_stranded[1:] for segment_stranded in list_1]
    list_2_unstrand=[segment_stranded[1:] for segment_stranded in list_2]
    [seg_common,same_strand_count]=compare_strand(list_1,list_2,list_1_unstrand,list_2_unstrand)

    # if there is less than 10% of strand difference among the common segments (ie more than 90% of same strand), return 0, no inversion 
    if len(seg_common)-same_strand_count<len(seg_common)/10:
        inversion=0
    else:
        inversion=1

    # check if we have an inversion for the order of the segmetns. double inversion = no inversion. 
    [segment_order_inversion,list_segfeat_nb,list_segfeat_azu]=detect_segment_order_inversion(list_1_unstrand,list_2_unstrand)
    if segment_order_inversion:
       inversion=abs(inversion-1)
    return [list_segfeat_nb,list_segfeat_azu,inversion]



def print_variations_2(first_seg,last_seg,feat,paths,seg_seq):

    if (first_seg!=''): # if the feature is not completly absent        # add the else, output absent features
        [variation,list_segfeat_nb,list_segfeat_azu]=create_var(feat,first_seg,last_seg,paths)
        
        var=0 # count variations, to see if there is any
        feature=Features[feat]
        feat_start=feature.start
        # loop to go through both paths
        i=0
        j=0

        # detect and print variations ignoring the strands
        while (i<len(list_segfeat_nb)) & (j<len(list_segfeat_azu)):
            if list_segfeat_nb[i] != list_segfeat_azu[j]: # if there is a difference between the two paths
                if list_segfeat_azu[j] not in list_segfeat_nb: # if the segment in azu is absent in nb
                    if list_segfeat_nb[i] not in list_segfeat_azu: # if the segment in nb is absent is azu

                        # if both segments are absent in the other genome, its a substitution
                        variation.last_seg_in_target=list_segfeat_azu[j]
                        if (variation.type=='insertion') | (variation.type=='deletion'):
                            # print the current variation 
                            print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j)
                            reset_var(variation)
                            var+=1
                        if variation.type=='substitution':
                            continue_var(variation,seg_seq,list_segfeat_nb,list_segfeat_azu,i,j)
                        else:
                            # initiate substitution
                            init_new_var(variation,"substitution",list_segfeat_nb,list_segfeat_azu,i,j,seg_seq,feature)
                        i+=1;j+=1


                    else: # azu segment not in nb, but nb segment in azu : insertion
                        if (variation.type=='substitution') | (variation.type=='deletion'):
                            # print the current variation 
                            print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j)
                            reset_var(variation)
                            var+=1
                        variation.last_seg_in_target=list_segfeat_azu[j]
                        if variation.type=='insertion':
                            continue_var(variation,seg_seq,list_segfeat_nb,list_segfeat_azu,i,j)
                        else:
                            # intiate insertion
                            init_new_var(variation,"insertion",list_segfeat_nb,list_segfeat_azu,i,j,seg_seq,feature)
                        j+=1


                elif list_segfeat_nb[i] not in list_segfeat_azu: # nb segment not in azu, but azu segment in nb : deletion
                    if (variation.type=='substitution') | (variation.type=='insertion'):
                        # print the current variation 
                        print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j)
                        reset_var(variation)
                        var+=1
                    if variation.type=='deletion':
                        continue_var(variation,seg_seq,list_segfeat_nb,list_segfeat_azu,i,j)
                    else:
                        # intiate deletion
                        init_new_var(variation,"deletion",list_segfeat_nb,list_segfeat_azu,i,j,seg_seq,feature)
                    i+=1

                else : # idk yet. if both segments are present in the other genome but not at the same position. probably substitution then
                    line="weird order change\n"
                    write_line(line,output_variations,False)

                    var+=1;i+=1;j+=1

            else: # segment present in both. print the running indel if there is one
                # print the current variation 
                if variation.type!='':
                    print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j)
                    var+=1
                    reset_var(variation)
                variation.last_seg_in_target=list_segfeat_azu[j]
                i+=1;j+=1

#        # finish printing the current indel
#        print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j)
#        if variation.type!='':
#            var+=1
#        reset_var(variation)

        if (variation.type!='') & (variation.type!="deletion"):
            print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j)
            var+=1

        # see if the end is missing for the first genome, ie i didn'r reach the length of the segment list for the first genome
        if i<len(list_segfeat_nb)-1:
            print_last_deletion(variation,list_segfeat_nb,list_segfeat_azu,i,feat_start,feature,seg_seq)
            var+=1
            reset_var(variation)



        if var==0:
            if list_segfeat_nb != list_segfeat_azu:
                print("??")
                print(feat,list_segfeat_nb,list_segfeat_azu)
            print_novar(variation)




# not used. 

def get_list_segments_missing(list_seg,segments_on_target_genome):
    segments_missing=[]
    for segment in list_seg:
            if segment[1:] not in segments_on_target_genome:
                segments_missing.append(Segments[segment[1:]])
    return segments_missing
# takes a feature and a feature type, returns a list of child features that have the wanted type. currently not used.
def get_child_list(feature,child_type):
    if type=="":
        return feature.childs
    list_childs=[]
    for child in feature.childs:
        if Features[child].type==child_type:
            list_childs.append(child)
    return list_childs