Skip to content
Snippets Groups Projects
Functions.py 47.4 KiB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990
from Graph_gff import Segments, Features, get_feature_start_on_segment, get_feature_stop_on_segment,invert_seg,search_segment
global segments_on_target_genome
segments_on_target_genome={}


def get_seg_occ(seg,walk,feat,copy):
    seg_in_walk=segments_on_target_genome[seg][walk]
    for seg_occurence in seg_in_walk:
        for feat_copy in seg_occurence[5:]:
            if (feat_copy[0]==feat) & (feat_copy[1]==copy):
                return seg_occurence # [chr,start,stop,strand,index,copies]
    print("no occ found ???",seg,walk,feat,copy)
    print(segments_on_target_genome[seg][walk])
    print(Features[feat].segments_list_source)
    print(Features[feat].segments_list_target)
    exit()

# get the start position of the features on the linear target genome, using their coordinates on the graph and the coordinantes of the segments on the genome
def get_feature_start_on_target_genome(start_seg,feat_id,walk,copy_id):
    seg_start_pos=get_seg_occ(start_seg,walk,feat_id,copy_id)[1]
    feat_start_pos=get_feature_start_on_segment(start_seg,feat_id)    
    return seg_start_pos+feat_start_pos-1

# get the stop position of the features on the linear target genome, using their coordinates on the graph and the coordinantes of the segments on the genome
def get_feature_stop_on_target_genome(stop_seg,feat_id,walk,copy_id):
    seg_start_pos=get_seg_occ(stop_seg,walk,feat_id,copy_id)[1]
    feat_stop_pos=get_feature_stop_on_segment(stop_seg,feat_id)
    return seg_start_pos+feat_stop_pos-1


# get the start position of the features on the linear target genome for inverted features
def get_feature_start_on_target_genome_inv(start_seg,feat_id,walk,copy_id):
    seg_end_pos=get_seg_occ(start_seg,walk,feat_id,copy_id)[2]
    feat_start_pos=get_feature_start_on_segment(start_seg,feat_id)
    return seg_end_pos-feat_start_pos+1

# get the stop position of the features on the linear target genome for inverted features
def get_feature_stop_on_target_genome_inv(stop_seg,feat_id,walk,copy_id):
    seg_end_pos=get_seg_occ(stop_seg,walk,feat_id,copy_id)[2]
    feat_stop_pos=get_feature_stop_on_segment(stop_seg,feat_id)
    return seg_end_pos-feat_stop_pos+1



# functions to get the gff with one line per feature

# check if the length of the feature on the target genome passes the filter max_diff
def right_size(size,max_diff,feat):
    if max_diff==0:
        return True
    return not ((size>Features[feat].size*max_diff) or (size<Features[feat].size/max_diff)) 

# generates the line for the gff of the target genome
def create_line_target_gff(first_seg,last_seg,feature_id,size_diff,inversion,walk,cov,id,copy_id):
    [chr,strand,feature]=[get_seg_occ(first_seg,walk,feature_id,copy_id)[0],Features[feature_id].strand,Features[feature_id]]

    annotation=f'{feature.annot};Size_diff={size_diff};coverage={cov};sequence_ID={id}' # Nb_variants={var_count};

    if inversion:
        start=get_feature_start_on_target_genome_inv(last_seg,feature_id,walk,copy_id)
        stop=get_feature_stop_on_target_genome_inv(first_seg,feature_id,walk,copy_id)
        strand=invert_strand(strand)
    else:
        start=get_feature_start_on_target_genome(first_seg,feature_id,walk,copy_id)
        stop=get_feature_stop_on_target_genome(last_seg,feature_id,walk,copy_id)

    if start>stop:
        temp=start
        start=stop
        stop=temp

    output_line=f'{chr}\tGrAnnoT\t{feature.type}\t{start}\t{stop}\t.\t{strand}\t.\t{annotation}\n'
    return output_line


# functions to get the alignment for the transfered genes

# creates an alignment for two segments
def segment_aln(type,seg_seq,seg_a,seg_b,first,feature_id,last):

    match type:
        case "identity":
            if first:
                feature=Features[feature_id]
                seq_aln=get_segment_sequence(seg_seq,seg_a)[feature.pos_start-1:]
            elif last:
                feature=Features[feature_id]
                seq_aln=get_segment_sequence(seg_seq,seg_a)[:feature.pos_stop]
            else:
                seq_aln=get_segment_sequence(seg_seq,seg_a)
            line_a=seq_aln
            line_b=seq_aln
            len_aln=len(seq_aln)
            line_c=len_aln*"*"
        case "substitution":
            seq_aln_a=get_segment_sequence(seg_seq,seg_a)
            seq_aln_b=get_segment_sequence(seg_seq,seg_b)
            len_a=len(seq_aln_a)
            len_b=len(seq_aln_b)
            if len_a>len_b:
                diff_len=len_a-len_b
                line_a=seq_aln_a
                line_b=seq_aln_b+diff_len*"-"
                line_c=len_a*" "
            else:
                diff_len=len_b-len_a
                line_a=seq_aln_a+diff_len*"-"
                line_b=seq_aln_b
                line_c=len_b*" "
        case "insertion":
            seq_aln_b=get_segment_sequence(seg_seq,seg_b)
            len_b=len(seq_aln_b)
            line_a=len_b*"-"
            line_b=seq_aln_b
            line_c=len_b*" "
        case "deletion":
            if first:
                feature=Features[feature_id]
                seq_aln_a=get_segment_sequence(seg_seq,seg_a)[feature.pos_start-1:]
            else:
                seq_aln_a=get_segment_sequence(seg_seq,seg_a)
            len_a=len(seq_aln_a)
            line_a=seq_aln_a
            line_b=len_a*"-"
            line_c=len_a*" "
        case "end_deletion":
            seq_aln_a=""
            for segment in seg_a[:-1]:
                seq_aln_a+=get_segment_sequence(seg_seq,segment)
            feature=Features[feature_id]
            seq_aln_a+=get_segment_sequence(seg_seq,seg_a[-1])[0:feature.pos_stop] # for the last segment, only take the part that the feature is on
            len_a=len(seq_aln_a)
            line_a=seq_aln_a
            line_b=len_a*"-"
            line_c=len_a*" "

    return [line_a,line_b,line_c,False] # check the orientation of the segment later

# formats the alignment lines
def parse_aln_lines(line_a,line_b,line_c,feature_id):
    if (len(line_a)!=len(line_b)) or (len(line_b)!=len(line_c)):
        print("line lengths differ in alignment")
    len_to_parse=len(line_a)
    len_parsed=0
    aln_line=""
    nb_res_a=0
    nb_res_b=0

    while len_parsed<len_to_parse:
        len_header=len(feature_id)+11
        headers=[feature_id+"_source    ",feature_id+"_target    ",len_header*" "]

        add_a=line_a[len_parsed:len_parsed+60]
        add_b=line_b[len_parsed:len_parsed+60]
        add_c=line_c[len_parsed:len_parsed+60]
        nb_res_a+=len(add_a)-add_a.count("-")
        nb_res_b+=len(add_b)-add_b.count("-")
        aln_line+=f'{headers[0]}{add_a}    {nb_res_a}\n'
        aln_line+=f'{headers[1]}{add_b}    {nb_res_b}\n'
        aln_line+=f'{headers[2]}{add_c}\n\n'
        len_parsed+=60
    aln_line+="\n"
    
    return aln_line

# creates the alignment for a feature
def create_line_aln(feature_path_source_genome,feature_path_target_genome,seg_seq,feature_id):
    line_a=""
    line_b=""
    line_c=""
    [i,j]=[0,0]
    first=True # when writing the first part of the feature, dont take the whole segment, only the part that the feature is on
    last=False # same for the last part of the feature

    while (i<len(feature_path_source_genome)) and (j<len(feature_path_target_genome)):
        if i==len(feature_path_source_genome)-1:
            last=True
        if feature_path_source_genome[i] != feature_path_target_genome[j]: # if there is a difference between the two paths
            if feature_path_target_genome[j] not in feature_path_source_genome: # if the segment in target genome is absent in source genome
                if feature_path_source_genome[i] not in feature_path_target_genome: # if the segment in source genome is absent is target genome : substitution
                    [add_a,add_b,add_c,first]=segment_aln("substitution",seg_seq,feature_path_source_genome[i],feature_path_target_genome[j],first,feature_id,last)
                    line_a+=add_a;line_b+=add_b;line_c+=add_c
                    i+=1;j+=1
                else: # target genome segment not in source_genome, but source_genome segment in target genome : insertion
                    [add_a,add_b,add_c,first]=segment_aln("insertion",seg_seq,"",feature_path_target_genome[j],first,feature_id,last)
                    line_a+=add_a;line_b+=add_b;line_c+=add_c
                    j+=1
            elif feature_path_source_genome[i] not in feature_path_target_genome: # source_genome segment not in target genome, but target genome segment in source_genome : deletion
                [add_a,add_b,add_c,first]=segment_aln("deletion",seg_seq,feature_path_source_genome[i],"",first,feature_id,last)
                line_a+=add_a;line_b+=add_b;line_c+=add_c
                i+=1
            else : # if both segments are present in the other genome but not at the same position. weird case never found yet
                [add_a,add_b,add_c,first]=segment_aln("substitution",seg_seq,feature_path_source_genome[i],feature_path_target_genome[j],first,feature_id,last)
                line_a+=add_a;line_b+=add_b;line_c+=add_c
                i+=1;j+=1

        else: # segment present in both, no variation. 
            [add_a,add_b,add_c,first]=segment_aln("identity",seg_seq,feature_path_source_genome[i],feature_path_target_genome[j],first,feature_id,last)
            line_a+=add_a;line_b+=add_b;line_c+=add_c
            i+=1;j+=1

    if i<=len(feature_path_source_genome)-1: # if we didn't reach the length of the segment list for the first genome, the end is missing for the second genome
        [add_a,add_b,add_c,first]=segment_aln("end_deletion",seg_seq,feature_path_source_genome[i:],"",first,feature_id,last)
        line_a+=add_a;line_b+=add_b;line_c+=add_c

    return parse_aln_lines(line_a,line_b,line_c,feature_id)


# functions to output the stats on the transfer

# stats about missing segments and feature type, not used, will change.
def stats_feature_missing_segment(feature_missing_segments,first_seg,last_seg,list_seg,feature_id,walk):
# [feature_missing_first,feature_missing_middle,feature_missing_last,feature_missing_all,feature_missing_total,feature_total,feature_ok]
    feature_missing_segments[5].append(feature_id)

    if first_seg=='' : # no segment of the feature is in the genome, the feature is missing entirely
        feature_missing_segments[3].append(feature_id)
    elif first_seg != list_seg[0]: # the first segment is missing 
        feature_missing_segments[0].append(feature_id)
    elif last_seg!=list_seg[-1]: # the last segment is missing
        feature_missing_segments[2].append(feature_id)

    # go through all the segments, check if some are missing in the middle of the feature
    elif (len(list_seg)!=1) and (feature_id not in feature_missing_segments[3]): # to access the second to last element
        for segment in list_seg[1-(len(list_seg)-2)]:
            if (segment not in segments_on_target_genome) or (walk not in segments_on_target_genome[segment]):
                feature_missing_segments[1].append(feature_id)
                break

    # go through the segments, to see if one is missing anywhere on the feature
    for segment in list_seg:
        if (segment not in segments_on_target_genome) or (walk not in segments_on_target_genome[segment]):
            if feature_id not in feature_missing_segments[4]:
                feature_missing_segments[4].append(feature_id)
                break

    # if the feature doesnt have a missing segment, it is complete.         ADD THE PATH CHECK FOR INSERTIONS !!
    if feature_id not in feature_missing_segments[4]:
        feature_missing_segments[6].append(feature_id)

def get_annot_features(list_features):
    list_annot_features=[]
    for feature in list_features:
        list_annot_features.append(Features[feature].note)
    return list_annot_features

def count_hypput_total(list_annot_first):
    total=len(list_annot_first)
    count_hypput=0
    for annot in list_annot_first:
        if ("hypothetical" in annot) or ("putative" in annot):
            count_hypput+=1
    return [count_hypput,total]

# print stats on the transfer : number of feature that have segments in different positions missing. 
def stats_features(feature_missing_segments):
# [feature_missing_first,feature_missing_middle,feature_missing_last,feature_missing_all,feature_missing_total,feature_total,feature_ok]
    list_annot_first=get_annot_features(feature_missing_segments[0])
    [hyp_put,total]=count_hypput_total(list_annot_first)
    print("\nthe first segment is missing for", total,"features, including",round(100*(hyp_put)/total,2),"% hypothetical or putative.")

    list_annot_middle=get_annot_features(feature_missing_segments[1])
    [hyp_put,total]=count_hypput_total(list_annot_middle)
    print("a middle segment is missing for", total,"features, including",round(100*(hyp_put)/total,2),"% hypothetical or putative.")

    list_annot_last=get_annot_features(feature_missing_segments[2])
    [hyp_put,total]=count_hypput_total(list_annot_last)
    print("the last segment is missing for", total,"features, including",round(100*(hyp_put)/total,2),"% hypothetical or putative.")

    list_annot_all=get_annot_features(feature_missing_segments[3])
    [hyp_put,total]=count_hypput_total(list_annot_all)
    print(total,"features are entirely missing, including",round(100*(hyp_put)/total,2),"% hypothetical or putative.")

    list_annot_total=get_annot_features(feature_missing_segments[4])
    [hyp_put,total]=count_hypput_total(list_annot_total)
    print("there is at least one segment missing for", total,"features, including",round(100*(hyp_put)/total,2),"% hypothetical or putative.")

    list_annot_ok=get_annot_features(feature_missing_segments[6])
    [hyp_put,total]=count_hypput_total(list_annot_ok)
    print(total ,"features are entirely present in the new genome, including",round(100*(hyp_put)/total,2),"% hypothetical or putative.")

    list_annot_features=get_annot_features(feature_missing_segments[5])
    [hyp_put,total]=count_hypput_total(list_annot_features)
    print("there is", total,"features in total, including",round(100*(hyp_put)/total,2),"% hypothetical or putative.")




# functions to generate the different gffs

# appends a dictionnary that associates a segments with its position on all the walks it's on (start stop and index in the segmnet list)
def get_segments_positions_on_genome(pos_seg): # add to the dict the info about the segments.
    with open(pos_seg,'r') as bed:
        line=bed.readline()
        seg_count=0
        file_name='.'.join(pos_seg.split('/')[-1].split('.')[0:-1]) # split by '.' to get the filename without the extention, then join by '.' in case there is a '.' in the filename
        while line:
            line=line.split()
            [seg,chrom,start,stop,strand,index]=[line[3],line[0],int(line[1])+1,int(line[2]),line[3][0:1],seg_count] # +1 in the start to convert the bed 0-based coordinate to a 1-based system
            # check if segment present twice on the same walk ???
            #segments_on_target_genome[seg]=[chrom,start,stop,strand,index,file_name]
            if seg not in segments_on_target_genome:
                segments_on_target_genome[seg]={} # dict of walks->segment_info; you get the info about the segment for each walk
            if file_name not in segments_on_target_genome[seg]:
                segments_on_target_genome[seg][file_name]=list()
            segments_on_target_genome[seg][file_name].append([chrom,start,stop,strand,index])
            seg_count+=1
            line=bed.readline()

# look for the segment on either strand of the target genome
def search_seg_on_target_genome(segment):
    inverted_segment=invert_seg(segment)
    if segment in segments_on_target_genome:
        #if inverted_segment in segments_on_target_genome:
        #    print(segment," found in both orientations")
        return segment
    elif inverted_segment in segments_on_target_genome:
        #print("inverted seg found *****")
        return inverted_segment
    else:
        return False

# look for a segment on a walk, in either orientations
def search_seg_on_walk(segment,walk): # for now just print the first found, look for several later...
    inverted_segment=invert_seg(segment)
    if segment in segments_on_target_genome:
        if walk in segments_on_target_genome[segment]:
            return segment
    elif inverted_segment in segments_on_target_genome:
        if walk in segments_on_target_genome[inverted_segment]:
            return inverted_segment
    else:
        return False

# generates a dictionnary that associaces the segments to their sequence : s5->AGGCTAA
def get_segments_sequence(segments_file,segments_list):
    with open(segments_file,'r') as file_segments:
        line=file_segments.readline()
        seg_seq={}
        while line:
            line=line.split()
            seg_id='s'+line[1]
            if seg_id in segments_list:
                seg_seq[seg_id]=line[2]
            line=file_segments.readline()
    return seg_seq

# generates a dictionnary that associates a walk_name to a list of segments : chr10->[>s1,>s2,>s4]
def get_paths(walks_file,target_genome):
    with open(walks_file,'r') as file_walks:
        paths={}
        line=file_walks.readline()
        while line:
            line=line.split()
            seq_name=line[1]+"_"+line[3]
            if target_genome in seq_name: # get the walk of the genome
                path=line[6].split(',')[1:]
                list_segments=[]
                for segment in path:
                    if segment[0:1]=='>':
                        list_segments.append('>s'+segment[1:])
                    elif segment[0:1]=='<':
                        list_segments.append('<s'+segment[1:])
                paths[seq_name]=list_segments
            line=file_walks.readline()
    return paths

# get the first and last segment of the list that is in the target genome (possibly several pairs)
def get_first_last_seg(list_seg):
    list_first_last_segs=[]
    [first_seg_found,last_seg_found,walk_found]=['','','']

    list_walks=get_walks_feature_cross(list_seg) # get all the walks where there is a segment of the feature
    for walk in list_walks: # find the first and last seg for each walk
        for segment in list_seg: # look for first_seg
            seg_found=search_seg_on_walk(segment,walk)
            if seg_found:
                first_seg_found=seg_found
                break
        if first_seg_found!='': # look for last_seg
            for segment in reversed(list_seg):
                last_seg_found=search_seg_on_walk(segment,walk)
                if last_seg_found:
                    walk_found=walk
                    break
        list_first_last_segs.append([first_seg_found,last_seg_found,walk_found])
        [first_seg_found,last_seg_found,walk_found]=['','','']
    # return all the match
    return list_first_last_segs

# functions to get the detail of the variations in the features

# find all the walks that contain a segment of the feature (list_seg is the walk of the feature on the source genome)
def get_walks_feature_cross(list_seg):
    list_walks=list()
    for segment in list_seg:
        seg_found=search_seg_on_target_genome(segment)
        if seg_found: # if the segment or the reverse complement is on the target genome
            for walk in segments_on_target_genome[seg_found]:
                if walk not in list_walks:
                    list_walks.append(walk)
    return list_walks

# returns a list of feature's child, and their childs' childs, etc. 
def get_child_list(feature_id):
    feature=Features[feature_id]
    list_childs=[]
    for child in feature.childs:
        list_childs.append(child) # add the child to the list
        list_childs+=get_child_list(child) # add the child's childs to the list
    return list_childs

# add the paths of the feature on the target genome in the object Feature
def add_target_genome_paths(feature_id,target_genome_paths):
    feature=Features[feature_id]
    list_seg=feature.segments_list_source
    list_first_last_segs=get_first_last_seg(list_seg)

    for match in list_first_last_segs: # for each walk that has the feature
        [first_seg,last_seg,walk_name]=match
        # get the first and last segments of all the copies on this walk
        first_last_segs_list=find_gene_copies(list_seg,walk_name,feature_id)
        copy_number=0
        for first_seg,last_seg in first_last_segs_list: # get the feature path for all the copies
            copy_number+=1
            copy_id="copy_"+str(copy_number) # get the copy that corresponds to this pair of first_seg,last_seg
            feature_target_path=get_feature_path(target_genome_paths[walk_name],first_seg,last_seg,walk_name,copy_id,feature_id)
            feature_path=[walk_name,copy_id,feature_target_path] # informations about the occurence of the feature
            feature.segments_list_target.append(feature_path)

            # add the feat_copy to all the segments !!!

            # get the pos start of the first and last segments (existing because done in find_gene_copies)
            # then look for segs that are between.
            walk_start=get_seg_occ(feature_target_path[0],walk_name,feature_id,copy_id)[1]
            walk_stop=get_seg_occ(feature_target_path[-1],walk_name,feature_id,copy_id)[2]
            feat_copy=(feature_id,copy_id)
            for seg in feature_target_path[1:-1]: # the first and last segs are already done in find_gene_copies
                for segment in segments_on_target_genome[seg][walk_name]: # find the right occurence
                    if walk_start <= segment[1] <= walk_stop: 
                        segment.append(feat_copy) # annotate the segment : feature_id, copy_id
                        break

            # add the paths of the gene's child features
            add_childs_paths(feature_id,feature_target_path,walk_name,copy_id,target_genome_paths)
     
    if len(list_first_last_segs)==0: # the latter steps expect this list to not be empty. ALSO DO IT FOR THE CHILDS ?
        feature.segments_list_target.append(['','',[]])

def add_childs_paths(feature_id,feature_target_path,walk_name,copy_id,target_genome_paths):
    childs_list=get_child_list(feature_id)
    for child_id in childs_list:
        child=Features[child_id]

        # find child path in parent's path
        [child_first_seg,child_last_seg]=['','']
        for seg in child.segments_list_source:
            if seg in feature_target_path: # parent path
                child_first_seg=seg
                break
            elif invert_seg(seg) in feature_target_path:
                child_first_seg=invert_seg(seg)
                break

        if child_first_seg!='': # the child feature may be absent in this occurence of the parent
            for seg in reversed(child.segments_list_source):
                if seg in feature_target_path: # parent path
                    child_last_seg=seg
                    break
                elif invert_seg(seg) in feature_target_path:
                    child_last_seg=invert_seg(seg)
                    break
            
            # get the path of the child feature
            child_path=get_feature_path(target_genome_paths[walk_name],child_first_seg,child_last_seg,walk_name,copy_id,feature_id)
            feat_copy=(child_id,copy_id)

            feature_path=[walk_name,copy_id]
            feature_path.append(child_path)
            child.segments_list_target.append(feature_path)

            for segment_id in child_path: # annotate the segments with info about the occurence of the child feature
                segment=get_seg_occ(segment_id,walk_name,feature_id,copy_id)
                segment.append(feat_copy)

# find all the copies of the segments from the source list in the target genome
def find_all_seg(list_seg_source,walk_name):
    index=0
    list_seg_target=[] # contains list of info for each seg : [seg_id,seg_strand,start_pos,index_on_source_walk]
    list_seg_source_unstranded=[] # contains the path in the source genome, but with the strand separated from the segment_id : [s24,>]
    for seg in list_seg_source:
        list_seg_source_unstranded.append([seg[1:],seg[0]]) # seg_id,seg_strand : [s24,>]
        seg_inverted=invert_seg(seg)
        # look for all the segment copies in the target genome walk, in both orientations
        if (seg in segments_on_target_genome) and (walk_name in segments_on_target_genome[seg]): # if the segment is in the target genome on the right walk (chr,ctg)
            for copy in segments_on_target_genome[seg][walk_name]: # take all its copies
                seg_info=[seg[1:],seg[0],int(copy[1]),index] # [s24,>,584425,4]
                list_seg_target.append(seg_info)
        if (seg_inverted in segments_on_target_genome) and (walk_name in segments_on_target_genome[seg_inverted]) : # same but with the segment inverted
            for copy in segments_on_target_genome[seg_inverted][walk_name]:
                seg_info=[seg_inverted[1:],seg_inverted[0],int(copy[1]),index]
                list_seg_target.append(seg_info)
        index+=1
    list_seg_target.sort(key=sort_seg_info) # order the list of segments by start position
    return [list_seg_target,list_seg_source_unstranded]

def find_gene_copies(list_seg_source,walk_name,feature_id):
    # find all copies of all segments from the gene in the target genome (in both orientations)
    [list_seg_target,list_seg_source_unstranded]=find_all_seg(list_seg_source,walk_name)
    # find each copy of the gene in the ordered list of segments
    [first_segs_list,last_segs_list]=detect_copies(list_seg_target,list_seg_source_unstranded,walk_name,feature_id)

    # join the first and last seg lists
    first_last_segs_list=[]
    index=0
    for first_seg in first_segs_list:
        last_seg=last_segs_list[index]
        pair=(first_seg,last_seg)
        first_last_segs_list.append(pair)
        index+=1

    return first_last_segs_list # return a list of pairs (first_seg,last_seg)

# called by find_gene_copies
def detect_copies(list_seg_target,list_seg_source_unstranded,walk_name,feature_id):
    # prepare the variables for the first iteration of the for loop
    [old_id,old_strand,old_start,old_index]=list_seg_target[0]
    [first_segs_list,last_segs_list]=[[],[]]
    old_seg_id=old_strand+old_id
    first_segs_list.append(old_seg_id)

    copy_number=1
    copy_id="copy_"+str(copy_number)
    feat_copy=(feature_id,copy_id)

    for segment in segments_on_target_genome[old_seg_id][walk_name]: # look for the first seg to add the occurence info
        if segment[1]==old_start:
            copy_id="copy_"+str(copy_number)
            feat_copy=(feature_id,copy_id)
            segment.append(feat_copy)
            break
    # adjust old_index for the first iteration of the loop
    first_inversion=(old_strand!=list_seg_source_unstranded[old_index][1])
    if first_inversion:
        old_index+=1
    else:
        old_index-=1

    for seg in list_seg_target: # find and annotate (with feat_copy) all the first and last segments of the feature copies
        [new_id,new_strand,new_start,new_index]=seg
        new_seg_id=new_strand+new_id
        inversion=(seg[1]!=list_seg_source_unstranded[new_index][1]) # inversion if this segment's strand is not the same as in the source walk

        if inversion : 
            if not((old_strand==new_strand) and (old_index>new_index)): # not (if the index decreases and the strand stays the same, it is the same gene copy)
                last_segs_list.append(old_seg_id)
                add_occurence_info_seg(old_seg_id,walk_name,old_start,feat_copy) # add info for the last seg of the previous copy

                copy_number+=1
                copy_id="copy_"+str(copy_number)
                feat_copy=(feature_id,copy_id) # new feature copy, change the info

                first_segs_list.append(new_seg_id)
                add_occurence_info_seg(new_seg_id,walk_name,new_start,feat_copy) # add info for the first seg of the new copy
        else: 
            if not((old_strand==new_strand) and (old_index<new_index)): # not (if the index increases and the strand stays the same, it is the same gene copy)
                last_segs_list.append(old_seg_id)
                add_occurence_info_seg(old_seg_id,walk_name,old_start,feat_copy) # add info for the last seg of the previous copy

                copy_number+=1
                copy_id="copy_"+str(copy_number)
                feat_copy=(feature_id,copy_id) # new feature copy, change the info

                first_segs_list.append(new_seg_id)
                add_occurence_info_seg(new_seg_id,walk_name,new_start,feat_copy) # add info for the first seg of the new copy

        [old_strand,old_index,old_seg_id,old_start]=[new_strand,new_index,new_seg_id,new_start]

    # add the last seg info
    last_segs_list.append(old_seg_id)
    add_occurence_info_seg(old_seg_id,walk_name,new_start,feat_copy) # add info for the last seg of the last copy

    return [first_segs_list,last_segs_list]

def add_occurence_info_seg(target_seg_id,walk_name,target_start,feat_copy):
    for segment in segments_on_target_genome[target_seg_id][walk_name]: # look for the right occurence of the segment
        if segment[1]==target_start:
            segment.append(feat_copy) # add seg info
            break

def sort_seg_info(seg_info):
    return seg_info[2]

# find the feature's path in target genome walk
def get_feature_path(target_genome_path,first_seg,last_seg,walk_name,copy_id,feature_id):
    # look for first_seg and last_seg that has the right copy_id for this feature
    first_seg_index=get_seg_occ(first_seg,walk_name,feature_id,copy_id)[4]
    last_seg_index=get_seg_occ(last_seg,walk_name,feature_id,copy_id)[4]
    first_index=min(first_seg_index,last_seg_index)
    last_index=max(first_seg_index,last_seg_index)
    feature_path_target_genome=target_genome_path[first_index:last_index+1]
    return feature_path_target_genome

# count the variations between two lists
def count_variations(feature_id,target_list):
    feature=Features[feature_id]
    if len(target_list)!=0:
        source_list=feature.segments_list_source
        inversion=detect_feature_inversion(source_list,target_list)
        if inversion:
            target_list=invert_segment_list(target_list)
        target_dict=dict.fromkeys(target_list,"")
        source_dict=dict.fromkeys(source_list,"") # convert list into dict to search segments in dict quicker.
        var_count=0
        for segment in source_dict:
            if segment not in target_dict:
                var_count+=1
        for segment in target_dict:
            if segment not in source_dict:
                var_count+=1
        # this counts the substitutions twice, as insertion+deletion.
    return var_count

# get the coverage and sequence id of a feature
def get_id_cov(feature_id,seg_size,target_list): # seg_size has unoriented segments : s25
    feature=Features[feature_id]
    source_list=feature.segments_list_source

    inversion=detect_feature_inversion(source_list,target_list)
    if inversion:
        target_list=invert_segment_list(target_list)

    [match,subs,inser,delet]=[0,0,0,0]

    [i,j]=[0,0]
    first=True # when writing the first part of the feature, dont take the whole segment, only the part that the feature is on
    last=False # same for the last part of the feature # for id and ins.

    while (i<len(source_list)) and (j<len(target_list)):
        if i==len(source_list)-1:
            last=True
        if source_list[i] != target_list[j]: # if there is a difference between the two paths
            if target_list[j] not in source_list: # if the segment in target genome is absent in source genome
                if source_list[i] not in target_list: # if the segment in source genome is absent is target genome : substitution
                    add=segment_id_cov("substitution",seg_size,source_list[i],target_list[j],first,feature,last)
                    match+=add[0];subs+=add[1];inser+=add[2];delet+=add[3];first=add[4]
                    i+=1;j+=1
                else: # target genome segment not in source_genome, but source_genome segment in target genome : insertion
                    add=segment_id_cov("insertion",seg_size,source_list[i],target_list[j],first,feature,last)
                    match+=add[0];subs+=add[1];inser+=add[2];delet+=add[3];first=add[4]
                    j+=1
            elif source_list[i] not in target_list: # source_genome segment not in target genome, but target genome segment in source_genome : deletion
                add=segment_id_cov("deletion",seg_size,source_list[i],target_list[j],first,feature,last)
                match+=add[0];subs+=add[1];inser+=add[2];delet+=add[3];first=add[4]
                i+=1
            else : # if both segments are present in the other genome but not at the same position. weird case never found yet
                add=segment_id_cov("substitution",seg_size,source_list[i],target_list[j],first,feature,last)
                match+=add[0];subs+=add[1];inser+=add[2];delet+=add[3];first=add[4]
                i+=1;j+=1
        else: # segment present in both, no variation. 
            add=segment_id_cov("identity",seg_size,source_list[i],target_list[j],first,feature,last)
            match+=add[0];subs+=add[1];inser+=add[2];delet+=add[3];first=add[4]
            i+=1;j+=1

    if i<=len(source_list)-1: # if we didn't reach the length of the segment list for the first genome, the end is missing for the second genome
        add=segment_id_cov("end_deletion",seg_size,source_list[i:],'',first,feature,last)
        match+=add[0];subs+=add[1];inser+=add[2];delet+=add[3];first=add[4]

    cov=round((match+subs)/(match+subs+delet),3)
    id=round(match/(match+subs+inser+delet),3)
    #var_count=count_variations(feature_id,target_list)
    return [cov,id]

# computes the cov/id calculation for a segment pair
def segment_id_cov(type,seg_size,seg_a,seg_b,first,feature,last):
    [match,subs,inser,delet]=[0,0,0,0]
    match type:
        case "identity":
            if first:
                match+=seg_size[seg_a[1:]]-feature.pos_start+1
            elif last:
                match+=feature.pos_stop
            else:
                match+=seg_size[seg_a[1:]]
        case "substitution":
            if seg_size[seg_b[1:]]!=seg_size[seg_a[1:]]: # substitution can be between segments of different size
                if seg_size[seg_b[1:]]>seg_size[seg_a[1:]]:
                    subs+=seg_size[seg_a[1:]]
                    inser+=seg_size[seg_b[1:]]-seg_size[seg_a[1:]]
                elif seg_size[seg_b[1:]]<seg_size[seg_a[1:]]:
                    subs+=seg_size[seg_b[1:]]
                    delet+=seg_size[seg_a[1:]]-seg_size[seg_b[1:]]
            else:
                subs+=seg_size[seg_a[1:]]

        case "insertion":
            inser+=seg_size[seg_b[1:]]
        case "deletion":
            if first:
                delet+=seg_size[seg_a[1:]]-feature.pos_start+1
            else:
                delet+=seg_size[seg_a[1:]]
        case "end_deletion":
            for seg in seg_a[:-1]:
                delet+=seg_size[seg[1:]]
            delet+=feature.pos_stop

    return [match,subs,inser,delet,False] # check the orientation of the segment later

# invert the given strand
def invert_strand(strand):
    match strand:
        case "+":
            return "-"
        case "-":
            return "+"
        case ">":
            return "<"
        case "<":
            return ">"
        case default:
            return ""

# outputs the nucleotide sequence of a list of segments, corresponding to the end of a feature
def get_sequence_list_seg(list_seg,i,feature,seg_seq):
    del_sequence=""
    for k in range(i,len(list_seg)):
        if k==len(list_seg)-1:
            del_sequence+=get_segment_sequence(seg_seq,list_seg[k])[0:feature.pos_stop]
        else:
            del_sequence+=get_segment_sequence(seg_seq,list_seg[k])
    return del_sequence

# outputs the sequence of an oriented segment
def get_segment_sequence(seg_seq,segment):
    if segment[0]==">":
        return seg_seq[segment[1:]]
    else:
        return reverse_complement(seg_seq[segment[1:]])

# outputs the reverse complement of a sequence
def reverse_complement(sequence):
    sequence_rc=""
    for char in sequence:
        sequence_rc+=complement(char)
    return sequence_rc[::-1]

# outputs the reverse complement of a nucleotide
def complement(nucl):
    match nucl:
        case "A":
            return "T"
        case "C":
            return "G"
        case "G":
            return "C"
        case "T":
            return "A"
    return nucl

# stores information about a feature and its current variation
class Variation:
    def __init__(self,feature_id,feature_type,chr,start_new,stop_new,inversion,size_diff,size_new):
        self.feature_id=feature_id
        self.feature_type=feature_type
        self.chr=chr
        self.start_new=start_new
        self.stop_new=stop_new
        self.inversion=inversion
        self.size_diff=size_diff
        self.size_new=size_new
        self.type=''
        self.last_seg_in_target=''
        self.seg_ref=list()
        self.seg_alt=list()

# initiate a Variation object with the information on the feature it is on
def create_var(feature_id,first_seg,last_seg,walk,copy_id,feature_target_path):
    feature=Features[feature_id]
    # get feature paths on the original genome and on the target genome
    feature_path_target_genome=feature_target_path
    feature_path_source_genome=feature.segments_list_source
    inversion=detect_feature_inversion(feature_path_source_genome,feature_path_target_genome)

    if inversion:
        feature_path_target_genome=invert_segment_list(feature_path_target_genome)
        stop_new_genome=get_feature_start_on_target_genome_inv(last_seg,feature_id,walk,copy_id)
        start_new_genome=get_feature_stop_on_target_genome_inv(first_seg,feature_id,walk,copy_id)
    else:
        start_new_genome=get_feature_start_on_target_genome(first_seg,feature_id,walk,copy_id)
        stop_new_genome=get_feature_stop_on_target_genome(last_seg,feature_id,walk,copy_id)
    size_new_genome=stop_new_genome-start_new_genome+1
    size_diff=str(size_new_genome-feature.size)
    sequence_name=get_seg_occ(first_seg,walk,feature_id,copy_id)[0]

    variation=Variation(feature_id,feature.type,sequence_name,start_new_genome,stop_new_genome,inversion,size_diff,size_new_genome)
    return(variation,feature_path_source_genome,feature_path_target_genome)

# reset the informations of the variation, but keep the information about the feature
def reset_var(variation):
    variation.type='' # make type enumerate
    variation.size_var=0
    variation.start_var=''
    variation.start_var_index=0
    variation.ref=''
    variation.alt=''

# find the position of a substitution on the source and the target sequence
def get_old_new_pos_substitution(feat_start,variation,start_feat_seg_target,feat,walk,copy_id):
    seg_pos=search_segment(variation.start_var)
    pos_old=str(int(Segments[seg_pos].start)-int(feat_start))

    var_start_seg=variation.start_on_target
    if variation.inversion:
        start_feat_seg_target=invert_seg(start_feat_seg_target)
        var_start_seg=invert_seg(var_start_seg)
        end_var=get_seg_occ(var_start_seg,walk,feat,copy_id)[2]
        start_feat=get_feature_start_on_target_genome_inv(start_feat_seg_target,feat,walk,copy_id)
        pos_new=str(start_feat-end_var)
    else:
        start_var=get_seg_occ(var_start_seg,walk,feat,copy_id)[1]
        start_feat=get_feature_start_on_target_genome(start_feat_seg_target,feat,walk,copy_id)
        pos_new=str(start_var-start_feat)
    return [pos_old,pos_new] # pos_old and pos_new are the base before the change

# find the position of an insertion on the source and the target sequence
def get_old_new_pos_insertion(variation,feat_start,start_feat_seg_target,feat,walk,copy_id):
    seg_pos=search_segment(variation.start_var) # start_var is the segment AFTER the insertion
    pos_old=str(int(Segments[seg_pos].start)-int(feat_start))

    start_var_seg=variation.start_var
    if variation.inversion:
        start_feat_seg_target=invert_seg(start_feat_seg_target)
        start_var_seg=invert_seg(start_var_seg)
        end_var=get_seg_occ(start_var_seg,walk,feat,copy_id)[2]+len(variation.alt) # start_var_seg is the segment AFTER the insertion
        start_feat=get_feature_start_on_target_genome_inv(start_feat_seg_target,feat,walk,copy_id)
        pos_new=str(start_feat-end_var)
    else:
        start_var=get_seg_occ(start_var_seg,walk,feat,copy_id)[1]-len(variation.alt) # start_var_seg is the segment AFTER the insertion
        start_feat=get_feature_start_on_target_genome(start_feat_seg_target,feat,walk,copy_id)
        pos_new=str(start_var-start_feat)
    return [pos_old,pos_new] # pos_old and pos_new are the base before the change

# find the position of a deletion on the source and the target sequence
def get_old_new_pos_deletion(variation,feat_start,start_feat_seg_target,feat,walk,copy_id):
    i=variation.start_var_index
    seg_pos=search_segment(variation.start_var)
    if i==0:
        pos_old=int(Segments[seg_pos].start)-int(feat_start)+Features[feat].pos_start-1
    else:
        pos_old=int(Segments[seg_pos].start)-int(feat_start)
        if pos_old<0:
            pos_old=0
            print("error with variation position",variation.inversion,"***")

    if variation.last_seg_in_target=="": # deletion of the beggining of the feature, so no segment placed in the new genome yet. 
        pos_new=0
    else:
        start_var_seg=variation.last_seg_in_target
        if variation.inversion:
            start_feat_seg_target=invert_seg(start_feat_seg_target)
            start_var_seg=invert_seg(start_var_seg)
            start_var=get_seg_occ(start_var_seg,walk,feat,copy_id)[1]-1
            start_feat=get_feature_start_on_target_genome_inv(start_feat_seg_target,feat,walk,copy_id)
            pos_new=str(start_feat-start_var)
        else:
            start_var=get_seg_occ(start_var_seg,walk,feat,copy_id)[2]+1
            start_feat=get_feature_start_on_target_genome(start_feat_seg_target,feat,walk,copy_id)
            pos_new=str(start_var-start_feat)
    return [pos_old,pos_new] # pos_old and pos_new are the base before the change

# change the variation information, but keep the feature information (the variation is on the feature)
def init_new_var(variation,type,feature_path_source_genome,feature_path_target_genome,i,j,seg_seq,feature):
    variation.type=type
    variation.start_var=feature_path_source_genome[i]
    variation.start_var_index=i
    if type=="substitution":
        variation.start_on_target=feature_path_target_genome[j]
        variation.ref=get_segment_sequence(seg_seq,feature_path_source_genome[i])
        variation.alt=get_segment_sequence(seg_seq,feature_path_target_genome[j])
        variation.seg_ref.append(feature_path_source_genome[i])
        variation.seg_alt.append(feature_path_target_genome[j])
    elif type=="insertion":
        variation.ref="-"
        variation.alt=get_segment_sequence(seg_seq,feature_path_target_genome[j])
        variation.seg_alt.append(feature_path_target_genome[j])
    elif type=="deletion":
        if i==0: # if the deletion is at the start of the feature, the deletion doesnt always start at the start at the first segment : 
            #use pos_start, position of the feature on its first segment
            variation.ref=get_segment_sequence(seg_seq,feature_path_source_genome[i])[feature.pos_start-1:]
            variation.seg_ref.append(feature_path_source_genome[i])
        else: # else, the deletion will always start at the start of the first segment.
            variation.ref=get_segment_sequence(seg_seq,feature_path_source_genome[i])
            variation.seg_ref.append(feature_path_source_genome[i])
        variation.alt="-"

# update the variation
def continue_var(variation,seg_seq,feature_path_source_genome,feature_path_target_genome,i,j,genome_to_continue):
    if variation.type=="substitution":
        if genome_to_continue==0: # genome_to_continue allows to choose if the substitution continues for the original or the target genome, or both.
            variation.ref+=get_segment_sequence(seg_seq,feature_path_source_genome[i])
            variation.alt+=get_segment_sequence(seg_seq,feature_path_target_genome[j])
            variation.seg_ref.append(feature_path_source_genome[i])
            variation.seg_alt.append(feature_path_target_genome[j])
        elif genome_to_continue==1: # deletion
            variation.ref+=get_segment_sequence(seg_seq,feature_path_source_genome[i])
            variation.seg_ref.append(feature_path_source_genome[i])
        elif genome_to_continue==2: # insertion
            variation.alt+=get_segment_sequence(seg_seq,feature_path_target_genome[j])
            variation.seg_alt.append(feature_path_target_genome[j])
    elif variation.type=="insertion":
        variation.alt+=get_segment_sequence(seg_seq,feature_path_target_genome[j])
        variation.seg_alt.append(feature_path_target_genome[j])
    elif variation.type=="deletion":
        variation.ref+=get_segment_sequence(seg_seq,feature_path_source_genome[i])
        variation.seg_ref.append(feature_path_source_genome[i])


# functions to detect inversions

# gives the list of segments from dict1 that are in dict2
def get_common_segments(dict1,dict2):
    list_common=[]
    for segment in dict1:
        if segment in dict2:
            list_common.append(segment)
    return list_common

# check if common segments in the two dict have the same strand
def compare_strand(dict1,dict2): # dict1 and dict2 : [seg_id]->[seg_strand]
    seg_common=get_common_segments(dict1,dict2)

    # for each segment in common, check if the strand is the same
    same_strand_count=0
    for segment in seg_common:
        if dict1[segment]==dict2[segment]:
            same_strand_count+=1
    return [len(seg_common),same_strand_count]

# check if the two dict have their segments in the inverted order
def detect_segment_order_inversion(dict1,dict2):
    list_1_common=get_common_segments(dict1,dict2)
    list_2_common=get_common_segments(dict2,dict1) # same segments, different orders
    list_2_common_reversed=list(reversed(list_2_common))
    [cpt,i]=[0,0]
    while i<len(list_1_common):
        if list_2_common_reversed[i]==list_1_common[i]:
            cpt+=1
        i+=1
    return (cpt>len(list_1_common)*0.9) # if more than 90% of the segments are on the same position when the lists are reversed, there is an inversion. 

# check if the two dict have the same segments but in different orientation
def detect_orient_inversion(dict1,dict2):
    [seg_common_count,same_strand_count]=compare_strand(dict1,dict2)

    if same_strand_count>=seg_common_count*0.9: # if more than 90% of segments shared have the same strand, no inversion
        return [False,dict1,dict2]
    else:
        return [True,dict1,dict2]


# takes two lists of segments for two genes, check if the first list is an inversion of the second one (if the segments in common are on the opposite strand)
def detect_feature_inversion(list_1,list_2):
    # convert list into dict with unstranded segment id as key and strand as value 
    strand1=[seg[0] for seg in list_1]
    id1=[seg[1:] for seg in list_1]
    dict1 = {id1[i]: strand1[i] for i in range(len(strand1))}
    strand2=[seg[0] for seg in list_2]
    id2=[seg[1:] for seg in list_2]
    dict2 = {id2[i]: strand2[i] for i in range(len(strand2))}
 
    # check if we have an inversion of the orientation of the segments
    [strand_inversion,dict1,dict2]=detect_orient_inversion(dict1,dict2)

    # check if we have an inversion of the order of the segments
    segment_order_inversion=detect_segment_order_inversion(dict1,dict2)

    # if there we have both inversions, the gene is in an inverted region
    if segment_order_inversion and strand_inversion:
        return True
    else :
        return False

# invert all the segments in a list (change the orientation)
def invert_segment_list(seg_list):
    list_inverted=list()
    for seg in seg_list:
        list_inverted.append(invert_seg(seg))
    return list(reversed(list_inverted))