Skip to content
Snippets Groups Projects
Functions_output.py 26.6 KiB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
from Graph_gff import Segments, Features, write_line
from Functions import *


### functions to generate a genome's gff from the graph's gff


# functions to get the detailed gff with all the fragments of the features
# outputs each fragment of the feature in a gff
def gff_detail(list_seg,feature_id):
    # loop that goes through all the segments that have the current feature
    # keeps the first segment present in the genome found, and when it finds a segment absent in the genome, prints the current part of the fragment, and resets the first segment present.
    # continues to go through the segments, keeps the next segment present in the genome, and when it finds a segment absent, prints the second part of the feature, etc.
    # at the end of the loop, prints the last part of the fragment.
    [feat_start,seg_start,last_seg]=["","",""] # first segment of the feature, first segment of the current part of the feature, last segment in the part of the feature, for loop below
    for segment in list_seg:
        if segment[1:] in segments_on_target_genome: 
            if feat_start=="":
                feat_start=segment[1:]
            if seg_start=="": # if we dont have a start, take the current segment for the start of the part of the feature
                seg_start=segment[1:]
            #else: if we already have a start, keep going though the segments until we find a stop (segment not in azucena)
        else:
            if seg_start!="": # found a stop and we have a start. print the line, reset seg_start, and keep going through the segments to find the next seg_start
                out_line=create_line_detail(last_seg,feature_id,seg_start,feat_start)
                write_line(out_line,output_detail_gff,False)
                seg_start=""
            #else: if the current segment is not in azucena but there is no start, keep looking for a start
        last_seg=segment    
    if last_seg[1:] in segments_on_target_genome:
        out_line=create_line_detail(list_seg[-1],feature_id,seg_start,feat_start)
        write_line(out_line,output_detail_gff,False)


# functions to get the gff with one line per feature
# outputs the feature once in a gff, from the first to the last segment present on the new genome (if the size is ok) :
def gff_one(first_seg,last_seg,feature_id,list_seg,max_diff):

    if (first_seg!=''): # feature present on the target genome
        size_on_new_genome=get_feature_stop_on_genome(last_seg,feature_id)-get_feature_start_on_genome(first_seg,feature_id)+1
        size_diff=abs(size_on_new_genome-Features[feature_id].size)
        if right_size(size_on_new_genome,max_diff,feature_id): 
            line=create_line_gff_one(first_seg,last_seg,feature_id,size_diff,list_seg)
            write_line(line,output_gff_one,False)
            return size_diff
        else:
            return "bad_size"
    else :
        return "absent"



# writes the gff of azucena using the gff of the graph
def genome_gff(pos_seg, gff, out, gfa):
    print("generation of the genome's gff ")

    # create variables and open files
    [once,detail,var,stats]=[False,False,True,False]
    if var:
        [paths,seg_seq]=get_segments_sequence_and_paths(gfa)
        file_out_var = open("variations.txt", 'w')
        global output_variations
        output_variations=[0,"",file_out_var]
    if detail:
        file_out_detail = open("azucena_chr10_detail.gff", 'w')
        global output_detail_gff
        output_detail_gff=[0,"",file_out_detail]
    if once:
        file_out=open(out,'w')
        global output_gff_one
        output_gff_one=[0,"",file_out]
        bad_size_features=0
        absent_features=0
        diff_size_transfered_features=[0,0] # [count,sum], to get the average
    if stats:
        # create objects for stats on how many segments are absent in azucena, their average length, etc
        feature_missing_segments=[[],[],[],[],[],[],[]] # [feature_missing_first,feature_missing_middle,feature_missing_last,feature_missing_all,feature_missing_total,feature_total,feature_ok]
        # the fist segment of the feature is missing - feature_missing_first
        # the last segment of the feature is missing - feature_missing_last
        # at least one middle segment of the feature is missing - feature_missing_middle
        # the entire feature is missing - feature_missing_all
        # at least one segment is missing first, last, or middle) - feature_missing_total
        # no segment is missing, the feature is complete - feature_ok  
        # total number of features, with missing segments or not - feature_total
    get_segments_positions_on_genome(pos_seg)
    list_feature_to_transfer=get_all_features_in_gff(gff) # get the list of all the features to transfer from the gff

    for feat in list_feature_to_transfer:

        # for each feature, get list of the segments where it is and the first and last segment of the feature on the new genome
        list_seg=Features[feat].segments_list
        first_seg=get_first_seg(list_seg)
        last_seg=get_first_seg(reversed(list_seg))

        # outputs the feature once in a gff, from the first to the last segment present on the new genome
        if once:
            max_diff=2 # maximum difference size (n times bigger of smaller)
            transfer_stat=gff_one(first_seg,last_seg,feat,list_seg,max_diff) # insertion not considered !!!
            if transfer_stat=="bad_size":
                bad_size_features+=1
            elif transfer_stat=="absent":
                absent_features+=1
            else:
                diff_size_transfered_features[0]+=1
                diff_size_transfered_features[1]+=transfer_stat
            write_line("",output_gff_one,True)

        # outputs each fragment of the feature in a gff
        if detail:
            gff_detail(list_seg,feat) # insertions !
            write_line("",output_detail_gff,True)

        # outputs the detail of variations of the feature :
        if var:
            print_variations_2(first_seg,last_seg,feat,paths,seg_seq)
            write_line("",output_variations,True)

        if stats==True:
            stats_feature_missing_segment(feature_missing_segments,first_seg,last_seg,list_seg,feat)

    # close output_files
    if detail:
        file_out_detail.close()
    if once:
        file_out.close()
    if var:
        file_out_var.close()


    # print stats
    if stats==True:
        if once:
            print(len(Features)-(bad_size_features+absent_features),"out of",len(Features),"features are transfered.")
            print(bad_size_features,"out of",len(Features), "features are not transfered because they are too big or too small compared to the original genome.")
            print(absent_features,"out of",len(Features),"features are not transfered because they are absent in the new genome.")
            print("average length difference of the transfered genes : ",diff_size_transfered_features[1]/diff_size_transfered_features[0])
        stats_features(feature_missing_segments)




# functions to get the detail of the variations in the features

def print_variations(first_seg,last_seg,feat,paths,seg_seq):

    if (first_seg!=''): # if the feature is not completly absent        # add the else, output absent features

        var=0 # count variations, to see if there is any
        feature=Features[feat]
        feat_start=feature.start

        # get the lengths of the feature, on the original genome and on the new one
        start_new_genome=get_feature_start_on_genome(first_seg,feat)
        stop_new_genome=get_feature_stop_on_genome(last_seg,feat)
        size_new_genome=int(stop_new_genome)-int(start_new_genome)+1
        size_diff=str(size_new_genome-feature.size)

        # get feature paths on the original genome and on the target genome
        list_segfeat_azu=get_feature_path(paths,first_seg,last_seg)
        list_segfeat_nb=feature.segments_list

        # loop to go through both paths
        i=0
        j=0
        [last,last_taille,last_seq,last_start,last_in_azu]=['',0,'','',''] # rename last_taille 

        # check if there is an inversion and remove strands
        [list_segfeat_nb,list_segfeat_azu,inversion]=detect_inversion(list_segfeat_nb,list_segfeat_azu)

        # detect and print variations ignoring the strands
        while (i<len(list_segfeat_nb)) & (j<len(list_segfeat_azu)):
            if list_segfeat_nb[i] != list_segfeat_azu[j]: # if there is a difference between the two paths
                if list_segfeat_azu[j] not in list_segfeat_nb: # if the segment in azu is absent in nb
                    if list_segfeat_nb[i] not in list_segfeat_azu: # if the segment in nb is absent is azu
                        # is both segments are absent in the other genome, its a substitution
                        last_in_azu=list_segfeat_azu[j]

                        # print if we had an insertion or deletion running
                        if last=='insertion': # last : type énuméré.    # fct compare cas précedent à cas courant, si différent imprimer cas précédent. 
                            [pos_old,pos_new]=get_old_new_pos_insertion(last_start,feat_start,list_segfeat_azu,feat)
                            # line : formated line f"{feat}\t"
                            line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tinsertion\t-\t"+last_seq+"\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
                            write_line(line,output_variations,False)
                            var+=1
                        elif last=='deletion':
                            [pos_old,pos_new]=get_old_new_pos_deletion(last_start,feat_start,list_segfeat_azu,feat,last_in_azu,i)

                            line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tdeletion\t"+last_seq+"\t-\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
                            write_line(line,output_variations,False)                          
                            var+=1
                        last='';last_taille=0;last_seq='';last_start=''

                        # print the substitution        # what if plusieurs substitutions à la suite ? 
                        # substitution of segment list_segfeat_nb[i][1:] by segment list_segfeat_azu[j][1:]
                        [pos_old,pos_new]=get_old_new_pos_substitution(feat_start,list_segfeat_nb,list_segfeat_azu,feat,i,j)

                        if len(seg_seq[list_segfeat_nb[i]]) == len(seg_seq[list_segfeat_azu[j]]): # if the substituion is between two segment of the same size, print it
                            size_subs=len(seg_seq[list_segfeat_nb[i]])
                            line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tsubstitution\t"+seg_seq[list_segfeat_nb[i]]+"\t"+seg_seq[list_segfeat_azu[j]]+"\t"+str(size_subs)+"\t"+str(pos_old)+"\t"+pos_new+"\n"
                        else :
                            # if the segments of the substitution have a different size, print deletion then insertion at the same position.
                            line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tdeletion\t"+seg_seq[list_segfeat_nb[i]]+"\t-\t"+str(len(seg_seq[list_segfeat_nb[i]]))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
                            line+=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tinsertion\t-\t"+seg_seq[list_segfeat_azu[j]]+"\t"+str(len(seg_seq[list_segfeat_azu[j]]))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
                            var+=1
                        write_line(line,output_variations,False)
                        var+=1;i+=1;j+=1

                    else: # azu segment not in nb, but nb segment in azu : insertion
                        if last=='deletion':
                            [pos_old,pos_new]=get_old_new_pos_deletion(last_start,feat_start,list_segfeat_azu,feat,last_in_azu,i)

                            line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tdeletion\t"+last_seq+"\t-\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
                            write_line(line,output_variations,False)
                            var+=1;last='';last_taille=0;last_start='';last_seq=''

                        last_in_azu=list_segfeat_azu[j]

                        if last=='insertion':
                            last_seq=last_seq+seg_seq[list_segfeat_azu[j]]
                        else:
                            last='insertion'
                            last_seq=seg_seq[list_segfeat_azu[j]]
                            last_start=list_segfeat_nb[i]
                        j+=1
                        
                elif list_segfeat_nb[i] not in list_segfeat_azu: # nb segment not in azu, but azu segment in nb : deletion
                    if last=='insertion':
                        [pos_old,pos_new]=get_old_new_pos_insertion(last_start,feat_start,list_segfeat_azu,feat)
                        
                        line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tinsertion\t-\t"+last_seq+"\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
                        write_line(line,output_variations,False)
                        var+=1;last='';last_start='';last_taille=0;last_seq=''

                    if last=='deletion':
                        last_seq=last_seq+seg_seq[list_segfeat_nb[i]]
                    else:
                        last='deletion'
                        last_start=list_segfeat_nb[i]
                        if i==0: # if the deletion is at the start of the feature, the deletion doesnt start at the start at the first segment : 
                                    #use pos_start, position of the feature on its first segment
                            last_seq=seg_seq[list_segfeat_nb[i]][feature.pos_start-1:]
                        else: # else, the deletion will always start at the start of the first segment.
                            last_seq=seg_seq[list_segfeat_nb[i]]
                    i+=1
                    
                else : # idk yet. if both segments are present in the other genome but not at the same position. probably substitution then
                    line="weird order change\n"
                    write_line(line,output_variations,False)
                    var+=1;i+=1;j+=1

            else: # segment present in both. print the running indel if there is one
                if last=='insertion':
                    [pos_old,pos_new]=get_old_new_pos_insertion(last_start,feat_start,list_segfeat_azu,feat)
                    
                    line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tinsertion\t-\t"+last_seq+"\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
                    write_line(line,output_variations,False)
                    var+=1

                elif last=='deletion':
                    [pos_old,pos_new]=get_old_new_pos_deletion(last_start,feat_start,list_segfeat_azu,feat,last_in_azu,i)
                    
                    line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tdeletion\t"+last_seq+"\t-\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
                    write_line(line,output_variations,False)
                    var+=1

                last_in_azu=list_segfeat_azu[j]
                last='';last_taille=0;last_start='';last_seq='';i+=1;j+=1

        # finish printing the current indel
        if last=='insertion':
            [pos_old,pos_new]=get_old_new_pos_insertion(last_start,feat_start,list_segfeat_azu,feat)

            line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tinsertion\t-\t"+last_seq+"\t"+str(last_taille)+"\t"+str(pos_old)+"\t"+pos_new+"\n"
            write_line(line,output_variations,False)
            var+=1

        elif last=='deletion':
            [pos_old,pos_new]=get_old_new_pos_deletion(last_start,feat_start,list_segfeat_azu,feat,last_in_azu,i)

            line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tdeletion\t"+last_seq+"\t-\t"+str(len(last_seq))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
            write_line(line,output_variations,False)
            var+=1

        # see if the end is missing for one of the two genomes
        if not((i>=len(list_segfeat_nb)-1) & (j>=len(list_segfeat_azu)-1)):
            pos_old=int(Segments[list_segfeat_nb[i]].start)-int(feat_start)+1
            del_sequence=get_sequence_list_seg(list_segfeat_nb,i,feature,seg_seq)
            length=len(del_sequence)
            pos_new=str(size_new_genome+1) # the deletion is at the end of the feature on the new genome

            line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tdeletion\t"+del_sequence+"\t-\t"+str(length)+"\t"+str(pos_old)+"\t"+pos_new+"\n"
            write_line(line,output_variations,False)
            var+=1

        if var==0:
            line=feat+"\t"+feature.type+"\t"+feature.chr+"\t"+str(start_new_genome)+"\t"+str(stop_new_genome)+"\t"+str(size_new_genome)+"\t"+str(inversion)+"\t"+size_diff+"\tno_var\t-\t-\t-\t-\t-\n"
            write_line(line,output_variations,False)


def print_variations_2(first_seg,last_seg,feat,paths,seg_seq):

    if (first_seg!=''): # if the feature is not completly absent        # add the else, output absent features
        [variation,list_segfeat_nb,list_segfeat_azu]=create_var(feat,first_seg,last_seg,paths)
        feature=Features[feat]
        feat_start=feature.start
        # loop to go through both paths
        [i,j,var_count]=[0,0,0]

        # detect and print variations ignoring the strands
        while (i<len(list_segfeat_nb)) & (j<len(list_segfeat_azu)):
            if list_segfeat_nb[i] != list_segfeat_azu[j]: # if there is a difference between the two paths
                if list_segfeat_azu[j] not in list_segfeat_nb: # if the segment in azu is absent in nb
                    if list_segfeat_nb[i] not in list_segfeat_azu: # if the segment in nb is absent is azu

                        # if both segments are absent in the other genome, its a substitution
                        variation.last_seg_in_target=list_segfeat_azu[j]
                        if (variation.type=='insertion') | (variation.type=='deletion'):
                            # print the current variation 
                            print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j)
                            reset_var(variation)
                            var_count+=1
                        if variation.type=='substitution':
                            continue_var(variation,seg_seq,list_segfeat_nb,list_segfeat_azu,i,j)
                        else:
                            # initiate substitution
                            init_new_var(variation,"substitution",list_segfeat_nb,list_segfeat_azu,i,j,seg_seq,feature)
                        i+=1;j+=1

                    else: # azu segment not in nb, but nb segment in azu : insertion
                        if (variation.type=='substitution') | (variation.type=='deletion'):
                            # print the current variation 
                            print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j)
                            reset_var(variation)
                            var_count+=1
                        variation.last_seg_in_target=list_segfeat_azu[j]
                        if variation.type=='insertion':
                            continue_var(variation,seg_seq,list_segfeat_nb,list_segfeat_azu,i,j)
                        else:
                            # intiate insertion
                            init_new_var(variation,"insertion",list_segfeat_nb,list_segfeat_azu,i,j,seg_seq,feature)
                        j+=1

                elif list_segfeat_nb[i] not in list_segfeat_azu: # nb segment not in azu, but azu segment in nb : deletion
                    if (variation.type=='substitution') | (variation.type=='insertion'):
                        # print the current variation 
                        print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j)
                        reset_var(variation)
                        var_count+=1
                    if variation.type=='deletion':
                        continue_var(variation,seg_seq,list_segfeat_nb,list_segfeat_azu,i,j)
                    else:
                        # intiate deletion
                        init_new_var(variation,"deletion",list_segfeat_nb,list_segfeat_azu,i,j,seg_seq,feature)
                    i+=1

                else : # idk yet. if both segments are present in the other genome but not at the same position. probably substitution then
                    line="weird order change\n"
                    write_line(line,output_variations,False)
                    var_count+=1;i+=1;j+=1

            else: # segment present in both. print the running indel if there is one
                # print the current variation 
                if variation.type!='':
                    print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j)
                    var_count+=1
                    reset_var(variation)
                variation.last_seg_in_target=list_segfeat_azu[j]
                i+=1;j+=1

        if (variation.type!=''):
            print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j)
            var_count+=1
            reset_var(variation)

        # see if the end is missing for the first genome, ie i didn'r reach the length of the segment list for the first genome
        if i<=len(list_segfeat_nb)-1:
            print_last_deletion(variation,list_segfeat_nb,list_segfeat_azu,i,feat_start,feature,seg_seq)
            var_count+=1
            reset_var(variation)

        if var_count==0:
            print_novar(variation)


def print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j):
    if variation.type=='insertion':
        [pos_old,pos_new]=get_old_new_pos_insertion_2(variation,feat_start,list_segfeat_azu,feat)
        line=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tinsertion\t-\t"+variation.alt+"\t"+str(len(variation.alt))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
        write_line(line,output_variations,False)
    elif variation.type=='deletion':
        [pos_old,pos_new]=get_old_new_pos_deletion_2(variation,feat_start,list_segfeat_azu,feat,i)
        line=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tdeletion\t"+variation.ref+"\t-\t"+str(len(variation.ref))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
        write_line(line,output_variations,False)
    elif variation.type=='substitution':
        [pos_old,pos_new]=get_old_new_pos_substitution_2(feat_start,variation,list_segfeat_azu,feat,j)
        if len(variation.ref) == len(variation.alt): # if the substituion is between two segment of the same size, print it
            size_subs=len(variation.ref)
            line=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tsubstitution\t"+variation.ref+"\t"+variation.alt+"\t"+str(size_subs)+"\t"+str(pos_old)+"\t"+pos_new+"\n"
        else :
            # if the segments of the substitution have a different size, print deletion then insertion at the same position.
            line=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tdeletion\t"+variation.ref+"\t-\t"+str(len(variation.ref))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
            line+=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tinsertion\t-\t"+variation.alt+"\t"+str(len(variation.alt))+"\t"+str(pos_old)+"\t"+pos_new+"\n"
        write_line(line,output_variations,False)

def print_last_deletion(variation,list_segfeat_nb,i,feat_start,feature,seg_seq):
    pos_old=int(Segments[list_segfeat_nb[i]].start)-int(feat_start)+1
    del_sequence=get_sequence_list_seg(list_segfeat_nb,i,feature,seg_seq)
    length=len(del_sequence)
    pos_new=str(int(variation.size_new)+1) # the deletion is at the end of the feature on the new genome

    line=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tdeletion\t"+del_sequence+"\t-\t"+str(length)+"\t"+str(pos_old)+"\t"+pos_new+"\n"
    write_line(line,output_variations,False)

def print_novar(variation):
    line=variation.feature_id+"\t"+variation.feature_type+"\t"+variation.chr+"\t"+str(variation.start_new)+"\t"+str(variation.stop_new)+"\t"+variation.size_new+"\t"+variation.inversion+"\t"+variation.size_diff+"\tno_var\t-\t-\t-\t-\t-\n"
    write_line(line,output_variations,False)


# not used. 

def get_list_segments_missing(list_seg,segments_on_target_genome):
    segments_missing=[]
    for segment in list_seg:
            if segment[1:] not in segments_on_target_genome:
                segments_missing.append(Segments[segment[1:]])
    return segments_missing

# takes a feature and a feature type, returns a list of child features that have the wanted type. currently not used.
def get_child_list(feature,child_type):
    if type=="":
        return feature.childs
    list_childs=[]
    for child in feature.childs:
        if Features[child].type==child_type:
            list_childs.append(child)
    return list_childs