from .intersect import Features from .usefull_little_functions import get_segment_sequence from .detect_inversion import detect_feature_inversion,invert_segment_list # functions to get the alignment for the transfered genes # get the alignment between the features on the source genome and the features on the target genome def print_alignment(feat,match,seg_seq,file_out_aln): if match[0]!='': feature_target_path=match[2] if len(feature_target_path)>0: # if the feature is not completely absent feature_path_source_genome=Features[feat].segments_list_source inversion=detect_feature_inversion(feature_path_source_genome,feature_target_path) if inversion: feature_target_path=invert_segment_list(feature_target_path) line_aln=create_line_aln(feature_path_source_genome,feature_target_path,seg_seq,feat) file_out_aln.write(line_aln) # creates an alignment for two segments def segment_aln(type,seg_seq,seg_a,seg_b,first,feature_id,last): match type: case "identity": if first: feature=Features[feature_id] seq_aln=get_segment_sequence(seg_seq,seg_a)[feature.pos_start-1:] elif last: feature=Features[feature_id] seq_aln=get_segment_sequence(seg_seq,seg_a)[:feature.pos_stop] else: seq_aln=get_segment_sequence(seg_seq,seg_a) line_a=seq_aln line_b=seq_aln len_aln=len(seq_aln) line_c=len_aln*"*" case "substitution": seq_aln_a=get_segment_sequence(seg_seq,seg_a) seq_aln_b=get_segment_sequence(seg_seq,seg_b) len_a=len(seq_aln_a) len_b=len(seq_aln_b) if len_a>len_b: diff_len=len_a-len_b line_a=seq_aln_a line_b=seq_aln_b+diff_len*"-" line_c=len_a*" " else: diff_len=len_b-len_a line_a=seq_aln_a+diff_len*"-" line_b=seq_aln_b line_c=len_b*" " case "insertion": seq_aln_b=get_segment_sequence(seg_seq,seg_b) len_b=len(seq_aln_b) line_a=len_b*"-" line_b=seq_aln_b line_c=len_b*" " case "deletion": if first: feature=Features[feature_id] seq_aln_a=get_segment_sequence(seg_seq,seg_a)[feature.pos_start-1:] else: seq_aln_a=get_segment_sequence(seg_seq,seg_a) len_a=len(seq_aln_a) line_a=seq_aln_a line_b=len_a*"-" line_c=len_a*" " case "end_deletion": seq_aln_a="" for segment in seg_a[:-1]: seq_aln_a+=get_segment_sequence(seg_seq,segment) feature=Features[feature_id] seq_aln_a+=get_segment_sequence(seg_seq,seg_a[-1])[0:feature.pos_stop] # for the last segment, only take the part that the feature is on len_a=len(seq_aln_a) line_a=seq_aln_a line_b=len_a*"-" line_c=len_a*" " return [line_a,line_b,line_c,False] # check the orientation of the segment later # formats the alignment lines def parse_aln_lines(line_a,line_b,line_c,feature_id): if (len(line_a)!=len(line_b)) or (len(line_b)!=len(line_c)): print("line lengths differ in alignment") len_to_parse=len(line_a) len_parsed=0 aln_line="" nb_res_a=0 nb_res_b=0 while len_parsed<len_to_parse: len_header=len(feature_id)+11 headers=[feature_id+"_source ",feature_id+"_target ",len_header*" "] add_a=line_a[len_parsed:len_parsed+60] add_b=line_b[len_parsed:len_parsed+60] add_c=line_c[len_parsed:len_parsed+60] nb_res_a+=len(add_a)-add_a.count("-") nb_res_b+=len(add_b)-add_b.count("-") aln_line+=f'{headers[0]}{add_a} {nb_res_a}\n' aln_line+=f'{headers[1]}{add_b} {nb_res_b}\n' aln_line+=f'{headers[2]}{add_c}\n\n' len_parsed+=60 aln_line+="\n" return aln_line # creates the alignment for a feature def create_line_aln(feature_path_source_genome,feature_path_target_genome,seg_seq,feature_id): line_a="" line_b="" line_c="" [i,j]=[0,0] first=True # when writing the first part of the feature, dont take the whole segment, only the part that the feature is on last=False # same for the last part of the feature while (i<len(feature_path_source_genome)) and (j<len(feature_path_target_genome)): if i==len(feature_path_source_genome)-1: last=True if feature_path_source_genome[i] != feature_path_target_genome[j]: # if there is a difference between the two paths if feature_path_target_genome[j] not in feature_path_source_genome: # if the segment in target genome is absent in source genome if feature_path_source_genome[i] not in feature_path_target_genome: # if the segment in source genome is absent is target genome : substitution [add_a,add_b,add_c,first]=segment_aln("substitution",seg_seq,feature_path_source_genome[i],feature_path_target_genome[j],first,feature_id,last) line_a+=add_a;line_b+=add_b;line_c+=add_c i+=1;j+=1 else: # target genome segment not in source_genome, but source_genome segment in target genome : insertion [add_a,add_b,add_c,first]=segment_aln("insertion",seg_seq,"",feature_path_target_genome[j],first,feature_id,last) line_a+=add_a;line_b+=add_b;line_c+=add_c j+=1 elif feature_path_source_genome[i] not in feature_path_target_genome: # source_genome segment not in target genome, but target genome segment in source_genome : deletion [add_a,add_b,add_c,first]=segment_aln("deletion",seg_seq,feature_path_source_genome[i],"",first,feature_id,last) line_a+=add_a;line_b+=add_b;line_c+=add_c i+=1 else : # if both segments are present in the other genome but not at the same position. weird case never found yet [add_a,add_b,add_c,first]=segment_aln("substitution",seg_seq,feature_path_source_genome[i],feature_path_target_genome[j],first,feature_id,last) line_a+=add_a;line_b+=add_b;line_c+=add_c i+=1;j+=1 else: # segment present in both, no variation. [add_a,add_b,add_c,first]=segment_aln("identity",seg_seq,feature_path_source_genome[i],feature_path_target_genome[j],first,feature_id,last) line_a+=add_a;line_b+=add_b;line_c+=add_c i+=1;j+=1 if i<=len(feature_path_source_genome)-1: # if we didn't reach the length of the segment list for the first genome, the end is missing for the second genome [add_a,add_b,add_c,first]=segment_aln("end_deletion",seg_seq,feature_path_source_genome[i:],"",first,feature_id,last) line_a+=add_a;line_b+=add_b;line_c+=add_c return parse_aln_lines(line_a,line_b,line_c,feature_id)