from .usefull_little_functions import * from .intersect import invert_seg,Features from .detect_inversion import detect_feature_inversion from .Functions import get_id_cov # outputs the gff line of the target genome for a feature occurence (a feature can be transfered at several positions) def generate_target_gff(feature_id,seg_size,args,reason_features_not_transfered,match,file_out_gff,segments_on_target_genome): feature=Features[feature_id] # get list of the segments where it is and the first and last segment of the feature on the new genome [first_seg,last_seg,walk,copy_id,feature_target_path]=get_featurePath_ends(match) inversion=detect_feature_inversion(feature.segments_list_source,feature_target_path) if first_seg!='': # feature present on the target genome if inversion: size_on_new_genome=get_feature_start_on_target_genome_inv(last_seg,feature_id,walk,copy_id,segments_on_target_genome)-get_feature_stop_on_target_genome_inv(first_seg,feature_id,walk,copy_id,segments_on_target_genome)+1 else: size_on_new_genome=get_feature_stop_on_target_genome(last_seg,feature_id,walk,copy_id,segments_on_target_genome)-get_feature_start_on_target_genome(first_seg,feature_id,walk,copy_id,segments_on_target_genome)+1 size_diff=abs(size_on_new_genome-feature.size) [cov,id]=match[3] if (cov*100<args.coverage) or (id*100<args.identity): reason_features_not_transfered[1]+=1 match.append(False) # store the information that this gene copy didnt pass the filters return "no" else: line_gff=create_line_target_gff(first_seg,last_seg,feature_id,size_diff,inversion,walk,cov,id,copy_id,segments_on_target_genome) # transfer the gene file_out_gff.write(line_gff) # transfer all the gene's childs transfer_childs(feature_id,feature_target_path,walk,copy_id,inversion,seg_size,file_out_gff,segments_on_target_genome) match.append(True) # store the information that this gene copy was transfered return size_diff else : reason_features_not_transfered[0]+=1 match.append(False) # store the information that this gene copy didnt pass the filters return "no" def transfer_childs(feature_id,feature_target_path,walk,copy_id,inversion,seg_size,file_out_gff,segments_on_target_genome): childs_list=Features[feature_id].get_child_list(Features) for child_id in childs_list: child=Features[child_id] # look for the first and last seg of the child in its parent path [child_first_seg,child_last_seg]=['',''] for seg in child.segments_list_source: # get first seg in the parent path if seg in feature_target_path: # parent path child_first_seg=seg break elif invert_seg(seg) in feature_target_path: child_first_seg=invert_seg(seg) break if child_first_seg!='': # check that the child feature is not absent in this occurence of the parent feature for seg in reversed(child.segments_list_source): # get last seg in the parent path if seg in feature_target_path: # parent path child_last_seg=seg break elif invert_seg(seg) in feature_target_path: child_last_seg=invert_seg(seg) break if inversion: # calculate the child feature size child_size_on_new_genome=get_feature_start_on_target_genome_inv(child_last_seg,child_id,walk,copy_id,segments_on_target_genome)-get_feature_stop_on_target_genome_inv(child_first_seg,child_id,walk,copy_id,segments_on_target_genome)+1 else: child_size_on_new_genome=get_feature_stop_on_target_genome(child_last_seg,child_id,walk,copy_id,segments_on_target_genome)-get_feature_start_on_target_genome(child_first_seg,child_id,walk,copy_id,segments_on_target_genome)+1 child_size_diff=abs(child_size_on_new_genome-child.size) for match in child.segments_list_target: # look for the right occurence of the child feature if (match[0]==walk) and (match[1]==copy_id): seg_list=match[2] [cov,id]=get_id_cov(child_id,seg_size,seg_list) break if inversion: temp=child_first_seg child_first_seg=child_last_seg child_last_seg=temp # transfer the child feature: line_gff=create_line_target_gff(child_first_seg,child_last_seg,child_id,child_size_diff,inversion,walk,cov,id,copy_id,segments_on_target_genome) file_out_gff.write(line_gff) # generates the line for the gff of the target genome def create_line_target_gff(first_seg,last_seg,feature_id,size_diff,inversion,walk,cov,id,copy_id,segments_on_target_genome): [chr,strand,feature]=[get_seg_occ(first_seg,walk,feature_id,copy_id,segments_on_target_genome)[0],Features[feature_id].strand,Features[feature_id]] annotation=f'{feature.annot};Size_diff={size_diff};coverage={cov};sequence_ID={id}' # Nb_variants={var_count}; if inversion: start=get_feature_start_on_target_genome_inv(last_seg,feature_id,walk,copy_id,segments_on_target_genome) stop=get_feature_stop_on_target_genome_inv(first_seg,feature_id,walk,copy_id,segments_on_target_genome) strand=invert_strand(strand) else: start=get_feature_start_on_target_genome(first_seg,feature_id,walk,copy_id,segments_on_target_genome) stop=get_feature_stop_on_target_genome(last_seg,feature_id,walk,copy_id,segments_on_target_genome) if start>stop: temp=start start=stop stop=temp if strand=='': strand='.' output_line=f'{chr}\tGrAnnoT\t{feature.type}\t{start}\t{stop}\t.\t{strand}\t.\t{annotation}\n' return output_line