# created by Nina Marthe 2023 - nina.marthe@ird.fr # licensed under MIT import os import subprocess from Graph_gff import * from Functions_output import * from argparser import * #from inference import * args=arg() read_args(args) # intersect is in the current directory intersect="intersect" gfa=args.graph.name load_intersect(intersect) segments=args.segment_coordinates_path+"/segments.txt" # outputs the gff and gaf of the graph if args.graph_gff: out_graph_gff=gfa.split("/")[-1].split(".")[0:-1][0]+".gff" graph_gff(out_graph_gff) if args.graph_gaf: out_graph_gaf=gfa.split("/")[-1].split(".")[0:-1][0]+".gaf" graph_gaf(out_graph_gaf,segments) # get list of files in seg_coord segment_coord_files=os.listdir(args.segment_coordinates_path) # if a transfer on a target genome is asked if args.annotation or args.variation or args.alignment: # get the target genomes if there is none specified if len(args.target)==0: with open(args.segment_coordinates_path+"/walks.txt",'r') as walks: line=walks.readline() while line: genome_name=line.split()[1] if (args.source_genome not in genome_name) and (genome_name not in args.target) and ("MINIGRAPH" not in genome_name): args.target.append(genome_name) line=walks.readline() pav_dict={} # if a pav matrix is asked, create dictionnary to store the features, and for each feature the value for all the target genomes. if args.pav_matrix: pav_line=len(args.target)*[1] pav_dict["gene_id"]=list(args.target) for feat in Features.keys(): if Features[feat].type=='gene': pav_dict[feat]=pav_line.copy() seg_size={} # if the annotation transfer on target genome is asked, build a dictionnary with the segment sizes if args.annotation or args.variation or args.alignment or args.pav_matrix: with open(segments,'r') as segments_file: line=segments_file.readline() while line: line=line.split() seg_id='s'+line[1] segment_size=len(line[2]) seg_size[seg_id]=segment_size line=segments_file.readline() genome_index=0 for target_genome in args.target: print(f'\n{target_genome} transfer :') # create directory to store output files command="mkdir "+target_genome subprocess.run(command,shell=True,timeout=None) # create dictionnaries with paths and segments positions. for file in segment_coord_files: genome_name=get_genome_name(args.target,file) if genome_name==target_genome : print(f' loading the information from the file {file}') file_path=args.segment_coordinates_path+file get_segments_positions_on_genome(file_path) print(f' loading the walks for the genome {target_genome}') walks_path=args.segment_coordinates_path+"/walks.txt" target_genome_paths=get_paths(walks_path,target_genome) # create output files names out_target_gff=target_genome+"/"+target_genome+".gff" out_target_var=target_genome+"/"+target_genome+"_var.txt" out_aln=target_genome+"/"+target_genome+"_aln.txt" list_feat_absent=[] # do the annotation transfer (or var/aln) transfer_on_target(segments,out_target_gff,out_target_var,out_aln,target_genome,target_genome_paths,list_feat_absent,seg_size,args) # if pav matrix is asked, add the information of this transfer on the matrix if args.pav_matrix: for feat in list_feat_absent: pav_dict[feat][genome_index]=0 genome_index+=1 # print the pav matrix if args.pav_matrix: print('\ngeneration of the presence-absence matrix for the transfered genes') pav_output='' for line in pav_dict: pav_output+=line for field in pav_dict[line]: pav_output+="\t"+str(field) pav_output+="\n" out_pav="PAV_matrix.txt" with open(out_pav,'r') as file_out_pav: file_out_pav.write(pav_output)