#!/home/nina/annotpangenome/.venv/bin/python # created by Nina Marthe 2023 - nina.marthe@ird.fr # licensed under MIT from Graph_gff import * from Functions_output import * #from inference import * run="command_line" if run=="command_line": import sys if not(len(sys.argv)>=4) :# intersect, gfa, pos_seg (target_genome_name) print("expected input : intersect, gfa file with walks, bed file with positions of the segments on the target genome") #print("output : graph gff, graph gaf, target genome gff*2+variations") sys.exit(1) elif (sys.argv[1]=="-h") : print("expected input : intersect, gfa file with walks, bed file with positions of the segments on the target genome") print("output : graph gff, graph gaf, target genome gff*2+variations") sys.exit(1) intersect=sys.argv[1] gfa=sys.argv[2] pos_seg=sys.argv[3] out_gff=gfa.split("/")[-1].split(".")[0:-1][0]+".gff" out_gaf=gfa.split("/")[-1].split(".")[0:-1][0]+".gaf" out_once=pos_seg.split("/")[-1].split(".")[0:-1][0]+".gff" out_detail=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_detail.gff" out_var=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_variations.txt" if len(sys.argv)==5: target_genome_name=sys.argv[4] else: target_genome_name=pos_seg.split("/")[-1].split(".")[0:-1][0] load_intersect(intersect) # outputs the gff and gaf of the graph for chr10 graph_gff(out_gff) graph_gaf(out_gaf,gfa) # outputs the gff of a genome for the chr10 max_diff=2 # maximum size difference (n times bigger or smaller) between the gene on the reference genome and the gene on the target genome for the gene to be transfered. genome_gff(pos_seg,out_gff,gfa,out_once,out_detail,out_var,target_genome_name,max_diff) if run=="test": intersect_path='intersect.bed' load_intersect(intersect_path) # outputs the gff of the graph for chr10 output_gff='graph.gff' output_gaf='graph.gaf' gfa="graph.gfa" graph_gff(output_gff) graph_gaf(output_gaf,gfa) out_once="target_genome.gff" out_var="variations.txt" out_detail="target_genome_detail.gff" pos_seg="genome2_chr10.bed" gff=output_gff target_genome_name="genome2_chr10" # outputs the gff of a genome for the chr10 genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var,target_genome_name) if run=="reel": # creates segments and features for the intersect between the graph for chr10 and the gff of IRGSP intersect_path='/home/nina/grannot/wd/align_genes/intersect_segments-genes_chr10.bed' load_intersect(intersect_path) # outputs the gff of the graph for chr10 output_gff='graph_chr10.gff' output_gaf='graph_chr10.gaf' gfa="data/graphs/RiceGraphChr10_cactus.gfa" graph_gff(output_gff) graph_gaf(output_gaf,gfa) genome = 'ac' if genome=='ac': # transfer from graph to azucena pos_seg="seg_coord/AzucenaRS1_chromosome10.bed" out_once="azucena_chr10.gff" out_detail="azucena_detail_chr10.gff" out_var="variations_chr10.gff" if genome=='nb': # transfer from graph to nipponbare pos_seg="seg_coord/IRGSP-1_0_Chr10.bed" out_once="nb_chr10_all.gff" out_detail="nb_chr10_all_detail.gff" out_var="variations_chr10.gff" gff=output_gff target_genome_name="CM020642.1_Azucena_chromosome10" # outputs the gff of a genome for the chr10 genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var,target_genome_name)