#!/home/nina/annotpangenome/.venv/bin/python # created by Nina Marthe 2023 - nina.marthe@ird.fr # licensed under MIT from Graph_gff import * from Functions_output import * #from inference import * run="command_line" if run=="command_line": import sys if not(len(sys.argv)>=4) :# intersect, gfa, pos_seg print("expected input : intersect, gfa file with walks, bed file with positions of the segments on the target genome") #print("output : graph gff, graph gaf, target genome gff*2+variations") sys.exit(1) elif (sys.argv[1]=="-h") : print("expected input : intersect, gfa file with walks, bed file with positions of the segments on the target genome") print("output : graph gff, graph gaf, target genome gff*2+variations") sys.exit(1) intersect=sys.argv[1] gfa=sys.argv[2] pos_seg=sys.argv[3] out_gff=gfa.split("/")[-1].split(".")[0:-1][0]+".gff" out_gaf=gfa.split("/")[-1].split(".")[0:-1][0]+".gaf" out_once=pos_seg.split("/")[-1].split(".")[0:-1][0]+".gff" out_detail=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_detail.gff" out_var=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_variations.txt" if len(sys.argv)==5: target_genome_name=sys.argv[5] else: target_genome_name=pos_seg.split("/")[-1].split(".")[0:-1][0] print(target_genome_name) # input : intersect, gfa, pos_seg. # out_gff, out_gaf, out_once, out_var, out_detail. # out_gf/af = "graph.gf/af" # out_once/detail: récupérer pos_seg, enlever le .bed, ajouter _detail au besoin. pareil pour var, le renommer azu_var_chr10.txt ? # pos_seg.split("/")[-1].split(".")[0:-1][0]+".gff" load_intersect(intersect) # outputs the gff and gaf of the graph for chr10 graph_gff(out_gff) graph_gaf(out_gaf,gfa) # outputs the gff of a genome for the chr10 genome_gff(pos_seg,out_gff,gfa,out_once,out_detail,out_var,target_genome_name) if run=="test": intersect_path='/home/nina/annotpangenome/test_data/input_data_inf/intersect.bed' load_intersect(intersect_path) # outputs the gff of the graph for chr10 output_gff='test_data/graph.gff' output_gaf='test_data/graph.gaf' gfa="test_data/input_data_inf/graph_test.gfa" graph_gff(output_gff) graph_gaf(output_gaf,gfa) out_once="test_data/target_genome.gff" out_var="test_data/variations.txt" out_detail="test_data/target_genome_detail.gff" pos_seg="test_data/input_data_inf/genome4_chr10.bed" gff="test_data/graph.gff" target_genome_name="genome4_chr10" # outputs the gff of a genome for the chr10 genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var,target_genome_name) if run=="reel": # creates segments and features for the intersect between the graph for chr10 and the gff of IRGSP intersect_path='/home/nina/annotpangenome/align_genes/intersect_segments-genes_chr10.bed' load_intersect(intersect_path) # outputs the gff of the graph for chr10 output_gff='graph_chr10.gff' output_gaf='graph_chr10.gaf' gfa="data/graphs/RiceGraphChr10_cactus.gfa" graph_gff(output_gff) graph_gaf(output_gaf,gfa) genome = 'ac' if genome=='ac': # transfer from graph to azucena pos_seg="seg_coord/AzucenaRS1_chromosome10.bed" out_once="azucena_chr10.gff" out_detail="azucena_detail_chr10.gff" out_var="variations_chr10.gff" if genome=='nb': # transfer from graph to nipponbare pos_seg="seg_coord/IRGSP-1_0_Chr10.bed" out_once="nb_chr10_all.gff" out_detail="nb_chr10_all_detail.gff" out_var="variations_chr10.gff" gff="graph_chr10.gff" target_genome_name="CM020642.1_Azucena_chromosome10" # outputs the gff of a genome for the chr10 genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var,target_genome_name)