Newer
Older
#!/home/nina/annotpangenome/.venv/bin/python
# created by Nina Marthe 2023 - nina.marthe@ird.fr
# licensed under MIT
from Graph_gff import *

nina.marthe_ird.fr
committed
#from inference import *

nina.marthe_ird.fr
committed
run="command_line"
if run=="command_line":
import sys
if not(len(sys.argv)>=4) :# intersect, gfa, pos_seg (target_genome_name)

nina.marthe_ird.fr
committed
print("expected input : intersect, gfa file with walks, bed file with positions of the segments on the target genome")
#print("output : graph gff, graph gaf, target genome gff*2+variations")
sys.exit(1)
elif (sys.argv[1]=="-h") :
print("expected input : intersect, gfa file with walks, bed file with positions of the segments on the target genome")
print("output : graph gff, graph gaf, target genome gff*2+variations")
sys.exit(1)
intersect=sys.argv[1]
gfa=sys.argv[2]
pos_seg=sys.argv[3]
out_gff=gfa.split("/")[-1].split(".")[0:-1][0]+".gff"
out_gaf=gfa.split("/")[-1].split(".")[0:-1][0]+".gaf"
out_once=pos_seg.split("/")[-1].split(".")[0:-1][0]+".gff"
out_detail=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_detail.gff"
out_var=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_variations.txt"
if len(sys.argv)==5:
target_genome_name=sys.argv[5]
else:
target_genome_name=pos_seg.split("/")[-1].split(".")[0:-1][0]
load_intersect(intersect)
# outputs the gff and gaf of the graph for chr10
graph_gff(out_gff)
graph_gaf(out_gaf,gfa)
# outputs the gff of a genome for the chr10
genome_gff(pos_seg,out_gff,gfa,out_once,out_detail,out_var,target_genome_name)

nina.marthe_ird.fr
committed
if run=="test":
intersect_path='/home/nina/annotpangenome/test_data/input_data_inf/intersect.bed'
load_intersect(intersect_path)
# outputs the gff of the graph for chr10
output_gff='test_data/graph.gff'
output_gaf='test_data/graph.gaf'
gfa="test_data/input_data_inf/graph_test.gfa"
graph_gff(output_gff)
graph_gaf(output_gaf,gfa)
out_once="test_data/target_genome.gff"
out_var="test_data/variations.txt"
out_detail="test_data/target_genome_detail.gff"
pos_seg="test_data/input_data_inf/genome4_chr10.bed"
gff="test_data/graph.gff"

nina.marthe_ird.fr
committed
target_genome_name="genome4_chr10"
# outputs the gff of a genome for the chr10

nina.marthe_ird.fr
committed
genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var,target_genome_name)
# creates segments and features for the intersect between the graph for chr10 and the gff of IRGSP
intersect_path='/home/nina/annotpangenome/align_genes/intersect_segments-genes_chr10.bed'
load_intersect(intersect_path)
# outputs the gff of the graph for chr10
output_gff='graph_chr10.gff'
output_gaf='graph_chr10.gaf'
gfa="data/graphs/RiceGraphChr10_cactus.gfa"
graph_gff(output_gff)
graph_gaf(output_gaf,gfa)
genome = 'ac'
if genome=='ac': # transfer from graph to azucena
pos_seg="seg_coord/AzucenaRS1_chromosome10.bed"

nina.marthe_ird.fr
committed
out_once="azucena_chr10.gff"
out_detail="azucena_detail_chr10.gff"
out_var="variations_chr10.gff"
if genome=='nb': # transfer from graph to nipponbare
pos_seg="seg_coord/IRGSP-1_0_Chr10.bed"

nina.marthe_ird.fr
committed
out_once="nb_chr10_all.gff"
out_detail="nb_chr10_all_detail.gff"
out_var="variations_chr10.gff"
gff="graph_chr10.gff"

nina.marthe_ird.fr
committed
target_genome_name="CM020642.1_Azucena_chromosome10"
# outputs the gff of a genome for the chr10

nina.marthe_ird.fr
committed
genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var,target_genome_name)