-
nina.marthe_ird.fr authorednina.marthe_ird.fr authored
main.py 3.48 KiB
#!/home/nina/annotpangenome/.venv/bin/python
# created by Nina Marthe 2023 - nina.marthe@ird.fr
# licensed under MIT
from Graph_gff import *
from Functions_output import *
#from inference import *
run="command_line"
if run=="command_line":
import sys
if not(len(sys.argv)>=4) :# intersect, gfa, pos_seg (target_genome_name)
print("expected input : intersect, gfa file with walks, bed file with positions of the segments on the target genome")
#print("output : graph gff, graph gaf, target genome gff*2+variations")
sys.exit(1)
elif (sys.argv[1]=="-h") :
print("expected input : intersect, gfa file with walks, bed file with positions of the segments on the target genome")
print("output : graph gff, graph gaf, target genome gff*2+variations")
sys.exit(1)
intersect=sys.argv[1]
gfa=sys.argv[2]
pos_seg=sys.argv[3]
out_gff=gfa.split("/")[-1].split(".")[0:-1][0]+".gff"
out_gaf=gfa.split("/")[-1].split(".")[0:-1][0]+".gaf"
out_once=pos_seg.split("/")[-1].split(".")[0:-1][0]+".gff"
out_detail=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_detail.gff"
out_var=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_variations.txt"
if len(sys.argv)==5:
target_genome_name=sys.argv[4]
else:
target_genome_name=pos_seg.split("/")[-1].split(".")[0:-1][0]
load_intersect(intersect)
# outputs the gff and gaf of the graph for chr10
graph_gff(out_gff)
graph_gaf(out_gaf,gfa)
# outputs the gff of a genome for the chr10
max_diff=2 # maximum size difference (n times bigger or smaller) between the gene on the reference genome and the gene on the target genome for the gene to be transfered.
genome_gff(pos_seg,out_gff,gfa,out_once,out_detail,out_var,target_genome_name,max_diff)
if run=="test":
intersect_path='intersect.bed'
load_intersect(intersect_path)
# outputs the gff of the graph for chr10
output_gff='graph.gff'
output_gaf='graph.gaf'
gfa="graph.gfa"
graph_gff(output_gff)
graph_gaf(output_gaf,gfa)
out_once="target_genome.gff"
out_var="variations.txt"
out_detail="target_genome_detail.gff"
pos_seg="genome2_chr10.bed"
gff=output_gff
target_genome_name="genome2_chr10"
# outputs the gff of a genome for the chr10
genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var,target_genome_name)
if run=="reel":
# creates segments and features for the intersect between the graph for chr10 and the gff of IRGSP
intersect_path='/home/nina/grannot/wd/align_genes/intersect_segments-genes_chr10.bed'
load_intersect(intersect_path)
# outputs the gff of the graph for chr10
output_gff='graph_chr10.gff'
output_gaf='graph_chr10.gaf'
gfa="data/graphs/RiceGraphChr10_cactus.gfa"
graph_gff(output_gff)
graph_gaf(output_gaf,gfa)
genome = 'ac'
if genome=='ac': # transfer from graph to azucena
pos_seg="seg_coord/AzucenaRS1_chromosome10.bed"
out_once="azucena_chr10.gff"
out_detail="azucena_detail_chr10.gff"
out_var="variations_chr10.gff"
if genome=='nb': # transfer from graph to nipponbare
pos_seg="seg_coord/IRGSP-1_0_Chr10.bed"
out_once="nb_chr10_all.gff"
out_detail="nb_chr10_all_detail.gff"
out_var="variations_chr10.gff"
gff=output_gff
target_genome_name="CM020642.1_Azucena_chromosome10"
# outputs the gff of a genome for the chr10
genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var,target_genome_name)