Newer
Older
# created by Nina Marthe 2023 - nina.marthe@ird.fr
# licensed under MIT
from Graph_gff import *

nina.marthe_ird.fr
committed
#from inference import *

nina.marthe_ird.fr
committed

nina.marthe_ird.fr
committed
import sys
if not(len(sys.argv)>=4) :# intersect, gfa, pos_seg (target_genome_name)
print("expected input : intersect, gfa file with walks, bed file with positions of the segments on the target genome, and the name of the walk of the target genome")

nina.marthe_ird.fr
committed
sys.exit(1)

nina.marthe_ird.fr
committed
elif (sys.argv[1]=="-h") :
print("expected input : intersect, gfa file with walks, bed file with positions of the segments on the target genome, and the name of the walk of the target genome")

nina.marthe_ird.fr
committed
print("output : graph gff, graph gaf, target genome gff+variations")
sys.exit(1)
intersect=sys.argv[1]
gfa=sys.argv[2]
pos_seg=sys.argv[3]
out_graph_gff=gfa.split("/")[-1].split(".")[0:-1][0]+".gff"
out_graph_gaf=gfa.split("/")[-1].split(".")[0:-1][0]+".gaf"
out_target_gff=pos_seg.split("/")[-1].split(".")[0:-1][0]+".gff"
out_target_var=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_variations.txt"

nina.marthe_ird.fr
committed
out_clustal=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_clustal.txt"

nina.marthe_ird.fr
committed
if len(sys.argv)==5:

nina.marthe_ird.fr
committed
target_genomes=[sys.argv[4]] # todo : take several genome names

nina.marthe_ird.fr
committed
else:

nina.marthe_ird.fr
committed
target_genomes=[pos_seg.split("/")[-1].split(".")[0:-1][0]] # todo : delete this option

nina.marthe_ird.fr
committed
load_intersect(intersect)
# outputs the gff and gaf of the graph for chr10
graph_gff(out_graph_gff)
graph_gaf(out_graph_gaf,gfa)
# outputs the gff of a genome for the chr10
max_diff=2 # maximum size difference (n times bigger or smaller) between the gene on the source genome and the gene on the target genome for the gene to be transfered.

nina.marthe_ird.fr
committed
transfer_on_target(pos_seg,gfa,out_target_gff,out_target_var,out_clustal,target_genomes,max_diff)