main.py

#!/home/nina/annotpangenome/.venv/bin/python

# created by Nina Marthe 2023 - nina.marthe@ird.fr
# licensed under MIT

from Graph_gff import *
from Functions_output import *
#from inference import *

run="command_line"

if run=="command_line":

    import sys
    if not(len(sys.argv)>=4) :# intersect, gfa, pos_seg (target_genome_name)
            print("expected input : intersect, gfa file  with walks, bed file with positions of the segments on the target genome")
            #print("output : graph gff, graph gaf, target genome gff*2+variations")
            sys.exit(1)
    elif (sys.argv[1]=="-h") :
        print("expected input : intersect, gfa file  with walks, bed file with positions of the segments on the target genome")
        print("output : graph gff, graph gaf, target genome gff*2+variations")
        sys.exit(1)

    intersect=sys.argv[1]
    gfa=sys.argv[2]
    pos_seg=sys.argv[3]

    out_gff=gfa.split("/")[-1].split(".")[0:-1][0]+".gff"
    out_gaf=gfa.split("/")[-1].split(".")[0:-1][0]+".gaf"
    out_once=pos_seg.split("/")[-1].split(".")[0:-1][0]+".gff"
    out_detail=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_detail.gff"
    out_var=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_variations.txt"
    if len(sys.argv)==5:
        target_genome_name=sys.argv[5]
    else:
        target_genome_name=pos_seg.split("/")[-1].split(".")[0:-1][0]

    load_intersect(intersect)

    # outputs the gff and gaf of the graph for chr10
    graph_gff(out_gff)
    graph_gaf(out_gaf,gfa)

    # outputs the gff of a genome for the chr10
    genome_gff(pos_seg,out_gff,gfa,out_once,out_detail,out_var,target_genome_name)


if run=="test":
    intersect_path='/home/nina/annotpangenome/test_data/input_data_inf/intersect.bed'
    load_intersect(intersect_path)

    # outputs the gff of the graph for chr10
    output_gff='test_data/graph.gff'
    output_gaf='test_data/graph.gaf'
    gfa="test_data/input_data_inf/graph_test.gfa"
    graph_gff(output_gff)
    graph_gaf(output_gaf,gfa)

    out_once="test_data/target_genome.gff"
    out_var="test_data/variations.txt"
    out_detail="test_data/target_genome_detail.gff"
    pos_seg="test_data/input_data_inf/genome4_chr10.bed"

    gff="test_data/graph.gff"

    target_genome_name="genome4_chr10"

    # outputs the gff of a genome for the chr10
    genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var,target_genome_name)

if run=="reel":

    # creates segments and features for the intersect between the graph for chr10 and the gff of IRGSP
    intersect_path='/home/nina/annotpangenome/align_genes/intersect_segments-genes_chr10.bed'
    load_intersect(intersect_path)

    # outputs the gff of the graph for chr10
    output_gff='graph_chr10.gff'
    output_gaf='graph_chr10.gaf'
    gfa="data/graphs/RiceGraphChr10_cactus.gfa"
    graph_gff(output_gff)
    graph_gaf(output_gaf,gfa)


    genome = 'ac'
    if genome=='ac': # transfer from graph to azucena
        pos_seg="seg_coord/AzucenaRS1_chromosome10.bed"
        out_once="azucena_chr10.gff"
        out_detail="azucena_detail_chr10.gff"
        out_var="variations_chr10.gff"

    if genome=='nb': # transfer from graph to nipponbare
        pos_seg="seg_coord/IRGSP-1_0_Chr10.bed"
        out_once="nb_chr10_all.gff"
        out_detail="nb_chr10_all_detail.gff"
        out_var="variations_chr10.gff"

    gff="graph_chr10.gff"
    target_genome_name="CM020642.1_Azucena_chromosome10"

    # outputs the gff of a genome for the chr10
    genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var,target_genome_name)