Skip to content
Snippets Groups Projects
main.py 3.47 KiB
Newer Older
#!/home/nina/annotpangenome/.venv/bin/python
# created by Nina Marthe 2023 - nina.marthe@ird.fr
from Functions_output import *
    if not(len(sys.argv)>=4) :# intersect, gfa, pos_seg (target_genome_name)
            print("expected input : intersect, gfa file  with walks, bed file with positions of the segments on the target genome")
            #print("output : graph gff, graph gaf, target genome gff*2+variations")
            sys.exit(1)
    elif (sys.argv[1]=="-h") :
        print("expected input : intersect, gfa file  with walks, bed file with positions of the segments on the target genome")
        print("output : graph gff, graph gaf, target genome gff*2+variations")
        sys.exit(1)

    intersect=sys.argv[1]
    gfa=sys.argv[2]
    pos_seg=sys.argv[3]

    out_gff=gfa.split("/")[-1].split(".")[0:-1][0]+".gff"
    out_gaf=gfa.split("/")[-1].split(".")[0:-1][0]+".gaf"
    out_once=pos_seg.split("/")[-1].split(".")[0:-1][0]+".gff"
    out_detail=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_detail.gff"
    out_var=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_variations.txt"
    if len(sys.argv)==5:
        target_genome_name=sys.argv[5]
    else:
        target_genome_name=pos_seg.split("/")[-1].split(".")[0:-1][0]

    load_intersect(intersect)

    # outputs the gff and gaf of the graph for chr10
    graph_gff(out_gff)
    graph_gaf(out_gaf,gfa)

    # outputs the gff of a genome for the chr10
    genome_gff(pos_seg,out_gff,gfa,out_once,out_detail,out_var,target_genome_name)



if run=="test":
    intersect_path='/home/nina/annotpangenome/test_data/input_data_inf/intersect.bed'
    load_intersect(intersect_path)

    # outputs the gff of the graph for chr10
    output_gff='test_data/graph.gff'
    output_gaf='test_data/graph.gaf'
    gfa="test_data/input_data_inf/graph_test.gfa"
    graph_gff(output_gff)
    graph_gaf(output_gaf,gfa)

    out_once="test_data/target_genome.gff"
    out_var="test_data/variations.txt"
    out_detail="test_data/target_genome_detail.gff"
    pos_seg="test_data/input_data_inf/genome4_chr10.bed"

    gff="test_data/graph.gff"


    # outputs the gff of a genome for the chr10
    genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var,target_genome_name)
    # creates segments and features for the intersect between the graph for chr10 and the gff of IRGSP
    intersect_path='/home/nina/annotpangenome/align_genes/intersect_segments-genes_chr10.bed'
    load_intersect(intersect_path)
    # outputs the gff of the graph for chr10
    output_gff='graph_chr10.gff'
    output_gaf='graph_chr10.gaf'
    gfa="data/graphs/RiceGraphChr10_cactus.gfa"
    graph_gff(output_gff)
    graph_gaf(output_gaf,gfa)
    genome = 'ac'
    if genome=='ac': # transfer from graph to azucena
        pos_seg="seg_coord/AzucenaRS1_chromosome10.bed"
        out_once="azucena_chr10.gff"
        out_detail="azucena_detail_chr10.gff"
        out_var="variations_chr10.gff"
    if genome=='nb': # transfer from graph to nipponbare
        pos_seg="seg_coord/IRGSP-1_0_Chr10.bed"
        out_once="nb_chr10_all.gff"
        out_detail="nb_chr10_all_detail.gff"
        out_var="variations_chr10.gff"
    target_genome_name="CM020642.1_Azucena_chromosome10"

    # outputs the gff of a genome for the chr10
    genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var,target_genome_name)