Skip to content
Snippets Groups Projects
main.py 4.54 KiB
Newer Older
# created by Nina Marthe 2023 - nina.marthe@ird.fr
# licensed under MIT

import os
import subprocess
nina.marthe_ird.fr's avatar
nina.marthe_ird.fr committed
from .Graph_gff import *
from .Functions_output import *
from .argparser import *
nina.marthe_ird.fr's avatar
nina.marthe_ird.fr committed
#from inference import *
nina.marthe_ird.fr's avatar
nina.marthe_ird.fr committed
#from GrAnnoT import *
nina.marthe_ird.fr's avatar
nina.marthe_ird.fr committed
def main():

    args=arg()
    read_args(args) 

    # intersect is in the current directory
    intersect="intersect"
    gfa=args.graph.name
    load_intersect(intersect)

    segments=args.segment_coordinates_path+"/segments.txt"

    # outputs the gff and gaf of the graph
    if args.graph_gff:
        out_graph_gff=gfa.split("/")[-1].split(".")[0:-1][0]+".gff"
        graph_gff(out_graph_gff)
    if args.graph_gaf:
        out_graph_gaf=gfa.split("/")[-1].split(".")[0:-1][0]+".gaf"
        graph_gaf(out_graph_gaf,segments)

    # get list of files in seg_coord
    segment_coord_files=os.listdir(args.segment_coordinates_path)

    # if a transfer on a target genome is asked
    if args.annotation or args.variation or args.alignment:

        # get the target genomes if there is none specified
        if len(args.target)==0:
            with open(args.segment_coordinates_path+"/walks.txt",'r') as walks:
                line=walks.readline()
                while line:
                    genome_name=line.split()[1]
                    if (args.source_genome not in genome_name) and (genome_name not in args.target) and ("MINIGRAPH" not in genome_name):
                        args.target.append(genome_name)
                    line=walks.readline()

        pav_dict={}
        # if a pav matrix is asked, create dictionnary to store the features, and for each feature the value for all the target genomes.
        if args.pav_matrix:
            pav_line=len(args.target)*[1]
            pav_dict["gene_id"]=list(args.target)
            for feat in Features.keys():
                if Features[feat].type=='gene':
                    pav_dict[feat]=pav_line.copy()

        seg_size={}
        # if the annotation transfer on target genome is asked, build a dictionnary with the segment sizes
        if args.annotation or args.variation or args.alignment or args.pav_matrix:
            with open(segments,'r') as segments_file:
                line=segments_file.readline()
                while line:
                    line=line.split()
                    seg_id='s'+line[1]
                    segment_size=len(line[2])
                    seg_size[seg_id]=segment_size
                    line=segments_file.readline()

        genome_index=0
        for target_genome in args.target:
            print(f'\n{target_genome} transfer :')
            # create directory to store output files
            command="mkdir "+target_genome
            subprocess.run(command,shell=True,timeout=None)

            # create dictionnaries with paths and segments positions.
            for file in segment_coord_files:
                genome_name=get_genome_name(args.target,file)
                if genome_name==target_genome :
                    print(f'    loading the information from the file {file}')
                    file_path=args.segment_coordinates_path+file
                    get_segments_positions_on_genome(file_path)
            print(f'    loading the walks for the genome {target_genome}')
            walks_path=args.segment_coordinates_path+"/walks.txt"
            target_genome_paths=get_paths(walks_path,target_genome)

            # create output files names
            out_target_gff=target_genome+"/"+target_genome+".gff"
            out_target_var=target_genome+"/"+target_genome+"_var.txt"
            out_aln=target_genome+"/"+target_genome+"_aln.txt"

            list_feat_absent=[]
            # do the annotation transfer (or var/aln)
            transfer_on_target(segments,out_target_gff,out_target_var,out_aln,target_genome,target_genome_paths,list_feat_absent,seg_size,args)
            
            # if pav matrix is asked, add the information of this transfer on the matrix
            if args.pav_matrix:
                for feat in list_feat_absent:
                    pav_dict[feat][genome_index]=0
                genome_index+=1
nina.marthe_ird.fr's avatar
nina.marthe_ird.fr committed
        # print the pav matrix
        if args.pav_matrix:
nina.marthe_ird.fr's avatar
nina.marthe_ird.fr committed
            print('\ngeneration of the presence-absence matrix for the transfered genes')
            pav_output=''
            for line in pav_dict:
                pav_output+=line
                for field in pav_dict[line]:
                    pav_output+="\t"+str(field)
                pav_output+="\n"

            out_pav="PAV_matrix.txt"
            with open(out_pav,'r') as file_out_pav:
                file_out_pav.write(pav_output)