Newer
Older
# created by Nina Marthe 2023 - nina.marthe@ird.fr
# licensed under MIT

nina.marthe_ird.fr
committed
import os
import subprocess
from Graph_gff import *

nina.marthe_ird.fr
committed
from argparser import *

nina.marthe_ird.fr
committed
#from inference import *

nina.marthe_ird.fr
committed

nina.marthe_ird.fr
committed
args=arg()
read_args(args) # intersect is in the current directory

nina.marthe_ird.fr
committed

nina.marthe_ird.fr
committed
intersect="intersect"
gfa=args.graph.name
load_intersect(intersect)

nina.marthe_ird.fr
committed

nina.marthe_ird.fr
committed
# outputs the gff and gaf of the graph for chr10

nina.marthe_ird.fr
committed
out_graph_gff=gfa.split("/")[-1].split(".")[0:-1][0]+".gff"
out_graph_gaf=gfa.split("/")[-1].split(".")[0:-1][0]+".gaf"

nina.marthe_ird.fr
committed
segments=args.segment_coordinates_path+"/segments.txt"

nina.marthe_ird.fr
committed
graph_gff(out_graph_gff)

nina.marthe_ird.fr
committed
graph_gaf(out_graph_gaf,segments)
segment_coord_files=os.listdir(args.segment_coordinates_path)
# get list of files in seg_coord.
# get the target genomes if there is none specified
if len(args.target)==0:
walks=open(args.segment_coordinates_path+"/walks.txt",'r')
walk_lines=walks.readlines()
for line in walk_lines:

nina.marthe_ird.fr
committed
genome_name=line.split()[1]
if (args.source_genome not in genome_name) & (genome_name not in args.target) & ("MINIGRAPH" not in genome_name):

nina.marthe_ird.fr
committed
args.target.append(genome_name)

nina.marthe_ird.fr
committed
for target_genome in args.target:
segments_on_target_genome={}

nina.marthe_ird.fr
committed
print(f'{target_genome} transfer :')

nina.marthe_ird.fr
committed
# create directory to store output files
command="mkdir "+target_genome
subprocess.run(command,shell=True,timeout=None)
# create dictonaries with paths and segments positions.
for file in segment_coord_files:
genome_name=get_genome_name(args.target,file)

nina.marthe_ird.fr
committed
print(f' loading the information from the file {file}')

nina.marthe_ird.fr
committed
file_path=args.segment_coordinates_path+file
get_segments_positions_on_genome(file_path)

nina.marthe_ird.fr
committed
print(f' loading the walks for the genome {target_genome}')
walks_path=args.segment_coordinates_path+"/walks.txt"
target_genome_paths=get_paths(walks_path,target_genome)

nina.marthe_ird.fr
committed
out_target_gff=target_genome+"/"+target_genome+".gff"
out_target_var=target_genome+"/"+target_genome+"_var.txt"
out_clustal=target_genome+"/"+target_genome+"_aln.txt"

nina.marthe_ird.fr
committed

nina.marthe_ird.fr
committed
transfer_on_target(segments,out_target_gff,out_target_var,out_clustal,target_genome,target_genome_paths,args)