Newer
Older
# created by Nina Marthe 2023 - nina.marthe@ird.fr
# licensed under MIT

nina.marthe_ird.fr
committed
import os
import subprocess
from Graph_gff import *

nina.marthe_ird.fr
committed
from argparser import *

nina.marthe_ird.fr
committed
#from inference import *

nina.marthe_ird.fr
committed

nina.marthe_ird.fr
committed
args=arg()
read_args(args) # intersect is in the current directory

nina.marthe_ird.fr
committed

nina.marthe_ird.fr
committed
intersect="intersect"
gfa=args.graph.name
load_intersect(intersect)

nina.marthe_ird.fr
committed

nina.marthe_ird.fr
committed
# outputs the gff and gaf of the graph for chr10

nina.marthe_ird.fr
committed
out_graph_gff=gfa.split("/")[-1].split(".")[0:-1][0]+".gff"
out_graph_gaf=gfa.split("/")[-1].split(".")[0:-1][0]+".gaf"

nina.marthe_ird.fr
committed
segments=args.segment_coordinates_path+"/segments.txt"

nina.marthe_ird.fr
committed

nina.marthe_ird.fr
committed
if args.graph_gff:
graph_gff(out_graph_gff)
if args.graph_gaf:
graph_gaf(out_graph_gaf,segments)

nina.marthe_ird.fr
committed
segment_coord_files=os.listdir(args.segment_coordinates_path)
# get list of files in seg_coord.

nina.marthe_ird.fr
committed
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
if args.annotation | args.variation | args.alignment:
# get the target genomes if there is none specified
if len(args.target)==0:
walks=open(args.segment_coordinates_path+"/walks.txt",'r')
walk_lines=walks.readlines()
for line in walk_lines:
genome_name=line.split()[1]
if (args.source_genome not in genome_name) & (genome_name not in args.target) & ("MINIGRAPH" not in genome_name):
args.target.append(genome_name)
for target_genome in args.target:
segments_on_target_genome={}
print(f'\n{target_genome} transfer :')
# create directory to store output files
command="mkdir "+target_genome
subprocess.run(command,shell=True,timeout=None)
# create dictonaries with paths and segments positions.
for file in segment_coord_files:
genome_name=get_genome_name(args.target,file)
if genome_name==target_genome :
print(f' loading the information from the file {file}')
file_path=args.segment_coordinates_path+file
get_segments_positions_on_genome(file_path)
print(f' loading the walks for the genome {target_genome}')
walks_path=args.segment_coordinates_path+"/walks.txt"
target_genome_paths=get_paths(walks_path,target_genome)
out_target_gff=target_genome+"/"+target_genome+".gff"
out_target_var=target_genome+"/"+target_genome+"_var.txt"
out_aln=target_genome+"/"+target_genome+"_aln.txt"
transfer_on_target(segments,out_target_gff,out_target_var,out_aln,target_genome,target_genome_paths,args)
segments_on_target_genome={} # empty dict