Skip to content
Snippets Groups Projects
Commit ff851411 authored by nina.marthe_ird.fr's avatar nina.marthe_ird.fr
Browse files

reorganized the output of target genome files, made the options independant

parent 91d95fba
No related branches found
No related tags found
No related merge requests found
......@@ -7,7 +7,7 @@ from Functions import *
# functions to get the gff with one line per feature
# outputs the feature once in a gff, from the first to the last segment present on the new genome (if the size is ok) :
def generate_target_gff(first_seg,last_seg,feature_id,list_seg,max_diff):
def generate_target_gff(first_seg,last_seg,feature_id,max_diff):
if (first_seg!=''): # feature present on the target genome
size_on_new_genome=get_feature_stop_on_target_genome(last_seg,feature_id)-get_feature_start_on_target_genome(first_seg,feature_id)+1
......@@ -26,15 +26,13 @@ def generate_target_gff(first_seg,last_seg,feature_id,list_seg,max_diff):
# writes the gff of target genome using the gff of the graph
def transfer_on_target(segments_file, out_target_gff, out_var,out_aln,target_genome,target_genome_paths,args):
print(f'generation of {target_genome} output')
stats=False
list_feature_to_transfer= Features.keys()
print(f'generation of {target_genome} output')
segments_list={}
if args.annotation | args.variation | args.alignment:
for feat in list_feature_to_transfer:
add_target_genome_path(feat,target_genome_paths)
for feat in list_feature_to_transfer:
add_target_genome_path(feat,target_genome_paths)
if args.annotation:
print(f' generation of {target_genome} gff')
......@@ -47,17 +45,10 @@ def transfer_on_target(segments_file, out_target_gff, out_var,out_aln,target_gen
for feat in list_feature_to_transfer:
# for each feature, get list of the segments where it is and the first and last segment of the feature on the new genome
list_seg=Features[feat].segments_list_source
feature_target_path=Features[feat].segments_list_target
[first_seg,last_seg]=get_first_last_seg(feature_target_path)
if args.variation: # append dict of segments for which we may need the sequence
for segment in list_seg:
segments_list[segment[1:]]=''
for segment in feature_target_path:
segments_list[segment[1:]]=''
transfer_stat=generate_target_gff(first_seg,last_seg,feat,list_seg,args.max_difference) # insertions not considered !!!
transfer_stat=generate_target_gff(first_seg,last_seg,feat,args.max_difference) # insertions not considered !!!
if transfer_stat=="bad_size":
bad_size_features+=1
elif transfer_stat=="absent":
......@@ -69,32 +60,52 @@ def transfer_on_target(segments_file, out_target_gff, out_var,out_aln,target_gen
write_line("",output_target_gff,True)
file_out.close()
if args.variation | args.alignment : # append dict of segments for which we may need the sequence
for feat in list_feature_to_transfer:
list_seg=Features[feat].segments_list_source
feature_target_path=Features[feat].segments_list_target
for segment in list_seg:
segments_list[segment[1:]]=''
for segment in feature_target_path:
segments_list[segment[1:]]=''
if args.variation:
print(f' generation of {target_genome} gene variation details')
seg_seq=get_segments_sequence(segments_file,segments_list) # if var and not ann, segments list doesnt exist ! change getsegseq.
print(f' generation of {target_genome} genes variation details')
seg_seq=get_segments_sequence(segments_file,segments_list)
file_out_var = open(out_var, 'w')
global output_variations
output_variations=[0,"",file_out_var]
if args.alignment:
file_aln=open(out_aln,'w')
global output_target_aln
output_target_aln=[0,"",file_aln]
write_line("Sequence alignment generated from feature path comparison in pangenome graph. Made with GrAnnoT v1.\n\n",output_target_aln,False)
for feat in list_feature_to_transfer:
# for each feature, get list of the segments where it is and the first and last segment of the feature on the new genome
list_seg=Features[feat].segments_list_source
feature_target_path=Features[feat].segments_list_target
[first_seg,last_seg]=get_first_last_seg(feature_target_path)
print_variations(first_seg,last_seg,feat,seg_seq,args.alignment)
print_variations(first_seg,last_seg,feat,seg_seq)
write_line("",output_variations,True)
file_out_var.close()
if args.alignment:
write_line("",output_target_aln,True)
file_aln.close()
if args.alignment:
print(f' generation of {args.source_genome} genes alignment with {target_genome} genes')
if not args.variation:
seg_seq=get_segments_sequence(segments_file,segments_list)
file_aln=open(out_aln,'w')
global output_target_aln
output_target_aln=[0,"",file_aln]
write_line("Sequence alignment generated from feature path comparison in pangenome graph. Made with GrAnnoT v1.\n\n",output_target_aln,False)
for feat in list_feature_to_transfer:
list_seg=Features[feat].segments_list_source
feature_target_path=Features[feat].segments_list_target
[first_seg,last_seg]=get_first_last_seg(feature_target_path)
print_alignment(first_seg,feat,seg_seq)
write_line("",output_target_aln,True)
file_aln.close()
if stats:
# create objects for stats on how many segments are absent in target genome, their average length, etc
......@@ -131,9 +142,22 @@ def get_first_last_seg(seg_list):
first_seg=''
last_seg=''
return [first_seg,last_seg]
# fct to get the alignment between the features on the source genome and the features on the target genome
def print_alignment(first_seg,feat,seg_seq):
if first_seg!='': # if the feature is not completly absent
feature=Features[feat]
feature_path_target_genome=feature.segments_list_target
feature_path_source_genome=feature.segments_list_source
[feature_path_source_genome,feature_path_target_genome,inversion]=detect_feature_inversion(feature_path_source_genome,feature_path_target_genome)
line_aln=create_line_aln(feature_path_source_genome,feature_path_target_genome,seg_seq,feat)
write_line(line_aln,output_target_aln,False)
# functions to get the detail of the variations in the features
def print_variations(first_seg,last_seg,feat,seg_seq,alignment):
def print_variations(first_seg,last_seg,feat,seg_seq):
if first_seg!='': # if the feature is not completly absent # add the else, output absent features
[variation,feature_path_source_genome,feature_path_target_genome]=create_var(feat,first_seg,last_seg) # removes the strands in the segment lists
......@@ -142,10 +166,6 @@ def print_variations(first_seg,last_seg,feat,seg_seq,alignment):
# loop to go through both paths with i and j
[i,j,var_count]=[0,0,0]
if alignment:
line_aln=create_line_aln(feature_path_source_genome,feature_path_target_genome,seg_seq,feat)
write_line(line_aln,output_target_aln,False)
# detect and print variations ignoring the strands
while (i<len(feature_path_source_genome)) & (j<len(feature_path_target_genome)):
if feature_path_source_genome[i] != feature_path_target_genome[j]: # if there is a difference between the two paths
......
......@@ -32,36 +32,38 @@ segment_coord_files=os.listdir(args.segment_coordinates_path)
# get list of files in seg_coord.
# get the target genomes if there is none specified
if len(args.target)==0:
walks=open(args.segment_coordinates_path+"/walks.txt",'r')
walk_lines=walks.readlines()
for line in walk_lines:
genome_name=line.split()[1]
if (args.source_genome not in genome_name) & (genome_name not in args.target) & ("MINIGRAPH" not in genome_name):
args.target.append(genome_name)
for target_genome in args.target:
segments_on_target_genome={}
print(f'\n{target_genome} transfer :')
# create directory to store output files
command="mkdir "+target_genome
subprocess.run(command,shell=True,timeout=None)
# create dictonaries with paths and segments positions.
for file in segment_coord_files:
genome_name=get_genome_name(args.target,file)
if genome_name==target_genome :
print(f' loading the information from the file {file}')
file_path=args.segment_coordinates_path+file
get_segments_positions_on_genome(file_path)
print(f' loading the walks for the genome {target_genome}')
walks_path=args.segment_coordinates_path+"/walks.txt"
target_genome_paths=get_paths(walks_path,target_genome)
out_target_gff=target_genome+"/"+target_genome+".gff"
out_target_var=target_genome+"/"+target_genome+"_var.txt"
out_aln=target_genome+"/"+target_genome+"_aln.txt"
transfer_on_target(segments,out_target_gff,out_target_var,out_aln,target_genome,target_genome_paths,args)
segments_on_target_genome={} # empty dict
if args.annotation | args.variation | args.alignment:
# get the target genomes if there is none specified
if len(args.target)==0:
walks=open(args.segment_coordinates_path+"/walks.txt",'r')
walk_lines=walks.readlines()
for line in walk_lines:
genome_name=line.split()[1]
if (args.source_genome not in genome_name) & (genome_name not in args.target) & ("MINIGRAPH" not in genome_name):
args.target.append(genome_name)
for target_genome in args.target:
segments_on_target_genome={}
print(f'\n{target_genome} transfer :')
# create directory to store output files
command="mkdir "+target_genome
subprocess.run(command,shell=True,timeout=None)
# create dictonaries with paths and segments positions.
for file in segment_coord_files:
genome_name=get_genome_name(args.target,file)
if genome_name==target_genome :
print(f' loading the information from the file {file}')
file_path=args.segment_coordinates_path+file
get_segments_positions_on_genome(file_path)
print(f' loading the walks for the genome {target_genome}')
walks_path=args.segment_coordinates_path+"/walks.txt"
target_genome_paths=get_paths(walks_path,target_genome)
out_target_gff=target_genome+"/"+target_genome+".gff"
out_target_var=target_genome+"/"+target_genome+"_var.txt"
out_aln=target_genome+"/"+target_genome+"_aln.txt"
transfer_on_target(segments,out_target_gff,out_target_var,out_aln,target_genome,target_genome_paths,args)
segments_on_target_genome={} # empty dict
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment