Skip to content
Snippets Groups Projects
Commit ad2fb3b5 authored by nina.marthe_ird.fr's avatar nina.marthe_ird.fr
Browse files

adapted the code to use the options for the output files. changed every...

adapted the code to use the options for the output files. changed every instance of 'clustal' to 'aln' in the code
parent b89cd323
No related branches found
No related tags found
No related merge requests found
......@@ -38,9 +38,9 @@ def create_line_target_gff(first_seg,last_seg,feature_id,size_diff,list_seg):
return output_line
# functions to get the clustal alignment for the transfered genes
# functions to get the alignment for the transfered genes
# create clustal aln for a feature
# create alignment for a feature
def segment_aln(type,seg_seq,seg_a,seg_b):
match type:
......@@ -89,12 +89,12 @@ def segment_aln(type,seg_seq,seg_a,seg_b):
return [line_a,line_b,line_c] # check the orientation of the segment later
def parse_clustal_lines(line_a,line_b,line_c,feature_id):
def parse_aln_lines(line_a,line_b,line_c,feature_id):
if (len(line_a)!=len(line_b)) | (len(line_b)!=len(line_c)):
print("line length differ in clustal alignment")
print("line length differ in alignment")
len_to_parse=len(line_a)
len_parsed=0
clustal_line=""
aln_line=""
nb_res_a=0
nb_res_b=0
......@@ -107,16 +107,16 @@ def parse_clustal_lines(line_a,line_b,line_c,feature_id):
add_c=line_c[len_parsed:len_parsed+60]
nb_res_a+=len(add_a)-add_a.count("-")
nb_res_b+=len(add_b)-add_b.count("-")
clustal_line+=f'{headers[0]}{add_a} {nb_res_a}\n'
clustal_line+=f'{headers[1]}{add_b} {nb_res_b}\n'
clustal_line+=f'{headers[2]}{add_c}\n\n'
aln_line+=f'{headers[0]}{add_a} {nb_res_a}\n'
aln_line+=f'{headers[1]}{add_b} {nb_res_b}\n'
aln_line+=f'{headers[2]}{add_c}\n\n'
len_parsed+=60
clustal_line+="\n"
aln_line+="\n"
return clustal_line
return aln_line
def create_line_clustal(feature_path_source_genome,feature_path_target_genome,seg_seq,feature_id):
def create_line_aln(feature_path_source_genome,feature_path_target_genome,seg_seq,feature_id):
line_a=""
line_b=""
line_c=""
......@@ -152,7 +152,7 @@ def create_line_clustal(feature_path_source_genome,feature_path_target_genome,se
[add_a,add_b,add_c]=segment_aln("end_deletion",seg_seq,feature_path_source_genome[i:],"")
line_a+=add_a;line_b+=add_b;line_c+=add_c
return parse_clustal_lines(line_a,line_b,line_c,feature_id)
return parse_aln_lines(line_a,line_b,line_c,feature_id)
# functions to output the stats on the transfer
......
......@@ -24,15 +24,15 @@ def generate_target_gff(first_seg,last_seg,feature_id,list_seg,max_diff):
# writes the gff of target genome using the gff of the graph
def transfer_on_target(segments_file, out_target_gff, out_var,out_clustal,target_genome,target_genome_paths,args):
def transfer_on_target(segments_file, out_target_gff, out_var,out_aln,target_genome,target_genome_paths,args):
[target_gff,var,clustal,stats]=[True,True,True,False]
[target_gff,var,aln,stats]=[True,True,True,False]
list_feature_to_transfer= Features.keys()
print(f'generation of {target_genome} output')
segments_list={}
if target_gff:
if args.annotation:
print(f' generation of {target_genome} gff')
file_out=open(out_target_gff,'w')
global output_target_gff
......@@ -47,7 +47,7 @@ def transfer_on_target(segments_file, out_target_gff, out_var,out_clustal,target
first_seg=get_first_seg(list_seg)[1:]
last_seg=get_first_seg(reversed(list_seg))[1:]
if var: # append dict of segments for which we may need the sequence
if args.variation: # append dict of segments for which we may need the sequence
add_target_genome_path(feat,target_genome_paths)
for segment in list_seg:
segments_list[segment[1:]]=''
......@@ -63,21 +63,21 @@ def transfer_on_target(segments_file, out_target_gff, out_var,out_clustal,target
diff_size_transfered_features[0]+=1
diff_size_transfered_features[1]+=transfer_stat
write_line("",output_target_gff,True)
file_out.close()
write_line("",output_target_gff,True)
file_out.close()
if var:
if args.variation:
print(f' generation of {target_genome} gene variation details')
seg_seq=get_segments_sequence(segments_file,segments_list)
file_out_var = open(out_var, 'w')
global output_variations
output_variations=[0,"",file_out_var]
if clustal:
file_clustal=open(out_clustal,'w')
global output_target_clustal
output_target_clustal=[0,"",file_clustal]
write_line("Sequence alignment generated from feature path comparison in pangenome graph. Made with GrAnnoT v1.\n\n",output_target_clustal,False)
if args.alignment:
file_aln=open(out_aln,'w')
global output_target_aln
output_target_aln=[0,"",file_aln]
write_line("Sequence alignment generated from feature path comparison in pangenome graph. Made with GrAnnoT v1.\n\n",output_target_aln,False)
for feat in list_feature_to_transfer:
# for each feature, get list of the segments where it is and the first and last segment of the feature on the new genome
......@@ -87,10 +87,10 @@ def transfer_on_target(segments_file, out_target_gff, out_var,out_clustal,target
print_variations(first_seg,last_seg,feat,seg_seq)
write_line("",output_variations,True)
write_line("",output_target_clustal,True)
file_out_var.close()
file_clustal.close()
write_line("",output_variations,True)
write_line("",output_target_aln,True)
file_out_var.close()
file_aln.close()
if stats:
# create objects for stats on how many segments are absent in target genome, their average length, etc
......@@ -111,7 +111,7 @@ def transfer_on_target(segments_file, out_target_gff, out_var,out_clustal,target
for feat in list_feature_to_transfer:
stats_feature_missing_segment(feature_missing_segments,first_seg,last_seg,list_seg,feat)
if target_gff:
if args.annotation:
print(len(Features)-(bad_size_features+absent_features),"out of",len(Features),"features are transfered.")
print(bad_size_features,"out of",len(Features), "features are not transfered because they are too big or too small compared to the original genome.")
print(absent_features,"out of",len(Features),"features are not transfered because they are absent in the new genome.")
......@@ -132,8 +132,8 @@ def print_variations(first_seg,last_seg,feat,seg_seq):
# loop to go through both paths with i and j
[i,j,var_count]=[0,0,0]
line_clustal=create_line_clustal(feature_path_source_genome,feature_path_target_genome,seg_seq,feat)
write_line(line_clustal,output_target_clustal,False)
line_aln=create_line_aln(feature_path_source_genome,feature_path_target_genome,seg_seq,feat)
write_line(line_aln,output_target_aln,False)
# detect and print variations ignoring the strands
while (i<len(feature_path_source_genome)) & (j<len(feature_path_target_genome)):
......
......@@ -22,8 +22,10 @@ out_graph_gaf=gfa.split("/")[-1].split(".")[0:-1][0]+".gaf"
segments=args.segment_coordinates_path+"/segments.txt"
graph_gff(out_graph_gff)
graph_gaf(out_graph_gaf,segments)
if args.graph_gff:
graph_gff(out_graph_gff)
if args.graph_gaf:
graph_gaf(out_graph_gaf,segments)
segment_coord_files=os.listdir(args.segment_coordinates_path)
......@@ -59,6 +61,6 @@ for target_genome in args.target:
out_target_gff=target_genome+"/"+target_genome+".gff"
out_target_var=target_genome+"/"+target_genome+"_var.txt"
out_clustal=target_genome+"/"+target_genome+"_aln.txt"
out_aln=target_genome+"/"+target_genome+"_aln.txt"
transfer_on_target(segments,out_target_gff,out_target_var,out_clustal,target_genome,target_genome_paths,args)
transfer_on_target(segments,out_target_gff,out_target_var,out_aln,target_genome,target_genome_paths,args)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment