Skip to content
Snippets Groups Projects
Commit 2ce45f90 authored by nina.marthe_ird.fr's avatar nina.marthe_ird.fr
Browse files

changed the order in which the output files are generated, to prepare for...

changed the order in which the output files are generated, to prepare for changes to reduce memory usage
parent 5a555dc2
No related branches found
No related tags found
No related merge requests found
...@@ -24,86 +24,92 @@ def target_gff(first_seg,last_seg,feature_id,list_seg,max_diff): ...@@ -24,86 +24,92 @@ def target_gff(first_seg,last_seg,feature_id,list_seg,max_diff):
# writes the gff of target genome using the gff of the graph # writes the gff of target genome using the gff of the graph
def transfer_on_target(pos_seg, gfa, out_once, out_var,out_clustal,target_genome_name,max_diff): def transfer_on_target(pos_seg, gfa, out_once, out_var,out_clustal,target_genomes,max_diff):
print("generation of the target genome's outputs")
[once,var,clustal,stats]=[True,True,True,False]
# create variables and open files
[once,var,stats,clustal]=[True,True,False,True]
genomes_to_index=[target_genome_name]
if var:
[paths,seg_seq]=get_segments_sequence_and_paths(gfa,genomes_to_index)
target_genome_path=paths[target_genome_name]
file_out_var = open(out_var, 'w')
global output_variations
output_variations=[0,"",file_out_var]
if once:
file_out=open(out_once,'w')
global output_target_gff
output_target_gff=[0,"",file_out]
bad_size_features=0
absent_features=0
diff_size_transfered_features=[0,0] # [count,sum], to get the average
if clustal:
file_clustal=open(out_clustal,'w')
global output_target_clustal
output_target_clustal=[0,"",file_clustal]
write_line("CLUSTALW alignment from feature path comparison in pangenome graph. Made with GrAnnoT v1.\n\n",output_target_clustal,False)
if stats:
# create objects for stats on how many segments are absent in target genome, their average length, etc
feature_missing_segments=[[],[],[],[],[],[],[]] # [feature_missing_first,feature_missing_middle,feature_missing_last,feature_missing_all,feature_missing_total,feature_total,feature_ok]
# the fist segment of the feature is missing - feature_missing_first
# the last segment of the feature is missing - feature_missing_last
# at least one middle segment of the feature is missing - feature_missing_middle
# the entire feature is missing - feature_missing_all
# at least one segment is missing first, last, or middle) - feature_missing_total
# no segment is missing, the feature is complete - feature_ok
# total number of features, with missing segments or not - feature_total
get_segments_positions_on_genome(pos_seg) get_segments_positions_on_genome(pos_seg)
list_feature_to_transfer= Features.keys() list_feature_to_transfer= Features.keys()
for feat in list_feature_to_transfer: for genome in target_genomes:
print(f'generation of {genome} output')
# for each feature, get list of the segments where it is and the first and last segment of the feature on the new genome
list_seg=Features[feat].segments_list
first_seg=get_first_seg(list_seg)
last_seg=get_first_seg(reversed(list_seg))
# outputs the feature once in a gff, from the first to the last segment present on the new genome
if once: if once:
transfer_stat=target_gff(first_seg,last_seg,feat,list_seg,max_diff) # insertion not considered !!! file_out=open(out_once,'w')
if transfer_stat=="bad_size": global output_target_gff
bad_size_features+=1 output_target_gff=[0,"",file_out]
elif transfer_stat=="absent": bad_size_features=0
absent_features+=1 absent_features=0
else: diff_size_transfered_features=[0,0] # [count,sum], to get the average
diff_size_transfered_features[0]+=1
diff_size_transfered_features[1]+=transfer_stat for feat in list_feature_to_transfer:
write_line("",output_target_gff,True) # for each feature, get list of the segments where it is and the first and last segment of the feature on the new genome
write_line("",output_target_clustal,True) list_seg=Features[feat].segments_list
first_seg=get_first_seg(list_seg)
# outputs the detail of variations of the feature : last_seg=get_first_seg(reversed(list_seg))
if var:
print_variations(first_seg,last_seg,feat,target_genome_path,seg_seq) transfer_stat=target_gff(first_seg,last_seg,feat,list_seg,max_diff) # insertion not considered !!!
write_line("",output_variations,True) if transfer_stat=="bad_size":
bad_size_features+=1
if stats==True: elif transfer_stat=="absent":
stats_feature_missing_segment(feature_missing_segments,first_seg,last_seg,list_seg,feat) absent_features+=1
else:
diff_size_transfered_features[0]+=1
diff_size_transfered_features[1]+=transfer_stat
write_line("",output_target_gff,True)
file_out.close()
# close output_files if var:
if once: [paths,seg_seq]=get_segments_sequence_and_paths(gfa,target_genomes)
file_out.close() target_genome_path=paths[genome]
if var: file_out_var = open(out_var, 'w')
file_out_var.close() global output_variations
output_variations=[0,"",file_out_var]
# print stats if clustal:
if stats==True: file_clustal=open(out_clustal,'w')
if once: global output_target_clustal
print(len(Features)-(bad_size_features+absent_features),"out of",len(Features),"features are transfered.") output_target_clustal=[0,"",file_clustal]
print(bad_size_features,"out of",len(Features), "features are not transfered because they are too big or too small compared to the original genome.") write_line("CLUSTALW alignment from feature path comparison in pangenome graph. Made with GrAnnoT v1.\n\n",output_target_clustal,False)
print(absent_features,"out of",len(Features),"features are not transfered because they are absent in the new genome.")
print("average length difference of the transfered genes : ",diff_size_transfered_features[1]/diff_size_transfered_features[0]) for feat in list_feature_to_transfer:
stats_features(feature_missing_segments) # for each feature, get list of the segments where it is and the first and last segment of the feature on the new genome
list_seg=Features[feat].segments_list
first_seg=get_first_seg(list_seg)
last_seg=get_first_seg(reversed(list_seg))
print_variations(first_seg,last_seg,feat,target_genome_path,seg_seq)
write_line("",output_variations,True)
write_line("",output_target_clustal,True)
file_out_var.close()
if stats:
# create objects for stats on how many segments are absent in target genome, their average length, etc
feature_missing_segments=[[],[],[],[],[],[],[]] # [feature_missing_first,feature_missing_middle,feature_missing_last,feature_missing_all,feature_missing_total,feature_total,feature_ok]
# the fist segment of the feature is missing - feature_missing_first
# the last segment of the feature is missing - feature_missing_last
# at least one middle segment of the feature is missing - feature_missing_middle
# the entire feature is missing - feature_missing_all
# at least one segment is missing first, last, or middle) - feature_missing_total
# no segment is missing, the feature is complete - feature_ok
# total number of features, with missing segments or not - feature_total
# for each feature, get list of the segments where it is and the first and last segment of the feature on the new genome
list_seg=Features[feat].segments_list
first_seg=get_first_seg(list_seg)
last_seg=get_first_seg(reversed(list_seg))
for feat in list_feature_to_transfer:
stats_feature_missing_segment(feature_missing_segments,first_seg,last_seg,list_seg,feat)
if once:
print(len(Features)-(bad_size_features+absent_features),"out of",len(Features),"features are transfered.")
print(bad_size_features,"out of",len(Features), "features are not transfered because they are too big or too small compared to the original genome.")
print(absent_features,"out of",len(Features),"features are not transfered because they are absent in the new genome.")
print("average length difference of the transfered genes : ",diff_size_transfered_features[1]/diff_size_transfered_features[0])
stats_features(feature_missing_segments)
......
...@@ -25,9 +25,9 @@ out_target_gff=pos_seg.split("/")[-1].split(".")[0:-1][0]+".gff" ...@@ -25,9 +25,9 @@ out_target_gff=pos_seg.split("/")[-1].split(".")[0:-1][0]+".gff"
out_target_var=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_variations.txt" out_target_var=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_variations.txt"
out_clustal=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_clustal.txt" out_clustal=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_clustal.txt"
if len(sys.argv)==5: if len(sys.argv)==5:
target_genome_name=sys.argv[4] target_genomes=[sys.argv[4]] # todo : take several genome names
else: else:
target_genome_name=pos_seg.split("/")[-1].split(".")[0:-1][0] target_genomes=[pos_seg.split("/")[-1].split(".")[0:-1][0]] # todo : delete this option
load_intersect(intersect) load_intersect(intersect)
...@@ -37,4 +37,4 @@ graph_gaf(out_graph_gaf,gfa) ...@@ -37,4 +37,4 @@ graph_gaf(out_graph_gaf,gfa)
# outputs the gff of a genome for the chr10 # outputs the gff of a genome for the chr10
max_diff=2 # maximum size difference (n times bigger or smaller) between the gene on the source genome and the gene on the target genome for the gene to be transfered. max_diff=2 # maximum size difference (n times bigger or smaller) between the gene on the source genome and the gene on the target genome for the gene to be transfered.
transfer_on_target(pos_seg,gfa,out_target_gff,out_target_var,out_clustal,target_genome_name,max_diff) transfer_on_target(pos_seg,gfa,out_target_gff,out_target_var,out_clustal,target_genomes,max_diff)
\ No newline at end of file \ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment