changed the order in which the output files are generated, to prepare for...

changed the order in which the output files are generated, to prepare for changes to reduce memory usage

changed the order in which the output files are generated, to prepare for...
2ce45f90 · nina.marthe_ird.fr · 5a555dc2 · 2ce45f90 · 2ce45f90
Commit 2ce45f90 authored 1 year ago by nina.marthe_ird.fr
--- a/Functions_output.py
+++ b/Functions_output.py
@@ -24,86 +24,92 @@ def target_gff(first_seg,last_seg,feature_id,list_seg,max_diff):
 # writes the gff of target genome using the gff of the graph
-def transfer_on_target(pos_seg, gfa, out_once, out_var,out_clustal,target_genome_name,max_diff):
+def transfer_on_target(pos_seg, gfa, out_once, out_var,out_clustal,target_genomes,max_diff):
-    print("generation of the target genome's outputs")
+    [once,var,clustal,stats]=[True,True,True,False]
-    # create variables and open files
-    [once,var,stats,clustal]=[True,True,False,True]
-    genomes_to_index=[target_genome_name]
-    if var:
-        [paths,seg_seq]=get_segments_sequence_and_paths(gfa,genomes_to_index)
-        target_genome_path=paths[target_genome_name]
-        file_out_var = open(out_var, 'w')
-        global output_variations
-        output_variations=[0,"",file_out_var]
-    if once:
-        file_out=open(out_once,'w')
-        global output_target_gff
-        output_target_gff=[0,"",file_out]
-        bad_size_features=0
-        absent_features=0
-        diff_size_transfered_features=[0,0] # [count,sum], to get the average
-    if clustal:
-        file_clustal=open(out_clustal,'w')
-        global output_target_clustal
-        output_target_clustal=[0,"",file_clustal]
-        write_line("CLUSTALW alignment from feature path comparison in pangenome graph. Made with GrAnnoT v1.\n\n",output_target_clustal,False)
-    if stats:
-        # create objects for stats on how many segments are absent in target genome, their average length, etc
-        feature_missing_segments=[[],[],[],[],[],[],[]] # [feature_missing_first,feature_missing_middle,feature_missing_last,feature_missing_all,feature_missing_total,feature_total,feature_ok]
-        # the fist segment of the feature is missing - feature_missing_first
-        # the last segment of the feature is missing - feature_missing_last
-        # at least one middle segment of the feature is missing - feature_missing_middle
-        # the entire feature is missing - feature_missing_all
-        # at least one segment is missing first, last, or middle) - feature_missing_total
-        # no segment is missing, the feature is complete - feature_ok  
-        # total number of features, with missing segments or not - feature_total
    get_segments_positions_on_genome(pos_seg)
    list_feature_to_transfer= Features.keys()
-    for feat in list_feature_to_transfer:
+    for genome in target_genomes:
+        print(f'generation of {genome} output')
-        # for each feature, get list of the segments where it is and the first and last segment of the feature on the new genome
-        list_seg=Features[feat].segments_list
-        first_seg=get_first_seg(list_seg)
-        last_seg=get_first_seg(reversed(list_seg))
-        # outputs the feature once in a gff, from the first to the last segment present on the new genome
        if once:
-            transfer_stat=target_gff(first_seg,last_seg,feat,list_seg,max_diff) # insertion not considered !!!
+            file_out=open(out_once,'w')
-            if transfer_stat=="bad_size":
+            global output_target_gff
-                bad_size_features+=1
+            output_target_gff=[0,"",file_out]
-            elif transfer_stat=="absent":
+            bad_size_features=0
-                absent_features+=1
+            absent_features=0
-            else:
+            diff_size_transfered_features=[0,0] # [count,sum], to get the average
-                diff_size_transfered_features[0]+=1
-                diff_size_transfered_features[1]+=transfer_stat
+            for feat in list_feature_to_transfer:
-            write_line("",output_target_gff,True)
+                # for each feature, get list of the segments where it is and the first and last segment of the feature on the new genome
-            write_line("",output_target_clustal,True)
+                list_seg=Features[feat].segments_list
+                first_seg=get_first_seg(list_seg)
-        # outputs the detail of variations of the feature :
+                last_seg=get_first_seg(reversed(list_seg))
-        if var:
-            print_variations(first_seg,last_seg,feat,target_genome_path,seg_seq)
+                transfer_stat=target_gff(first_seg,last_seg,feat,list_seg,max_diff) # insertion not considered !!!
-            write_line("",output_variations,True)
+                if transfer_stat=="bad_size":
+                    bad_size_features+=1
-        if stats==True:
+                elif transfer_stat=="absent":
-            stats_feature_missing_segment(feature_missing_segments,first_seg,last_seg,list_seg,feat)
+                    absent_features+=1
+                else:
+                    diff_size_transfered_features[0]+=1
+                    diff_size_transfered_features[1]+=transfer_stat
+                write_line("",output_target_gff,True)
+            file_out.close()
-    # close output_files
+        if var:
-    if once:
+            [paths,seg_seq]=get_segments_sequence_and_paths(gfa,target_genomes)
-        file_out.close()
+            target_genome_path=paths[genome]
-    if var:
+            file_out_var = open(out_var, 'w')
-        file_out_var.close()
+            global output_variations
+            output_variations=[0,"",file_out_var]
-    # print stats
+            if clustal:
-    if stats==True:
+                file_clustal=open(out_clustal,'w')
-        if once:
+                global output_target_clustal
-            print(len(Features)-(bad_size_features+absent_features),"out of",len(Features),"features are transfered.")
+                output_target_clustal=[0,"",file_clustal]
-            print(bad_size_features,"out of",len(Features), "features are not transfered because they are too big or too small compared to the original genome.")
+                write_line("CLUSTALW alignment from feature path comparison in pangenome graph. Made with GrAnnoT v1.\n\n",output_target_clustal,False)
-            print(absent_features,"out of",len(Features),"features are not transfered because they are absent in the new genome.")
-            print("average length difference of the transfered genes : ",diff_size_transfered_features[1]/diff_size_transfered_features[0])
+            for feat in list_feature_to_transfer:
-        stats_features(feature_missing_segments)
+                # for each feature, get list of the segments where it is and the first and last segment of the feature on the new genome
+                list_seg=Features[feat].segments_list
+                first_seg=get_first_seg(list_seg)
+                last_seg=get_first_seg(reversed(list_seg))
+                print_variations(first_seg,last_seg,feat,target_genome_path,seg_seq)
+                write_line("",output_variations,True)
+                write_line("",output_target_clustal,True)
+            file_out_var.close()
+        if stats:
+            # create objects for stats on how many segments are absent in target genome, their average length, etc
+            feature_missing_segments=[[],[],[],[],[],[],[]] # [feature_missing_first,feature_missing_middle,feature_missing_last,feature_missing_all,feature_missing_total,feature_total,feature_ok]
+            # the fist segment of the feature is missing - feature_missing_first
+            # the last segment of the feature is missing - feature_missing_last
+            # at least one middle segment of the feature is missing - feature_missing_middle
+            # the entire feature is missing - feature_missing_all
+            # at least one segment is missing first, last, or middle) - feature_missing_total
+            # no segment is missing, the feature is complete - feature_ok  
+            # total number of features, with missing segments or not - feature_total
+            # for each feature, get list of the segments where it is and the first and last segment of the feature on the new genome
+            list_seg=Features[feat].segments_list
+            first_seg=get_first_seg(list_seg)
+            last_seg=get_first_seg(reversed(list_seg))
+            for feat in list_feature_to_transfer:
+                stats_feature_missing_segment(feature_missing_segments,first_seg,last_seg,list_seg,feat)
+            if once:
+                print(len(Features)-(bad_size_features+absent_features),"out of",len(Features),"features are transfered.")
+                print(bad_size_features,"out of",len(Features), "features are not transfered because they are too big or too small compared to the original genome.")
+                print(absent_features,"out of",len(Features),"features are not transfered because they are absent in the new genome.")
+                print("average length difference of the transfered genes : ",diff_size_transfered_features[1]/diff_size_transfered_features[0])
+            stats_features(feature_missing_segments)

--- a/main.py
+++ b/main.py
@@ -25,9 +25,9 @@ out_target_gff=pos_seg.split("/")[-1].split(".")[0:-1][0]+".gff"
 out_target_var=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_variations.txt"
 out_clustal=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_clustal.txt"
 if len(sys.argv)==5:
-    target_genome_name=sys.argv[4]
+    target_genomes=[sys.argv[4]] # todo : take several genome names
 else:
-    target_genome_name=pos_seg.split("/")[-1].split(".")[0:-1][0]
+    target_genomes=[pos_seg.split("/")[-1].split(".")[0:-1][0]] # todo : delete this option
 load_intersect(intersect)
@@ -37,4 +37,4 @@ graph_gaf(out_graph_gaf,gfa)
 # outputs the gff of a genome for the chr10
 max_diff=2 # maximum size difference (n times bigger or smaller) between the gene on the source genome and the gene on the target genome for the gene to be transfered.
-transfer_on_target(pos_seg,gfa,out_target_gff,out_target_var,out_clustal,target_genome_name,max_diff)
+transfer_on_target(pos_seg,gfa,out_target_gff,out_target_var,out_clustal,target_genomes,max_diff)
\ No newline at end of file