From 117ec7265335c193e8e6853323d96f7113da473b Mon Sep 17 00:00:00 2001
From: "nina.marthe_ird.fr" <nina.marthe@ird.fr>
Date: Thu, 15 Feb 2024 15:28:54 +0100
Subject: [PATCH] adapted the code to handle paths without the assembly name,
 and to output in batch to reduce the number of file access

---
 getSegmentsCoordinates.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/getSegmentsCoordinates.py b/getSegmentsCoordinates.py
index f6f7f81..8e683c1 100644
--- a/getSegmentsCoordinates.py
+++ b/getSegmentsCoordinates.py
@@ -1,4 +1,5 @@
 import subprocess
+from Graph_gff import write_line
 
 def has_numbers(inputString):
     return any(char.isdigit() for char in inputString)
@@ -65,9 +66,9 @@ def seg_coord(gfa,walk_names):
     file_names=list()
     for line in lines :
         line=line.split()
-        name=line[3]
+        name=line[1]+"_"+line[3]
 
-        if check_walk_name(walk_names,name) | (len(walk_names)==1): # len=1 if there is only the source genome.
+        if (check_walk_name(walk_names,name)) | ((len(walk_names)==1) & ("MINIGRAPH" not in name)): # len=1 if there is only the source genome.
 
             path_start=int(line[4])
             seq_name=name.split('_')[-1]
@@ -83,25 +84,24 @@ def seg_coord(gfa,walk_names):
                 out_bed = open(file_name, 'w')
             else :
                 out_bed = open(file_name, 'a')
-                
+            output_bed=[0,"",out_bed]
+
             path=line[6].split(',')
             position=path_start
             
             for i in range(1, len(path)): # for each segment in the path, write the position of the segment in the output bed file
                 # coordinates calculation : start=position, stop=position+segment_size-1, then position+=segment_size
-                
+
                 seg_start=position
                 seg_name='s'+path[i][1:]
                 seg_stop=position+segments_size[seg_name]
-            
+
                 out_line=seq_name+'\t'+str(seg_start)+'\t'+str(seg_stop)+'\t'+path[i][0:1]+seg_name+'\n'
-                out_bed.write(out_line)
-                
+                write_line(out_line,output_bed,False)
+
                 position+=segments_size[seg_name]
+            write_line("",output_bed,True)
             out_bed.close()
     
     command="rm seg_coord/segments.txt && rm seg_coord/walks.txt"
-    subprocess.run(command,shell=True,timeout=None)
-
-    command="if ls test/*_MINIGRAPH_* 1> /dev/null 2>&1; then mkdir seg_coord/minigraph_segments && mv *_MINIGRAPH_* seg_coord/minigraph_segments/; fi"
     subprocess.run(command,shell=True,timeout=None)
\ No newline at end of file
-- 
GitLab