From 117ec7265335c193e8e6853323d96f7113da473b Mon Sep 17 00:00:00 2001 From: "nina.marthe_ird.fr" <nina.marthe@ird.fr> Date: Thu, 15 Feb 2024 15:28:54 +0100 Subject: [PATCH] adapted the code to handle paths without the assembly name, and to output in batch to reduce the number of file access --- getSegmentsCoordinates.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/getSegmentsCoordinates.py b/getSegmentsCoordinates.py index f6f7f81..8e683c1 100644 --- a/getSegmentsCoordinates.py +++ b/getSegmentsCoordinates.py @@ -1,4 +1,5 @@ import subprocess +from Graph_gff import write_line def has_numbers(inputString): return any(char.isdigit() for char in inputString) @@ -65,9 +66,9 @@ def seg_coord(gfa,walk_names): file_names=list() for line in lines : line=line.split() - name=line[3] + name=line[1]+"_"+line[3] - if check_walk_name(walk_names,name) | (len(walk_names)==1): # len=1 if there is only the source genome. + if (check_walk_name(walk_names,name)) | ((len(walk_names)==1) & ("MINIGRAPH" not in name)): # len=1 if there is only the source genome. path_start=int(line[4]) seq_name=name.split('_')[-1] @@ -83,25 +84,24 @@ def seg_coord(gfa,walk_names): out_bed = open(file_name, 'w') else : out_bed = open(file_name, 'a') - + output_bed=[0,"",out_bed] + path=line[6].split(',') position=path_start for i in range(1, len(path)): # for each segment in the path, write the position of the segment in the output bed file # coordinates calculation : start=position, stop=position+segment_size-1, then position+=segment_size - + seg_start=position seg_name='s'+path[i][1:] seg_stop=position+segments_size[seg_name] - + out_line=seq_name+'\t'+str(seg_start)+'\t'+str(seg_stop)+'\t'+path[i][0:1]+seg_name+'\n' - out_bed.write(out_line) - + write_line(out_line,output_bed,False) + position+=segments_size[seg_name] + write_line("",output_bed,True) out_bed.close() command="rm seg_coord/segments.txt && rm seg_coord/walks.txt" - subprocess.run(command,shell=True,timeout=None) - - command="if ls test/*_MINIGRAPH_* 1> /dev/null 2>&1; then mkdir seg_coord/minigraph_segments && mv *_MINIGRAPH_* seg_coord/minigraph_segments/; fi" subprocess.run(command,shell=True,timeout=None) \ No newline at end of file -- GitLab