changé le mode d'execution, supprime les fichiers temporaires, traduit les commentaires en anglais

a750b3ed · NMarthe · b40fd71a · a750b3ed
Commit a750b3ed authored 2 years ago by NMarthe
--- a/seg_coord/getSegmentsCoordinates.py
+++ b/seg_coord/getSegmentsCoordinates.py
-# input : gfa file with paths.
-gfa="platGB2_r426_chr10_cactus.gfa"
-# output : bed files giving the coordinates of the segments on the genomes (/minigraph segments)
 import subprocess
+import sys
+if not(len(sys.argv)==2) :
+        print("expected input : gfa file with walks.")
+        print("output : bed files giving the coordinates of the segments on the genomes (or on minigraph segments).")
+        sys.exit(1)
+elif (sys.argv[1]=="-h") :
+    print("expected input : gfa file with walks.")
+    print("output : bed files giving the coordinates of the segments on the genomes (or on minigraph segments).")
+    sys.exit(1)
+gfa=sys.argv[1]
-# récupérer les lignes qui commencent par 'S'
+# get the lines that start with "S"
 command="grep ^S "+gfa+" > segments.txt"
 subprocess.run(command,shell=True)
 segments = open('segments.txt','r')
 lines=segments.readlines()
 segments.close()
-# faire un dictionnaire avec les tailles des segments
+# build a dictionnary with the segment sizes
 segments_size={}
 for line in lines:
    line=line.split()
@@ -20,16 +27,14 @@ for line in lines:
    seg_size=len(line[2])
    segments_size[seg_id]=seg_size
+# get the lines that start with "W"
-#print(segments_size['1'])
-# récupérer les lignes qui commencent par 'W'
 command="grep ^W "+gfa+" | sed 's/>/,>/g' | sed 's/</,</g' > walks.txt"
 subprocess.run(command,shell=True)
 walks=open('walks.txt','r')
 lines=walks.readlines()
-walks.close()   
+walks.close()
-# sur ces lignes, récupérer le nom du génome pour nommer le bed
+# on these lines, get the name of the genome to name the output bed file
 file_names=list()
 for line in lines :
    line=line.split()
@@ -40,8 +45,8 @@ for line in lines :
    file_name=name+'_'+chromosome+'.bed'
-    # si on écrit dans le fichier pour la première fois, l'écraser. sinon l'append.
+    # if we are writing in the file for the first time, overwrite it. else, append it
-    # certains chr d'un même individu sont fragmentés. les coordonnées des segments de tous les fragments sont inscrites dans le même bed. 
+    # this is because some chromosomes are fragmented. the coordinates of all the fragments from the same chromosome are written in the same bed file.
    if file_name not in file_names:
        file_names.append(file_name)
        out_bed = open(file_name, 'w')
@@ -51,8 +56,8 @@ for line in lines :
    path=line[6].split(',')
    position=path_start
-    for i in range(1, len(path)): # pour chaque segment du path, print dans le bed la position du segment.
+    for i in range(1, len(path)): # for each segment in the path, write the position of the segment in the output bed file
-        # calcul coordonnées : start=position, stop=position+taille_segment-1, ensuite position+=taille_segment
+        # coordinates calculation : start=position, stop=position+segment_size-1, then position+=segment_size
        chr='Chr'+chromosome[len(chromosome)-2:]
@@ -73,5 +78,6 @@ for line in lines :
        position+=segments_size[seg_name]
    out_bed.close()
+command="rm segments.txt && rm walks.txt"
+subprocess.run(command,shell=True)