Skip to content
Snippets Groups Projects
Commit a750b3ed authored by NMarthe's avatar NMarthe
Browse files

changé le mode d'execution, supprime les fichiers temporaires, traduit les commentaires en anglais

parent b40fd71a
No related branches found
No related tags found
No related merge requests found
# input : gfa file with paths.
gfa="platGB2_r426_chr10_cactus.gfa"
# output : bed files giving the coordinates of the segments on the genomes (/minigraph segments)
import subprocess import subprocess
import sys
if not(len(sys.argv)==2) :
print("expected input : gfa file with walks.")
print("output : bed files giving the coordinates of the segments on the genomes (or on minigraph segments).")
sys.exit(1)
elif (sys.argv[1]=="-h") :
print("expected input : gfa file with walks.")
print("output : bed files giving the coordinates of the segments on the genomes (or on minigraph segments).")
sys.exit(1)
gfa=sys.argv[1]
# récupérer les lignes qui commencent par 'S' # get the lines that start with "S"
command="grep ^S "+gfa+" > segments.txt" command="grep ^S "+gfa+" > segments.txt"
subprocess.run(command,shell=True) subprocess.run(command,shell=True)
segments = open('segments.txt','r') segments = open('segments.txt','r')
lines=segments.readlines() lines=segments.readlines()
segments.close() segments.close()
# faire un dictionnaire avec les tailles des segments # build a dictionnary with the segment sizes
segments_size={} segments_size={}
for line in lines: for line in lines:
line=line.split() line=line.split()
...@@ -20,16 +27,14 @@ for line in lines: ...@@ -20,16 +27,14 @@ for line in lines:
seg_size=len(line[2]) seg_size=len(line[2])
segments_size[seg_id]=seg_size segments_size[seg_id]=seg_size
# get the lines that start with "W"
#print(segments_size['1'])
# récupérer les lignes qui commencent par 'W'
command="grep ^W "+gfa+" | sed 's/>/,>/g' | sed 's/</,</g' > walks.txt" command="grep ^W "+gfa+" | sed 's/>/,>/g' | sed 's/</,</g' > walks.txt"
subprocess.run(command,shell=True) subprocess.run(command,shell=True)
walks=open('walks.txt','r') walks=open('walks.txt','r')
lines=walks.readlines() lines=walks.readlines()
walks.close() walks.close()
# sur ces lignes, récupérer le nom du génome pour nommer le bed
# on these lines, get the name of the genome to name the output bed file
file_names=list() file_names=list()
for line in lines : for line in lines :
line=line.split() line=line.split()
...@@ -40,8 +45,8 @@ for line in lines : ...@@ -40,8 +45,8 @@ for line in lines :
file_name=name+'_'+chromosome+'.bed' file_name=name+'_'+chromosome+'.bed'
# si on écrit dans le fichier pour la première fois, l'écraser. sinon l'append. # if we are writing in the file for the first time, overwrite it. else, append it
# certains chr d'un même individu sont fragmentés. les coordonnées des segments de tous les fragments sont inscrites dans le même bed. # this is because some chromosomes are fragmented. the coordinates of all the fragments from the same chromosome are written in the same bed file.
if file_name not in file_names: if file_name not in file_names:
file_names.append(file_name) file_names.append(file_name)
out_bed = open(file_name, 'w') out_bed = open(file_name, 'w')
...@@ -51,8 +56,8 @@ for line in lines : ...@@ -51,8 +56,8 @@ for line in lines :
path=line[6].split(',') path=line[6].split(',')
position=path_start position=path_start
for i in range(1, len(path)): # pour chaque segment du path, print dans le bed la position du segment. for i in range(1, len(path)): # for each segment in the path, write the position of the segment in the output bed file
# calcul coordonnées : start=position, stop=position+taille_segment-1, ensuite position+=taille_segment # coordinates calculation : start=position, stop=position+segment_size-1, then position+=segment_size
chr='Chr'+chromosome[len(chromosome)-2:] chr='Chr'+chromosome[len(chromosome)-2:]
...@@ -73,5 +78,6 @@ for line in lines : ...@@ -73,5 +78,6 @@ for line in lines :
position+=segments_size[seg_name] position+=segments_size[seg_name]
out_bed.close() out_bed.close()
command="rm segments.txt && rm walks.txt"
subprocess.run(command,shell=True)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment