Skip to content
Snippets Groups Projects
Commit 5ff25ba1 authored by alice.boizet_cirad.fr's avatar alice.boizet_cirad.fr
Browse files

fix errors writing vcf file

parent 108f7668
No related branches found
No related tags found
No related merge requests found
......@@ -11,7 +11,7 @@ import math
# Returns:
# str: A random segment from the given genotype.
# '''
def generate_chr (chromosome_size, marker_density, err_rate, mean_depth, sd_depth):
def generate_chr_for_one_ind (chromosome_size, marker_density, err_rate, mean_depth, sd_depth):
# Create a list of all possible genotypes
#genotypes = ["A", "B", "H", "H"]
......@@ -22,11 +22,7 @@ def generate_chr (chromosome_size, marker_density, err_rate, mean_depth, sd_dept
# random marker positions
size = round(chromosome_size * marker_density)
print(size)
print(chromosome_size)
print(marker_density)
positions = sorted(random.sample(range(chromosome_size),size)) #sorted list of markers positions
for i in range(0, size):
if i > 0:
#TODO
......@@ -58,7 +54,10 @@ def generate_chr (chromosome_size, marker_density, err_rate, mean_depth, sd_dept
#TODO : ajouter erreur de séquençage
# sd : standard deviation
site_depth = round(random.gauss(mean_depth, sd_depth))
g = random.gauss(mean_depth, sd_depth)
if g < 0:
g = 0
site_depth = round(g)
x = 0
y = 0
if site_depth == 0:
......@@ -74,8 +73,7 @@ def generate_chr (chromosome_size, marker_density, err_rate, mean_depth, sd_dept
genotype = "1/1"
else:
#x = random.gauss(site_depth, sd2)
print(site_depth)
x = round(random.random(range(0,site_depth)))
x = random.randint(0,site_depth)
y = site_depth - x
if x == 0:
genotype = "1/1"
......@@ -85,16 +83,20 @@ def generate_chr (chromosome_size, marker_density, err_rate, mean_depth, sd_dept
genotype = "0/1"
finalGenotype = str(genotype) + ":" + str(site_depth) + ":" + str(x) + "," + str(y)
finalGenotype = str(genotype) + ":" + str(site_depth) + ":" + str(x) + "," + str(y) + ":.:.:.:.:."
segment.append(finalGenotype)
# Return the random segment
return "".join(segment)
return segment
# Generate a list of individuals
def generate_individuals (nb_individuals, chromosome_size, err_rate, marker_density, mean_depth, sd_depth):
matrix = []
for i in range(nb_individuals):
print("individual " + str(i+1))
matrix.append(generate_chr_for_one_ind(chromosome_size, marker_density, err_rate, mean_depth, sd_depth))
return [generate_chr(chromosome_size, marker_density, err_rate, mean_depth, sd_depth) for i in range(nb_individuals)]
return matrix
# Check if a row is correct
def is_correct (row):
......@@ -115,23 +117,23 @@ mean_depth = 3
sd_depth = 1
pop = generate_individuals(nb_individuals, chromosome_size, err_rate, marker_density, mean_depth, sd_depth)
# Create an empty list to store the individuals
rows = []
# # Create an empty list to store the individuals
# rows = []
# Loop through the population
for i in range(chromosome_size):
# Create an empty list to store the individual
row = []
# # Loop through the population
# for i in range(chromosome_size):
# # Create an empty list to store the individual
# row = []
# Loop through the individuals in the population
for ind in pop:
# Append the individual's chromosome to the row
row.append(ind[i])
# # Loop through the individuals in the population
# for ind in pop:
# # Append the individual's chromosome to the row
# row.append(ind[i])
# Append the row to the list of rows
#row.append(is_correct(row))
# # Append the row to the list of rows
# #row.append(is_correct(row))
rows.append(row)
# rows.append(row)
header = [
......@@ -156,24 +158,32 @@ header = [
]
#with open('test.vcf', 'w') as file:
with open('test.tsv', 'w') as file:
# Write the header of the file
#file.writelines(header)
file.writelines(header)
# Write the header of the file
file.write("\t".join(["#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT"] + [str(n) for n in range(nb_individuals)] + ["Parent1", "Parent2"] + ["Test"]) + "\n")
# Iterate over each row in the table
for i, row in enumerate(rows):
# Create an empty list to store the converted row
converted_row = []
#for i, row in enumerate(rows):
for m in range(0, len(pop[0])):
row = ["chr01", str(m + 1), ".", "A", "T", ".", ".", ".", "GT:DP:AD:RO:QR:AO:QA:GL"]
for i in range(0, len(pop)):
row.append(pop[i][m])
row.append("0/0:"+ str(mean_depth) + ":" + str(mean_depth) + ",0:.:.:.:.:.")
row.append("1/1:"+ str(mean_depth) + ":0," + str(mean_depth) + ":.:.:.:.:.")
file.write("\t".join(row) + "\n");
# # Create an empty list to store the converted row
# converted_row = []
# Iterate over each genotype in the row
for genotype in row:
converted_row.append(genotype + ":.:.:.:.:.")
# # Iterate over each genotype in the row
# for genotype in ind:
# converted_row.append(genotype + ":.:.:.:.:.")
file.write("\t".join(["chr01", str(i + 1), ".", "A", "T", ".", ".", ".", "GT:DP:AD:RO:QR:AO:QA:GL"] + converted_row + ["0/0:"+ mean_depth + ":" + mean_depth + ",0:.:.:.:.:.", "1/1:"+ mean_depth + ":0," + mean_depth + ":.:.:.:.:."]) + "\n" )
# file.write("\t".join(["chr01", str(i + 1), ".", "A", "T", ".", ".", ".", "GT:DP:AD:RO:QR:AO:QA:GL"] + converted_row + ["0/0:"+ mean_depth + ":" + mean_depth + ",0:.:.:.:.:.", "1/1:"+ mean_depth + ":0," + mean_depth + ":.:.:.:.:."]) + "\n" )
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment