Skip to content
Snippets Groups Projects
Commit 92a82851 authored by Cecile Triay's avatar Cecile Triay
Browse files

errorRates added to allele depth computation and reestimation of genotypes...

errorRates added to allele depth computation and reestimation of genotypes according to these values.
parent 6d8b8423
No related branches found
No related tags found
No related merge requests found
import random
import csv
import math
import numpy as np
#def generate_chr_for_one_ind (chromosome_size, marker_density, err_rate, mean_depth, max_depth, markers_positions, conversion_factor):
def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor):
def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor, errA, errB):
'''
Generate a chromosome for a given size, density of markers, and depth (follwing a gaussian distrib with a mean_depth and sd_depth).
Args:
chromosome_size (int): The size of the chromosome.
marker_density (float): The density of markers.
err_rate (float): The error rate of the chromosome.
mean_depth (float): The mean depth of the chromosome.
max_depth
markers_positions
markers_positions (lst) : liste of marker positions (picked on a grid)
err_rate (float): The error rate of the chromosome.
Returns:
list: The chromosome.
list: The chromosome with each position having a given genotype.
'''
segment = []
segment_error = []
# Iterate through the list of markers positions
for i in range(0, len(markers_positions)):
......@@ -78,56 +75,99 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor):
# Initialize x and y
x = 0
y = 0
x_error = 0
y_error = 0
# If the depth of the current marker is 0, genotype is Missing Data
if site_depth == 0:
genotype = "./."
genotype_error = "./."
else :
# If the current marker is A and the previous marker is A, genotype if homozygote A (ref, 0/0)
if genotype1 == "A" and genotype2 == "A":
x = site_depth
y = 0
genotype = "0/0"
x_error = 0
y_error = 0
for j in range(0,site_depth):
rnd = random.random()
# If the random number is greater than errB, we switch increase the wronge allele depth
if rnd > errA:
x_error = x_error + 0
y_error = y_error + 1
else :
x_error = x_error + 1
y_error = y_error + 0
if x_error > 0:
genotype_error = "0/1"
else :
genotype_error = "0/0"
# If the current marker is B and the previous marker is B, genotype if homozygote B (alt, 1/1)
elif genotype1 == "B" and genotype2 == "B":
x = 1
y = site_depth
genotype = "1/1"
x_error = 0
y_error = 0
for j in range(0,site_depth):
rnd = random.random()
# If the random number is greater than errB, we switch increase the wronge allele depth
if rnd > errB:
x_error = x_error + 1
y_error = y_error + 0
else :
x_error = x_error + 0
y_error = y_error + 1
if x_error > 0:
genotype_error = "0/1"
else :
genotype_error = "1/1"
# If the current marker is neither A nor B, genotype if heterozygous H (0/1)
else:
# Generate a random number between 0 and the depth of the current marker
x = random.randint(0,site_depth)
y = site_depth - x
x_error = x
y_error = y
# If the depth of x of the current marker is 0, seen as homozygous
if x == 0:
genotype = "1/1"
genotype_error = "1/1"
# If the depth of y of the current marker is not 0, seen as homozygous
elif y == 0:
genotype = "0/0"
genotype = "0/0"
genotype_error = "0/0"
# If the depth x and y of the current marker is not 0, seen as heterozygous
else :
genotype = "0/1"
genotype = "0/1"
genotype_error = "0/1"
#print("reads",genotype1, genotype2)
#print(genotype)
# Append the genotype and the depth of the current marker to the segment list
finalGenotype = str(genotype) + ":" + str(site_depth) + ":" + str(x) + "," + str(y) + ":.:.:.:.:."
print(finalGenotype)
segment.append(finalGenotype)
finalGenotype_error = str(genotype_error) + ":" + str(site_depth) + ":" + str(x_error) + "," + str(y_error) + ":.:.:.:.:."
print(finalGenotype_error)
segment_error.append(finalGenotype_error)
return segment
# Generate a list of individuals
def generate_individuals (nb_individuals, chromosome_size, marker_density, mean_depth, conversion_factor):
def generate_individuals (nb_individuals, chromosome_size, marker_density, mean_depth, conversion_factor, errA, errB):
matrix = []
# Calculate the number of marker requiered
markers_nb = round(chromosome_size * marker_density)
# Generate a sorted list of markers positions
# positions = sorted(random.sample(range(chromosome_size),size))
#markers_positions = sorted(random.sample(range(chromosome_size),size))
markers_positions = np.linspace(1, chromosome_size, markers_nb, dtype="int")
for i in range(nb_individuals):
print("individual " + str(i+1))
matrix.append(generate_chr_for_one_ind(mean_depth, markers_positions, conversion_factor))
matrix.append(generate_chr_for_one_ind(mean_depth, markers_positions, conversion_factor, errA, errB))
return matrix, markers_positions
......@@ -142,17 +182,19 @@ def generate_individuals (nb_individuals, chromosome_size, marker_density, mean_
# return [count_a, count_b, count_h, 0.45 <= count_h <= 0.55 and 0.2 <= count_b <= 0.3 and 0.2 <= count_a <= 0.3]
# Set the parameters for the script
nb_individuals = 3
chromosome_size = 44000000
cMsize = 180 ## Size of genetic map
nb_individuals = 1
chromosome_size = 1000
cMsize = 5 ## Size of genetic map
conversion_factor = chromosome_size/cMsize ## Corresponds to a bpPercM conversion ! Needs to be fixed... Does not produce the correct division!
#conversion_factor = 233000
print(conversion_factor)
marker_density = 255000/44000000
#print(conversion_factor)
marker_density = 0.006
mean_depth = 3
max_depth = 10 #TODO
err_rate = 0.02
pop = generate_individuals(nb_individuals, chromosome_size, marker_density, mean_depth, conversion_factor)
max_depth = 6 #TODO
errA = 0.005
errB = 0.005
# #size = 200000
pop = generate_individuals(nb_individuals, chromosome_size, marker_density, mean_depth, conversion_factor, errA, errB)
matrix = pop[0]
markers_positions = pop[1]
......@@ -180,7 +222,7 @@ header = [
]
with open('Newtest.vcf', 'w') as file:
with open('ABtest.vcf', 'w') as file:
# Write the header of the file
file.writelines(header)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment