Skip to content
Snippets Groups Projects
Commit ece4e192 authored by alice.boizet_cirad.fr's avatar alice.boizet_cirad.fr
Browse files

resolve merge

parents 361a8781 845547da
No related branches found
No related tags found
No related merge requests found
......@@ -9,19 +9,23 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor,
Generate a chromosome for a given size, density of markers, and depth (follwing a gaussian distrib with a mean_depth and sd_depth).
Args:
mean_depth (float): The mean depth of the chromosome.
mean_depth (float): The mean sequencing depth of the chromosome
markers_positions (lst) : liste of marker positions (picked on a grid)
err_rate (float): The error rate of the chromosome.
conversion_factor (float) : Value of the bp per cM conversion (bp chromosome size / cM chromosome size)
errA & errB (float): Respectively the error rate of observing a B whereas the genotype is truely a A and a A whereas the genotype is truely a B
Returns:
list: The chromosome with each position having a given genotype.
segment: A chromosome with each position having a given genotype with a given site depth and allele depth
segment_error : A chromosome with each position having a given genotype with a given site depth and allele depth considering the error rates errA and errB
'''
# Create an empty list to store the chromosome
segment = []
# Create an empty list to store the chromosome with error
segment_error = []
breakpoints = []
# Iterate through the list of markers positions
# Iterate through the list of markers positions (fixed)
previousMarkerGenotype = ""
for i in range(0, len(markers_positions)):
recomb1 = False
......@@ -29,21 +33,23 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor,
# If the current marker is not the first one
if i > 0:
# Calculate the interval between the current marker and the previous one
#TODO
#Kosambi inverse function
#using the Kosambi inverse function to estimate the chance of recombination
IntervalWithPreviousMarker = markers_positions[i] - markers_positions[i-1]
tempcM = 2 * (IntervalWithPreviousMarker / 100) / conversion_factor
tempcM = 2 * (IntervalWithPreviousMarker / 100) / conversion_factor ## Conversion factor is used to have the bp per cM
IntervalWithPreviousMarkerInRF = 0.5 * ((math.exp(tempcM) - math.exp(-tempcM)) / (math.exp(tempcM) + math.exp(-tempcM)))
rnd = random.random()
# If the random number is greater than the probability of a recombination
#print("before",genotype1, genotype2)
if rnd > 1 - IntervalWithPreviousMarkerInRF: # a recombination occurs
# If the random number is greater than the probability of a recombination a recombination occurs
# Done for genotype 1
if rnd > 1 - IntervalWithPreviousMarkerInRF:
recomb1 = True
# If the previous marker is A, change the genotype to B
if genotype1 == "A":
genotype1 = "B";
else :
genotype1 = "A";
# Done for genotype 2
rnd = random.random()
if rnd > 1 - IntervalWithPreviousMarkerInRF:
recomb2 = True
......@@ -52,8 +58,8 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor,
genotype2 = "B"
else :
genotype2 = "A"
#print("after",genotype1, genotype2)
# If the current marker is the first one, fix the two first genotypes.
# If the current marker is the first one, Set the two first genotypes (random).
else:
rnd = random.random()
# If the random number is greater than 0.5
......@@ -92,33 +98,35 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor,
breakpoints.append([i,transition])
# Calculate the depth of the current marker
# Calculate the site depth of the current marker
g = np.random.poisson(mean_depth)
# If the depth of the current marker is less than 0 (safeguard)
if g < 0:
g = 0
# Round the depth of the current marker
site_depth = round(g)
# Initialize x and y
# Initialize x and y (with and without error)
x = 0
y = 0
x_error = 0
y_error = 0
# If the depth of the current marker is 0, genotype is Missing Data
if site_depth == 0:
genotype = "./."
genotype_error = "./."
else :
# If the current marker is A and the previous marker is A, genotype if homozygote A (ref, 0/0)
# If the site is homozygous A (ref, 0/0)
if genotype1 == "A" and genotype2 == "A":
x = site_depth
y = 0
genotype = "0/0"
x_error = 0
y_error = 0
# Considering the sequencing and mapping error at each site for a given site depth
for j in range(0,site_depth):
rnd = random.random()
# If the random number is smaller than errA, we switch increase the wrong allele depth
# If the random number is smaller the error rate, we increase the wronge allele depth
if rnd < errA:
x_error = x_error + 0
y_error = y_error + 1
......@@ -139,9 +147,10 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor,
genotype = "1/1"
x_error = 0
y_error = 0
# Considering the sequencing and mapping error at each site for a given site depth
for j in range(0,site_depth):
rnd = random.random()
# If the random number is smaller than errB, we switch increase the wronge allele depth
# If the random number is smaller the error rate, we increase the wronge allele depth
if rnd < errB:
x_error = x_error + 1
y_error = y_error + 0
......@@ -154,22 +163,24 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor,
genotype_error = "0/1"
else :
genotype_error = "1/1"
# If the current marker is neither A nor B, genotype if heterozygous H (0/1)
else:
# Generate a random number between 0 and the depth of the current marker
x = random.randint(0,site_depth)
y = site_depth - x
# Error of A and B compensate themselves in heterozygous site so no need to change the x_error and y_error
x_error = x
y_error = y
# If the depth of x of the current marker is 0, seen as homozygous
# If the depth of x of the current marker is 0, it's seen as homozygous site (alt, 1/1)
if x == 0:
genotype = "1/1"
genotype_error = "1/1"
# If the depth of y of the current marker is not 0, seen as homozygous
# If the depth of x of the current marker is 0, it's seen as homozygous site (ref, 0/0)
elif y == 0:
genotype = "0/0"
genotype_error = "0/0"
# If the depth x and y of the current marker is not 0, seen as heterozygous
# If the depth x and y of the current marker is not 0, it's seen as heterozygous (0/1)
else :
genotype = "0/1"
genotype_error = "0/1"
......@@ -180,6 +191,7 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor,
finalGenotype = str(genotype) + ":" + str(site_depth) + ":" + str(x) + "," + str(y) + ":.:.:.:.:."
segment.append(finalGenotype)
# Append the genotype and the depth of the current marker to the segment_error list
finalGenotype_error = str(genotype_error) + ":" + str(site_depth) + ":" + str(x_error) + "," + str(y_error) + ":.:.:.:.:."
segment_error.append(finalGenotype_error)
......@@ -193,6 +205,7 @@ def generate_individuals (nb_individuals, chromosome_size, marker_density, mean_
# Calculate the number of marker requiered
markers_nb = round(chromosome_size * marker_density)
# Generate a sorted list of markers positions
#markers_positions = sorted(random.sample(range(chromosome_size),size))
markers_positions = np.linspace(1, chromosome_size, markers_nb, dtype="int")
......@@ -228,14 +241,13 @@ nb_individuals = 3
chromosome_size = 44000000
cMsize = 180 ## Size of genetic map
conversion_factor = chromosome_size/cMsize ## Corresponds to a bpPercM conversion ! Needs to be fixed... Does not produce the correct division!
#print(conversion_factor)
marker_density = 0.0055
mean_depth = 3
max_depth = 6 #TODO
max_depth = 6 #TODO (better estimate of max?)
errA = 0.005
errB = 0.005
# #size = 200000
## Generate the pop and associate the results to the matrixes with and without errors
pop = generate_individuals(nb_individuals, chromosome_size, marker_density, mean_depth, conversion_factor, errA, errB)
matrix = pop[0]
matrix_error = pop[1]
......@@ -264,7 +276,8 @@ header = [
]
with open('ABtest.vcf', 'w') as file:
## Write the VCF files (with and without errors)
with open('test.vcf', 'w') as file:
# Write the header of the file
file.writelines(header)
......@@ -280,7 +293,7 @@ with open('ABtest.vcf', 'w') as file:
row.append("1/1:"+ str(mean_depth) + ":0," + str(mean_depth) + ":.:.:.:.")
file.write("\t".join(row) + "\n");
with open('ABtest_error.vcf', 'w') as file:
with open('test_error.vcf', 'w') as file:
# Write the header of the file
file.writelines(header)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment