Skip to content
Snippets Groups Projects
Commit 361a8781 authored by alice.boizet_cirad.fr's avatar alice.boizet_cirad.fr
Browse files

generate output file with breakpoints

parent c10927e4
No related branches found
No related tags found
No related merge requests found
...@@ -19,9 +19,13 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor, ...@@ -19,9 +19,13 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor,
segment = [] segment = []
segment_error = [] segment_error = []
breakpoints = []
# Iterate through the list of markers positions # Iterate through the list of markers positions
previousMarkerGenotype = ""
for i in range(0, len(markers_positions)): for i in range(0, len(markers_positions)):
recomb1 = False
recomb2 = False
# If the current marker is not the first one # If the current marker is not the first one
if i > 0: if i > 0:
# Calculate the interval between the current marker and the previous one # Calculate the interval between the current marker and the previous one
...@@ -34,6 +38,7 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor, ...@@ -34,6 +38,7 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor,
# If the random number is greater than the probability of a recombination # If the random number is greater than the probability of a recombination
#print("before",genotype1, genotype2) #print("before",genotype1, genotype2)
if rnd > 1 - IntervalWithPreviousMarkerInRF: # a recombination occurs if rnd > 1 - IntervalWithPreviousMarkerInRF: # a recombination occurs
recomb1 = True
# If the previous marker is A, change the genotype to B # If the previous marker is A, change the genotype to B
if genotype1 == "A": if genotype1 == "A":
genotype1 = "B"; genotype1 = "B";
...@@ -41,11 +46,12 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor, ...@@ -41,11 +46,12 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor,
genotype1 = "A"; genotype1 = "A";
rnd = random.random() rnd = random.random()
if rnd > 1 - IntervalWithPreviousMarkerInRF: if rnd > 1 - IntervalWithPreviousMarkerInRF:
recomb2 = True
# If the previous marker is B, change the genotype to A # If the previous marker is B, change the genotype to A
if genotype2 == "A": if genotype2 == "A":
genotype2 = "B"; genotype2 = "B"
else : else :
genotype2 = "A"; genotype2 = "A"
#print("after",genotype1, genotype2) #print("after",genotype1, genotype2)
# If the current marker is the first one, fix the two first genotypes. # If the current marker is the first one, fix the two first genotypes.
else: else:
...@@ -61,10 +67,31 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor, ...@@ -61,10 +67,31 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor,
genotype2 = "B" genotype2 = "B"
else : else :
genotype2 = "A" genotype2 = "A"
# initialize previousMarkerGenotype
if genotype1 == "A" and genotype2 == "A":
previousMarkerGenotype = "A"
elif genotype1 == "B" and genotype2 == "B":
previousMarkerGenotype = "B"
else :
previousMarkerGenotype = "H"
#Breakpoints
if recomb1 or recomb2:
#this is a breakpoint [beforeBkpAfter, transition]
transition = previousMarkerGenotype + " => "
#TODO : ajouter erreur de séquençage if genotype1 == "A" and genotype2 == "A":
previousMarkerGenotype = "A"
elif genotype1 == "B" and genotype2 == "B":
previousMarkerGenotype = "B"
else :
previousMarkerGenotype = "H"
transition = transition + previousMarkerGenotype
breakpoints.append([i,transition])
# Calculate the depth of the current marker # Calculate the depth of the current marker
g = np.random.poisson(mean_depth) g = np.random.poisson(mean_depth)
# If the depth of the current marker is less than 0 (safeguard) # If the depth of the current marker is less than 0 (safeguard)
...@@ -91,7 +118,7 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor, ...@@ -91,7 +118,7 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor,
y_error = 0 y_error = 0
for j in range(0,site_depth): for j in range(0,site_depth):
rnd = random.random() rnd = random.random()
# If the random number is greater than errB, we switch increase the wronge allele depth # If the random number is smaller than errA, we switch increase the wrong allele depth
if rnd < errA: if rnd < errA:
x_error = x_error + 0 x_error = x_error + 0
y_error = y_error + 1 y_error = y_error + 1
...@@ -114,7 +141,7 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor, ...@@ -114,7 +141,7 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor,
y_error = 0 y_error = 0
for j in range(0,site_depth): for j in range(0,site_depth):
rnd = random.random() rnd = random.random()
# If the random number is greater than errB, we switch increase the wronge allele depth # If the random number is smaller than errB, we switch increase the wronge allele depth
if rnd < errB: if rnd < errB:
x_error = x_error + 1 x_error = x_error + 1
y_error = y_error + 0 y_error = y_error + 0
...@@ -145,7 +172,8 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor, ...@@ -145,7 +172,8 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor,
# If the depth x and y of the current marker is not 0, seen as heterozygous # If the depth x and y of the current marker is not 0, seen as heterozygous
else : else :
genotype = "0/1" genotype = "0/1"
genotype_error = "0/1" genotype_error = "0/1"
#print("reads",genotype1, genotype2) #print("reads",genotype1, genotype2)
#print(genotype) #print(genotype)
# Append the genotype and the depth of the current marker to the segment list # Append the genotype and the depth of the current marker to the segment list
...@@ -155,12 +183,13 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor, ...@@ -155,12 +183,13 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor,
finalGenotype_error = str(genotype_error) + ":" + str(site_depth) + ":" + str(x_error) + "," + str(y_error) + ":.:.:.:.:." finalGenotype_error = str(genotype_error) + ":" + str(site_depth) + ":" + str(x_error) + "," + str(y_error) + ":.:.:.:.:."
segment_error.append(finalGenotype_error) segment_error.append(finalGenotype_error)
return segment, segment_error return segment, segment_error, breakpoints
# Generate a list of individuals # Generate a list of individuals
def generate_individuals (nb_individuals, chromosome_size, marker_density, mean_depth, conversion_factor, errA, errB): def generate_individuals (nb_individuals, chromosome_size, marker_density, mean_depth, conversion_factor, errA, errB):
matrix = [] matrix = []
matrix_error = [] matrix_error = []
breakpoints = []
# Calculate the number of marker requiered # Calculate the number of marker requiered
markers_nb = round(chromosome_size * marker_density) markers_nb = round(chromosome_size * marker_density)
...@@ -168,11 +197,19 @@ def generate_individuals (nb_individuals, chromosome_size, marker_density, mean_ ...@@ -168,11 +197,19 @@ def generate_individuals (nb_individuals, chromosome_size, marker_density, mean_
#markers_positions = sorted(random.sample(range(chromosome_size),size)) #markers_positions = sorted(random.sample(range(chromosome_size),size))
markers_positions = np.linspace(1, chromosome_size, markers_nb, dtype="int") markers_positions = np.linspace(1, chromosome_size, markers_nb, dtype="int")
for i in range(nb_individuals): with open('Breakpoints.csv', 'w') as breakpointFile:
print("individual " + str(i+1)) breakpointFile.write(",".join(["sample","average_bkp_position", "bkp_start_position", "bkp_stop_position", "transition"]) + "\n");
result = generate_chr_for_one_ind(mean_depth, markers_positions, conversion_factor, errA, errB) for i in range(nb_individuals):
matrix.append(result[0]) print("individual " + str(i+1))
matrix_error.append(result[1]) result = generate_chr_for_one_ind(mean_depth, markers_positions, conversion_factor, errA, errB)
matrix.append(result[0])
matrix_error.append(result[1])
for bkp in result[2]:
startBkp = markers_positions[bkp[0] - 1]
stopBkp = markers_positions[bkp[0]]
averageBkpPosition = startBkp + round((stopBkp - startBkp)/2)
bkp_row = [str(i), str(averageBkpPosition), str(startBkp), str(stopBkp), bkp[1]]
breakpointFile.write(",".join(bkp_row) + "\n");
return matrix, matrix_error, markers_positions return matrix, matrix_error, markers_positions
...@@ -187,7 +224,7 @@ def generate_individuals (nb_individuals, chromosome_size, marker_density, mean_ ...@@ -187,7 +224,7 @@ def generate_individuals (nb_individuals, chromosome_size, marker_density, mean_
# return [count_a, count_b, count_h, 0.45 <= count_h <= 0.55 and 0.2 <= count_b <= 0.3 and 0.2 <= count_a <= 0.3] # return [count_a, count_b, count_h, 0.45 <= count_h <= 0.55 and 0.2 <= count_b <= 0.3 and 0.2 <= count_a <= 0.3]
# Set the parameters for the script # Set the parameters for the script
nb_individuals = 100 nb_individuals = 3
chromosome_size = 44000000 chromosome_size = 44000000
cMsize = 180 ## Size of genetic map cMsize = 180 ## Size of genetic map
conversion_factor = chromosome_size/cMsize ## Corresponds to a bpPercM conversion ! Needs to be fixed... Does not produce the correct division! conversion_factor = chromosome_size/cMsize ## Corresponds to a bpPercM conversion ! Needs to be fixed... Does not produce the correct division!
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment