diff --git a/noisyplotype.py b/noisyplotype.py index dd88d4b5d1ba8152396621058cbfa64b29e0c975..14f6ee26ad2fc6f90f39f9482497a5b093d9fd39 100644 --- a/noisyplotype.py +++ b/noisyplotype.py @@ -19,9 +19,13 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor, segment = [] segment_error = [] + breakpoints = [] # Iterate through the list of markers positions + previousMarkerGenotype = "" for i in range(0, len(markers_positions)): + recomb1 = False + recomb2 = False # If the current marker is not the first one if i > 0: # Calculate the interval between the current marker and the previous one @@ -34,6 +38,7 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor, # If the random number is greater than the probability of a recombination #print("before",genotype1, genotype2) if rnd > 1 - IntervalWithPreviousMarkerInRF: # a recombination occurs + recomb1 = True # If the previous marker is A, change the genotype to B if genotype1 == "A": genotype1 = "B"; @@ -41,11 +46,12 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor, genotype1 = "A"; rnd = random.random() if rnd > 1 - IntervalWithPreviousMarkerInRF: + recomb2 = True # If the previous marker is B, change the genotype to A if genotype2 == "A": - genotype2 = "B"; + genotype2 = "B" else : - genotype2 = "A"; + genotype2 = "A" #print("after",genotype1, genotype2) # If the current marker is the first one, fix the two first genotypes. else: @@ -61,10 +67,31 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor, genotype2 = "B" else : genotype2 = "A" + + # initialize previousMarkerGenotype + if genotype1 == "A" and genotype2 == "A": + previousMarkerGenotype = "A" + elif genotype1 == "B" and genotype2 == "B": + previousMarkerGenotype = "B" + else : + previousMarkerGenotype = "H" + #Breakpoints + if recomb1 or recomb2: + #this is a breakpoint [beforeBkpAfter, transition] + transition = previousMarkerGenotype + " => " - #TODO : ajouter erreur de séquençage + if genotype1 == "A" and genotype2 == "A": + previousMarkerGenotype = "A" + elif genotype1 == "B" and genotype2 == "B": + previousMarkerGenotype = "B" + else : + previousMarkerGenotype = "H" + + transition = transition + previousMarkerGenotype + breakpoints.append([i,transition]) + # Calculate the depth of the current marker g = np.random.poisson(mean_depth) # If the depth of the current marker is less than 0 (safeguard) @@ -91,7 +118,7 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor, y_error = 0 for j in range(0,site_depth): rnd = random.random() - # If the random number is greater than errB, we switch increase the wronge allele depth + # If the random number is smaller than errA, we switch increase the wrong allele depth if rnd < errA: x_error = x_error + 0 y_error = y_error + 1 @@ -114,7 +141,7 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor, y_error = 0 for j in range(0,site_depth): rnd = random.random() - # If the random number is greater than errB, we switch increase the wronge allele depth + # If the random number is smaller than errB, we switch increase the wronge allele depth if rnd < errB: x_error = x_error + 1 y_error = y_error + 0 @@ -145,7 +172,8 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor, # If the depth x and y of the current marker is not 0, seen as heterozygous else : genotype = "0/1" - genotype_error = "0/1" + genotype_error = "0/1" + #print("reads",genotype1, genotype2) #print(genotype) # Append the genotype and the depth of the current marker to the segment list @@ -155,12 +183,13 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor, finalGenotype_error = str(genotype_error) + ":" + str(site_depth) + ":" + str(x_error) + "," + str(y_error) + ":.:.:.:.:." segment_error.append(finalGenotype_error) - return segment, segment_error + return segment, segment_error, breakpoints # Generate a list of individuals def generate_individuals (nb_individuals, chromosome_size, marker_density, mean_depth, conversion_factor, errA, errB): matrix = [] matrix_error = [] + breakpoints = [] # Calculate the number of marker requiered markers_nb = round(chromosome_size * marker_density) @@ -168,11 +197,19 @@ def generate_individuals (nb_individuals, chromosome_size, marker_density, mean_ #markers_positions = sorted(random.sample(range(chromosome_size),size)) markers_positions = np.linspace(1, chromosome_size, markers_nb, dtype="int") - for i in range(nb_individuals): - print("individual " + str(i+1)) - result = generate_chr_for_one_ind(mean_depth, markers_positions, conversion_factor, errA, errB) - matrix.append(result[0]) - matrix_error.append(result[1]) + with open('Breakpoints.csv', 'w') as breakpointFile: + breakpointFile.write(",".join(["sample","average_bkp_position", "bkp_start_position", "bkp_stop_position", "transition"]) + "\n"); + for i in range(nb_individuals): + print("individual " + str(i+1)) + result = generate_chr_for_one_ind(mean_depth, markers_positions, conversion_factor, errA, errB) + matrix.append(result[0]) + matrix_error.append(result[1]) + for bkp in result[2]: + startBkp = markers_positions[bkp[0] - 1] + stopBkp = markers_positions[bkp[0]] + averageBkpPosition = startBkp + round((stopBkp - startBkp)/2) + bkp_row = [str(i), str(averageBkpPosition), str(startBkp), str(stopBkp), bkp[1]] + breakpointFile.write(",".join(bkp_row) + "\n"); return matrix, matrix_error, markers_positions @@ -187,7 +224,7 @@ def generate_individuals (nb_individuals, chromosome_size, marker_density, mean_ # return [count_a, count_b, count_h, 0.45 <= count_h <= 0.55 and 0.2 <= count_b <= 0.3 and 0.2 <= count_a <= 0.3] # Set the parameters for the script -nb_individuals = 100 +nb_individuals = 3 chromosome_size = 44000000 cMsize = 180 ## Size of genetic map conversion_factor = chromosome_size/cMsize ## Corresponds to a bpPercM conversion ! Needs to be fixed... Does not produce the correct division!