diff --git a/noisyplotype.py b/noisyplotype.py index a6bb220c134d9524859347eee6d046753891abbf..a151fefc8f53588cd11b9f6a39cabe4c4f486ec5 100644 --- a/noisyplotype.py +++ b/noisyplotype.py @@ -4,8 +4,8 @@ import math import numpy as np -def generate_chr_for_one_ind (chromosome_size, marker_density, err_rate, mean_depth, max_depth, markers_positions): - +#def generate_chr_for_one_ind (chromosome_size, marker_density, err_rate, mean_depth, max_depth, markers_positions, conversion_factor): +def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor): ''' Generate a chromosome for a given size, density of markers, and depth (follwing a gaussian distrib with a mean_depth and sd_depth). @@ -31,7 +31,8 @@ def generate_chr_for_one_ind (chromosome_size, marker_density, err_rate, mean_de #TODO #Kosambi inverse function IntervalWithPreviousMarker = markers_positions[i] - markers_positions[i-1] - tempcM = 2 * (IntervalWithPreviousMarker / 100) / len(markers_positions) # markers_nb (= len(markers_positions)) corresponds to bpPercM + tempcM = 2 * (IntervalWithPreviousMarker / 100) / 233000 + #tempcM = 2 * (IntervalWithPreviousMarker / 100) / conversion_factor ## Needs to be fixed! Use of variable does not work... IntervalWithPreviousMarkerInRF = 0.5 * ((math.exp(tempcM) - math.exp(-tempcM)) / (math.exp(tempcM) + math.exp(-tempcM))) rnd = random.random() # If the random number is greater than the probability of a recombination @@ -109,14 +110,14 @@ def generate_chr_for_one_ind (chromosome_size, marker_density, err_rate, mean_de #print("reads",genotype1, genotype2) #print(genotype) # Append the genotype and the depth of the current marker to the segment list - finalGenotype = str(genotype) + ":" + str(site_depth) + ":" + str(x) + "," + str(y) + ":.:.:.:.:.:." + finalGenotype = str(genotype) + ":" + str(site_depth) + ":" + str(x) + "," + str(y) + ":.:.:.:.:." segment.append(finalGenotype) return segment # Generate a list of individuals -def generate_individuals (nb_individuals, chromosome_size, err_rate, marker_density, mean_depth, max_depth): +def generate_individuals (nb_individuals, chromosome_size, marker_density, mean_depth, conversion_factor): matrix = [] # Calculate the number of marker requiered @@ -127,7 +128,7 @@ def generate_individuals (nb_individuals, chromosome_size, err_rate, marker_dens for i in range(nb_individuals): print("individual " + str(i+1)) - matrix.append(generate_chr_for_one_ind(chromosome_size, marker_density, err_rate, mean_depth, max_depth, markers_positions)) + matrix.append(generate_chr_for_one_ind(mean_depth, markers_positions, conversion_factor)) return matrix, markers_positions @@ -143,15 +144,20 @@ def generate_individuals (nb_individuals, chromosome_size, err_rate, marker_dens # Set the parameters for the script nb_individuals = 3 -err_rate = 0.02 chromosome_size = 44000000 +cMsize = 180 ## Size of genetic map +#conversion_factor = chromosome_size/cMsize ## Corresponds to a bpPercM conversion ! Needs to be fixed... Does not produce the correct division! +conversion_factor = 233000 marker_density = 255000/44000000 mean_depth = 3 max_depth = 10 #TODO -pop = generate_individuals(nb_individuals, chromosome_size, err_rate, marker_density, mean_depth, max_depth) +err_rate = 0.02 + +pop = generate_individuals(nb_individuals, chromosome_size, marker_density, mean_depth, max_depth) matrix = pop[0] -markers_positions = pop[1] - +markers_positions = pop[1] + + # Header to add to the VCF for it to be recognized as such file. header = [ "##fileformat=VCFv4.1\n", @@ -175,18 +181,18 @@ header = [ ] -with open('test.vcf', 'w') as file: +with open('Newtest.vcf', 'w') as file: # Write the header of the file file.writelines(header) # Write the header of the file - file.write("\t".join(["#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT"] + [str(n) for n in range(nb_individuals)] + ["Parent1", "Parent2"] + ["Test"]) + "\n") + file.write("\t".join(["#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT"] + [str(n) for n in range(nb_individuals)] + ["Parent1", "Parent2"]) + "\n") # Iterate over each row in the table for m in range(0, len(matrix[0])): row = ["chr01", str(markers_positions[m]), ".", "A", "T", ".", ".", ".", "GT:DP:AD:RO:QR:AO:QA:GL"] for i in range(0, len(matrix)): row.append(matrix[i][m]) - row.append("0/0:"+ str(mean_depth) + ":" + str(mean_depth) + ",0:.:.:.:.:.") - row.append("1/1:"+ str(mean_depth) + ":0," + str(mean_depth) + ":.:.:.:.:.") + row.append("0/0:"+ str(mean_depth) + ":" + str(mean_depth) + ",0:.:.:.:.") + row.append("1/1:"+ str(mean_depth) + ":0," + str(mean_depth) + ":.:.:.:.") file.write("\t".join(row) + "\n"); \ No newline at end of file