diff --git a/noisyplotype.py b/noisyplotype.py index 14b5251c4ee506b9e53b586b18d31887974b8709..e363a1d847cefd8ff0bdcd10bcc3bed82db596a9 100644 --- a/noisyplotype.py +++ b/noisyplotype.py @@ -11,7 +11,7 @@ import math # Returns: # str: A random segment from the given genotype. # ''' -def generate_chr (chromosome_size, marker_density, err_rate, mean_depth, sd_depth): +def generate_chr_for_one_ind (chromosome_size, marker_density, err_rate, mean_depth, sd_depth): # Create a list of all possible genotypes #genotypes = ["A", "B", "H", "H"] @@ -22,11 +22,7 @@ def generate_chr (chromosome_size, marker_density, err_rate, mean_depth, sd_dept # random marker positions size = round(chromosome_size * marker_density) - print(size) - print(chromosome_size) - print(marker_density) positions = sorted(random.sample(range(chromosome_size),size)) #sorted list of markers positions - for i in range(0, size): if i > 0: #TODO @@ -58,7 +54,10 @@ def generate_chr (chromosome_size, marker_density, err_rate, mean_depth, sd_dept #TODO : ajouter erreur de séquençage # sd : standard deviation - site_depth = round(random.gauss(mean_depth, sd_depth)) + g = random.gauss(mean_depth, sd_depth) + if g < 0: + g = 0 + site_depth = round(g) x = 0 y = 0 if site_depth == 0: @@ -74,8 +73,7 @@ def generate_chr (chromosome_size, marker_density, err_rate, mean_depth, sd_dept genotype = "1/1" else: #x = random.gauss(site_depth, sd2) - print(site_depth) - x = round(random.random(range(0,site_depth))) + x = random.randint(0,site_depth) y = site_depth - x if x == 0: genotype = "1/1" @@ -85,16 +83,20 @@ def generate_chr (chromosome_size, marker_density, err_rate, mean_depth, sd_dept genotype = "0/1" - finalGenotype = str(genotype) + ":" + str(site_depth) + ":" + str(x) + "," + str(y) + finalGenotype = str(genotype) + ":" + str(site_depth) + ":" + str(x) + "," + str(y) + ":.:.:.:.:." segment.append(finalGenotype) # Return the random segment - return "".join(segment) + return segment # Generate a list of individuals def generate_individuals (nb_individuals, chromosome_size, err_rate, marker_density, mean_depth, sd_depth): + matrix = [] + for i in range(nb_individuals): + print("individual " + str(i+1)) + matrix.append(generate_chr_for_one_ind(chromosome_size, marker_density, err_rate, mean_depth, sd_depth)) - return [generate_chr(chromosome_size, marker_density, err_rate, mean_depth, sd_depth) for i in range(nb_individuals)] + return matrix # Check if a row is correct def is_correct (row): @@ -115,23 +117,23 @@ mean_depth = 3 sd_depth = 1 pop = generate_individuals(nb_individuals, chromosome_size, err_rate, marker_density, mean_depth, sd_depth) -# Create an empty list to store the individuals -rows = [] +# # Create an empty list to store the individuals +# rows = [] -# Loop through the population -for i in range(chromosome_size): - # Create an empty list to store the individual - row = [] +# # Loop through the population +# for i in range(chromosome_size): +# # Create an empty list to store the individual +# row = [] - # Loop through the individuals in the population - for ind in pop: - # Append the individual's chromosome to the row - row.append(ind[i]) +# # Loop through the individuals in the population +# for ind in pop: +# # Append the individual's chromosome to the row +# row.append(ind[i]) - # Append the row to the list of rows - #row.append(is_correct(row)) +# # Append the row to the list of rows +# #row.append(is_correct(row)) - rows.append(row) +# rows.append(row) header = [ @@ -156,24 +158,32 @@ header = [ ] #with open('test.vcf', 'w') as file: + with open('test.tsv', 'w') as file: # Write the header of the file - #file.writelines(header) + file.writelines(header) # Write the header of the file file.write("\t".join(["#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT"] + [str(n) for n in range(nb_individuals)] + ["Parent1", "Parent2"] + ["Test"]) + "\n") # Iterate over each row in the table - for i, row in enumerate(rows): - # Create an empty list to store the converted row - converted_row = [] + #for i, row in enumerate(rows): + for m in range(0, len(pop[0])): + row = ["chr01", str(m + 1), ".", "A", "T", ".", ".", ".", "GT:DP:AD:RO:QR:AO:QA:GL"] + for i in range(0, len(pop)): + row.append(pop[i][m]) + row.append("0/0:"+ str(mean_depth) + ":" + str(mean_depth) + ",0:.:.:.:.:.") + row.append("1/1:"+ str(mean_depth) + ":0," + str(mean_depth) + ":.:.:.:.:.") + file.write("\t".join(row) + "\n"); + + # # Create an empty list to store the converted row + # converted_row = [] - # Iterate over each genotype in the row - for genotype in row: - - converted_row.append(genotype + ":.:.:.:.:.") + # # Iterate over each genotype in the row + # for genotype in ind: + # converted_row.append(genotype + ":.:.:.:.:.") - file.write("\t".join(["chr01", str(i + 1), ".", "A", "T", ".", ".", ".", "GT:DP:AD:RO:QR:AO:QA:GL"] + converted_row + ["0/0:"+ mean_depth + ":" + mean_depth + ",0:.:.:.:.:.", "1/1:"+ mean_depth + ":0," + mean_depth + ":.:.:.:.:."]) + "\n" ) + # file.write("\t".join(["chr01", str(i + 1), ".", "A", "T", ".", ".", ".", "GT:DP:AD:RO:QR:AO:QA:GL"] + converted_row + ["0/0:"+ mean_depth + ":" + mean_depth + ",0:.:.:.:.:.", "1/1:"+ mean_depth + ":0," + mean_depth + ":.:.:.:.:."]) + "\n" )