Skip to content
Snippets Groups Projects
Commit d1091cac authored by Cecile Triay's avatar Cecile Triay
Browse files

Correct vcf output and correct kosambi formula

parent ea5a11f7
No related branches found
No related tags found
No related merge requests found
......@@ -4,8 +4,8 @@ import math
import numpy as np
def generate_chr_for_one_ind (chromosome_size, marker_density, err_rate, mean_depth, max_depth, markers_positions):
#def generate_chr_for_one_ind (chromosome_size, marker_density, err_rate, mean_depth, max_depth, markers_positions, conversion_factor):
def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor):
'''
Generate a chromosome for a given size, density of markers, and depth (follwing a gaussian distrib with a mean_depth and sd_depth).
......@@ -31,7 +31,8 @@ def generate_chr_for_one_ind (chromosome_size, marker_density, err_rate, mean_de
#TODO
#Kosambi inverse function
IntervalWithPreviousMarker = markers_positions[i] - markers_positions[i-1]
tempcM = 2 * (IntervalWithPreviousMarker / 100) / len(markers_positions) # markers_nb (= len(markers_positions)) corresponds to bpPercM
tempcM = 2 * (IntervalWithPreviousMarker / 100) / 233000
#tempcM = 2 * (IntervalWithPreviousMarker / 100) / conversion_factor ## Needs to be fixed! Use of variable does not work...
IntervalWithPreviousMarkerInRF = 0.5 * ((math.exp(tempcM) - math.exp(-tempcM)) / (math.exp(tempcM) + math.exp(-tempcM)))
rnd = random.random()
# If the random number is greater than the probability of a recombination
......@@ -109,14 +110,14 @@ def generate_chr_for_one_ind (chromosome_size, marker_density, err_rate, mean_de
#print("reads",genotype1, genotype2)
#print(genotype)
# Append the genotype and the depth of the current marker to the segment list
finalGenotype = str(genotype) + ":" + str(site_depth) + ":" + str(x) + "," + str(y) + ":.:.:.:.:.:."
finalGenotype = str(genotype) + ":" + str(site_depth) + ":" + str(x) + "," + str(y) + ":.:.:.:.:."
segment.append(finalGenotype)
return segment
# Generate a list of individuals
def generate_individuals (nb_individuals, chromosome_size, err_rate, marker_density, mean_depth, max_depth):
def generate_individuals (nb_individuals, chromosome_size, marker_density, mean_depth, conversion_factor):
matrix = []
# Calculate the number of marker requiered
......@@ -127,7 +128,7 @@ def generate_individuals (nb_individuals, chromosome_size, err_rate, marker_dens
for i in range(nb_individuals):
print("individual " + str(i+1))
matrix.append(generate_chr_for_one_ind(chromosome_size, marker_density, err_rate, mean_depth, max_depth, markers_positions))
matrix.append(generate_chr_for_one_ind(mean_depth, markers_positions, conversion_factor))
return matrix, markers_positions
......@@ -143,15 +144,20 @@ def generate_individuals (nb_individuals, chromosome_size, err_rate, marker_dens
# Set the parameters for the script
nb_individuals = 3
err_rate = 0.02
chromosome_size = 44000000
cMsize = 180 ## Size of genetic map
#conversion_factor = chromosome_size/cMsize ## Corresponds to a bpPercM conversion ! Needs to be fixed... Does not produce the correct division!
conversion_factor = 233000
marker_density = 255000/44000000
mean_depth = 3
max_depth = 10 #TODO
pop = generate_individuals(nb_individuals, chromosome_size, err_rate, marker_density, mean_depth, max_depth)
err_rate = 0.02
pop = generate_individuals(nb_individuals, chromosome_size, marker_density, mean_depth, max_depth)
matrix = pop[0]
markers_positions = pop[1]
markers_positions = pop[1]
# Header to add to the VCF for it to be recognized as such file.
header = [
"##fileformat=VCFv4.1\n",
......@@ -175,18 +181,18 @@ header = [
]
with open('test.vcf', 'w') as file:
with open('Newtest.vcf', 'w') as file:
# Write the header of the file
file.writelines(header)
# Write the header of the file
file.write("\t".join(["#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT"] + [str(n) for n in range(nb_individuals)] + ["Parent1", "Parent2"] + ["Test"]) + "\n")
file.write("\t".join(["#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT"] + [str(n) for n in range(nb_individuals)] + ["Parent1", "Parent2"]) + "\n")
# Iterate over each row in the table
for m in range(0, len(matrix[0])):
row = ["chr01", str(markers_positions[m]), ".", "A", "T", ".", ".", ".", "GT:DP:AD:RO:QR:AO:QA:GL"]
for i in range(0, len(matrix)):
row.append(matrix[i][m])
row.append("0/0:"+ str(mean_depth) + ":" + str(mean_depth) + ",0:.:.:.:.:.")
row.append("1/1:"+ str(mean_depth) + ":0," + str(mean_depth) + ":.:.:.:.:.")
row.append("0/0:"+ str(mean_depth) + ":" + str(mean_depth) + ",0:.:.:.:.")
row.append("1/1:"+ str(mean_depth) + ":0," + str(mean_depth) + ":.:.:.:.")
file.write("\t".join(row) + "\n");
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment