Skip to content
Snippets Groups Projects
Commit 108f7668 authored by alice.boizet_cirad.fr's avatar alice.boizet_cirad.fr
Browse files

fusion of simulation.vba and cecile method

parent ddade1d2
No related branches found
No related tags found
No related merge requests found
import random
import csv
import math
# '''Generate a random segment from a given genotype.
......@@ -10,67 +11,90 @@ import csv
# Returns:
# str: A random segment from the given genotype.
# '''
def generate_segment (genotype, size, err_rate):
def generate_chr (chromosome_size, marker_density, err_rate, mean_depth, sd_depth):
# Create a list of all possible genotypes
genotypes = ["A", "B", "H", "H"]
#genotypes = ["A", "B", "H", "H"]
# Create a list of all other genotypes that are not the given genotype
other_genotypes = [g for g in genotypes if g!= genotype]
#other_genotypes = [g for g in genotypes if g!= genotype]
segment = []
# Generate a random segment from the given genotype
for i in range(size):
# If the probability of replacing a segment is less than or equal to the error rate
if random.random() <= err_rate:
# Replace the segment with a random other genotype
segment.append(random.choice(other_genotypes))
# random marker positions
size = round(chromosome_size * marker_density)
print(size)
print(chromosome_size)
print(marker_density)
positions = sorted(random.sample(range(chromosome_size),size)) #sorted list of markers positions
for i in range(0, size):
if i > 0:
#TODO
#Kosambi inverse function
IntervalWithPreviousMarker = positions[i] - positions[i-1]
temp = 2 * (IntervalWithPreviousMarker / 100)
IntervalWithPreviousMarkerInRF = 0.5 * ((math.exp(temp) - math.exp(-temp)) / (math.exp(temp) + math.exp(-temp)))
rnd = random.random()
if rnd > 1 - IntervalWithPreviousMarkerInRF: # a recombination occurs
if genotype1 == "A":
genotype1 = "B";
else :
genotype1 = "A";
if genotype2 == "A":
genotype2 = "B";
else :
genotype1 = "A";
else:
# Otherwise, append the given genotype to the segment
segment.append(genotype)
# first marker
rnd = random.random()
if rnd > 0.5:
genotype1 = "A"
genotype2 = "B"
else :
genotype2 = "A"
genotype1 = "B"
#TODO : ajouter erreur de séquençage
# sd : standard deviation
site_depth = round(random.gauss(mean_depth, sd_depth))
x = 0
y = 0
if site_depth == 0:
genotype = "./."
else :
if genotype1 == "A" and genotype2 == "A":
x = site_depth
y = 0
genotype = "0/0"
elif genotype1 == "B" and genotype2 == "B":
x = 1
y = site_depth
genotype = "1/1"
else:
#x = random.gauss(site_depth, sd2)
print(site_depth)
x = round(random.random(range(0,site_depth)))
y = site_depth - x
if x == 0:
genotype = "1/1"
elif y == 0:
genotype = "0/0"
else :
genotype = "0/1"
finalGenotype = str(genotype) + ":" + str(site_depth) + ":" + str(x) + "," + str(y)
segment.append(finalGenotype)
# Return the random segment
return "".join(segment)
# Generate a chromosome with a given total size and error rate
def generate_chromosome (total_size, err_rate):
min_transition = 1
max_transition = 2
min_segment_size = 1000
nb_transition = random.randint(min_transition, max_transition)
# Generate a list of probabilities for each transition
genotype_probability = ["A", "B", "H", "H"]
chromosome = []
genotypes = []
current_size = 0
for i in range(nb_transition + 1): # nb of segments is nb transition + 1
if i == nb_transition:
segment_size = total_size - current_size
else:
segment_size = random.randint(min_segment_size, (total_size - current_size) - ((nb_transition - i) * min_segment_size))
# Generate a genotype with the given probability
if len(genotypes) > 0:
genotype = "H" if (genotypes[-1] == "A" or genotypes[-1] == "B") else random.choice(["A", "B"]) #random.choice([g for g in genotype_probability if g!= genotypes[-1]])
else:
genotype = random.choice(genotype_probability) # twice as many chances to be H
# Generate a segment with the given genotype and size
segment = generate_segment(genotype, segment_size, err_rate)
chromosome.append(segment)
genotypes.append(genotype)
current_size += segment_size
# Join the chromosome with a '-'
return "".join(chromosome)
# Generate a list of individuals
def generate_individuals (nb_individuals, chromosome_size, err_rate):
return [generate_chromosome(chromosome_size, err_rate) for i in range(nb_individuals)]
def generate_individuals (nb_individuals, chromosome_size, err_rate, marker_density, mean_depth, sd_depth):
return [generate_chr(chromosome_size, marker_density, err_rate, mean_depth, sd_depth) for i in range(nb_individuals)]
# Check if a row is correct
def is_correct (row):
......@@ -85,7 +109,11 @@ def is_correct (row):
chromosome_size = 100000
nb_individuals = 100
err_rate = 0.02
pop = generate_individuals(nb_individuals, chromosome_size, err_rate)
chromosome_size = 44000000
marker_density = 255000/44000000
mean_depth = 3
sd_depth = 1
pop = generate_individuals(nb_individuals, chromosome_size, err_rate, marker_density, mean_depth, sd_depth)
# Create an empty list to store the individuals
rows = []
......@@ -142,22 +170,10 @@ with open('test.tsv', 'w') as file:
# Iterate over each genotype in the row
for genotype in row:
# Set the genotype to '.' if it is not present
g = "."
# If the genotype is 'H', set the genotype to 0/1
if genotype == "H":
g = "0/1"
# If the genotype is 'A', set the genotype to 0/0
elif genotype == "A":
g = "0/0"
# If the genotype is 'B', set the genotype to 1/1
elif genotype == "B":
g = "1/1"
converted_row.append(g + ":.:.:.:.:.:.:.")
file.write("\t".join(["chr01", str(i + 1), ".", "A", "T", ".", ".", ".", "GT:DP:AD:RO:QR:AO:QA:GL"] + converted_row + ["0/0:.:.:.:.:.:.:.", "1/1:.:.:.:.:.:.:."]) + "\n" )
converted_row.append(genotype + ":.:.:.:.:.")
file.write("\t".join(["chr01", str(i + 1), ".", "A", "T", ".", ".", ".", "GT:DP:AD:RO:QR:AO:QA:GL"] + converted_row + ["0/0:"+ mean_depth + ":" + mean_depth + ",0:.:.:.:.:.", "1/1:"+ mean_depth + ":0," + mean_depth + ":.:.:.:.:."]) + "\n" )
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment