Correct vcf output and correct kosambi formula

d1091cac · Cecile Triay · ea5a11f7 · d1091cac
Commit d1091cac authored 1 year ago by Cecile Triay
--- a/noisyplotype.py
+++ b/noisyplotype.py
@@ -4,8 +4,8 @@ import math
 import numpy as np
 

-def generate_chr_for_one_ind (chromosome_size, marker_density, err_rate, mean_depth, max_depth, markers_positions):
-
+#def generate_chr_for_one_ind (chromosome_size, marker_density, err_rate, mean_depth, max_depth, markers_positions, conversion_factor):
+def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor):
    '''
    Generate a chromosome for a given size, density of markers, and depth (follwing a gaussian distrib with a mean_depth and sd_depth).
    
@@ -31,7 +31,8 @@ def generate_chr_for_one_ind (chromosome_size, marker_density, err_rate, mean_de
            #TODO 
            #Kosambi inverse function
            IntervalWithPreviousMarker = markers_positions[i] - markers_positions[i-1]
-            tempcM = 2 * (IntervalWithPreviousMarker / 100) / len(markers_positions) # markers_nb (= len(markers_positions)) corresponds to bpPercM
+            tempcM = 2 * (IntervalWithPreviousMarker / 100) / 233000
+            #tempcM = 2 * (IntervalWithPreviousMarker / 100) / conversion_factor    ## Needs to be fixed! Use of variable does not work...
            IntervalWithPreviousMarkerInRF = 0.5 * ((math.exp(tempcM) - math.exp(-tempcM)) / (math.exp(tempcM) + math.exp(-tempcM)))
            rnd = random.random()
            # If the random number is greater than the probability of a recombination
@@ -109,14 +110,14 @@ def generate_chr_for_one_ind (chromosome_size, marker_density, err_rate, mean_de
        #print("reads",genotype1, genotype2)
        #print(genotype)
        # Append the genotype and the depth of the current marker to the segment list
-        finalGenotype = str(genotype) + ":" + str(site_depth) + ":" + str(x) + "," + str(y) + ":.:.:.:.:.:."
+        finalGenotype = str(genotype) + ":" + str(site_depth) + ":" + str(x) + "," + str(y) + ":.:.:.:.:."
        segment.append(finalGenotype)

    return segment


 # Generate a list of individuals
-def generate_individuals (nb_individuals, chromosome_size, err_rate, marker_density, mean_depth, max_depth):
+def generate_individuals (nb_individuals, chromosome_size, marker_density, mean_depth, conversion_factor):
    matrix = []

    # Calculate the number of marker requiered
@@ -127,7 +128,7 @@ def generate_individuals (nb_individuals, chromosome_size, err_rate, marker_dens

    for i in range(nb_individuals):
        print("individual " + str(i+1))
-        matrix.append(generate_chr_for_one_ind(chromosome_size, marker_density, err_rate, mean_depth, max_depth, markers_positions))
+        matrix.append(generate_chr_for_one_ind(mean_depth, markers_positions, conversion_factor))
    
    return matrix, markers_positions

@@ -143,15 +144,20 @@ def generate_individuals (nb_individuals, chromosome_size, err_rate, marker_dens

 # Set the parameters for the script
 nb_individuals = 3
-err_rate = 0.02
 chromosome_size = 44000000
+cMsize = 180    ## Size of genetic map
+#conversion_factor = chromosome_size/cMsize   ## Corresponds to a bpPercM conversion ! Needs to be fixed... Does not produce the correct division!
+conversion_factor = 233000
 marker_density = 255000/44000000
 mean_depth = 3
 max_depth = 10 #TODO
-pop = generate_individuals(nb_individuals, chromosome_size, err_rate, marker_density, mean_depth, max_depth)
+err_rate = 0.02
+
+pop = generate_individuals(nb_individuals, chromosome_size, marker_density, mean_depth, max_depth)
 matrix = pop[0]
-markers_positions = pop[1]
-    
+markers_positions = pop[1]    
+
+
 # Header to add to the VCF for it to be recognized as such file.    
 header = [
 "##fileformat=VCFv4.1\n",
@@ -175,18 +181,18 @@ header = [
 ]


-with open('test.vcf', 'w') as file:
+with open('Newtest.vcf', 'w') as file:
    # Write the header of the file
    file.writelines(header)
    
    # Write the header of the file
-    file.write("\t".join(["#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT"] + [str(n) for n in range(nb_individuals)] + ["Parent1", "Parent2"] + ["Test"]) + "\n")
+    file.write("\t".join(["#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT"] + [str(n) for n in range(nb_individuals)] + ["Parent1", "Parent2"]) + "\n")
    
    # Iterate over each row in the table
    for m in range(0, len(matrix[0])):
        row = ["chr01", str(markers_positions[m]), ".", "A", "T", ".", ".", ".", "GT:DP:AD:RO:QR:AO:QA:GL"]
        for i in range(0, len(matrix)):
            row.append(matrix[i][m])
-        row.append("0/0:"+ str(mean_depth) + ":" + str(mean_depth) + ",0:.:.:.:.:.")
-        row.append("1/1:"+ str(mean_depth) + ":0," + str(mean_depth) + ":.:.:.:.:.")
+        row.append("0/0:"+ str(mean_depth) + ":" + str(mean_depth) + ",0:.:.:.:.")
+        row.append("1/1:"+ str(mean_depth) + ":0," + str(mean_depth) + ":.:.:.:.")
        file.write("\t".join(row) + "\n");
\ No newline at end of file