fix errors writing vcf file

5ff25ba1 · alice.boizet_cirad.fr · 108f7668 · 5ff25ba1
Commit 5ff25ba1 authored 1 year ago by alice.boizet_cirad.fr
--- a/noisyplotype.py
+++ b/noisyplotype.py
@@ -11,7 +11,7 @@ import math
 # Returns:
    # str: A random segment from the given genotype.
 # '''
-def generate_chr (chromosome_size, marker_density, err_rate, mean_depth, sd_depth):
+def generate_chr_for_one_ind (chromosome_size, marker_density, err_rate, mean_depth, sd_depth):

    # Create a list of all possible genotypes
    #genotypes = ["A", "B", "H", "H"]
@@ -22,11 +22,7 @@ def generate_chr (chromosome_size, marker_density, err_rate, mean_depth, sd_dept

    # random marker positions
    size = round(chromosome_size * marker_density)
-    print(size)
-    print(chromosome_size)
-    print(marker_density)
    positions = sorted(random.sample(range(chromosome_size),size)) #sorted list of markers positions
-
    for i in range(0, size):
        if i > 0:
            #TODO 
@@ -58,7 +54,10 @@ def generate_chr (chromosome_size, marker_density, err_rate, mean_depth, sd_dept
        #TODO : ajouter erreur de séquençage

        # sd : standard deviation
-        site_depth = round(random.gauss(mean_depth, sd_depth))
+        g = random.gauss(mean_depth, sd_depth)
+        if g < 0:
+            g = 0
+        site_depth = round(g)
        x = 0
        y = 0
        if site_depth == 0:            
@@ -74,8 +73,7 @@ def generate_chr (chromosome_size, marker_density, err_rate, mean_depth, sd_dept
                genotype = "1/1"
            else:
                #x = random.gauss(site_depth, sd2)
-                print(site_depth)
-                x = round(random.random(range(0,site_depth)))
+                x = random.randint(0,site_depth)
                y = site_depth - x
                if x == 0:
                    genotype = "1/1"
@@ -85,16 +83,20 @@ def generate_chr (chromosome_size, marker_density, err_rate, mean_depth, sd_dept
                    genotype = "0/1"                

        
-        finalGenotype = str(genotype) + ":" + str(site_depth) + ":" + str(x) + "," + str(y)
+        finalGenotype = str(genotype) + ":" + str(site_depth) + ":" + str(x) + "," + str(y) + ":.:.:.:.:."
        segment.append(finalGenotype)

    # Return the random segment
-    return "".join(segment)
+    return segment

 # Generate a list of individuals
 def generate_individuals (nb_individuals, chromosome_size, err_rate, marker_density, mean_depth, sd_depth):
+    matrix = []
+    for i in range(nb_individuals):
+        print("individual " + str(i+1))
+        matrix.append(generate_chr_for_one_ind(chromosome_size, marker_density, err_rate, mean_depth, sd_depth))
    
-    return [generate_chr(chromosome_size, marker_density, err_rate, mean_depth, sd_depth) for i in range(nb_individuals)]
+    return matrix

 # Check if a row is correct
 def is_correct (row):
@@ -115,23 +117,23 @@ mean_depth = 3
 sd_depth = 1
 pop = generate_individuals(nb_individuals, chromosome_size, err_rate, marker_density, mean_depth, sd_depth)

-# Create an empty list to store the individuals
-rows = []
+# # Create an empty list to store the individuals
+# rows = []

-# Loop through the population
-for i in range(chromosome_size):
-    # Create an empty list to store the individual
-    row = []
+# # Loop through the population
+# for i in range(chromosome_size):
+#     # Create an empty list to store the individual
+#     row = []
    
-    # Loop through the individuals in the population
-    for ind in pop:
-        # Append the individual's chromosome to the row
-        row.append(ind[i])
+#     # Loop through the individuals in the population
+#     for ind in pop:
+#         # Append the individual's chromosome to the row
+#         row.append(ind[i])
        
-    # Append the row to the list of rows
-    #row.append(is_correct(row))
+#     # Append the row to the list of rows
+#     #row.append(is_correct(row))
    
-    rows.append(row)
+#     rows.append(row)
    
    
 header = [
@@ -156,24 +158,32 @@ header = [
 ]

 #with open('test.vcf', 'w') as file:
+
 with open('test.tsv', 'w') as file:
    # Write the header of the file
-    #file.writelines(header)
+    file.writelines(header)
    
    # Write the header of the file
    file.write("\t".join(["#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT"] + [str(n) for n in range(nb_individuals)] + ["Parent1", "Parent2"] + ["Test"]) + "\n")
    
    # Iterate over each row in the table
-    for i, row in enumerate(rows):
-        # Create an empty list to store the converted row
-        converted_row = []
+    #for i, row in enumerate(rows):
+    for m in range(0, len(pop[0])):
+        row = ["chr01", str(m + 1), ".", "A", "T", ".", ".", ".", "GT:DP:AD:RO:QR:AO:QA:GL"]
+        for i in range(0, len(pop)):
+            row.append(pop[i][m])
+        row.append("0/0:"+ str(mean_depth) + ":" + str(mean_depth) + ",0:.:.:.:.:.")
+        row.append("1/1:"+ str(mean_depth) + ":0," + str(mean_depth) + ":.:.:.:.:.")
+        file.write("\t".join(row) + "\n");
+
+        # # Create an empty list to store the converted row
+        # converted_row = []
        
-        # Iterate over each genotype in the row
-        for genotype in row:
-
-            converted_row.append(genotype + ":.:.:.:.:.")
+        # # Iterate over each genotype in the row
+        # for genotype in ind:
+        #     converted_row.append(genotype + ":.:.:.:.:.")

-        file.write("\t".join(["chr01", str(i + 1), ".", "A", "T", ".", ".", ".", "GT:DP:AD:RO:QR:AO:QA:GL"] + converted_row + ["0/0:"+ mean_depth + ":" + mean_depth + ",0:.:.:.:.:.", "1/1:"+ mean_depth + ":0," + mean_depth + ":.:.:.:.:."]) + "\n" )
+        # file.write("\t".join(["chr01", str(i + 1), ".", "A", "T", ".", ".", ".", "GT:DP:AD:RO:QR:AO:QA:GL"] + converted_row + ["0/0:"+ mean_depth + ":" + mean_depth + ",0:.:.:.:.:.", "1/1:"+ mean_depth + ":0," + mean_depth + ":.:.:.:.:."]) + "\n" )