diff --git a/README.md b/README.md index 7221a51805b3b3195eddf1b6cb196f5aa49ba1e5..69618325c07d41916cdd5f914e55b3f3f59561f6 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Popsimul +# PopSimul ## Description @@ -17,10 +17,10 @@ You can see all parameters by running this command | Short | Long | Description | Default value | |---|---|---|---| | -o | --output | Name for the 3 generated files : [output].vcf, [output]_errors.vcf, [output]_Breakpoints.csv | popsimul | -| -n | --nb_individuals | number of individuals | 10 | -| -s | --chr_size | chromosome size | 4000000| +| -ni | --nb_individuals | number of individuals | 10 | +| -bp | --chr_bp | chromosome size | 4000000| | -cM | --cM_size | size of genetic map | 180 | -| -d | --marker_density | marker density| 0.005 | +| -nm | --nb_markers | number of markers| 220000 | | -a | --average_depth | average depth | 3 | | -m | --max_depth | max depth | 10 | | -eA | --error_rate_A | error rate for A | 0.05 | diff --git a/noisyplotype.py b/noisyplotype.py index 92093badeeeadd99b4185ab568da5179e9f2629b..c8255c46301a46b633f23768fe4182f6afeeca16 100644 --- a/noisyplotype.py +++ b/noisyplotype.py @@ -196,16 +196,13 @@ def generate_chr_for_one_ind (mean_depth, markers_positions, conversion_factor, return segment, segment_error, breakpoints # Generate a list of individuals -def generate_individuals (nb_individuals, chromosome_size, marker_density, mean_depth, conversion_factor, errA, errB): +def generate_individuals (nb_individuals, chromosome_size, nb_markers, mean_depth, conversion_factor, errA, errB): matrix = [] matrix_error = [] - - # Calculate the number of marker requiered - markers_nb = round(chromosome_size * marker_density) # Generate a sorted list of markers positions #markers_positions = sorted(random.sample(range(chromosome_size),size)) - markers_positions = np.linspace(1, chromosome_size, markers_nb, dtype="int") + markers_positions = np.linspace(1, chromosome_size, nb_markers, dtype="int") with open(args.output + '_Breakpoints.csv', 'w') as breakpointFile: breakpointFile.write(",".join(["sample","average_bkp_position", "bkp_start_position", "bkp_stop_position", "transitionType"]) + "\n"); @@ -235,13 +232,13 @@ def generate_individuals (nb_individuals, chromosome_size, marker_density, mean_ # define commandline arguments -parser = argparse.ArgumentParser(description='Description of your program') +parser = argparse.ArgumentParser(description='PopSimul') parser.add_argument('-o', '--output', help='Output files name', default="popsimul") -parser.add_argument('-n', '--nb_individuals', help='Number of individuals', default=10) -parser.add_argument('-s', '--chr_size', help='Chromosome size in bp', default=44000000) +parser.add_argument('-ni', '--nb_individuals', help='Number of individuals', default=10) +parser.add_argument('-bp', '--chr_bp', help='Chromosome size in bp', default=44000000) parser.add_argument('-cM', '--cM_size', help='Size of genetic map', default=180) -parser.add_argument('-d', '--marker_density', help='Number of markers per bp', default=0.0058) +parser.add_argument('-nm', '--nb_markers', help='Number of markers', default=220000) parser.add_argument('-a', '--average_depth', help='Depth average', default=3) parser.add_argument('-m', '--max_depth', help='Maximum possible depth', default=10) parser.add_argument('-eA', '--error_rate_A', help='Error rate', default=0.05) @@ -251,12 +248,12 @@ args = parser.parse_args() # Set the parameters for the script nb_individuals = int(args.nb_individuals) -chromosome_size = int(args.chr_size) +chromosome_size = int(args.chr_bp) cMsize = int(args.cM_size) ## Size of genetic map conversion_factor = chromosome_size/cMsize ## Corresponds to a bpPercM conversion ! Needs to be fixed... Does not produce the correct division! #conversion_factor = 233000 #marker_density = 255000/44000000 -marker_density = float(args.marker_density) +nb_markers = int(args.nb_markers) mean_depth = float(args.average_depth) max_depth = float(args.max_depth) #TODO better estimate for max ? errA = float(args.error_rate_A) @@ -264,9 +261,9 @@ errB = float(args.error_rate_B) print("-----USED PARAMETERS-----") print("nb_individuals =",str(nb_individuals)) -print("chromosome_size =",str(chromosome_size)) +print("chr_bp =",str(chromosome_size)) print("cMsize =",str(cMsize)) -print("marker_density =",str(nb_individuals)) +print("nb_markers =",str(nb_markers)) print("average_depth =",str(mean_depth)) print("max_depth =",str(max_depth)) print("error_rate_A =",str(errA)) @@ -274,7 +271,7 @@ print("error_rate_B =",str(errB)) print("------------------------") ## Generate the pop and associate the results to the matrixes with and without errors -pop = generate_individuals(nb_individuals, chromosome_size, marker_density, mean_depth, conversion_factor, errA, errB) +pop = generate_individuals(nb_individuals, chromosome_size, nb_markers, mean_depth, conversion_factor, errA, errB) matrix = pop[0] matrix_error = pop[1] markers_positions = pop[2] @@ -333,4 +330,4 @@ with open(args.output + '_errors.vcf', 'w') as file: row.append("0/0:"+ str(round(mean_depth)) + ":" + str(round(mean_depth)) + ",0:.:.:.:.") row.append("1/1:"+ str(round(mean_depth)) + ":0," + str(round(mean_depth)) + ":.:.:.:.") file.write("\t".join(row) + "\n"); - print("The file", args.output, "has been generated") \ No newline at end of file + print("The files", args.output + ".vcf, " + args.output + "_errors.vcf and " + args.output + "_Breakpoint.csv" , "have been generated") \ No newline at end of file