Skip to content
Snippets Groups Projects
Commit be3ee463 authored by fadwael.khaddar_ird.fr's avatar fadwael.khaddar_ird.fr
Browse files

generate boxplot for methylation frenquency

parent ec175022
No related branches found
No related tags found
No related merge requests found
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# Reading files:
bed1 = pd.read_csv("C:\\Users\\SCD UM\\Desktop\\methylation_frequency\\MET1_DSP.bed", sep='\t', names=['chro', 'start', 'end', 'freq'])
bed2 = pd.read_csv("C:\\Users\\SCD UM\\Desktop\\methylation_frequency\\MET1_Tombo.bed", sep='\t', names=['chro', 'start', 'end', 'freq'])
bed3 = pd.read_csv("C:\\Users\\SCD UM\\Desktop\\methylation_frequency\\MET1_Megalodon.bed", sep='\t', names=['chro', 'start', 'end', 'freq'])
bed4 = pd.read_csv("C:\\Users\\SCD UM\\Desktop\\methylation_frequency\\MET1_DeepMP.bed", sep='\t', names=['chro', 'start', 'end','freq'])
freq1 = bed1["freq"]
freq2 = bed2["freq"]
freq3 = bed3["freq"]
freq4 = bed4["freq"]
# Sorting frequency data
freq1_sorted = np.sort(freq1)
freq2_sorted = np.sort(freq2)
freq3_sorted = np.sort(freq3)
freq4_sorted = np.sort(freq4)
# Dividing data based on frequency ranges
# Bed 1
freq1_low = freq1_sorted[(freq1_sorted >= 0.25) & (freq1_sorted < 0.5)]
freq1_medium = freq1_sorted[(freq1_sorted >= 0.5) & (freq1_sorted < 0.75)]
freq1_high = freq1_sorted[freq1_sorted >= 0.75]
# Bed 2
freq2_low = freq2_sorted[(freq2_sorted >= 0.25) & (freq2_sorted < 0.5)]
freq2_medium = freq2_sorted[(freq2_sorted >= 0.5) & (freq2_sorted < 0.75)]
freq2_high = freq2_sorted[freq2_sorted >= 0.75]
# Bed 3
freq3_low = freq3_sorted[(freq3_sorted >= 0.25) & (freq3_sorted < 0.5)]
freq3_medium = freq3_sorted[(freq3_sorted >= 0.5) & (freq3_sorted < 0.75)]
freq3_high = freq3_sorted[freq3_sorted >= 0.75]
# Bed 4
freq4_low = freq4_sorted[(freq4_sorted >= 0.25) & (freq4_sorted < 0.5)]
freq4_medium = freq4_sorted[(freq4_sorted >= 0.5) & (freq4_sorted < 0.75)]
freq4_high = freq4_sorted[freq4_sorted >= 0.75]
# Creating subplots
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(12, 4))
# Color definitions
color_bed1 = 'lightblue'
color_bed2 = 'lightgreen'
color_bed3 = 'red'
color_bed4 = 'pink'
# Plotting boxplots for each frequency range
boxes_low = axes[0].boxplot([freq1_low, freq2_low, freq3_low, freq4_low], patch_artist=True, labels=["DeepSignalPlant", "Tombo", "Megalodon", "DeepMP"])
axes[0].set_xticklabels(["DeepSignalPlant", "Tombo", "Megalodon", "DeepMP"], rotation=45)
boxes_medium = axes[1].boxplot([freq1_medium, freq2_medium, freq3_medium, freq4_medium], patch_artist=True, labels=["DeepSignalPlant", "Tombo", "Megalodon", "DeepMP"])
axes[1].set_xticklabels(["DeepSignalPlant", "Tombo", "Megalodon", "DeepMP"], rotation=45)
boxes_high = axes[2].boxplot([freq1_high, freq2_high, freq3_high, freq4_high], patch_artist=True, labels=["DeepSignalPlant", "Tombo", "Megalodon", "DeepMP"])
axes[2].set_xticklabels(["DeepSignalPlant", "Tombo", "Megalodon", "DeepMP"], rotation=45)
# Setting colors for the boxes
for i, box in enumerate(boxes_low['boxes']):
if i == 0:
box.set(facecolor=color_bed1)
elif i == 1:
box.set(facecolor=color_bed2)
elif i == 2:
box.set(facecolor=color_bed3)
elif i == 3:
box.set(facecolor=color_bed4)
for i, box in enumerate(boxes_medium['boxes']):
if i == 0:
box.set(facecolor=color_bed1)
elif i == 1:
box.set(facecolor=color_bed2)
elif i == 2:
box.set(facecolor=color_bed3)
elif i == 3:
box.set(facecolor=color_bed4)
for i, box in enumerate(boxes_high['boxes']):
if i == 0:
box.set(facecolor=color_bed1)
elif i == 1:
box.set(facecolor=color_bed2)
elif i == 2:
box.set(facecolor=color_bed3)
elif i == 3:
box.set(facecolor=color_bed4)
# Setting titles and y-axis labels
axes[0].set_title("Faible méthylation (0.25 - 0.5)")
axes[0].set_ylabel("Fréquence de méthylation")
axes[1].set_title("Moyenne méthylation (0.5 - 0.75)")
axes[1].set_ylabel("Fréquence de méthylation")
axes[2].set_title("Forte méthylation (0.75-1.0)")
axes[2].set_ylabel("Fréquence de méthylation")
# Setting the same y-axis limits for all subplots
# Calculating the overall minimum and maximum values
min_value = min(np.min(freq1_low), np.min(freq2_low), np.min(freq3_low), np.min(freq4_low))
max_value = max(np.max(freq1_high), np.max(freq2_high), np.max(freq3_high), np.max(freq4_high))
# Setting the same y-axis limits for all subplots with additional space
buffer = 0.05 # Adjust this value to increase or decrease the buffer space
y_min = min_value - buffer * (max_value - min_value)
y_max = max_value + buffer * (max_value - min_value)
for ax in axes:
ax.set_ylim(y_min, y_max)
# Adjusting the layout
plt.tight_layout()
# Display the plot
plt.show()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment