Skip to content
Snippets Groups Projects
Commit c3c81c1b authored by fadwael.khaddar_ird.fr's avatar fadwael.khaddar_ird.fr
Browse files

VennDiagramm

parent 10cdec7d
No related branches found
No related tags found
No related merge requests found
import os
import subprocess
import numpy as np
import pandas as pd
import glob
import sys
import matplotlib
import matplotlib.pyplot as plt
#from matplotlib_venn import venn3
import matplotlib.patches as mpatches
from matplotlib_venn import venn3_unweighted
import argparse
import re
from tkinter import *
import tkinter as tk
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
matplotlib.use('TkAgg')
# Arguments
parser = argparse.ArgumentParser(description="Venn Diagramm of gff and bed files")
parser.add_argument('-bed1', type=str, help="Path to the first bed file")
parser.add_argument('-bed2', type=str, help="Path to the second bed file")
parser.add_argument('-bed3', type=str, help="Path to the third bed file")
args = parser.parse_args()
bed1 = args.bed1
bed2 = args.bed2
bed3 = args.bed3
# Extract file names
filename1 = os.path.splitext(os.path.basename(bed1))[0]
filename2 = os.path.splitext(os.path.basename(bed2))[0]
filename3 = os.path.splitext(os.path.basename(bed3))[0]
# Running awk for filtring gff file.
# Intersection of bedfile , You have to load bedtools
# Utilisation de la sortie de la première commande comme entrée pour la deuxième commande
command2 = subprocess.Popen(f"multiIntersectBed -i {bed1} {bed2} {bed3} ", shell=True, stdout=subprocess.PIPE)
output, error = command2.communicate()
results = output.decode('utf-8').strip()
#print(results)
# Creation of Dataframe
columns = ["Chro", "Start", "End", "nb_overlap_file", "label_overlap_file","None1", "None2", "None3"]
rows= [line.split('\t') for line in results.split('\n') if line]
raw_dataframe=pd.DataFrame(rows, columns=columns)
#print(raw_dataframe)
df= raw_dataframe.loc[:, ["Chro", "Start", "End", "nb_overlap_file", "label_overlap_file"]]
#print(df)
# Conversion of values
df['nb_overlap_file'] = df['nb_overlap_file'].astype(int)
df['label_overlap_file'] = df['label_overlap_file'].astype(str)
filtre1 = df[df['nb_overlap_file'] == 3]
count1 = filtre1.shape[0]
#print(count1)
filtre2 = df.loc[(df['nb_overlap_file'] == 2) & (df['label_overlap_file'] == '1,3')]
#print(len(filtre2))
# Different filters
filtre3 = df.loc[(df['nb_overlap_file'] == 2) & (df['label_overlap_file'] == '1,2')]
print(filtre3)
filtre1 = df[df['nb_overlap_file'] == 3]
count1 = filtre1.shape[0]
filtre4 = df[(df['nb_overlap_file'] == 1) & (df['label_overlap_file'] == '1')]
#print(len(filtre4))
filtre5 = df[(df['nb_overlap_file'] == 1) & (df['label_overlap_file'] == '2')]
print(filtre5)
filtre2 = df.loc[(df['nb_overlap_file'] == 2) & (df['label_overlap_file'] == '1,3')]
count2 = filtre2.shape[0]
filtre6 = df[(df['nb_overlap_file'] == 1) & (df['label_overlap_file'] == '3')]
#print(len(filtre6))
filtre3 = df.loc[(df['nb_overlap_file'] == 2) & (df['label_overlap_file'] == '1,2')]
count3 = filtre3.shape[0]
"""
# Création de la liste des colonnes pour le DataFrame:
columns = ["chromosome_f1", "start_f1", "end_f1", "feature", "chromosome_f2", "start_f2", "end_f2", "methylation", "overlap_length"]
filtre4 = df.loc[(df['nb_overlap_file'] == 2) & (df['label_overlap_file'] == '2,3')]
count4 = filtre4.shape[0]
# Création de liste de ligne pout le DataFrame:
rows= [line.split('\t') for line in intersection.split('\n') if line]
#Cette ligne de code divise la chaîne intersection en une liste de listes,
# où chaque sous-liste représente une ligne du tableau de l'intersection,
# en filtrant les lignes vides.
filtre5 = df[(df['nb_overlap_file'] == 1) & (df['label_overlap_file'] == '1')]
count5=filtre5.shape[0]
dataframe = pd.DataFrame(rows, columns=columns)
methylated_counts = dataframe.groupby(['start_f1', 'end_f1']).size().reset_index(name='Methylated_cytosine')
## Plotting pie Chart:
filtre6 = df[(df['nb_overlap_file'] == 1) & (df['label_overlap_file'] == '2')]
count6 = filtre6.shape[0]
# Proportion de gènes avec et sans méthylation
num_feature_methylated = len(set(dataframe['start_f1']))
num_feature_unmethylated = nb_features - num_feature_methylated
# Création de la figure
fig, ax = plt.subplots(figsize=(6, 6))
filtre7 = df[(df['nb_overlap_file'] == 1) & (df['label_overlap_file'] == '3')]
count7 = filtre7.shape[0]
# Données à représenter
data = [num_feature_methylated, num_feature_unmethylated]
# Labels pour le pie chart
labels = ['Methylated', 'Unmethylated']
# Venn Diagramm Construction
# Couleurs pour chaque section du pie chart
colors = ['#ffc107', '#007bff']
subset=[count5,count6,count3,count7,count2,count4,count1]
venn3_unweighted(subset, set_labels=(filename1, filename2,filename3))
fig, ax = plt.subplots()
venn3_unweighted(subset, set_labels=(filename1, filename2, filename3), ax=ax)
fig, ax = plt.subplots()
venn3_unweighted(subset, set_labels=(filename1, filename2, filename3), ax=ax)
# Création du pie chart
ax.pie(data, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)
root = tk.Tk()
root.title("Venn Diagram")
# Ajout d'un titre
ax.set_title(f'Proportion of methylated and unmethylated {feature}')
canvas = FigureCanvasTkAgg(fig, master=root)
canvas.draw()
canvas.get_tk_widget().pack()
# Affichage du graphique
plt.show()
"""
root.mainloop()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment