Skip to content
Snippets Groups Projects
Commit 30d512e5 authored by fadwael.khaddar_ird.fr's avatar fadwael.khaddar_ird.fr
Browse files

ça manque l'automatisation

parent 5913c6fc
No related branches found
No related tags found
No related merge requests found
####-------- Intersection of bed files --------####
# Autor: Fadwa EL KHADDAR
# Lab : DIADE - IRD
# University : Montpellier - France
####-------- Intersection of bed files --------####
# Autor: Fadwa EL KHADDAR
# Lab : DIADE - IRD
# University : Montpellier - France
import os
import subprocess
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from matplotlib_venn import venn2_unweighted
import argparse
import tkinter as tk
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
matplotlib.use('TkAgg')
# Arguments
parser = argparse.ArgumentParser(description="Venn Diagram of gff and bed files")
parser.add_argument('-bed1', type=str, help="Path to the first bed file")
parser.add_argument('-bed2', type=str, help="Path to the second bed file")
args = parser.parse_args()
bed1 = args.bed1
bed2 = args.bed2
# Extract file names
from matplotlib_venn import venn2
import os
# Lecture des données des fichiers BED
bed1 = "C:\\Users\\SCD UM\\Documents\\UpSetPlot\\BS-seq.bed"
bed2 = "C:\\Users\\SCD UM\\Documents\\UpSetPlot\\Tombo.bed"
filename1 = os.path.splitext(os.path.basename(bed1))[0]
filename2 = os.path.splitext(os.path.basename(bed2))[0]
# Intersection of bed files using bedtools
command2 = subprocess.Popen(f"bedtools intersect -u -a {bed1} -b {bed2}", shell=True, stdout=subprocess.PIPE)
output, error = command2.communicate()
results = output.decode('utf-8').strip()
# Create DataFrame
columns = ["Chro", "Start", "End"]
rows = [line.split('\t') for line in results.split('\n') if line]
df = pd.DataFrame(rows, columns=columns)
# Venn Diagram Construction
count1 = df.shape[0]
count2 = len(pd.read_csv(bed1, sep='\t'))
count3 = len(pd.read_csv(bed2, sep='\t'))
subset = (count2, count3, count1)
venn2_unweighted(subset, set_labels=(filename1, filename2))
fig, ax = plt.subplots()
venn2_unweighted(subset, set_labels=(filename1, filename2), ax=ax)
root = tk.Tk()
root.title("Venn Diagram")
canvas = FigureCanvasTkAgg(fig, master=root)
canvas.draw()
canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=1)
root.mainloop()
bed1_data = pd.read_csv(bed1, sep='\t', names=['Chro', 'Start', 'End'])
bed2_data = pd.read_csv(bed2, sep='\t', names=['Chro', 'Start', 'End'])
# Conversion des colonnes 'Start' et 'End' en tuples (pour la comparaison)
bed1_intervals = [(start, end) for start, end in zip(bed1_data['Start'], bed1_data['End'])]
bed2_intervals = [(start, end) for start, end in zip(bed2_data['Start'], bed2_data['End'])]
# Création des ensembles
set1 = set(bed1_intervals)
set2 = set(bed2_intervals)
# Calcul de l'intersection et des ensembles uniques
intersection = set1 & set2
unique_set1 = set1 - set2
unique_set2 = set2 - set1
# Compter les éléments des ensembles
count1 = len(unique_set1)
count2 = len(unique_set2)
count3 = len(intersection)
# Construction du diagramme de Venn
venn2(subsets=(count1, count2, count3), set_labels=(filename1, filename2))
plt.title("")
# Afficher le diagramme
plt.show()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment