####-------- Intersection of bed files --------#### # Autor: Fadwa EL KHADDAR # Lab : DIADE - IRD # University : Montpellier - France import os import subprocess import pandas as pd import matplotlib import matplotlib.pyplot as plt from matplotlib_venn import venn3_unweighted import argparse import tkinter as tk from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg matplotlib.use('TkAgg') # Arguments parser = argparse.ArgumentParser(description="Venn Diagramm of gff and bed files") parser.add_argument('-bed1', type=str, help="Path to the first bed file") parser.add_argument('-bed2', type=str, help="Path to the second bed file") parser.add_argument('-bed3', type=str, help="Path to the third bed file") args = parser.parse_args() bed1 = args.bed1 bed2 = args.bed2 bed3 = args.bed3 # Extract file names filename1 = os.path.splitext(os.path.basename(bed1))[0] filename2 = os.path.splitext(os.path.basename(bed2))[0] filename3 = os.path.splitext(os.path.basename(bed3))[0] # Intersection of bedfile , You have to load bedtools command2 = subprocess.Popen(f"multiIntersectBed -i {bed1} {bed2} {bed3} ", shell=True, stdout=subprocess.PIPE) output, error = command2.communicate() results = output.decode('utf-8').strip() # Creation of Dataframe columns = ["Chro", "Start", "End", "nb_overlap_file", "label_overlap_file","None1", "None2", "None3"] rows= [line.split('\t') for line in results.split('\n') if line] raw_dataframe=pd.DataFrame(rows, columns=columns) df= raw_dataframe.loc[:, ["Chro", "Start", "End", "nb_overlap_file", "label_overlap_file"]] # Conversion of values df['nb_overlap_file'] = df['nb_overlap_file'].astype(int) df['label_overlap_file'] = df['label_overlap_file'].astype(str) # Different filters filtre1 = df[df['nb_overlap_file'] == 3] count1 = filtre1.shape[0] filtre2 = df.loc[(df['nb_overlap_file'] == 2) & (df['label_overlap_file'] == '1,3')] count2 = filtre2.shape[0] filtre3 = df.loc[(df['nb_overlap_file'] == 2) & (df['label_overlap_file'] == '1,2')] count3 = filtre3.shape[0] filtre4 = df.loc[(df['nb_overlap_file'] == 2) & (df['label_overlap_file'] == '2,3')] count4 = filtre4.shape[0] filtre5 = df[(df['nb_overlap_file'] == 1) & (df['label_overlap_file'] == '1')] count5=filtre5.shape[0] filtre6 = df[(df['nb_overlap_file'] == 1) & (df['label_overlap_file'] == '2')] count6 = filtre6.shape[0] filtre7 = df[(df['nb_overlap_file'] == 1) & (df['label_overlap_file'] == '3')] count7 = filtre7.shape[0] # Venn Diagramm Construction subset=[count5,count6,count3,count7,count2,count4,count1] venn3_unweighted(subset, set_labels=(filename1, filename2,filename3)) fig, ax = plt.subplots() venn3_unweighted(subset, set_labels=(filename1, filename2, filename3), ax=ax) fig, ax = plt.subplots() venn3_unweighted(subset, set_labels=(filename1, filename2, filename3), ax=ax) root = tk.Tk() root.title("Venn Diagram") canvas = FigureCanvasTkAgg(fig, master=root) canvas.draw() canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=1) root.mainloop()