Newer
Older
####-------- Intersection of bed files --------####
# Autor: Fadwa EL KHADDAR
# Lab : DIADE - IRD
# University : Montpellier - France
import os
import subprocess
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import tkinter as tk
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
parser = argparse.ArgumentParser(description="Venn Diagramm of gff and bed files")
parser.add_argument('-bed1', type=str, help="Path to the first bed file")
parser.add_argument('-bed2', type=str, help="Path to the second bed file")
parser.add_argument('-bed3', type=str, help="Path to the third bed file")
args = parser.parse_args()
bed1 = args.bed1
bed2 = args.bed2
bed3 = args.bed3
filename1 = os.path.splitext(os.path.basename(bed1))[0]
filename2 = os.path.splitext(os.path.basename(bed2))[0]
filename3 = os.path.splitext(os.path.basename(bed3))[0]
# Intersection of bedfile , You have to load bedtools
command2 = subprocess.Popen(f"multiIntersectBed -i {bed1} {bed2} {bed3} ", shell=True, stdout=subprocess.PIPE)
output, error = command2.communicate()
results = output.decode('utf-8').strip()
columns = ["Chro", "Start", "End", "nb_overlap_file", "label_overlap_file","None1", "None2", "None3"]
rows= [line.split('\t') for line in results.split('\n') if line]
raw_dataframe=pd.DataFrame(rows, columns=columns)
df= raw_dataframe.loc[:, ["Chro", "Start", "End", "nb_overlap_file", "label_overlap_file"]]
df['nb_overlap_file'] = df['nb_overlap_file'].astype(int)
df['label_overlap_file'] = df['label_overlap_file'].astype(str)
filtre1 = df[df['nb_overlap_file'] == 3]
count1 = filtre1.shape[0]
filtre2 = df.loc[(df['nb_overlap_file'] == 2) & (df['label_overlap_file'] == '1,3')]
count2 = filtre2.shape[0]
filtre3 = df.loc[(df['nb_overlap_file'] == 2) & (df['label_overlap_file'] == '1,2')]
count3 = filtre3.shape[0]
filtre4 = df.loc[(df['nb_overlap_file'] == 2) & (df['label_overlap_file'] == '2,3')]
count4 = filtre4.shape[0]
filtre5 = df[(df['nb_overlap_file'] == 1) & (df['label_overlap_file'] == '1')]
count5=filtre5.shape[0]
filtre6 = df[(df['nb_overlap_file'] == 1) & (df['label_overlap_file'] == '2')]
count6 = filtre6.shape[0]
filtre7 = df[(df['nb_overlap_file'] == 1) & (df['label_overlap_file'] == '3')]
count7 = filtre7.shape[0]
subset=[count5,count6,count3,count7,count2,count4,count1]
venn3_unweighted(subset, set_labels=(filename1, filename2,filename3))
fig, ax = plt.subplots()
venn3_unweighted(subset, set_labels=(filename1, filename2, filename3), ax=ax)
fig, ax = plt.subplots()
venn3_unweighted(subset, set_labels=(filename1, filename2, filename3), ax=ax)
canvas = FigureCanvasTkAgg(fig, master=root)
canvas.draw()
canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=1)