import os import subprocess import numpy as np import pandas as pd import glob import sys import matplotlib import matplotlib.pyplot as plt #from matplotlib_venn import venn3 import matplotlib.patches as mpatches import argparse import re from tkinter import * matplotlib.use('TkAgg') parser = argparse.ArgumentParser(description="Venn Diagramm of gff and bed files") parser.add_argument('-bed1', type=str, help="Path to the first bed file") parser.add_argument('-bed2', type=str, help="Path to the second bed file") parser.add_argument('-bed3', type=str, help="Path to the third bed file") args = parser.parse_args() bed1 = args.bed1 bed2 = args.bed2 bed3 = args.bed3 filename1 = os.path.splitext(os.path.basename(bed1))[0] filename2 = os.path.splitext(os.path.basename(bed2))[0] filename3 = os.path.splitext(os.path.basename(bed3))[0] # Running awk for filtring gff file. # Utilisation de la sortie de la première commande comme entrée pour la deuxième commande command2 = subprocess.Popen(f"multiIntersectBed -i {bed1} {bed2} {bed3} ", shell=True, stdout=subprocess.PIPE) output, error = command2.communicate() results = output.decode('utf-8').strip() #print(results) columns = ["Chro", "Start", "End", "nb_overlap_file", "label_overlap_file","None1", "None2", "None3"] rows= [line.split('\t') for line in results.split('\n') if line] raw_dataframe=pd.DataFrame(rows, columns=columns) #print(raw_dataframe) df= raw_dataframe.loc[:, ["Chro", "Start", "End", "nb_overlap_file", "label_overlap_file"]] #print(df) df['nb_overlap_file'] = df['nb_overlap_file'].astype(int) df['label_overlap_file'] = df['label_overlap_file'].astype(str) filtre1 = df[df['nb_overlap_file'] == 3] count1 = filtre1.shape[0] #print(count1) filtre2 = df.loc[(df['nb_overlap_file'] == 2) & (df['label_overlap_file'] == '1,3')] #print(len(filtre2)) filtre3 = df.loc[(df['nb_overlap_file'] == 2) & (df['label_overlap_file'] == '1,2')] print(filtre3) filtre4 = df[(df['nb_overlap_file'] == 1) & (df['label_overlap_file'] == '1')] #print(len(filtre4)) filtre5 = df[(df['nb_overlap_file'] == 1) & (df['label_overlap_file'] == '2')] print(filtre5) filtre6 = df[(df['nb_overlap_file'] == 1) & (df['label_overlap_file'] == '3')] #print(len(filtre6)) """ # Création de la liste des colonnes pour le DataFrame: columns = ["chromosome_f1", "start_f1", "end_f1", "feature", "chromosome_f2", "start_f2", "end_f2", "methylation", "overlap_length"] # Création de liste de ligne pout le DataFrame: rows= [line.split('\t') for line in intersection.split('\n') if line] #Cette ligne de code divise la chaîne intersection en une liste de listes, # où chaque sous-liste représente une ligne du tableau de l'intersection, # en filtrant les lignes vides. dataframe = pd.DataFrame(rows, columns=columns) methylated_counts = dataframe.groupby(['start_f1', 'end_f1']).size().reset_index(name='Methylated_cytosine') ## Plotting pie Chart: # Proportion de gènes avec et sans méthylation num_feature_methylated = len(set(dataframe['start_f1'])) num_feature_unmethylated = nb_features - num_feature_methylated # Création de la figure fig, ax = plt.subplots(figsize=(6, 6)) # Données à représenter data = [num_feature_methylated, num_feature_unmethylated] # Labels pour le pie chart labels = ['Methylated', 'Unmethylated'] # Couleurs pour chaque section du pie chart colors = ['#ffc107', '#007bff'] # Création du pie chart ax.pie(data, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90) # Ajout d'un titre ax.set_title(f'Proportion of methylated and unmethylated {feature}') # Affichage du graphique plt.show() """