Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import os
import subprocess
import numpy as np
import pandas as pd
import glob
import sys
import matplotlib
import matplotlib.pyplot as plt
#from matplotlib_venn import venn3
import matplotlib.patches as mpatches
import argparse
import re
from tkinter import *
matplotlib.use('TkAgg')
parser = argparse.ArgumentParser(description="Venn Diagramm of gff and bed files")
parser.add_argument('-bed1', type=str, help="Path to the first bed file")
parser.add_argument('-bed2', type=str, help="Path to the second bed file")
parser.add_argument('-bed3', type=str, help="Path to the third bed file")
args = parser.parse_args()
bed1 = args.bed1
bed2 = args.bed2
bed3 = args.bed3
filename1 = os.path.splitext(os.path.basename(bed1))[0]
filename2 = os.path.splitext(os.path.basename(bed2))[0]
filename3 = os.path.splitext(os.path.basename(bed3))[0]
# Running awk for filtring gff file.
# Utilisation de la sortie de la première commande comme entrée pour la deuxième commande
command2 = subprocess.Popen(f"multiIntersectBed -i {bed1} {bed2} {bed3} ", shell=True, stdout=subprocess.PIPE)
output, error = command2.communicate()
results = output.decode('utf-8').strip()
#print(results)
columns = ["Chro", "Start", "End", "nb_overlap_file", "label_overlap_file","None1", "None2", "None3"]
rows= [line.split('\t') for line in results.split('\n') if line]
raw_dataframe=pd.DataFrame(rows, columns=columns)
#print(raw_dataframe)
df= raw_dataframe.loc[:, ["Chro", "Start", "End", "nb_overlap_file", "label_overlap_file"]]
#print(df)
df['nb_overlap_file'] = df['nb_overlap_file'].astype(int)
df['label_overlap_file'] = df['label_overlap_file'].astype(str)
filtre1 = df[df['nb_overlap_file'] == 3]
count1 = filtre1.shape[0]
#print(count1)
filtre2 = df.loc[(df['nb_overlap_file'] == 2) & (df['label_overlap_file'] == '1,3')]
#print(len(filtre2))
filtre3 = df.loc[(df['nb_overlap_file'] == 2) & (df['label_overlap_file'] == '1,2')]
print(filtre3)
filtre4 = df[(df['nb_overlap_file'] == 1) & (df['label_overlap_file'] == '1')]
#print(len(filtre4))
filtre5 = df[(df['nb_overlap_file'] == 1) & (df['label_overlap_file'] == '2')]
print(filtre5)
filtre6 = df[(df['nb_overlap_file'] == 1) & (df['label_overlap_file'] == '3')]
#print(len(filtre6))
"""
# Création de la liste des colonnes pour le DataFrame:
columns = ["chromosome_f1", "start_f1", "end_f1", "feature", "chromosome_f2", "start_f2", "end_f2", "methylation", "overlap_length"]
# Création de liste de ligne pout le DataFrame:
rows= [line.split('\t') for line in intersection.split('\n') if line]
#Cette ligne de code divise la chaîne intersection en une liste de listes,
# où chaque sous-liste représente une ligne du tableau de l'intersection,
# en filtrant les lignes vides.
dataframe = pd.DataFrame(rows, columns=columns)
methylated_counts = dataframe.groupby(['start_f1', 'end_f1']).size().reset_index(name='Methylated_cytosine')
## Plotting pie Chart:
# Proportion de gènes avec et sans méthylation
num_feature_methylated = len(set(dataframe['start_f1']))
num_feature_unmethylated = nb_features - num_feature_methylated
# Création de la figure
fig, ax = plt.subplots(figsize=(6, 6))
# Données à représenter
data = [num_feature_methylated, num_feature_unmethylated]
# Labels pour le pie chart
labels = ['Methylated', 'Unmethylated']
# Couleurs pour chaque section du pie chart
colors = ['#ffc107', '#007bff']
# Création du pie chart
ax.pie(data, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)
# Ajout d'un titre
ax.set_title(f'Proportion of methylated and unmethylated {feature}')
# Affichage du graphique
plt.show()
"""