VennDiagramm

a213e549 · fadwael.khaddar_ird.fr · 0347f483 · a213e549
Commit a213e549 authored 1 year ago by fadwael.khaddar_ird.fr
--- a/VennDiagramm.py
+++ b/VennDiagramm.py
+
+import os
+import subprocess
+import numpy as np
+import pandas as pd
+import  glob
+import sys
+import matplotlib
+import matplotlib.pyplot as plt
+from matplotlib_venn import venn3
+import matplotlib.patches as mpatches
+import argparse
+import re
+from tkinter import *
+matplotlib.use('TkAgg')
+
+parser = argparse.ArgumentParser(description="Venn Diagramm of gff and bed files")
+parser.add_argument('-gff', type=str, help="Path to gff file")
+parser.add_argument('-bed', type=str, help="Path to bed file")
+parser.add_argument('-feature', type=str, default= all, help="chosen feature : CDS, gene, exon")
+parser.add_argument('-o', type=str, help="Path to output fold")
+
+args = parser.parse_args()
+
+gff = args.gff
+bed = args.bed
+output = args.o
+feature= args.feature
+
+# Running awk for filtring gff file.
+
+process = subprocess.Popen(f'wc -l {bed}', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+output, error = process.communicate()
+count_C = output.decode('utf-8').strip()
+count_bed = int(re.findall('\d+', count_C)[0])
+
+
+if feature != "all":
+    # Exécution de la première commande pour extraire les lignes qui ne contiennent pas "mRNA"
+    command1 = subprocess.Popen(f'awk -v OFS="\t" \'$3 != "mRNA" {{print $1 , $4 , $5 , $3}}\' {gff} | grep {feature}', shell=True, stdout=subprocess.PIPE)  #stdout=subprocess.PIPE, qui permet de récupérer la sortie de la commande
+    output1, errors1 = command1.communicate()
+    count_features = output1.decode('utf-8').strip().split('\n')
+
+    # Utilisation de la sortie de la première commande comme entrée pour la deuxième commande
+    command2 = subprocess.Popen(f"bedtools intersect -wao -a stdin -b {bed}", shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+    output2, errors2 = command2.communicate(input=output1)
+    intersection = output2.decode('utf-8').strip()
+
+else:
+    command1 = subprocess.Popen(f'awk -v OFS="\t" \'$3 != "mRNA" {{print $1 , $4 , $5 , $3}}\' {gff} ', shell=True, stdout=subprocess.PIPE)  #stdout=subprocess.PIPE, qui permet de récupérer la sortie de la commande
+    command2 = subprocess.Popen(f"bedtools intersect -a stdin -b {bed}", shell=True, stdin=command1.stdout, stdout=subprocess.PIPE) #stdin est utilisée pour lire les résultats de la première commande, et la sortie est également redirigée vers un tube, en utilisant subprocess.PIPE
+    output, errors = command2.communicate()
+
+    intersection = output.decode('utf-8').strip()
+    print(intersection)
+
+
+# Création de la liste des colonnes pour le DataFrame:
+columns = ["chromosome_f1", "start_f1", "end_f1", "feature", "chromosome_f2", "start_f2", "end_f2", "methylation", "overlap_length"]
+
+# Création de liste de ligne pout le DataFrame:
+rows= [line.split('\t') for line in intersection.split('\n') if line]
+
+#Cette ligne de code divise la chaîne intersection en une liste de listes,
+# où chaque sous-liste représente une ligne du tableau de l'intersection,
+# en filtrant les lignes vides.
+
+dataframe = pd.DataFrame(rows, columns=columns)
+cds_counts = dataframe.groupby(['start_f1', 'end_f1']).size().reset_index(name='Methylated_cytosine')
+
+
+
+
+# Afficher le pie chart
+plt.axis('equal')
+plt.title('Nombre de start_f1')
+plt.show()
\ No newline at end of file