Skip to content
Snippets Groups Projects
Commit a213e549 authored by fadwael.khaddar_ird.fr's avatar fadwael.khaddar_ird.fr
Browse files

VennDiagramm

parent 0347f483
No related branches found
No related tags found
No related merge requests found
import os
import subprocess
import numpy as np
import pandas as pd
import glob
import sys
import matplotlib
import matplotlib.pyplot as plt
from matplotlib_venn import venn3
import matplotlib.patches as mpatches
import argparse
import re
from tkinter import *
matplotlib.use('TkAgg')
parser = argparse.ArgumentParser(description="Venn Diagramm of gff and bed files")
parser.add_argument('-gff', type=str, help="Path to gff file")
parser.add_argument('-bed', type=str, help="Path to bed file")
parser.add_argument('-feature', type=str, default= all, help="chosen feature : CDS, gene, exon")
parser.add_argument('-o', type=str, help="Path to output fold")
args = parser.parse_args()
gff = args.gff
bed = args.bed
output = args.o
feature= args.feature
# Running awk for filtring gff file.
process = subprocess.Popen(f'wc -l {bed}', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, error = process.communicate()
count_C = output.decode('utf-8').strip()
count_bed = int(re.findall('\d+', count_C)[0])
if feature != "all":
# Exécution de la première commande pour extraire les lignes qui ne contiennent pas "mRNA"
command1 = subprocess.Popen(f'awk -v OFS="\t" \'$3 != "mRNA" {{print $1 , $4 , $5 , $3}}\' {gff} | grep {feature}', shell=True, stdout=subprocess.PIPE) #stdout=subprocess.PIPE, qui permet de récupérer la sortie de la commande
output1, errors1 = command1.communicate()
count_features = output1.decode('utf-8').strip().split('\n')
# Utilisation de la sortie de la première commande comme entrée pour la deuxième commande
command2 = subprocess.Popen(f"bedtools intersect -wao -a stdin -b {bed}", shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
output2, errors2 = command2.communicate(input=output1)
intersection = output2.decode('utf-8').strip()
else:
command1 = subprocess.Popen(f'awk -v OFS="\t" \'$3 != "mRNA" {{print $1 , $4 , $5 , $3}}\' {gff} ', shell=True, stdout=subprocess.PIPE) #stdout=subprocess.PIPE, qui permet de récupérer la sortie de la commande
command2 = subprocess.Popen(f"bedtools intersect -a stdin -b {bed}", shell=True, stdin=command1.stdout, stdout=subprocess.PIPE) #stdin est utilisée pour lire les résultats de la première commande, et la sortie est également redirigée vers un tube, en utilisant subprocess.PIPE
output, errors = command2.communicate()
intersection = output.decode('utf-8').strip()
print(intersection)
# Création de la liste des colonnes pour le DataFrame:
columns = ["chromosome_f1", "start_f1", "end_f1", "feature", "chromosome_f2", "start_f2", "end_f2", "methylation", "overlap_length"]
# Création de liste de ligne pout le DataFrame:
rows= [line.split('\t') for line in intersection.split('\n') if line]
#Cette ligne de code divise la chaîne intersection en une liste de listes,
# où chaque sous-liste représente une ligne du tableau de l'intersection,
# en filtrant les lignes vides.
dataframe = pd.DataFrame(rows, columns=columns)
cds_counts = dataframe.groupby(['start_f1', 'end_f1']).size().reset_index(name='Methylated_cytosine')
# Afficher le pie chart
plt.axis('equal')
plt.title('Nombre de start_f1')
plt.show()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment