Skip to content
Snippets Groups Projects
Commit 5bad4c14 authored by nina.marthe_ird.fr's avatar nina.marthe_ird.fr
Browse files

nom du génome cible maintenant spécifié dans le main, plus simple pour le changer

parent 85ed4724
No related branches found
No related tags found
No related merge requests found
...@@ -2,9 +2,6 @@ from Graph_gff import Segments, Features, get_feature_start_on_segment, get_feat ...@@ -2,9 +2,6 @@ from Graph_gff import Segments, Features, get_feature_start_on_segment, get_feat
global segments_on_target_genome global segments_on_target_genome
segments_on_target_genome={} segments_on_target_genome={}
global target_genome_name
target_genome_name="CM020642.1_Azucena_chromosome10"
target_genome_name="genome4_chr10"
# get the start position of the features on the linear genome, using their coordinates on the graph and the coordinantes of the segments on the genome # get the start position of the features on the linear genome, using their coordinates on the graph and the coordinantes of the segments on the genome
def get_feature_start_on_genome(start_seg,feat_id): def get_feature_start_on_genome(start_seg,feat_id):
...@@ -245,7 +242,7 @@ def compare_strand(list_1,list_2,list_1_unstrand,list_2_unstrand): ...@@ -245,7 +242,7 @@ def compare_strand(list_1,list_2,list_1_unstrand,list_2_unstrand):
same_strand_count+=1 same_strand_count+=1
return [seg_common,same_strand_count] return [seg_common,same_strand_count]
def get_feature_path(paths,first_seg,last_seg): def get_feature_path(paths,first_seg,last_seg,target_genome_name):
# find the path in azucena. # find the path in azucena.
first_strand=convert_strand(segments_on_target_genome[first_seg][3]) first_strand=convert_strand(segments_on_target_genome[first_seg][3])
first_seg_stranded=first_strand+first_seg first_seg_stranded=first_strand+first_seg
...@@ -325,7 +322,7 @@ class Variation: ...@@ -325,7 +322,7 @@ class Variation:
#def __str__(self): #def __str__(self):
# return f"id={self.id}, position on the original genome={self.chr}:{self.start}-{self.stop}, size={self.size}, features={self.features}" # return f"id={self.id}, position on the original genome={self.chr}:{self.start}-{self.stop}, size={self.size}, features={self.features}"
def create_var(feature_id,first_seg,last_seg,paths): def create_var(feature_id,first_seg,last_seg,paths,target_genome_name):
feature=Features[feature_id] feature=Features[feature_id]
start_new_genome=get_feature_start_on_genome(first_seg,feature_id) start_new_genome=get_feature_start_on_genome(first_seg,feature_id)
stop_new_genome=get_feature_stop_on_genome(last_seg,feature_id) stop_new_genome=get_feature_stop_on_genome(last_seg,feature_id)
...@@ -333,7 +330,7 @@ def create_var(feature_id,first_seg,last_seg,paths): ...@@ -333,7 +330,7 @@ def create_var(feature_id,first_seg,last_seg,paths):
size_diff=str(size_new_genome-feature.size) size_diff=str(size_new_genome-feature.size)
# get feature paths on the original genome and on the target genome # get feature paths on the original genome and on the target genome
list_segfeat_azu=get_feature_path(paths,first_seg,last_seg) list_segfeat_azu=get_feature_path(paths,first_seg,last_seg,target_genome_name)
list_segfeat_nb=feature.segments_list list_segfeat_nb=feature.segments_list
[list_segfeat_nb,list_segfeat_azu,inversion]=detect_gene_inversion(list_segfeat_nb,list_segfeat_azu) [list_segfeat_nb,list_segfeat_azu,inversion]=detect_gene_inversion(list_segfeat_nb,list_segfeat_azu)
......
...@@ -51,7 +51,7 @@ def gff_one(first_seg,last_seg,feature_id,list_seg,max_diff): ...@@ -51,7 +51,7 @@ def gff_one(first_seg,last_seg,feature_id,list_seg,max_diff):
# writes the gff of azucena using the gff of the graph # writes the gff of azucena using the gff of the graph
def genome_gff(pos_seg, gff, gfa, out_once, out_detail, out_var): def genome_gff(pos_seg, gff, gfa, out_once, out_detail, out_var,target_genome_name):
print("generation of the genome's gff ") print("generation of the genome's gff ")
# create variables and open files # create variables and open files
...@@ -112,7 +112,7 @@ def genome_gff(pos_seg, gff, gfa, out_once, out_detail, out_var): ...@@ -112,7 +112,7 @@ def genome_gff(pos_seg, gff, gfa, out_once, out_detail, out_var):
# outputs the detail of variations of the feature : # outputs the detail of variations of the feature :
if var: if var:
print_variations(first_seg,last_seg,feat,paths,seg_seq) print_variations(first_seg,last_seg,feat,paths,seg_seq,target_genome_name)
write_line("",output_variations,True) write_line("",output_variations,True)
if stats==True: if stats==True:
...@@ -141,10 +141,10 @@ def genome_gff(pos_seg, gff, gfa, out_once, out_detail, out_var): ...@@ -141,10 +141,10 @@ def genome_gff(pos_seg, gff, gfa, out_once, out_detail, out_var):
# functions to get the detail of the variations in the features # functions to get the detail of the variations in the features
def print_variations(first_seg,last_seg,feat,paths,seg_seq): def print_variations(first_seg,last_seg,feat,paths,seg_seq,target_genome_name):
if (first_seg!=''): # if the feature is not completly absent # add the else, output absent features if (first_seg!=''): # if the feature is not completly absent # add the else, output absent features
[variation,list_segfeat_nb,list_segfeat_azu]=create_var(feat,first_seg,last_seg,paths) # removes the strands in the segment lists [variation,list_segfeat_nb,list_segfeat_azu]=create_var(feat,first_seg,last_seg,paths,target_genome_name) # removes the strands in the segment lists
feature=Features[feat] feature=Features[feat]
feat_start=feature.start feat_start=feature.start
# loop to go through both paths with i and j # loop to go through both paths with i and j
......
...@@ -7,7 +7,53 @@ from Graph_gff import * ...@@ -7,7 +7,53 @@ from Graph_gff import *
from Functions_output import * from Functions_output import *
#from inference import * #from inference import *
run="test" run="command_line"
if run=="command_line":
import sys
if not(len(sys.argv)>=4) :# intersect, gfa, pos_seg
print("expected input : intersect, gfa file with walks, bed file with positions of the segments on the target genome")
#print("output : graph gff, graph gaf, target genome gff*2+variations")
sys.exit(1)
elif (sys.argv[1]=="-h") :
print("expected input : intersect, gfa file with walks, bed file with positions of the segments on the target genome")
print("output : graph gff, graph gaf, target genome gff*2+variations")
sys.exit(1)
intersect=sys.argv[1]
gfa=sys.argv[2]
pos_seg=sys.argv[3]
out_gff=gfa.split("/")[-1].split(".")[0:-1][0]+".gff"
out_gaf=gfa.split("/")[-1].split(".")[0:-1][0]+".gaf"
out_once=pos_seg.split("/")[-1].split(".")[0:-1][0]+".gff"
out_detail=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_detail.gff"
out_var=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_variations.txt"
if len(sys.argv)==5:
target_genome_name=sys.argv[5]
else:
target_genome_name=pos_seg.split("/")[-1].split(".")[0:-1][0]
print(target_genome_name)
# input : intersect, gfa, pos_seg.
# out_gff, out_gaf, out_once, out_var, out_detail.
# out_gf/af = "graph.gf/af"
# out_once/detail: récupérer pos_seg, enlever le .bed, ajouter _detail au besoin. pareil pour var, le renommer azu_var_chr10.txt ?
# pos_seg.split("/")[-1].split(".")[0:-1][0]+".gff"
load_intersect(intersect)
# outputs the gff and gaf of the graph for chr10
graph_gff(out_gff)
graph_gaf(out_gaf,gfa)
# outputs the gff of a genome for the chr10
genome_gff(pos_seg,out_gff,gfa,out_once,out_detail,out_var,target_genome_name)
if run=="test": if run=="test":
intersect_path='/home/nina/annotpangenome/test_data/input_data_inf/intersect.bed' intersect_path='/home/nina/annotpangenome/test_data/input_data_inf/intersect.bed'
...@@ -27,29 +73,10 @@ if run=="test": ...@@ -27,29 +73,10 @@ if run=="test":
gff="test_data/graph.gff" gff="test_data/graph.gff"
# outputs the gff of a genome for the chr10 target_genome_name="genome4_chr10"
genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var)
if run=="chr3":
intersect_path='/home/nina/annotpangenome/chr3/intersect_segments_genes_irgsp_chr3.bed'
load_intersect(intersect_path)
# outputs the gff of the graph for chr10
output_gff='graph_chr3.gff'
gfa="test_graph"
graph_gff(output_gff)
pos_seg="seg_coord/AzucenaRS1_chromosome3_corrected.bed"
out_once="azucena_chr3.gff"
out_var="variations_chr3.txt"
out_detail="azucena_detail_chr3.gff"
gff="graph_chr3.gff"
# outputs the gff of a genome for the chr10 # outputs the gff of a genome for the chr10
genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var) genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var,target_genome_name)
if run=="reel": if run=="reel":
...@@ -79,8 +106,9 @@ if run=="reel": ...@@ -79,8 +106,9 @@ if run=="reel":
out_var="variations_chr10.gff" out_var="variations_chr10.gff"
gff="graph_chr10.gff" gff="graph_chr10.gff"
target_genome_name="CM020642.1_Azucena_chromosome10"
# outputs the gff of a genome for the chr10 # outputs the gff of a genome for the chr10
genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var) genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var,target_genome_name)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment