diff --git a/Functions.py b/Functions.py index 923adebfc90eea1f5bd16c3476c2e4aaf2f2dd66..eed43ab2d32ce082c06fd6b65fd9e8e30805a611 100644 --- a/Functions.py +++ b/Functions.py @@ -2,9 +2,6 @@ from Graph_gff import Segments, Features, get_feature_start_on_segment, get_feat global segments_on_target_genome segments_on_target_genome={} -global target_genome_name -target_genome_name="CM020642.1_Azucena_chromosome10" -target_genome_name="genome4_chr10" # get the start position of the features on the linear genome, using their coordinates on the graph and the coordinantes of the segments on the genome def get_feature_start_on_genome(start_seg,feat_id): @@ -245,7 +242,7 @@ def compare_strand(list_1,list_2,list_1_unstrand,list_2_unstrand): same_strand_count+=1 return [seg_common,same_strand_count] -def get_feature_path(paths,first_seg,last_seg): +def get_feature_path(paths,first_seg,last_seg,target_genome_name): # find the path in azucena. first_strand=convert_strand(segments_on_target_genome[first_seg][3]) first_seg_stranded=first_strand+first_seg @@ -325,7 +322,7 @@ class Variation: #def __str__(self): # return f"id={self.id}, position on the original genome={self.chr}:{self.start}-{self.stop}, size={self.size}, features={self.features}" -def create_var(feature_id,first_seg,last_seg,paths): +def create_var(feature_id,first_seg,last_seg,paths,target_genome_name): feature=Features[feature_id] start_new_genome=get_feature_start_on_genome(first_seg,feature_id) stop_new_genome=get_feature_stop_on_genome(last_seg,feature_id) @@ -333,7 +330,7 @@ def create_var(feature_id,first_seg,last_seg,paths): size_diff=str(size_new_genome-feature.size) # get feature paths on the original genome and on the target genome - list_segfeat_azu=get_feature_path(paths,first_seg,last_seg) + list_segfeat_azu=get_feature_path(paths,first_seg,last_seg,target_genome_name) list_segfeat_nb=feature.segments_list [list_segfeat_nb,list_segfeat_azu,inversion]=detect_gene_inversion(list_segfeat_nb,list_segfeat_azu) diff --git a/Functions_output.py b/Functions_output.py index 7fdfe540d0e1aaadbe305c41c2ee2b020823835d..82c6edcfa8d9bb498f0a99199eab26a80be23b79 100644 --- a/Functions_output.py +++ b/Functions_output.py @@ -51,7 +51,7 @@ def gff_one(first_seg,last_seg,feature_id,list_seg,max_diff): # writes the gff of azucena using the gff of the graph -def genome_gff(pos_seg, gff, gfa, out_once, out_detail, out_var): +def genome_gff(pos_seg, gff, gfa, out_once, out_detail, out_var,target_genome_name): print("generation of the genome's gff ") # create variables and open files @@ -112,7 +112,7 @@ def genome_gff(pos_seg, gff, gfa, out_once, out_detail, out_var): # outputs the detail of variations of the feature : if var: - print_variations(first_seg,last_seg,feat,paths,seg_seq) + print_variations(first_seg,last_seg,feat,paths,seg_seq,target_genome_name) write_line("",output_variations,True) if stats==True: @@ -141,10 +141,10 @@ def genome_gff(pos_seg, gff, gfa, out_once, out_detail, out_var): # functions to get the detail of the variations in the features -def print_variations(first_seg,last_seg,feat,paths,seg_seq): +def print_variations(first_seg,last_seg,feat,paths,seg_seq,target_genome_name): if (first_seg!=''): # if the feature is not completly absent # add the else, output absent features - [variation,list_segfeat_nb,list_segfeat_azu]=create_var(feat,first_seg,last_seg,paths) # removes the strands in the segment lists + [variation,list_segfeat_nb,list_segfeat_azu]=create_var(feat,first_seg,last_seg,paths,target_genome_name) # removes the strands in the segment lists feature=Features[feat] feat_start=feature.start # loop to go through both paths with i and j diff --git a/main.py b/main.py index 64ee9b29a3f8990ded5325e37d7cfabd81997723..4f1f88eeba424aa85f69f60c8e8c5f0eaae01daf 100755 --- a/main.py +++ b/main.py @@ -7,7 +7,53 @@ from Graph_gff import * from Functions_output import * #from inference import * -run="test" +run="command_line" + +if run=="command_line": + + import sys + if not(len(sys.argv)>=4) :# intersect, gfa, pos_seg + print("expected input : intersect, gfa file with walks, bed file with positions of the segments on the target genome") + #print("output : graph gff, graph gaf, target genome gff*2+variations") + sys.exit(1) + elif (sys.argv[1]=="-h") : + print("expected input : intersect, gfa file with walks, bed file with positions of the segments on the target genome") + print("output : graph gff, graph gaf, target genome gff*2+variations") + sys.exit(1) + + intersect=sys.argv[1] + gfa=sys.argv[2] + pos_seg=sys.argv[3] + + out_gff=gfa.split("/")[-1].split(".")[0:-1][0]+".gff" + out_gaf=gfa.split("/")[-1].split(".")[0:-1][0]+".gaf" + out_once=pos_seg.split("/")[-1].split(".")[0:-1][0]+".gff" + out_detail=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_detail.gff" + out_var=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_variations.txt" + if len(sys.argv)==5: + target_genome_name=sys.argv[5] + else: + target_genome_name=pos_seg.split("/")[-1].split(".")[0:-1][0] + print(target_genome_name) + + + # input : intersect, gfa, pos_seg. + # out_gff, out_gaf, out_once, out_var, out_detail. + # out_gf/af = "graph.gf/af" + # out_once/detail: récupérer pos_seg, enlever le .bed, ajouter _detail au besoin. pareil pour var, le renommer azu_var_chr10.txt ? + # pos_seg.split("/")[-1].split(".")[0:-1][0]+".gff" + + load_intersect(intersect) + + # outputs the gff and gaf of the graph for chr10 + graph_gff(out_gff) + graph_gaf(out_gaf,gfa) + + # outputs the gff of a genome for the chr10 + genome_gff(pos_seg,out_gff,gfa,out_once,out_detail,out_var,target_genome_name) + + + if run=="test": intersect_path='/home/nina/annotpangenome/test_data/input_data_inf/intersect.bed' @@ -27,29 +73,10 @@ if run=="test": gff="test_data/graph.gff" - # outputs the gff of a genome for the chr10 - genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var) - - -if run=="chr3": - intersect_path='/home/nina/annotpangenome/chr3/intersect_segments_genes_irgsp_chr3.bed' - load_intersect(intersect_path) - - # outputs the gff of the graph for chr10 - output_gff='graph_chr3.gff' - gfa="test_graph" - graph_gff(output_gff) - - pos_seg="seg_coord/AzucenaRS1_chromosome3_corrected.bed" - out_once="azucena_chr3.gff" - out_var="variations_chr3.txt" - out_detail="azucena_detail_chr3.gff" - - - gff="graph_chr3.gff" + target_genome_name="genome4_chr10" # outputs the gff of a genome for the chr10 - genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var) + genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var,target_genome_name) if run=="reel": @@ -79,8 +106,9 @@ if run=="reel": out_var="variations_chr10.gff" gff="graph_chr10.gff" + target_genome_name="CM020642.1_Azucena_chromosome10" # outputs the gff of a genome for the chr10 - genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var) + genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var,target_genome_name)