nom du génome cible maintenant spécifié dans le main, plus simple pour le changer

5bad4c14 · nina.marthe_ird.fr · 85ed4724 · 5bad4c14 · 5bad4c14 · 5bad4c14
Commit 5bad4c14 authored 1 year ago by nina.marthe_ird.fr
--- a/Functions.py
+++ b/Functions.py
@@ -2,9 +2,6 @@ from Graph_gff import Segments, Features, get_feature_start_on_segment, get_feat
 global segments_on_target_genome
 segments_on_target_genome={}
-global target_genome_name
-target_genome_name="CM020642.1_Azucena_chromosome10"
-target_genome_name="genome4_chr10"
 # get the start position of the features on the linear genome, using their coordinates on the graph and the coordinantes of the segments on the genome
 def get_feature_start_on_genome(start_seg,feat_id):
@@ -245,7 +242,7 @@ def compare_strand(list_1,list_2,list_1_unstrand,list_2_unstrand):
            same_strand_count+=1
    return [seg_common,same_strand_count]
-def get_feature_path(paths,first_seg,last_seg):
+def get_feature_path(paths,first_seg,last_seg,target_genome_name):
    # find the path in azucena. 
    first_strand=convert_strand(segments_on_target_genome[first_seg][3])
    first_seg_stranded=first_strand+first_seg
@@ -325,7 +322,7 @@ class Variation:
    #def __str__(self):
    #    return f"id={self.id}, position on the original genome={self.chr}:{self.start}-{self.stop}, size={self.size}, features={self.features}"
-def create_var(feature_id,first_seg,last_seg,paths):
+def create_var(feature_id,first_seg,last_seg,paths,target_genome_name):
    feature=Features[feature_id]
    start_new_genome=get_feature_start_on_genome(first_seg,feature_id)
    stop_new_genome=get_feature_stop_on_genome(last_seg,feature_id)
@@ -333,7 +330,7 @@ def create_var(feature_id,first_seg,last_seg,paths):
    size_diff=str(size_new_genome-feature.size)
    # get feature paths on the original genome and on the target genome
-    list_segfeat_azu=get_feature_path(paths,first_seg,last_seg)
+    list_segfeat_azu=get_feature_path(paths,first_seg,last_seg,target_genome_name)
    list_segfeat_nb=feature.segments_list
    [list_segfeat_nb,list_segfeat_azu,inversion]=detect_gene_inversion(list_segfeat_nb,list_segfeat_azu)

--- a/Functions_output.py
+++ b/Functions_output.py
@@ -51,7 +51,7 @@ def gff_one(first_seg,last_seg,feature_id,list_seg,max_diff):
 # writes the gff of azucena using the gff of the graph
-def genome_gff(pos_seg, gff, gfa, out_once, out_detail, out_var):
+def genome_gff(pos_seg, gff, gfa, out_once, out_detail, out_var,target_genome_name):
    print("generation of the genome's gff ")
    # create variables and open files
@@ -112,7 +112,7 @@ def genome_gff(pos_seg, gff, gfa, out_once, out_detail, out_var):
        # outputs the detail of variations of the feature :
        if var:
-            print_variations(first_seg,last_seg,feat,paths,seg_seq)
+            print_variations(first_seg,last_seg,feat,paths,seg_seq,target_genome_name)
            write_line("",output_variations,True)
        if stats==True:
@@ -141,10 +141,10 @@ def genome_gff(pos_seg, gff, gfa, out_once, out_detail, out_var):
 # functions to get the detail of the variations in the features
-def print_variations(first_seg,last_seg,feat,paths,seg_seq):
+def print_variations(first_seg,last_seg,feat,paths,seg_seq,target_genome_name):
    if (first_seg!=''): # if the feature is not completly absent        # add the else, output absent features
-        [variation,list_segfeat_nb,list_segfeat_azu]=create_var(feat,first_seg,last_seg,paths) # removes the strands in the segment lists
+        [variation,list_segfeat_nb,list_segfeat_azu]=create_var(feat,first_seg,last_seg,paths,target_genome_name) # removes the strands in the segment lists
        feature=Features[feat]
        feat_start=feature.start
        # loop to go through both paths with i and j

--- a/main.py
+++ b/main.py
@@ -7,7 +7,53 @@ from Graph_gff import *
 from Functions_output import *
 #from inference import *
-run="test"
+run="command_line"
+if run=="command_line":
+    import sys
+    if not(len(sys.argv)>=4) :# intersect, gfa, pos_seg
+            print("expected input : intersect, gfa file  with walks, bed file with positions of the segments on the target genome")
+            #print("output : graph gff, graph gaf, target genome gff*2+variations")
+            sys.exit(1)
+    elif (sys.argv[1]=="-h") :
+        print("expected input : intersect, gfa file  with walks, bed file with positions of the segments on the target genome")
+        print("output : graph gff, graph gaf, target genome gff*2+variations")
+        sys.exit(1)
+    intersect=sys.argv[1]
+    gfa=sys.argv[2]
+    pos_seg=sys.argv[3]
+    out_gff=gfa.split("/")[-1].split(".")[0:-1][0]+".gff"
+    out_gaf=gfa.split("/")[-1].split(".")[0:-1][0]+".gaf"
+    out_once=pos_seg.split("/")[-1].split(".")[0:-1][0]+".gff"
+    out_detail=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_detail.gff"
+    out_var=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_variations.txt"
+    if len(sys.argv)==5:
+        target_genome_name=sys.argv[5]
+    else:
+        target_genome_name=pos_seg.split("/")[-1].split(".")[0:-1][0]
+    print(target_genome_name)
+        # input : intersect, gfa, pos_seg.
+        # out_gff, out_gaf, out_once, out_var, out_detail.
+        # out_gf/af = "graph.gf/af"
+        # out_once/detail: récupérer pos_seg, enlever le .bed, ajouter _detail au besoin. pareil pour var, le renommer azu_var_chr10.txt ?
+        # pos_seg.split("/")[-1].split(".")[0:-1][0]+".gff"
+    load_intersect(intersect)
+    # outputs the gff and gaf of the graph for chr10
+    graph_gff(out_gff)
+    graph_gaf(out_gaf,gfa)
+    # outputs the gff of a genome for the chr10
+    genome_gff(pos_seg,out_gff,gfa,out_once,out_detail,out_var,target_genome_name)
 if run=="test":
    intersect_path='/home/nina/annotpangenome/test_data/input_data_inf/intersect.bed'
@@ -27,29 +73,10 @@ if run=="test":
    gff="test_data/graph.gff"
-    # outputs the gff of a genome for the chr10
+    target_genome_name="genome4_chr10"
-    genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var)
-if run=="chr3":
-    intersect_path='/home/nina/annotpangenome/chr3/intersect_segments_genes_irgsp_chr3.bed'
-    load_intersect(intersect_path)
-    # outputs the gff of the graph for chr10
-    output_gff='graph_chr3.gff'
-    gfa="test_graph"
-    graph_gff(output_gff)
-    pos_seg="seg_coord/AzucenaRS1_chromosome3_corrected.bed"
-    out_once="azucena_chr3.gff"
-    out_var="variations_chr3.txt"
-    out_detail="azucena_detail_chr3.gff"
-    gff="graph_chr3.gff"
    # outputs the gff of a genome for the chr10
-    genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var)
+    genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var,target_genome_name)
 if run=="reel":
@@ -79,8 +106,9 @@ if run=="reel":
        out_var="variations_chr10.gff"
    gff="graph_chr10.gff"
+    target_genome_name="CM020642.1_Azucena_chromosome10"
    # outputs the gff of a genome for the chr10
-    genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var)
+    genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var,target_genome_name)