From 5bad4c1466ef615eeb549e4f5d039bd0237873a7 Mon Sep 17 00:00:00 2001
From: NMarthe <nina.marthe@ird.fr>
Date: Mon, 30 Oct 2023 14:45:32 +0100
Subject: [PATCH] =?UTF-8?q?nom=20du=20g=C3=A9nome=20cible=20maintenant=20s?=
 =?UTF-8?q?p=C3=A9cifi=C3=A9=20dans=20le=20main,=20plus=20simple=20pour=20?=
 =?UTF-8?q?le=20changer?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Functions.py        |  9 ++----
 Functions_output.py |  8 ++---
 main.py             | 74 +++++++++++++++++++++++++++++++--------------
 3 files changed, 58 insertions(+), 33 deletions(-)

diff --git a/Functions.py b/Functions.py
index 923adeb..eed43ab 100644
--- a/Functions.py
+++ b/Functions.py
@@ -2,9 +2,6 @@ from Graph_gff import Segments, Features, get_feature_start_on_segment, get_feat
 global segments_on_target_genome
 segments_on_target_genome={}
 
-global target_genome_name
-target_genome_name="CM020642.1_Azucena_chromosome10"
-target_genome_name="genome4_chr10"
 
 # get the start position of the features on the linear genome, using their coordinates on the graph and the coordinantes of the segments on the genome
 def get_feature_start_on_genome(start_seg,feat_id):
@@ -245,7 +242,7 @@ def compare_strand(list_1,list_2,list_1_unstrand,list_2_unstrand):
             same_strand_count+=1
     return [seg_common,same_strand_count]
 
-def get_feature_path(paths,first_seg,last_seg):
+def get_feature_path(paths,first_seg,last_seg,target_genome_name):
     # find the path in azucena. 
     first_strand=convert_strand(segments_on_target_genome[first_seg][3])
     first_seg_stranded=first_strand+first_seg
@@ -325,7 +322,7 @@ class Variation:
     #def __str__(self):
     #    return f"id={self.id}, position on the original genome={self.chr}:{self.start}-{self.stop}, size={self.size}, features={self.features}"
 
-def create_var(feature_id,first_seg,last_seg,paths):
+def create_var(feature_id,first_seg,last_seg,paths,target_genome_name):
     feature=Features[feature_id]
     start_new_genome=get_feature_start_on_genome(first_seg,feature_id)
     stop_new_genome=get_feature_stop_on_genome(last_seg,feature_id)
@@ -333,7 +330,7 @@ def create_var(feature_id,first_seg,last_seg,paths):
     size_diff=str(size_new_genome-feature.size)
 
     # get feature paths on the original genome and on the target genome
-    list_segfeat_azu=get_feature_path(paths,first_seg,last_seg)
+    list_segfeat_azu=get_feature_path(paths,first_seg,last_seg,target_genome_name)
     list_segfeat_nb=feature.segments_list
     [list_segfeat_nb,list_segfeat_azu,inversion]=detect_gene_inversion(list_segfeat_nb,list_segfeat_azu)
 
diff --git a/Functions_output.py b/Functions_output.py
index 7fdfe54..82c6edc 100644
--- a/Functions_output.py
+++ b/Functions_output.py
@@ -51,7 +51,7 @@ def gff_one(first_seg,last_seg,feature_id,list_seg,max_diff):
 
 
 # writes the gff of azucena using the gff of the graph
-def genome_gff(pos_seg, gff, gfa, out_once, out_detail, out_var):
+def genome_gff(pos_seg, gff, gfa, out_once, out_detail, out_var,target_genome_name):
     print("generation of the genome's gff ")
 
     # create variables and open files
@@ -112,7 +112,7 @@ def genome_gff(pos_seg, gff, gfa, out_once, out_detail, out_var):
 
         # outputs the detail of variations of the feature :
         if var:
-            print_variations(first_seg,last_seg,feat,paths,seg_seq)
+            print_variations(first_seg,last_seg,feat,paths,seg_seq,target_genome_name)
             write_line("",output_variations,True)
 
         if stats==True:
@@ -141,10 +141,10 @@ def genome_gff(pos_seg, gff, gfa, out_once, out_detail, out_var):
 
 # functions to get the detail of the variations in the features
 
-def print_variations(first_seg,last_seg,feat,paths,seg_seq):
+def print_variations(first_seg,last_seg,feat,paths,seg_seq,target_genome_name):
 
     if (first_seg!=''): # if the feature is not completly absent        # add the else, output absent features
-        [variation,list_segfeat_nb,list_segfeat_azu]=create_var(feat,first_seg,last_seg,paths) # removes the strands in the segment lists
+        [variation,list_segfeat_nb,list_segfeat_azu]=create_var(feat,first_seg,last_seg,paths,target_genome_name) # removes the strands in the segment lists
         feature=Features[feat]
         feat_start=feature.start
         # loop to go through both paths with i and j
diff --git a/main.py b/main.py
index 64ee9b2..4f1f88e 100755
--- a/main.py
+++ b/main.py
@@ -7,7 +7,53 @@ from Graph_gff import *
 from Functions_output import *
 #from inference import *
 
-run="test"
+run="command_line"
+
+if run=="command_line":
+
+    import sys
+    if not(len(sys.argv)>=4) :# intersect, gfa, pos_seg
+            print("expected input : intersect, gfa file  with walks, bed file with positions of the segments on the target genome")
+            #print("output : graph gff, graph gaf, target genome gff*2+variations")
+            sys.exit(1)
+    elif (sys.argv[1]=="-h") :
+        print("expected input : intersect, gfa file  with walks, bed file with positions of the segments on the target genome")
+        print("output : graph gff, graph gaf, target genome gff*2+variations")
+        sys.exit(1)
+
+    intersect=sys.argv[1]
+    gfa=sys.argv[2]
+    pos_seg=sys.argv[3]
+
+    out_gff=gfa.split("/")[-1].split(".")[0:-1][0]+".gff"
+    out_gaf=gfa.split("/")[-1].split(".")[0:-1][0]+".gaf"
+    out_once=pos_seg.split("/")[-1].split(".")[0:-1][0]+".gff"
+    out_detail=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_detail.gff"
+    out_var=pos_seg.split("/")[-1].split(".")[0:-1][0]+"_variations.txt"
+    if len(sys.argv)==5:
+        target_genome_name=sys.argv[5]
+    else:
+        target_genome_name=pos_seg.split("/")[-1].split(".")[0:-1][0]
+    print(target_genome_name)
+
+
+        # input : intersect, gfa, pos_seg.
+        # out_gff, out_gaf, out_once, out_var, out_detail.
+        # out_gf/af = "graph.gf/af"
+        # out_once/detail: récupérer pos_seg, enlever le .bed, ajouter _detail au besoin. pareil pour var, le renommer azu_var_chr10.txt ?
+        # pos_seg.split("/")[-1].split(".")[0:-1][0]+".gff"
+
+    load_intersect(intersect)
+
+    # outputs the gff and gaf of the graph for chr10
+    graph_gff(out_gff)
+    graph_gaf(out_gaf,gfa)
+
+    # outputs the gff of a genome for the chr10
+    genome_gff(pos_seg,out_gff,gfa,out_once,out_detail,out_var,target_genome_name)
+
+
+
 
 if run=="test":
     intersect_path='/home/nina/annotpangenome/test_data/input_data_inf/intersect.bed'
@@ -27,29 +73,10 @@ if run=="test":
 
     gff="test_data/graph.gff"
 
-    # outputs the gff of a genome for the chr10
-    genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var)
-
-
-if run=="chr3":
-    intersect_path='/home/nina/annotpangenome/chr3/intersect_segments_genes_irgsp_chr3.bed'
-    load_intersect(intersect_path)
-
-    # outputs the gff of the graph for chr10
-    output_gff='graph_chr3.gff'
-    gfa="test_graph"
-    graph_gff(output_gff)
-
-    pos_seg="seg_coord/AzucenaRS1_chromosome3_corrected.bed"
-    out_once="azucena_chr3.gff"
-    out_var="variations_chr3.txt"
-    out_detail="azucena_detail_chr3.gff"
-
-
-    gff="graph_chr3.gff"
+    target_genome_name="genome4_chr10"
 
     # outputs the gff of a genome for the chr10
-    genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var)
+    genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var,target_genome_name)
 
 if run=="reel":
 
@@ -79,8 +106,9 @@ if run=="reel":
         out_var="variations_chr10.gff"
 
     gff="graph_chr10.gff"
+    target_genome_name="CM020642.1_Azucena_chromosome10"
 
     # outputs the gff of a genome for the chr10
-    genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var)
+    genome_gff(pos_seg,gff,gfa,out_once,out_detail,out_var,target_genome_name)
 
 
-- 
GitLab