debug du calcul de coordonnees

02044675 · NMarthe · 3d07d9f4 · 02044675 · 02044675 · 02044675
Commit 02044675 authored 1 year ago by NMarthe
--- a/ClassSegFeat.py
+++ b/ClassSegFeat.py
@@ -12,8 +12,8 @@ class Segment:
        
    def __str__(self):
        return f"id={self.id}, position sur nipponbare={self.chr}:{self.start}-{self.stop}, size={self.size}, features={self.features}"
-    
-    
+
+
 class Feature:
    def __init__(self,id,type,chr,start,stop,annot,childs,parent,seg):
        self.id=id

--- a/Functions.py
+++ b/Functions.py
@@ -126,6 +126,7 @@ def get_stop(seg,feat):
        result=s.size
    else:
        result=f.stop-s.start
+
    return result

 # go through all the segments in Segments and prints the gff, with one line for each segment/feature intersection
@@ -143,7 +144,8 @@ def graph_gff(file_out):
            feature=feature_stranded[1:]
            segment_stranded=strand+segment
            type=Features[feature].type
-            start=max(1,get_start(segment,feature))
+            #start=max(1,get_start(segment,feature))
+            start=get_start(segment,feature)
            stop=get_stop(segment,feature)
                
            # get the rank and the total number of ocurrences for the feature
@@ -161,8 +163,6 @@ def graph_gff(file_out):
    file_out.close()


-    # get the start position of the features on the linear genome, using their coordinates on the graph and the coordinantes of the segments on the genome
-

 # functions to generate a genome's gff from the graph's gff

@@ -178,7 +178,7 @@ def get_start_2(seg_start, seg_ac, feat_id):
 def get_stop_2(seg_stop, seg_ac, feat_id):
    s_start=seg_ac[seg_stop][1]
    f_stop=get_stop(seg_stop,feat_id)
-    
+
    stop=int(s_start)+int(f_stop)
    return stop

@@ -234,9 +234,9 @@ def write_line(list_seg,i,feat,seg_start,gene_start,seg_ac):


 # writes the gff of azucena using the gff of the graph
-def azu(pos_seg, gff, out):
+def genome_gff(pos_seg, gff, out):
    
-    print("generation of azucena's gff ")
+    print("generation of the genome's gff ")
    
    # create a dictionnary with the positions of the segments on azucena
    seg_ac={}
@@ -410,4 +410,66 @@ def azu(pos_seg, gff, out):
            feat_all.write("\n")
        feat_all.close()

- 
\ No newline at end of file
+ 
+
+
+ # writes the gff of azucena using the gff of the graph
+def genome_gff_test(pos_seg, gff, out):
+    
+    print("generation of the genome's gff ")
+    
+    # create a dictionnary with the positions of the segments on azucena
+    seg_ac={}
+    bed=open(pos_seg,'r')
+    lines=bed.readlines()
+    for line in lines:
+        line=line.split()
+        s_id=line[3][1:]
+        ch=line[0]
+        start=line[1]
+        stop=line[2]
+        seg_ac[s_id]=list([ch,start,stop])
+    bed.close()
+    
+    gff=open(gff,'r')
+    file_out = open(out, 'w')
+    
+    lines=gff.readlines()
+
+    # get the list of all the features to transfer
+    list_feature=list()
+    for line in lines:
+        id=(line.split()[8].split(";")[0].split("=")[1].replace(".","_").replace(":","_"))
+        if id not in list_feature:
+            list_feature.append(id)
+    
+    
+    # for each feature, get list of the segments where it is
+    for feat in list_feature:
+        list_seg=Features[feat].segments_list
+        size_list=len(list_seg)
+        
+        gene_start="empty"
+        seg_start="empty"
+        seg_stop="empty"
+        
+        # prints all gene fragments. 
+        for i in range(0,size_list):
+            
+            if (list_seg[i][1:] in seg_ac) & (1==1):
+                feature=Features[feat]
+                seg_start=list_seg[i][1:]
+                seg_stop=list_seg[i][1:]
+                chr=seg_ac[seg_start][0]
+                start_azu=get_start_2(seg_start,seg_ac,feature.id) 
+                stop_azu=get_stop_2(seg_stop,seg_ac,feature.id)
+                out_line=chr+"  "+str(start_azu)+"  "+str(stop_azu)+"   "+seg_start+"   "+seg_stop+"   "+feature.id+"\n"
+                file_out.write(out_line)
+       
+
+    file_out.close()
+    gff.close() 
+    
+
+
+
--- a/main.py
+++ b/main.py
 import Functions as fct

 # creates segments and features for the intersect between the graph for chr10 and the gff of IRGSP
-intersect_path='/home/nina/annotpangenome/align_genes/intersect_segments-genes_chr10.bed'
+intersect_path='/home/nina/annotpangenome/align_genes/intersect_segments-features_chr10.bed'
 fct.create_seg_feat(intersect_path)

 # outputs the gff of the graph for chr10
 output_file='new_graph_chr10.gff'
 fct.graph_gff(output_file)

-pos_seg="seg_coord/AzucenaRS1_chromosome10.bed"
+
 gff="new_graph_chr10.gff"
-out="azucena_chr10.gff"
-fct.azu(pos_seg,gff,out)
+
+if 1==0:
+    pos_seg="seg_coord/AzucenaRS1_chromosome10.bed"
+    out="azucena_chr10_all.gff"
+
+if 1==1:
+    out="nb_chr10_all.gff"
+    pos_seg="seg_coord/IRGSP-1_0_Chr10.bed"
+
+fct.genome_gff(pos_seg,gff,out)