Skip to content
Snippets Groups Projects
Commit 02044675 authored by NMarthe's avatar NMarthe
Browse files

debug du calcul de coordonnees

parent 3d07d9f4
No related branches found
No related tags found
No related merge requests found
......@@ -12,8 +12,8 @@ class Segment:
def __str__(self):
return f"id={self.id}, position sur nipponbare={self.chr}:{self.start}-{self.stop}, size={self.size}, features={self.features}"
class Feature:
def __init__(self,id,type,chr,start,stop,annot,childs,parent,seg):
self.id=id
......
......@@ -126,6 +126,7 @@ def get_stop(seg,feat):
result=s.size
else:
result=f.stop-s.start
return result
# go through all the segments in Segments and prints the gff, with one line for each segment/feature intersection
......@@ -143,7 +144,8 @@ def graph_gff(file_out):
feature=feature_stranded[1:]
segment_stranded=strand+segment
type=Features[feature].type
start=max(1,get_start(segment,feature))
#start=max(1,get_start(segment,feature))
start=get_start(segment,feature)
stop=get_stop(segment,feature)
# get the rank and the total number of ocurrences for the feature
......@@ -161,8 +163,6 @@ def graph_gff(file_out):
file_out.close()
# get the start position of the features on the linear genome, using their coordinates on the graph and the coordinantes of the segments on the genome
# functions to generate a genome's gff from the graph's gff
......@@ -178,7 +178,7 @@ def get_start_2(seg_start, seg_ac, feat_id):
def get_stop_2(seg_stop, seg_ac, feat_id):
s_start=seg_ac[seg_stop][1]
f_stop=get_stop(seg_stop,feat_id)
stop=int(s_start)+int(f_stop)
return stop
......@@ -234,9 +234,9 @@ def write_line(list_seg,i,feat,seg_start,gene_start,seg_ac):
# writes the gff of azucena using the gff of the graph
def azu(pos_seg, gff, out):
def genome_gff(pos_seg, gff, out):
print("generation of azucena's gff ")
print("generation of the genome's gff ")
# create a dictionnary with the positions of the segments on azucena
seg_ac={}
......@@ -410,4 +410,66 @@ def azu(pos_seg, gff, out):
feat_all.write("\n")
feat_all.close()
\ No newline at end of file
# writes the gff of azucena using the gff of the graph
def genome_gff_test(pos_seg, gff, out):
print("generation of the genome's gff ")
# create a dictionnary with the positions of the segments on azucena
seg_ac={}
bed=open(pos_seg,'r')
lines=bed.readlines()
for line in lines:
line=line.split()
s_id=line[3][1:]
ch=line[0]
start=line[1]
stop=line[2]
seg_ac[s_id]=list([ch,start,stop])
bed.close()
gff=open(gff,'r')
file_out = open(out, 'w')
lines=gff.readlines()
# get the list of all the features to transfer
list_feature=list()
for line in lines:
id=(line.split()[8].split(";")[0].split("=")[1].replace(".","_").replace(":","_"))
if id not in list_feature:
list_feature.append(id)
# for each feature, get list of the segments where it is
for feat in list_feature:
list_seg=Features[feat].segments_list
size_list=len(list_seg)
gene_start="empty"
seg_start="empty"
seg_stop="empty"
# prints all gene fragments.
for i in range(0,size_list):
if (list_seg[i][1:] in seg_ac) & (1==1):
feature=Features[feat]
seg_start=list_seg[i][1:]
seg_stop=list_seg[i][1:]
chr=seg_ac[seg_start][0]
start_azu=get_start_2(seg_start,seg_ac,feature.id)
stop_azu=get_stop_2(seg_stop,seg_ac,feature.id)
out_line=chr+" "+str(start_azu)+" "+str(stop_azu)+" "+seg_start+" "+seg_stop+" "+feature.id+"\n"
file_out.write(out_line)
file_out.close()
gff.close()
import Functions as fct
# creates segments and features for the intersect between the graph for chr10 and the gff of IRGSP
intersect_path='/home/nina/annotpangenome/align_genes/intersect_segments-genes_chr10.bed'
intersect_path='/home/nina/annotpangenome/align_genes/intersect_segments-features_chr10.bed'
fct.create_seg_feat(intersect_path)
# outputs the gff of the graph for chr10
output_file='new_graph_chr10.gff'
fct.graph_gff(output_file)
pos_seg="seg_coord/AzucenaRS1_chromosome10.bed"
gff="new_graph_chr10.gff"
out="azucena_chr10.gff"
fct.azu(pos_seg,gff,out)
if 1==0:
pos_seg="seg_coord/AzucenaRS1_chromosome10.bed"
out="azucena_chr10_all.gff"
if 1==1:
out="nb_chr10_all.gff"
pos_seg="seg_coord/IRGSP-1_0_Chr10.bed"
fct.genome_gff(pos_seg,gff,out)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment