Skip to content
Snippets Groups Projects
Commit 4043b720 authored by NMarthe's avatar NMarthe
Browse files

séparé le code en plus de fonctions pour le rendre plus lisible

parent 875cc898
No related branches found
No related tags found
No related merge requests found
...@@ -6,113 +6,113 @@ global Segments ...@@ -6,113 +6,113 @@ global Segments
Segments={} Segments={}
Features={} Features={}
global print_graph_gff
print_graph_gff=[0,"",""]
# write in output file
def write_line(line,force):
print_graph_gff[1]+=line
print_graph_gff[0]+=1
if (print_graph_gff[0]>999) | (force==True):
print_graph_gff[2].write(print_graph_gff[1])
print_graph_gff[0]=0
print_graph_gff[1]=""
# functions to create the segments and the features # functions to create the segments and the features
# create a segment with its first feature # create a segment with its first feature
def init_seg(line,feature): def init_seg(line,feature_id,segment_id,strand):
line=line.split() [chr,start,stop]=[line[0],line[1],line[2]]
seg_id=line[3][1:]
chr=line[0]
start=line[1]
stop=line[2]
# add the current feature to the list of features that are on the segment # add the current feature to the list of features that are on the segment
feature_strand=line[10] feature_stranded=strand+feature_id
feature_stranded=feature_strand+feature feature_list=[feature_stranded]
feat=list() # create the segment, store it in the Segments dict
feat.append(feature_stranded) Segments[segment_id]=Class.Segment(segment_id,feature_list,chr,start,stop)
# create the segment
Segments[seg_id]=Class.Segment(seg_id,feat,chr,start,stop)
# create a feature with the first segment its on # create a feature with the first segment its on
def init_feature(line): def init_feature(line,feature_id,segment_id,strand):
line=line.split() [type,annot,chr,start,stop,childs]=[line[6],line[12],line[4],line[7],line[8],[]]
feature_id=line[12].split(';')[0].split("=")[1].replace(".","_").replace(":","_")
type=line[6]
annot=line[12]
chr=line[4]
start=line[7]
stop=line[8]
childs=list()
# add the current segment to the list of segments that have the feature
seg=line[3][1:]
strand=line[10]
seg_stranded=strand+seg
segments_list=list()
segments_list.append(seg_stranded)
parent=get_parent(annot,feature_id) parent=get_parent(annot,feature_id)
# add the current segment to the list of segments that have the feature
# create the feature segment_stranded=strand+segment_id
segments_list=[segment_stranded]
# create the feature, store it in the dict Features
Features[feature_id]=Class.Feature(feature_id,type,chr,start,stop,annot,childs,parent,segments_list) Features[feature_id]=Class.Feature(feature_id,type,chr,start,stop,annot,childs,parent,segments_list)
set_note(feature_id)
# if there is a parent, returns the id of the parent feature and add the child to the parent
# if there is a parent, get the id of the parent feature and add the child to the parent
def get_parent(annot,feature_id): def get_parent(annot,feature_id):
# if the current feature has a parent, add the current feature in the childs list of its parent
if annot.split(";")[1].split("=")[0]=="Parent": if annot.split(";")[1].split("=")[0]=="Parent":
# for annotations that look like : ID=LOC_Os01g01010.1:exon_7;Parent=LOC_Os01g01010.1, where the parent is in the field 1 # for annotations that look like : ID=LOC_Os01g01010.1:exon_7;Parent=LOC_Os01g01010.1, where the parent is in the field 1
parent=annot.split(";")[1].split("=")[1].replace(".","_").replace(":","_") parent=annot.split(";")[1].split("=")[1].replace(".","_").replace(":","_")
add_child(parent,feature_id) add_child(parent,feature_id)
elif annot.split(";")[2].split("=")[0]=="Parent": elif annot.split(";")[2].split("=")[0]=="Parent":
# for annotations that look like : ID=LOC_Os01g01010.1;Name=LOC_Os01g01010.1;Parent=LOC_Os01g01010, where the parent is in the field 2 # for annotations that look like : ID=LOC_Os01g01010.1;Name=LOC_Os01g01010.1;Parent=LOC_Os01g01010, where the parent is in the field 2
parent=annot.split(";")[2].split("=")[1].replace(".","_").replace(":","_") parent=annot.split(";")[2].split("=")[1].replace(".","_").replace(":","_")
add_child(parent,feature_id) add_child(parent,feature_id)
else: parent="" else: parent=""
return parent return parent
# add a feature to an existing segment # add a feature to an existing segment
def add_feature(seg,new_feature,strand): def add_feature(seg,new_feature,strand):
new_feature_stranded=strand+new_feature new_feature_stranded=strand+new_feature
if new_feature_stranded not in Segments[seg].features: if new_feature_stranded not in Segments[seg].features:
Segments[seg].features.append(new_feature_stranded) Segments[seg].features.append(new_feature_stranded)
# add a child feature to an existing feature # add a child feature to an existing feature
def add_child(feat,new_child): def add_child(feat,new_child):
if feat in Features.keys(): # if the parent feature exists if (feat in Features.keys()) & (new_child not in Features[feat].childs): # if the parent feature exists
if new_child not in Features[feat].childs: Features[feat].childs.append(new_child)
Features[feat].childs.append(new_child)
# add a segment to an existing feature # add a segment to an existing feature
def add_seg(feat,new_seg,strand): def add_seg(feat,new_seg,strand):
seg_stranded=strand+new_seg seg_stranded=strand+new_seg
if seg_stranded not in Features[feat].segments_list: if seg_stranded not in Features[feat].segments_list:
Features[feat].segments_list.append(seg_stranded) Features[feat].segments_list.append(seg_stranded)
# add the position of the feature on its first and last segment
def add_pos(feature):
feature.pos_start=get_start(feature.segments_list[0][1:],feature.id)
feature.pos_stop=get_stop(feature.segments_list[-1][1:],feature.id)
def load_feature_intersect(line,feature_id,segment_id,strand):
if feature_id not in Features: # if the feature doesn't exist, create it and add the current segment to its seg list
init_feature(line, feature_id, segment_id,strand)
else: # if it exists, add the current segment to the list of segments that have the existing feature
add_seg(feature_id,segment_id,strand)
def load_segment_intersect(line,feature_id,segment_id,strand):
if segment_id not in Segments: # if the segment doesn't exist, create it and add the current feature to its feat list
init_seg(line, feature_id,segment_id,strand)
else: # if it exists, add the current feature to the list of features on the existing segment
add_feature(segment_id,feature_id,strand)
# create a note for the child features that do not have annotation.
def set_note(id): # create a note for the child features that do not have annotation. not clean. fct for getting parent ?
def set_note(feature_id):
# the note contains information on the function of the feature and is used for statistics on hypothetical/putatives features. # the note contains information on the function of the feature and is used for statistics on hypothetical/putatives features.
# in the gff, the notes are only on the "gene" features. it's easier to have it for the childs than to check the parent's note (or the parent's parent). # in the gff, the notes are only on the "gene" features. it's easier to have it for the childs than to check the parent's note (or the parent's parent).
feat=Features[id] feature=Features[feature_id]
if feat.type=="gene": # if the feature is a gene, the note is the last field of its annotation. if feature.type=="gene": # if the feature is a gene, the note is the last field of its annotation.
feat.note=feat.annot.split(';')[-1] feature.note=feature.annot.split(';')[-1]
else: # else, the note will be the note of the gene that contains the feature. in my gff, only the genes have an annotation. else: # else, the note will be the note of the gene that contains the feature. in my gff, only the genes have an annotation.
# we go back to the parent of the feature, and its parent if necessary, etc, until we find the gene. # we go back to the parent of the feature, and its parent if necessary, etc, until we find the gene.
# this is because for example the parent of an exon is the mrna, not the gene itself, so we need to go up until we find the gene. # this is because for example the parent of an exon is the mrna, not the gene itself, so we need to go up until we find the gene.
curent=feat.parent curent=feature.parent
annot_found=False annot_found=False
while annot_found==False: while annot_found==False:
if Features[curent].type=="gene": # if/once we found the gene, we get its note to transfer it to the child feature if Features[curent].type=="gene": # if/once we found the gene, we get its note to transfer it to the child feature
note=Features[curent].annot.split(';')[-1] note=Features[curent].annot.split(';')[-1]
feat.note=note feature.note=note
annot_found=True annot_found=True
else: # if we didn't find the gene, we go up to the current feature's parent until we find it else: # if we didn't find the gene, we go up to the current feature's parent until we find it
curent=Features[Features[curent].parent].id curent=Features[Features[curent].parent].id
# create all the Segment and Feature objects in the dictionnaries Segments and Features # create all the Segment and Feature objects in the dictionnaries Segments and Features
def create_seg_feat(intersect_path): def load_intersect(intersect_path):
print("loading the intersect file")
# open the file with the intersect between the segments and the gff # open the file with the intersect between the segments and the gff
file = open(intersect_path, 'r') file = open(intersect_path, 'r')
...@@ -120,31 +120,19 @@ def create_seg_feat(intersect_path): ...@@ -120,31 +120,19 @@ def create_seg_feat(intersect_path):
file.close() file.close()
for line in lines: # for each line in the intersect file for line in lines: # for each line in the intersect file
line=line.split()
# get the ids for the dictionnaries' keys # get the ids for the dictionnaries' keys
feature_id=line.split()[12].split(';')[0].split("=")[1].replace(".","_").replace(":","_") feature_id=line[12].split(';')[0].split("=")[1].replace(".","_").replace(":","_")
segment_id=line.split()[3][1:] segment_id=line[3][1:]
strand=line[10]
if feature_id not in Features: # if the feature doesn't exist, create it and add the current segment to its seg list
init_feature(line)
else: # if it exists, add the current segment to the list of segments that have the existing feature
strand=line.split()[10]
add_seg(feature_id,segment_id,strand)
if segment_id not in Segments: # if the segment doesn't exist, create it and add the current feature to its feat list load_feature_intersect(line,feature_id,segment_id,strand)
init_seg(line, feature_id) load_segment_intersect(line,feature_id,segment_id,strand)
else: # if it exists, add the current feature to the list of features on the existing segment
strand=line.split()[10]
add_feature(segment_id,feature_id,strand)
# for all the features, add the note (information on the function of the feature), and the positions on the first and last seg.
# cant always do it before because for that i need to have all the parents in the dict Features, and all the segments in the list segments_list for each feature.
for feat_id in Features:
feat=Features[feat_id]
set_note(feat.id)
feat.pos_start=get_start(feat.segments_list[0][1:],feat.id)
feat.pos_stop=get_stop(feat.segments_list[-1][1:],feat.id)
# for all the features, add the position of the feature on its first and last segment.
# cant do it before because for that i need to have all the segments in the list segments_list for each feature.
for feat_id in Features:
add_pos(Features[feat_id])
...@@ -162,7 +150,6 @@ def get_start(seg_id,feat_id): ...@@ -162,7 +150,6 @@ def get_start(seg_id,feat_id):
result=f.start-s.start+1 result=f.start-s.start+1
return result return result
# get the feature's stop position on the segment # get the feature's stop position on the segment
def get_stop(seg_id,feat_id): def get_stop(seg_id,feat_id):
s=Segments[seg_id] s=Segments[seg_id]
...@@ -173,49 +160,39 @@ def get_stop(seg_id,feat_id): ...@@ -173,49 +160,39 @@ def get_stop(seg_id,feat_id):
result=f.stop-s.start+1 result=f.stop-s.start+1
return result return result
# generates the annotation for the gff of the graph, with the rank of the segment in the feature
def make_annot_graph(feature,segment,strand):
segment_stranded=strand+segment
# get the rank and the total number of ocurrences for the feature
rank=str(feature.segments_list.index(segment_stranded)+1)
total=str(len(feature.segments_list))
# create the annotation with the rank information
annotation=feature.annot+";Rank_occurrence="+rank+";Total_occurrences="+total
return annotation
def create_line_graph_gff(feature_stranded,segment):
[strand,feature_id]=[feature_stranded[0:1],feature_stranded[1:]]
feature=Features[feature_id]
annotation=make_annot_graph(feature,segment,strand)
type=feature.type
start=get_start(segment,feature_id)
stop=get_stop(segment,feature_id)
line=segment+"\tGrAnnot\t"+type+"\t"+str(start)+"\t"+str(stop)+"\t.\t"+strand+"\t.\t"+annotation+"\n"
return line
# go through all the segments in Segments and prints the gff, with one line for each segment/feature intersection # go through all the segments in Segments and prints the gff, with one line for each segment/feature intersection
def graph_gff(file_out): def graph_gff(file_out):
print("generation of the graph's gff") print("generation of the graph's gff")
file_out = open(file_out, 'w') file_out = open(file_out, 'w')
string_out="" print_graph_gff[2]=file_out
cpt_out=0
for segment in Segments: for segment in Segments:
# get the list of the features on the segment features_seg=Segments[segment].features # get the list of the features on the segment
features_seg=Segments[segment].features for feature_stranded in features_seg: # go through all the segment's features, and print the gff line for each
line = create_line_graph_gff(feature_stranded,segment)
# go through all the segment's features, and print the gff line for each write_line(line,False)
for feature_stranded in features_seg: write_line("",True)
strand=feature_stranded[0:1]
feature=feature_stranded[1:]
# feature[feature]
#[strand,feature]=[feature_stranded[0:1],feature_stranded[1:]]
segment_stranded=strand+segment
type=Features[feature].type
start=get_start(segment,feature)
stop=get_stop(segment,feature)
# get the rank and the total number of ocurrences for the feature
rank=str(Features[feature].segments_list.index(segment_stranded)+1)
total=str(len(Features[feature].segments_list))
# create the annotation with the rank information
annotation=Features[feature].annot+";Rank_occurrence="+rank+";Total_occurrences="+total
# write the gff line in the output file
line=segment+"\tGrAnnot\t"+type+"\t"+str(start)+"\t"+str(stop)+"\t.\t"+strand+"\t.\t"+annotation+"\n"
string_out+=line
cpt_out+=1
#file_out.write(line)
if cpt_out==1000:
file_out.write(string_out)
string_out=""
cpt_out=0
# faire un fct print
file_out.write(string_out)
file_out.close() file_out.close()
...@@ -223,4 +200,3 @@ def graph_gff(file_out): ...@@ -223,4 +200,3 @@ def graph_gff(file_out):
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment