ifsegment_idnotinSegments:# if the segment doesn't exist, create it and add the current feature to its feat list
init_seg(line,feature_id,segment_id,strand)
else:# if it exists, add the current feature to the list of features on the existing segment
add_feature(segment_id,feature_id,strand)
# create a note for the child features that do not have annotation.
defset_note(id):
# create a note for the child features that do not have annotation. not clean. fct for getting parent ?
defset_note(feature_id):
# the note contains information on the function of the feature and is used for statistics on hypothetical/putatives features.
# the note contains information on the function of the feature and is used for statistics on hypothetical/putatives features.
# in the gff, the notes are only on the "gene" features. it's easier to have it for the childs than to check the parent's note (or the parent's parent).
# in the gff, the notes are only on the "gene" features. it's easier to have it for the childs than to check the parent's note (or the parent's parent).
feat=Features[id]
feature=Features[feature_id]
iffeat.type=="gene":# if the feature is a gene, the note is the last field of its annotation.
iffeature.type=="gene":# if the feature is a gene, the note is the last field of its annotation.
feat.note=feat.annot.split(';')[-1]
feature.note=feature.annot.split(';')[-1]
else:# else, the note will be the note of the gene that contains the feature. in my gff, only the genes have an annotation.
else:# else, the note will be the note of the gene that contains the feature. in my gff, only the genes have an annotation.
# we go back to the parent of the feature, and its parent if necessary, etc, until we find the gene.
# we go back to the parent of the feature, and its parent if necessary, etc, until we find the gene.
# this is because for example the parent of an exon is the mrna, not the gene itself, so we need to go up until we find the gene.
# this is because for example the parent of an exon is the mrna, not the gene itself, so we need to go up until we find the gene.
curent=feat.parent
curent=feature.parent
annot_found=False
annot_found=False
whileannot_found==False:
whileannot_found==False:
ifFeatures[curent].type=="gene":# if/once we found the gene, we get its note to transfer it to the child feature
ifFeatures[curent].type=="gene":# if/once we found the gene, we get its note to transfer it to the child feature
note=Features[curent].annot.split(';')[-1]
note=Features[curent].annot.split(';')[-1]
feat.note=note
feature.note=note
annot_found=True
annot_found=True
else:# if we didn't find the gene, we go up to the current feature's parent until we find it
else:# if we didn't find the gene, we go up to the current feature's parent until we find it
curent=Features[Features[curent].parent].id
curent=Features[Features[curent].parent].id
# create all the Segment and Feature objects in the dictionnaries Segments and Features
# create all the Segment and Feature objects in the dictionnaries Segments and Features
defcreate_seg_feat(intersect_path):
defload_intersect(intersect_path):
print("loading the intersect file")
# open the file with the intersect between the segments and the gff
# open the file with the intersect between the segments and the gff
else:# if it exists, add the current feature to the list of features on the existing segment
strand=line.split()[10]
add_feature(segment_id,feature_id,strand)
# for all the features, add the note (information on the function of the feature), and the positions on the first and last seg.
# cant always do it before because for that i need to have all the parents in the dict Features, and all the segments in the list segments_list for each feature.