Skip to content
Snippets Groups Projects
add_features_to_graph.py 1.52 KiB
Newer Older
gfa_path="data/graphs/RiceGraphChr10_cactus.gfa"
gfa_file=open(gfa_path,"r")
lines_gfa=gfa_file.readlines()
gfa_file.close()

gaf_path="graph_chr10.gaf"
gaf_file=open(gaf_path,"r")
lines_gaf=gaf_file.readlines()
gaf_file.close()

gff_path="data/genes/nb_genes_chr10.gff3"
gff_file=open(gff_path,'r')
lines_gff=gff_file.readlines()
gff_file.close()


# make dict for finding start and stop position on ref for the features
feature_pos={}
for line in lines_gff:
    line=line.split()
    feature_id=line[8].split(";")[0].split("=")[1]
    feature_start=line[3]
    feature_stop=line[4]
    feature_pos[feature_id]=[feature_start,feature_stop]

# create the new W lines for the features' walks in the graph
new_W_lines=""
for line in lines_gaf:
    line=line.split()
    feature_id=line[0]
    if feature_id in feature_pos:
        feature_start=feature_pos[feature_id][0]
        feature_stop=feature_pos[feature_id][1]
    else:
        feature_start="."
        feature_stop="."
    # new_W_lines = W   SampleId    HapIndex    SequenceId  start_on_ref    stop_on_ref walk
    new_W_lines+=f'W\tIRGSP-1_0\t0\t{feature_id}\t{feature_start}\t{feature_stop}\t{line[5]}\n'



gfa_file_out=open("data/RiceGraphChr10_cactus_annotated.gfa","w")
[W_found,W_passed,W_printed]=[False,False,False]

for line in lines_gfa:
    if line[0]=="W":
        W_found=True
    if (line[0]!="W") & W_found:
        W_passed=True
    if W_passed & (not W_printed):
        gfa_file_out.write(new_W_lines)
        W_printed=True
    gfa_file_out.write(line)
gfa_file_out.close()