gfa_path="data/graphs/RiceGraphChr10_cactus.gfa" gfa_file=open(gfa_path,"r") lines_gfa=gfa_file.readlines() gfa_file.close() gaf_path="graph_chr10.gaf" gaf_file=open(gaf_path,"r") lines_gaf=gaf_file.readlines() gaf_file.close() gff_path="data/genes/nb_genes_chr10.gff3" gff_file=open(gff_path,'r') lines_gff=gff_file.readlines() gff_file.close() # make dict for finding start and stop position on ref for the features feature_pos={} for line in lines_gff: line=line.split() feature_id=line[8].split(";")[0].split("=")[1] feature_start=line[3] feature_stop=line[4] feature_pos[feature_id]=[feature_start,feature_stop] # create the new W lines for the features' walks in the graph new_W_lines="" for line in lines_gaf: line=line.split() feature_id=line[0] if feature_id in feature_pos: feature_start=feature_pos[feature_id][0] feature_stop=feature_pos[feature_id][1] else: feature_start="." feature_stop="." # new_W_lines = W SampleId HapIndex SequenceId start_on_ref stop_on_ref walk new_W_lines+=f'W\tIRGSP-1_0\t0\t{feature_id}\t{feature_start}\t{feature_stop}\t{line[5]}\n' gfa_file_out=open("data/RiceGraphChr10_cactus_annotated.gfa","w") [W_found,W_passed,W_printed]=[False,False,False] for line in lines_gfa: if line[0]=="W": W_found=True if (line[0]!="W") & W_found: W_passed=True if W_passed & (not W_printed): gfa_file_out.write(new_W_lines) W_printed=True gfa_file_out.write(line) gfa_file_out.close()