Skip to content
Snippets Groups Projects
Commit fe97d997 authored by nina.marthe_ird.fr's avatar nina.marthe_ird.fr
Browse files

ajouté le code aa, et une fct pour avoir la séquence d'une feature

parent e295bd96
No related branches found
No related tags found
No related merge requests found
......@@ -37,6 +37,8 @@ class Feature:
self.note=""
self.sequence=""
def __str__(self):
if self.parent=="":
......
......@@ -3,7 +3,7 @@ global segments_on_target_genome
segments_on_target_genome={}
global target_genome_name
target_genome_name="CM020642.1_Azucena_chromosome10"
target_genome_name="CM020635.1_Azucena_chromosome10"
target_genome_name="genome4_chr10"
# get the start position of the features on the linear genome, using their coordinates on the graph and the coordinantes of the segments on the genome
......@@ -65,7 +65,7 @@ def create_line_detail(last_seg,feature_id,start_seg,feat_start):
position=get_position_on_feature(start_seg,stop_seg,feat_start,feature)
annotation=feature.annot+proportion+position
output_line=f'{chr}\tGrAnnot\t{feature.type}\t{start_on_new_genome}\t{stop_on_new_genome}\t.\t{strand}\t.\t{annotation}\n'
output_line=f'{chr}\tGrAnnoT\t{feature.type}\t{start_on_new_genome}\t{stop_on_new_genome}\t.\t{strand}\t.\t{annotation}\n'
return output_line
......@@ -84,7 +84,7 @@ def create_line_gff_one(first_seg,last_seg,feature_id,size_diff,list_seg):
variant_count+=1
annotation=f'{feature.annot};Size_diff={size_diff};Nb_variants={variant_count}'
output_line=f'{chr}\tGrAnnot\t{feature.type}\t{get_feature_start_on_genome(first_seg,feature_id)}\t{get_feature_stop_on_genome(last_seg,feature_id)}\t.\t{strand}\t.\t{annotation}\n'
output_line=f'{chr}\tGrAnnoT\t{feature.type}\t{get_feature_start_on_genome(first_seg,feature_id)}\t{get_feature_stop_on_genome(last_seg,feature_id)}\t.\t{strand}\t.\t{annotation}\n'
return output_line
......@@ -285,7 +285,25 @@ def get_segment_sequence(seg_seq,segment):
if (segment[0]==">") | (segment[0]=="+"):
return seg_seq[segment[1:]]
else:
return seg_seq[segment[1:]][::-1]
return reverse_complement(seg_seq[segment[1:]])
def reverse_complement(sequence):
sequence_rc=""
for char in sequence:
sequence_rc+=complement(char)
return sequence_rc[::-1]
def complement(nucl):
match nucl:
case "A":
return "T"
case "C":
return "G"
case "G":
return "C"
case "T":
return "A"
return nucl
class Variation:
def __init__(self,feature_id,feature_type,chr,start_new,stop_new,inversion,size_diff):
......@@ -317,7 +335,7 @@ def create_var(feature_id,first_seg,last_seg,paths):
# get feature paths on the original genome and on the target genome
list_segfeat_azu=get_feature_path(paths,first_seg,last_seg)
list_segfeat_nb=feature.segments_list
[list_segfeat_nb,list_segfeat_azu,inversion]=detect_inversion(list_segfeat_nb,list_segfeat_azu)
[list_segfeat_nb,list_segfeat_azu,inversion]=detect_gene_inversion(list_segfeat_nb,list_segfeat_azu)
variation=Variation(feature_id,feature.type,feature.chr,start_new_genome,stop_new_genome,inversion,size_diff)
......@@ -437,13 +455,11 @@ def detect_orient_inversion(list_1,list_2):
strand_inversion=False
else:
strand_inversion=True
if same_strand_count!=0:
print("ignored strand difference :/")
return [strand_inversion,list_1_unstrand,list_2_unstrand]
# takes two lists of segments for two genes, check if the first list is an inversion of the second one (if the segments in common are on the opposite strand)
def detect_inversion(list_1,list_2):
def detect_gene_inversion(list_1,list_2):
# check if we have an inversion of the orientation of the segments
[strand_inversion,list_1_unstrand,list_2_unstrand]=detect_orient_inversion(list_1,list_2)
......@@ -472,3 +488,80 @@ def invert_segment_list(seg_list):
return list(reversed(list_inverted))
# pour faire l'inférence, lire le fichier variations.txt et l'interpréter. pour chaque variation, regarder ce que ça fait.
# donc première étape quand on appelle la fct "inférence", c'est d'ajouter dans les features leur séquence avec seg_seq et la liste des segments.
def add_feature_sequence(feature,seg_seq):
feature_sequence=""
for segment in feature.segments_list:
if segment==feature.segments_list[0]:
feature_sequence+=get_segment_sequence(seg_seq,segment)[feature.pos_start:] # vérifier les +/- 1 pour la position
elif segment==feature.segments_list[-1]:
feature_sequence+=get_segment_sequence(seg_seq,segment)[0:feature.pos_stop] # vérifier les +/- 1 pour la position
else:
feature_sequence+=get_segment_sequence(seg_seq,segment)
feature.sequence=feature_sequence
# penser à transcrire la séquence codante du gène !!
def get_aa(codon):
match codon[0:2]:
case "UU":
if (codon[2]=="U") | (codon[2]=="C"):
return "Phe"
else:
return "Leu"
case "UC":
return "Ser"
case "UA":
if (codon[2]=="U") | (codon[2]=="C"):
return "Tyr"
else:
return "Stop"
case "UG":
if (codon[2]=="U") | (codon[2]=="C"):
return "Cys"
elif codon[2]=="A":
return "Stop"
else:
return "Trp"
case "CU":
return "Leu"
case "CC":
return "Pro"
case "CA":
if (codon[2]=="U") | (codon[2]=="C"):
return "His"
else:
return "Gln"
case "CG":
return "Arg"
case "AU":
if codon[2]=="G":
return "Met"
else:
return "Ile"
case "AC":
return "Thr"
case "AA":
if (codon[2]=="U") | (codon[2]=="C"):
return "Asn"
else:
return "Lys"
case "AG":
if (codon[2]=="U") | (codon[2]=="C"):
return "Ser"
else:
return "Arg"
case "GU":
return "Val"
case "GC":
return "Ala"
case "GA":
if (codon[2]=="U") | (codon[2]=="C"):
return "Asp"
else:
return "Glu"
case "GG":
return "Gly"
\ No newline at end of file
......@@ -208,10 +208,18 @@ def print_variations(first_seg,last_seg,feat,paths,seg_seq):
init_new_var(variation,"deletion",list_segfeat_nb,list_segfeat_azu,i,j,seg_seq,feature)
i+=1
else : # if both segments are present in the other genome but not at the same position. probably substitution ? weird case never found yet
line=f'weird order change\n'
write_line(line,output_variations,False)
var_count+=1;i+=1;j+=1
else : # if both segments are present in the other genome but not at the same position. weird case never found yet
# can be a substitution. check later if its not an inversion
variation.last_seg_in_target=list_segfeat_azu[j][1:]
if (variation.type=='insertion') | (variation.type=='deletion'): # print the current variation before treating the substitution
print_current_var(variation,feat_start,list_segfeat_azu,feat,i)
reset_var(variation)
var_count+=1
if variation.type=='substitution':
continue_var(variation,seg_seq,list_segfeat_nb,list_segfeat_azu,i,j,0)
else: # initiate substitution
init_new_var(variation,"substitution",list_segfeat_nb,list_segfeat_azu,i,j,seg_seq,feature)
i+=1;j+=1
else: # segment present in both, no variation. print the running indel if there is one
if variation.type!='': # print the current variation if there is one
......@@ -245,16 +253,35 @@ def print_current_var(variation,feat_start,list_segfeat_azu,feat,i):
line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tdeletion\t{variation.ref}\t-\t{len(variation.ref)}\t{pos_old}\t{pos_new}\n'
write_line(line,output_variations,False)
elif variation.type=='substitution':
warning=detect_small_inversion(variation)
[pos_old,pos_new]=get_old_new_pos_substitution(feat_start,variation,list_segfeat_azu,feat)
if len(variation.ref) == len(variation.alt): # if the substituion is between two segment of the same size, print it
size_subs=len(variation.ref)
line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tsubstitution\t{variation.ref}\t{variation.alt}\t{size_subs}\t{pos_old}\t{pos_new}\n'
line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tsubstitution\t{variation.ref}\t{variation.alt}\t{size_subs}\t{pos_old}\t{pos_new}{warning}\n'
else :
# if the segments of the substitution have a different size, print deletion then insertion at the same position.
line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tdeletion\t{variation.ref}\t-\t{len(variation.ref)}\t{pos_old}\t{pos_new}\n'
line+=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tinsertion\t-\t{variation.alt}\t{len(variation.alt)}\t{pos_old}\t{pos_new}\n'
line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tdeletion\t{variation.ref}\t-\t{len(variation.ref)}\t{pos_old}\t{pos_new}{warning}\n'
line+=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tinsertion\t-\t{variation.alt}\t{len(variation.alt)}\t{pos_old}\t{pos_new}{warning}\n'
write_line(line,output_variations,False)
def detect_small_inversion(variation):
[list_ref_common,list_alt_common]=[list(),list()]
list_ref_unstrand=[segment_stranded[1:] for segment_stranded in variation.seg_ref]
list_alt_unstrand=[segment_stranded[1:] for segment_stranded in variation.seg_alt]
for seg in variation.seg_ref:
if seg[1:] in list_alt_unstrand:
list_ref_common.append(seg)
for seg in variation.seg_alt:
if seg[1:] in list_ref_unstrand:
list_alt_common.append(seg)
if (len(list_ref_common)>len(list_ref_unstrand)*0.5) & (len(list_alt_common)>len(list_alt_unstrand)*0.5):
print("eee")
return f'\t# Suspected inversion within this substitution.'
else:
return ''
def print_last_deletion(variation,list_segfeat_nb,i,feat_start,feature,seg_seq):
pos_old=int(Segments[list_segfeat_nb[i][1:]].start)-int(feat_start)+1
del_sequence=get_sequence_list_seg(list_segfeat_nb,i,feature,seg_seq)
......
......@@ -8,6 +8,24 @@ from Functions_output import *
run="test"
if run=="chr3":
intersect_path='/home/nina/annotpangenome/chr3/intersect_segments_genes_irgsp_chr3.bed'
load_intersect(intersect_path)
# outputs the gff of the graph for chr10
output_gff='graph_chr3.gff'
gfa="test_graph"
graph_gff(output_gff)
pos_seg="seg_coord/AzucenaRS1_chromosome3_corrected.bed"
out="azucena_chr3.gff"
gff="graph_chr3.gff"
# outputs the gff of a genome for the chr10
genome_gff(pos_seg,gff,out,gfa)
if run=="reel":
# creates segments and features for the intersect between the graph for chr10 and the gff of IRGSP
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment