Skip to content
Snippets Groups Projects
Commit fe97d997 authored by nina.marthe_ird.fr's avatar nina.marthe_ird.fr
Browse files

ajouté le code aa, et une fct pour avoir la séquence d'une feature

parent e295bd96
No related branches found
No related tags found
No related merge requests found
...@@ -37,6 +37,8 @@ class Feature: ...@@ -37,6 +37,8 @@ class Feature:
self.note="" self.note=""
self.sequence=""
def __str__(self): def __str__(self):
if self.parent=="": if self.parent=="":
......
...@@ -3,7 +3,7 @@ global segments_on_target_genome ...@@ -3,7 +3,7 @@ global segments_on_target_genome
segments_on_target_genome={} segments_on_target_genome={}
global target_genome_name global target_genome_name
target_genome_name="CM020642.1_Azucena_chromosome10" target_genome_name="CM020635.1_Azucena_chromosome10"
target_genome_name="genome4_chr10" target_genome_name="genome4_chr10"
# get the start position of the features on the linear genome, using their coordinates on the graph and the coordinantes of the segments on the genome # get the start position of the features on the linear genome, using their coordinates on the graph and the coordinantes of the segments on the genome
...@@ -65,7 +65,7 @@ def create_line_detail(last_seg,feature_id,start_seg,feat_start): ...@@ -65,7 +65,7 @@ def create_line_detail(last_seg,feature_id,start_seg,feat_start):
position=get_position_on_feature(start_seg,stop_seg,feat_start,feature) position=get_position_on_feature(start_seg,stop_seg,feat_start,feature)
annotation=feature.annot+proportion+position annotation=feature.annot+proportion+position
output_line=f'{chr}\tGrAnnot\t{feature.type}\t{start_on_new_genome}\t{stop_on_new_genome}\t.\t{strand}\t.\t{annotation}\n' output_line=f'{chr}\tGrAnnoT\t{feature.type}\t{start_on_new_genome}\t{stop_on_new_genome}\t.\t{strand}\t.\t{annotation}\n'
return output_line return output_line
...@@ -84,7 +84,7 @@ def create_line_gff_one(first_seg,last_seg,feature_id,size_diff,list_seg): ...@@ -84,7 +84,7 @@ def create_line_gff_one(first_seg,last_seg,feature_id,size_diff,list_seg):
variant_count+=1 variant_count+=1
annotation=f'{feature.annot};Size_diff={size_diff};Nb_variants={variant_count}' annotation=f'{feature.annot};Size_diff={size_diff};Nb_variants={variant_count}'
output_line=f'{chr}\tGrAnnot\t{feature.type}\t{get_feature_start_on_genome(first_seg,feature_id)}\t{get_feature_stop_on_genome(last_seg,feature_id)}\t.\t{strand}\t.\t{annotation}\n' output_line=f'{chr}\tGrAnnoT\t{feature.type}\t{get_feature_start_on_genome(first_seg,feature_id)}\t{get_feature_stop_on_genome(last_seg,feature_id)}\t.\t{strand}\t.\t{annotation}\n'
return output_line return output_line
...@@ -285,7 +285,25 @@ def get_segment_sequence(seg_seq,segment): ...@@ -285,7 +285,25 @@ def get_segment_sequence(seg_seq,segment):
if (segment[0]==">") | (segment[0]=="+"): if (segment[0]==">") | (segment[0]=="+"):
return seg_seq[segment[1:]] return seg_seq[segment[1:]]
else: else:
return seg_seq[segment[1:]][::-1] return reverse_complement(seg_seq[segment[1:]])
def reverse_complement(sequence):
sequence_rc=""
for char in sequence:
sequence_rc+=complement(char)
return sequence_rc[::-1]
def complement(nucl):
match nucl:
case "A":
return "T"
case "C":
return "G"
case "G":
return "C"
case "T":
return "A"
return nucl
class Variation: class Variation:
def __init__(self,feature_id,feature_type,chr,start_new,stop_new,inversion,size_diff): def __init__(self,feature_id,feature_type,chr,start_new,stop_new,inversion,size_diff):
...@@ -317,7 +335,7 @@ def create_var(feature_id,first_seg,last_seg,paths): ...@@ -317,7 +335,7 @@ def create_var(feature_id,first_seg,last_seg,paths):
# get feature paths on the original genome and on the target genome # get feature paths on the original genome and on the target genome
list_segfeat_azu=get_feature_path(paths,first_seg,last_seg) list_segfeat_azu=get_feature_path(paths,first_seg,last_seg)
list_segfeat_nb=feature.segments_list list_segfeat_nb=feature.segments_list
[list_segfeat_nb,list_segfeat_azu,inversion]=detect_inversion(list_segfeat_nb,list_segfeat_azu) [list_segfeat_nb,list_segfeat_azu,inversion]=detect_gene_inversion(list_segfeat_nb,list_segfeat_azu)
variation=Variation(feature_id,feature.type,feature.chr,start_new_genome,stop_new_genome,inversion,size_diff) variation=Variation(feature_id,feature.type,feature.chr,start_new_genome,stop_new_genome,inversion,size_diff)
...@@ -437,13 +455,11 @@ def detect_orient_inversion(list_1,list_2): ...@@ -437,13 +455,11 @@ def detect_orient_inversion(list_1,list_2):
strand_inversion=False strand_inversion=False
else: else:
strand_inversion=True strand_inversion=True
if same_strand_count!=0:
print("ignored strand difference :/")
return [strand_inversion,list_1_unstrand,list_2_unstrand] return [strand_inversion,list_1_unstrand,list_2_unstrand]
# takes two lists of segments for two genes, check if the first list is an inversion of the second one (if the segments in common are on the opposite strand) # takes two lists of segments for two genes, check if the first list is an inversion of the second one (if the segments in common are on the opposite strand)
def detect_inversion(list_1,list_2): def detect_gene_inversion(list_1,list_2):
# check if we have an inversion of the orientation of the segments # check if we have an inversion of the orientation of the segments
[strand_inversion,list_1_unstrand,list_2_unstrand]=detect_orient_inversion(list_1,list_2) [strand_inversion,list_1_unstrand,list_2_unstrand]=detect_orient_inversion(list_1,list_2)
...@@ -472,3 +488,80 @@ def invert_segment_list(seg_list): ...@@ -472,3 +488,80 @@ def invert_segment_list(seg_list):
return list(reversed(list_inverted)) return list(reversed(list_inverted))
# pour faire l'inférence, lire le fichier variations.txt et l'interpréter. pour chaque variation, regarder ce que ça fait.
# donc première étape quand on appelle la fct "inférence", c'est d'ajouter dans les features leur séquence avec seg_seq et la liste des segments.
def add_feature_sequence(feature,seg_seq):
feature_sequence=""
for segment in feature.segments_list:
if segment==feature.segments_list[0]:
feature_sequence+=get_segment_sequence(seg_seq,segment)[feature.pos_start:] # vérifier les +/- 1 pour la position
elif segment==feature.segments_list[-1]:
feature_sequence+=get_segment_sequence(seg_seq,segment)[0:feature.pos_stop] # vérifier les +/- 1 pour la position
else:
feature_sequence+=get_segment_sequence(seg_seq,segment)
feature.sequence=feature_sequence
# penser à transcrire la séquence codante du gène !!
def get_aa(codon):
match codon[0:2]:
case "UU":
if (codon[2]=="U") | (codon[2]=="C"):
return "Phe"
else:
return "Leu"
case "UC":
return "Ser"
case "UA":
if (codon[2]=="U") | (codon[2]=="C"):
return "Tyr"
else:
return "Stop"
case "UG":
if (codon[2]=="U") | (codon[2]=="C"):
return "Cys"
elif codon[2]=="A":
return "Stop"
else:
return "Trp"
case "CU":
return "Leu"
case "CC":
return "Pro"
case "CA":
if (codon[2]=="U") | (codon[2]=="C"):
return "His"
else:
return "Gln"
case "CG":
return "Arg"
case "AU":
if codon[2]=="G":
return "Met"
else:
return "Ile"
case "AC":
return "Thr"
case "AA":
if (codon[2]=="U") | (codon[2]=="C"):
return "Asn"
else:
return "Lys"
case "AG":
if (codon[2]=="U") | (codon[2]=="C"):
return "Ser"
else:
return "Arg"
case "GU":
return "Val"
case "GC":
return "Ala"
case "GA":
if (codon[2]=="U") | (codon[2]=="C"):
return "Asp"
else:
return "Glu"
case "GG":
return "Gly"
\ No newline at end of file
...@@ -208,10 +208,18 @@ def print_variations(first_seg,last_seg,feat,paths,seg_seq): ...@@ -208,10 +208,18 @@ def print_variations(first_seg,last_seg,feat,paths,seg_seq):
init_new_var(variation,"deletion",list_segfeat_nb,list_segfeat_azu,i,j,seg_seq,feature) init_new_var(variation,"deletion",list_segfeat_nb,list_segfeat_azu,i,j,seg_seq,feature)
i+=1 i+=1
else : # if both segments are present in the other genome but not at the same position. probably substitution ? weird case never found yet else : # if both segments are present in the other genome but not at the same position. weird case never found yet
line=f'weird order change\n' # can be a substitution. check later if its not an inversion
write_line(line,output_variations,False) variation.last_seg_in_target=list_segfeat_azu[j][1:]
var_count+=1;i+=1;j+=1 if (variation.type=='insertion') | (variation.type=='deletion'): # print the current variation before treating the substitution
print_current_var(variation,feat_start,list_segfeat_azu,feat,i)
reset_var(variation)
var_count+=1
if variation.type=='substitution':
continue_var(variation,seg_seq,list_segfeat_nb,list_segfeat_azu,i,j,0)
else: # initiate substitution
init_new_var(variation,"substitution",list_segfeat_nb,list_segfeat_azu,i,j,seg_seq,feature)
i+=1;j+=1
else: # segment present in both, no variation. print the running indel if there is one else: # segment present in both, no variation. print the running indel if there is one
if variation.type!='': # print the current variation if there is one if variation.type!='': # print the current variation if there is one
...@@ -245,16 +253,35 @@ def print_current_var(variation,feat_start,list_segfeat_azu,feat,i): ...@@ -245,16 +253,35 @@ def print_current_var(variation,feat_start,list_segfeat_azu,feat,i):
line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tdeletion\t{variation.ref}\t-\t{len(variation.ref)}\t{pos_old}\t{pos_new}\n' line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tdeletion\t{variation.ref}\t-\t{len(variation.ref)}\t{pos_old}\t{pos_new}\n'
write_line(line,output_variations,False) write_line(line,output_variations,False)
elif variation.type=='substitution': elif variation.type=='substitution':
warning=detect_small_inversion(variation)
[pos_old,pos_new]=get_old_new_pos_substitution(feat_start,variation,list_segfeat_azu,feat) [pos_old,pos_new]=get_old_new_pos_substitution(feat_start,variation,list_segfeat_azu,feat)
if len(variation.ref) == len(variation.alt): # if the substituion is between two segment of the same size, print it if len(variation.ref) == len(variation.alt): # if the substituion is between two segment of the same size, print it
size_subs=len(variation.ref) size_subs=len(variation.ref)
line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tsubstitution\t{variation.ref}\t{variation.alt}\t{size_subs}\t{pos_old}\t{pos_new}\n' line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tsubstitution\t{variation.ref}\t{variation.alt}\t{size_subs}\t{pos_old}\t{pos_new}{warning}\n'
else : else :
# if the segments of the substitution have a different size, print deletion then insertion at the same position. # if the segments of the substitution have a different size, print deletion then insertion at the same position.
line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tdeletion\t{variation.ref}\t-\t{len(variation.ref)}\t{pos_old}\t{pos_new}\n' line=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tdeletion\t{variation.ref}\t-\t{len(variation.ref)}\t{pos_old}\t{pos_new}{warning}\n'
line+=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tinsertion\t-\t{variation.alt}\t{len(variation.alt)}\t{pos_old}\t{pos_new}\n' line+=f'{variation.feature_id}\t{variation.feature_type}\t{variation.chr}\t{variation.start_new}\t{variation.stop_new}\t{variation.size_new}\t{variation.inversion}\t{variation.size_diff}\tinsertion\t-\t{variation.alt}\t{len(variation.alt)}\t{pos_old}\t{pos_new}{warning}\n'
write_line(line,output_variations,False) write_line(line,output_variations,False)
def detect_small_inversion(variation):
[list_ref_common,list_alt_common]=[list(),list()]
list_ref_unstrand=[segment_stranded[1:] for segment_stranded in variation.seg_ref]
list_alt_unstrand=[segment_stranded[1:] for segment_stranded in variation.seg_alt]
for seg in variation.seg_ref:
if seg[1:] in list_alt_unstrand:
list_ref_common.append(seg)
for seg in variation.seg_alt:
if seg[1:] in list_ref_unstrand:
list_alt_common.append(seg)
if (len(list_ref_common)>len(list_ref_unstrand)*0.5) & (len(list_alt_common)>len(list_alt_unstrand)*0.5):
print("eee")
return f'\t# Suspected inversion within this substitution.'
else:
return ''
def print_last_deletion(variation,list_segfeat_nb,i,feat_start,feature,seg_seq): def print_last_deletion(variation,list_segfeat_nb,i,feat_start,feature,seg_seq):
pos_old=int(Segments[list_segfeat_nb[i][1:]].start)-int(feat_start)+1 pos_old=int(Segments[list_segfeat_nb[i][1:]].start)-int(feat_start)+1
del_sequence=get_sequence_list_seg(list_segfeat_nb,i,feature,seg_seq) del_sequence=get_sequence_list_seg(list_segfeat_nb,i,feature,seg_seq)
......
...@@ -8,6 +8,24 @@ from Functions_output import * ...@@ -8,6 +8,24 @@ from Functions_output import *
run="test" run="test"
if run=="chr3":
intersect_path='/home/nina/annotpangenome/chr3/intersect_segments_genes_irgsp_chr3.bed'
load_intersect(intersect_path)
# outputs the gff of the graph for chr10
output_gff='graph_chr3.gff'
gfa="test_graph"
graph_gff(output_gff)
pos_seg="seg_coord/AzucenaRS1_chromosome3_corrected.bed"
out="azucena_chr3.gff"
gff="graph_chr3.gff"
# outputs the gff of a genome for the chr10
genome_gff(pos_seg,gff,out,gfa)
if run=="reel": if run=="reel":
# creates segments and features for the intersect between the graph for chr10 and the gff of IRGSP # creates segments and features for the intersect between the graph for chr10 and the gff of IRGSP
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment