Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
G
GrAnnoT
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
DIADE
dynadiv
GrAnnoT
Commits
2dff4cf5
Commit
2dff4cf5
authored
1 year ago
by
nina.marthe_ird.fr
Browse files
Options
Downloads
Patches
Plain Diff
corrected the inference when there is a frameshift. it simplfied the algorithm (less conditions)
parent
13c4e598
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
inference.py
+58
-92
58 additions, 92 deletions
inference.py
with
58 additions
and
92 deletions
inference.py
+
58
−
92
View file @
2dff4cf5
...
@@ -213,7 +213,7 @@ def findOtherStart(cds,segments_on_target_genome): # look for another start codo
...
@@ -213,7 +213,7 @@ def findOtherStart(cds,segments_on_target_genome): # look for another start codo
if
True
not
in
(
(
stop_pos
%
3
==
start_pos_frame
)
&
(
stop_pos
>
start_pos
)
for
stop_pos
in
stop_pos_list
)
:
if
True
not
in
(
(
stop_pos
%
3
==
start_pos_frame
)
&
(
stop_pos
>
start_pos
)
for
stop_pos
in
stop_pos_list
)
:
#print("codon start candidat trouvé dans l'arn messager,",n,"bases en amont du cds")
#print("codon start candidat trouvé dans l'arn messager,",n,"bases en amont du cds")
# calculer le décalage : si on en trouve un 2 bases en amont, ça décale le cadre de lecture !
# calculer le décalage : si on en trouve un 2 bases en amont, ça décale le cadre de lecture !
frame_shift
=
(
frame_shift
+
n
)
%
3
# vérifier le frame
shift !!
frame_shift
=
(
frame_shift
+
n
)
%
3
# vérifier le frameshift !!
print
(
"
the start codon at the position
"
,
start_pos
,
"
,
"
,
n
,
"
bases before the CDS, doesn
'
t have a stop codon after in the same reading frame
"
)
print
(
"
the start codon at the position
"
,
start_pos
,
"
,
"
,
n
,
"
bases before the CDS, doesn
'
t have a stop codon after in the same reading frame
"
)
else
:
else
:
print
(
"
the start codon at the position
"
,
start_pos
,
"
,
"
,
n
,
"
bases before the CDS, has a stop codon after in the same reading frame
"
)
print
(
"
the start codon at the position
"
,
start_pos
,
"
,
"
,
n
,
"
bases before the CDS, has a stop codon after in the same reading frame
"
)
...
@@ -256,7 +256,7 @@ for feature in Features.values(): # add the sequence of all features
...
@@ -256,7 +256,7 @@ for feature in Features.values(): # add the sequence of all features
for
cds_id
in
cds_var
.
keys
():
for
cds_id
in
cds_var
.
keys
():
cds
=
Features
[
cds_id
]
cds
=
Features
[
cds_id
]
print
(
"
analysis of the variations in the CDS
"
,
cds_id
,
"
:
\n
"
)
print
(
"
analysis of the variations in the CDS
"
,
cds_id
,
"
:
\n
"
)
frame
_
shift
=
0
frameshift
=
0
for
index
,
var
in
enumerate
(
cds_var
[
cds_id
]):
# for each variation in the current cds :
for
index
,
var
in
enumerate
(
cds_var
[
cds_id
]):
# for each variation in the current cds :
type_var
=
var
[
8
]
type_var
=
var
[
8
]
if
type_var
!=
"
no_var
"
:
# if there is a variation
if
type_var
!=
"
no_var
"
:
# if there is a variation
...
@@ -279,106 +279,72 @@ for cds_id in cds_var.keys():
...
@@ -279,106 +279,72 @@ for cds_id in cds_var.keys():
#findOtherStart(cds,segments_on_target_genome) # for now we don't look for another start codon
#findOtherStart(cds,segments_on_target_genome) # for now we don't look for another start codon
break
break
if
abs
(
length_alt
-
length_ref
)
%
3
==
0
:
# size diff 3k -> no frame shift.
old_frameshift
=
frameshift
frameshift
=
(
frameshift
+
length_ref
-
length_alt
)
%
3
if
(
posVar
[
0
])
%
3
==
0
:
#size diff 3k, position 3k
if
old_frameshift
!=
frameshift
:
print
(
"
variation between two codons not causing a frameshift
"
)
print
(
"
variation causing a frameshift
"
)
if
type_var
==
"
insertion
"
:
# frameshift=0 -> reading frame recovered. may need to get a base before.
print
(
"
insertion of
"
,
var
[
10
])
# frameshift=1 -> frameshift of 1 base to the right
elif
type_var
==
"
deletion
"
:
# frameshift=2 -> frameshift of 2 bases to the right
print
(
"
deletion of
"
,
var
[
9
])
if
frameshift
==
0
:
else
:
print
(
"
recovery of the original reading frame
"
)
print
(
"
substitution of
"
,
var
[
9
],
"
by
"
,
var
[
10
])
if
old_frameshift
==
0
:
print
(
"
loss of the original reading frame
"
)
#else:
# print("variation not causing a frameshift")
len_fragment_after
=
(
3
-
length_ref
)
%
3
deleted_sequence
=
cds
.
sequence
[
posVar
[
0
]:
posVar
[
0
]
+
length_ref
+
len_fragment_after
]
inserted_sequence
=
sequence_target
[
posVar
[
1
]:
posVar
[
1
]
+
length_alt
+
len_fragment_after
]
stop
=
print_variation_change
(
deleted_sequence
,
inserted_sequence
)
if
stop
:
break
else
:
# size diff 3k, position !=3k
if
type_var
==
"
insertion
"
:
print
(
"
variation in the middle of a codon not causing a frameshift
"
)
print
(
"
insertion of
"
,
var
[
10
])
if
type_var
==
"
insertion
"
:
elif
type_var
==
"
deletion
"
:
print
(
"
insertion of
"
,
var
[
10
])
print
(
"
deletion of
"
,
var
[
9
])
elif
type_var
==
"
deletion
"
:
else
:
print
(
"
deletion of
"
,
var
[
9
])
print
(
"
substitution of
"
,
var
[
9
],
"
by
"
,
var
[
10
])
else
:
print
(
"
substitution of
"
,
var
[
9
],
"
by
"
,
var
[
10
])
len_fragment_before
=
(
posVar
[
0
])
%
3
len_fragment_before
_del
=
(
posVar
[
0
])
%
3
len_fragment_after
=
(
3
-
(
len_fragment_before
+
length_ref
)
)
%
3
len_fragment_before
_ins
=
(
posVar
[
1
]
)
%
3
total_ins
=
sequence_target
[
posVar
[
1
]
-
len_fragment_before
:
posVar
[
1
]
+
length_alt
+
len_fragment_after
]
if
frameshift
==
0
:
total_del
=
cds
.
sequence
[
posVar
[
0
]
-
len_fragment_before
:
posVar
[
0
]
+
length_ref
+
len_fragment_after
]
# print only the local change.
len_fragment_after_del
=
(
3
-
(
len_fragment_before_del
+
length_ref
))
%
3
len_fragment_after_ins
=
(
3
-
(
len_fragment_before_ins
+
length_alt
))
%
3
total_ins
=
sequence_target
[
posVar
[
1
]
-
len_fragment_before_ins
:
posVar
[
1
]
+
length_alt
+
len_fragment_after_ins
]
total_del
=
cds
.
sequence
[
posVar
[
0
]
-
len_fragment_before_del
:
posVar
[
0
]
+
length_ref
+
len_fragment_after_del
]
stop
=
print_variation_change
(
total_del
,
total_ins
)
if
stop
:
break
stop
=
print_variation_change
(
total_del
,
total_ins
)
else
:
# print changes from local var to next var. at the next var, we will see if the reading frame is recovered.
print
(
"
frameshift of
"
,
frameshift
,
"
base(s) to the right.
"
)
if
index
==
len
(
cds_var
[
cds_id
])
-
1
:
# it is the last variation. translate until the end of the cds.
total_total_del
=
cds
.
sequence
[
posVar
[
0
]
-
len_fragment_before_del
:]
total_total_ins
=
sequence_target
[
posVar
[
1
]
-
len_fragment_before_ins
:]
stop
=
print_variation_change
(
total_total_del
,
total_total_ins
)
if
stop
:
if
stop
:
break
break
# possible that it prints too many variations : for ex if we have a snp on the first and the last base of a codon,
else
:
# not the last variation. translate until the next var.
# while printing the effect of the first snp we aso use the second one.
nextVar
=
cds_var
[
cds_id
][
index
+
1
]
posNextVar
=
[
int
(
nextVar
[
12
]),
int
(
nextVar
[
13
])]
else
:
# size diff !=3k
if
nextVar
[
8
]
==
"
insertion
"
:
print
(
"
variation causing a frameshift
"
)
length_ref_nextvar
=
0
old_frameshift
=
frame_shift
else
:
frame_shift
=
(
frame_shift
+
length_ref
-
length_alt
)
%
3
length_ref_nextvar
:
len
(
nextVar
[
9
])
# frameshift=0 -> reading frame recovered. may need to get a base before.
if
nextVar
[
8
]
==
"
deletion
"
:
# frameshift=1 -> frame shift of 1 base to the right
length_alt_nextvar
=
0
# frameshift=2 -> frame shift of 2 bases to the right
else
:
length_alt_nextvar
=
len
(
nextVar
[
10
])
if
type_var
==
"
insertion
"
:
print
(
"
insertion of
"
,
var
[
10
])
len_fragment_before_del_nextvar
=
(
posNextVar
[
0
])
%
3
elif
type_var
==
"
deletion
"
:
len_fragment_before_ins_nextvar
=
(
posNextVar
[
1
])
%
3
print
(
"
deletion of
"
,
var
[
9
])
total_total_del
=
cds
.
sequence
[
posVar
[
0
]
-
len_fragment_before_del
:
posNextVar
[
0
]
-
len_fragment_before_del_nextvar
]
else
:
total_total_ins
=
sequence_target
[
posVar
[
1
]
-
len_fragment_before_ins
:
posNextVar
[
1
]
-
len_fragment_before_ins_nextvar
]
print
(
"
substitution of
"
,
var
[
9
],
"
by
"
,
var
[
10
])
stop
=
print_variation_change
(
total_total_del
,
total_total_ins
)
len_fragment_before_del
=
(
posVar
[
0
])
%
3
len_fragment_before_ins
=
(
posVar
[
1
])
%
3
print
(
len_fragment_before_ins
,
len_fragment_before_del
)
if
frame_shift
==
0
:
# print only the local change.
len_fragment_after_del
=
(
3
-
(
len_fragment_before_del
+
length_ref
))
%
3
len_fragment_after_ins
=
(
3
-
(
len_fragment_before_ins
+
length_alt
))
%
3
total_ins
=
sequence_target
[
posVar
[
1
]
-
len_fragment_before_ins
:
posVar
[
1
]
+
length_alt
+
len_fragment_after_ins
]
total_del
=
cds
.
sequence
[
posVar
[
0
]
-
len_fragment_before_del
:
posVar
[
0
]
+
length_ref
+
len_fragment_after_del
]
print
(
"
recovery of the original reading frame
"
)
stop
=
print_variation_change
(
total_del
,
total_ins
)
if
stop
:
if
stop
:
break
break
# possible that it prints too many variations : for ex if we have a snp on the first and the last base of a codon,
else
:
# while printing the effect of the first snp we aso use the second one.
# print changes from local var to next var. at the next var, we will see if the reading frame is recovered.
print
(
"
creating frame shift of
"
,
frame_shift
,
"
base(s) to the right.
"
)
if
old_frameshift
==
0
:
print
(
"
loss of the original reading frame
"
)
if
index
==
len
(
cds_var
[
cds_id
])
-
1
:
# it is the last variation. translate until the end of the cds.
total_total_del
=
cds
.
sequence
[
posVar
[
0
]
-
len_fragment_before_del
:]
total_total_ins
=
sequence_target
[
posVar
[
1
]
-
len_fragment_before_ins
:]
stop
=
print_variation_change
(
total_total_del
,
total_total_ins
)
if
stop
:
break
else
:
nextVar
=
cds_var
[
cds_id
][
index
+
1
]
posNextVar
=
[
int
(
nextVar
[
12
]),
int
(
nextVar
[
13
])]
if
nextVar
[
8
]
==
"
insertion
"
:
length_ref_nextvar
=
0
else
:
length_ref_nextvar
:
len
(
nextVar
[
9
])
if
nextVar
[
8
]
==
"
deletion
"
:
length_alt_nextvar
=
0
else
:
length_alt_nextvar
=
len
(
nextVar
[
10
])
len_fragment_before_del_nextvar
=
(
posNextVar
[
0
])
%
3
len_fragment_before_ins_nextvar
=
(
posNextVar
[
1
])
%
3
total_total_del
=
cds
.
sequence
[
posVar
[
0
]
-
len_fragment_before_del
:
posNextVar
[
0
]
-
len_fragment_before_del_nextvar
]
total_total_ins
=
sequence_target
[
posVar
[
1
]
-
len_fragment_before_ins
:
posNextVar
[
1
]
-
len_fragment_before_ins_nextvar
]
stop
=
print_variation_change
(
total_total_del
,
total_total_ins
)
if
stop
:
break
print
(
"
\n
"
)
print
(
"
\n
"
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment