Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
G
GrAnnoT
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
DIADE
dynadiv
GrAnnoT
Commits
2dff4cf5
Commit
2dff4cf5
authored
1 year ago
by
nina.marthe_ird.fr
Browse files
Options
Downloads
Patches
Plain Diff
corrected the inference when there is a frameshift. it simplfied the algorithm (less conditions)
parent
13c4e598
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
inference.py
+58
-92
58 additions, 92 deletions
inference.py
with
58 additions
and
92 deletions
inference.py
+
58
−
92
View file @
2dff4cf5
...
...
@@ -213,7 +213,7 @@ def findOtherStart(cds,segments_on_target_genome): # look for another start codo
if
True
not
in
(
(
stop_pos
%
3
==
start_pos_frame
)
&
(
stop_pos
>
start_pos
)
for
stop_pos
in
stop_pos_list
)
:
#print("codon start candidat trouvé dans l'arn messager,",n,"bases en amont du cds")
# calculer le décalage : si on en trouve un 2 bases en amont, ça décale le cadre de lecture !
frame_shift
=
(
frame_shift
+
n
)
%
3
# vérifier le frame
shift !!
frame_shift
=
(
frame_shift
+
n
)
%
3
# vérifier le frameshift !!
print
(
"
the start codon at the position
"
,
start_pos
,
"
,
"
,
n
,
"
bases before the CDS, doesn
'
t have a stop codon after in the same reading frame
"
)
else
:
print
(
"
the start codon at the position
"
,
start_pos
,
"
,
"
,
n
,
"
bases before the CDS, has a stop codon after in the same reading frame
"
)
...
...
@@ -256,7 +256,7 @@ for feature in Features.values(): # add the sequence of all features
for
cds_id
in
cds_var
.
keys
():
cds
=
Features
[
cds_id
]
print
(
"
analysis of the variations in the CDS
"
,
cds_id
,
"
:
\n
"
)
frame
_
shift
=
0
frameshift
=
0
for
index
,
var
in
enumerate
(
cds_var
[
cds_id
]):
# for each variation in the current cds :
type_var
=
var
[
8
]
if
type_var
!=
"
no_var
"
:
# if there is a variation
...
...
@@ -279,106 +279,72 @@ for cds_id in cds_var.keys():
#findOtherStart(cds,segments_on_target_genome) # for now we don't look for another start codon
break
if
abs
(
length_alt
-
length_ref
)
%
3
==
0
:
# size diff 3k -> no frame shift.
if
(
posVar
[
0
])
%
3
==
0
:
#size diff 3k, position 3k
print
(
"
variation between two codons not causing a frameshift
"
)
if
type_var
==
"
insertion
"
:
print
(
"
insertion of
"
,
var
[
10
])
elif
type_var
==
"
deletion
"
:
print
(
"
deletion of
"
,
var
[
9
])
else
:
print
(
"
substitution of
"
,
var
[
9
],
"
by
"
,
var
[
10
])
old_frameshift
=
frameshift
frameshift
=
(
frameshift
+
length_ref
-
length_alt
)
%
3
if
old_frameshift
!=
frameshift
:
print
(
"
variation causing a frameshift
"
)
# frameshift=0 -> reading frame recovered. may need to get a base before.
# frameshift=1 -> frameshift of 1 base to the right
# frameshift=2 -> frameshift of 2 bases to the right
if
frameshift
==
0
:
print
(
"
recovery of the original reading frame
"
)
if
old_frameshift
==
0
:
print
(
"
loss of the original reading frame
"
)
#else:
# print("variation not causing a frameshift")
len_fragment_after
=
(
3
-
length_ref
)
%
3
deleted_sequence
=
cds
.
sequence
[
posVar
[
0
]:
posVar
[
0
]
+
length_ref
+
len_fragment_after
]
inserted_sequence
=
sequence_target
[
posVar
[
1
]:
posVar
[
1
]
+
length_alt
+
len_fragment_after
]
stop
=
print_variation_change
(
deleted_sequence
,
inserted_sequence
)
if
stop
:
break
else
:
# size diff 3k, position !=3k
print
(
"
variation in the middle of a codon not causing a frameshift
"
)
if
type_var
==
"
insertion
"
:
print
(
"
insertion of
"
,
var
[
10
])
elif
type_var
==
"
deletion
"
:
print
(
"
deletion of
"
,
var
[
9
])
else
:
print
(
"
substitution of
"
,
var
[
9
],
"
by
"
,
var
[
10
])
if
type_var
==
"
insertion
"
:
print
(
"
insertion of
"
,
var
[
10
])
elif
type_var
==
"
deletion
"
:
print
(
"
deletion of
"
,
var
[
9
])
else
:
print
(
"
substitution of
"
,
var
[
9
],
"
by
"
,
var
[
10
])
len_fragment_before
=
(
posVar
[
0
])
%
3
len_fragment_after
=
(
3
-
(
len_fragment_before
+
length_ref
)
)
%
3
len_fragment_before
_del
=
(
posVar
[
0
])
%
3
len_fragment_before
_ins
=
(
posVar
[
1
]
)
%
3
total_ins
=
sequence_target
[
posVar
[
1
]
-
len_fragment_before
:
posVar
[
1
]
+
length_alt
+
len_fragment_after
]
total_del
=
cds
.
sequence
[
posVar
[
0
]
-
len_fragment_before
:
posVar
[
0
]
+
length_ref
+
len_fragment_after
]
if
frameshift
==
0
:
# print only the local change.
len_fragment_after_del
=
(
3
-
(
len_fragment_before_del
+
length_ref
))
%
3
len_fragment_after_ins
=
(
3
-
(
len_fragment_before_ins
+
length_alt
))
%
3
total_ins
=
sequence_target
[
posVar
[
1
]
-
len_fragment_before_ins
:
posVar
[
1
]
+
length_alt
+
len_fragment_after_ins
]
total_del
=
cds
.
sequence
[
posVar
[
0
]
-
len_fragment_before_del
:
posVar
[
0
]
+
length_ref
+
len_fragment_after_del
]
stop
=
print_variation_change
(
total_del
,
total_ins
)
if
stop
:
break
stop
=
print_variation_change
(
total_del
,
total_ins
)
else
:
# print changes from local var to next var. at the next var, we will see if the reading frame is recovered.
print
(
"
frameshift of
"
,
frameshift
,
"
base(s) to the right.
"
)
if
index
==
len
(
cds_var
[
cds_id
])
-
1
:
# it is the last variation. translate until the end of the cds.
total_total_del
=
cds
.
sequence
[
posVar
[
0
]
-
len_fragment_before_del
:]
total_total_ins
=
sequence_target
[
posVar
[
1
]
-
len_fragment_before_ins
:]
stop
=
print_variation_change
(
total_total_del
,
total_total_ins
)
if
stop
:
break
# possible that it prints too many variations : for ex if we have a snp on the first and the last base of a codon,
# while printing the effect of the first snp we aso use the second one.
else
:
# not the last variation. translate until the next var.
nextVar
=
cds_var
[
cds_id
][
index
+
1
]
posNextVar
=
[
int
(
nextVar
[
12
]),
int
(
nextVar
[
13
])]
else
:
# size diff !=3k
print
(
"
variation causing a frameshift
"
)
old_frameshift
=
frame_shift
frame_shift
=
(
frame_shift
+
length_ref
-
length_alt
)
%
3
# frameshift=0 -> reading frame recovered. may need to get a base before.
# frameshift=1 -> frame shift of 1 base to the right
# frameshift=2 -> frame shift of 2 bases to the right
if
type_var
==
"
insertion
"
:
print
(
"
insertion of
"
,
var
[
10
])
elif
type_var
==
"
deletion
"
:
print
(
"
deletion of
"
,
var
[
9
])
else
:
print
(
"
substitution of
"
,
var
[
9
],
"
by
"
,
var
[
10
])
len_fragment_before_del
=
(
posVar
[
0
])
%
3
len_fragment_before_ins
=
(
posVar
[
1
])
%
3
print
(
len_fragment_before_ins
,
len_fragment_before_del
)
if
frame_shift
==
0
:
# print only the local change.
len_fragment_after_del
=
(
3
-
(
len_fragment_before_del
+
length_ref
))
%
3
len_fragment_after_ins
=
(
3
-
(
len_fragment_before_ins
+
length_alt
))
%
3
total_ins
=
sequence_target
[
posVar
[
1
]
-
len_fragment_before_ins
:
posVar
[
1
]
+
length_alt
+
len_fragment_after_ins
]
total_del
=
cds
.
sequence
[
posVar
[
0
]
-
len_fragment_before_del
:
posVar
[
0
]
+
length_ref
+
len_fragment_after_del
]
print
(
"
recovery of the original reading frame
"
)
stop
=
print_variation_change
(
total_del
,
total_ins
)
if
nextVar
[
8
]
==
"
insertion
"
:
length_ref_nextvar
=
0
else
:
length_ref_nextvar
:
len
(
nextVar
[
9
])
if
nextVar
[
8
]
==
"
deletion
"
:
length_alt_nextvar
=
0
else
:
length_alt_nextvar
=
len
(
nextVar
[
10
])
len_fragment_before_del_nextvar
=
(
posNextVar
[
0
])
%
3
len_fragment_before_ins_nextvar
=
(
posNextVar
[
1
])
%
3
total_total_del
=
cds
.
sequence
[
posVar
[
0
]
-
len_fragment_before_del
:
posNextVar
[
0
]
-
len_fragment_before_del_nextvar
]
total_total_ins
=
sequence_target
[
posVar
[
1
]
-
len_fragment_before_ins
:
posNextVar
[
1
]
-
len_fragment_before_ins_nextvar
]
stop
=
print_variation_change
(
total_total_del
,
total_total_ins
)
if
stop
:
break
else
:
# print changes from local var to next var. at the next var, we will see if the reading frame is recovered.
print
(
"
creating frame shift of
"
,
frame_shift
,
"
base(s) to the right.
"
)
if
old_frameshift
==
0
:
print
(
"
loss of the original reading frame
"
)
if
index
==
len
(
cds_var
[
cds_id
])
-
1
:
# it is the last variation. translate until the end of the cds.
total_total_del
=
cds
.
sequence
[
posVar
[
0
]
-
len_fragment_before_del
:]
total_total_ins
=
sequence_target
[
posVar
[
1
]
-
len_fragment_before_ins
:]
stop
=
print_variation_change
(
total_total_del
,
total_total_ins
)
if
stop
:
break
else
:
nextVar
=
cds_var
[
cds_id
][
index
+
1
]
posNextVar
=
[
int
(
nextVar
[
12
]),
int
(
nextVar
[
13
])]
if
nextVar
[
8
]
==
"
insertion
"
:
length_ref_nextvar
=
0
else
:
length_ref_nextvar
:
len
(
nextVar
[
9
])
if
nextVar
[
8
]
==
"
deletion
"
:
length_alt_nextvar
=
0
else
:
length_alt_nextvar
=
len
(
nextVar
[
10
])
len_fragment_before_del_nextvar
=
(
posNextVar
[
0
])
%
3
len_fragment_before_ins_nextvar
=
(
posNextVar
[
1
])
%
3
total_total_del
=
cds
.
sequence
[
posVar
[
0
]
-
len_fragment_before_del
:
posNextVar
[
0
]
-
len_fragment_before_del_nextvar
]
total_total_ins
=
sequence_target
[
posVar
[
1
]
-
len_fragment_before_ins
:
posNextVar
[
1
]
-
len_fragment_before_ins_nextvar
]
stop
=
print_variation_change
(
total_total_del
,
total_total_ins
)
if
stop
:
break
# possible that it prints too many variations : for ex if we have a snp on the first and the last base of a codon,
# while printing the effect of the first snp we aso use the second one.
print
(
"
\n
"
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment