Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
G
GrAnnoT
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
DIADE
dynadiv
GrAnnoT
Commits
2bbe863d
Commit
2bbe863d
authored
1 year ago
by
NMarthe
Browse files
Options
Downloads
Patches
Plain Diff
amélioré la gestion des substitutions quand le nombre de segments est différent
parent
409c59ea
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
Functions.py
+16
-7
16 additions, 7 deletions
Functions.py
Functions_output.py
+41
-17
41 additions, 17 deletions
Functions_output.py
with
57 additions
and
24 deletions
Functions.py
+
16
−
7
View file @
2bbe863d
...
...
@@ -2,6 +2,10 @@ from Graph_gff import Segments, Features, get_feature_start_on_segment, get_feat
global
segments_on_target_genome
segments_on_target_genome
=
{}
global
target_genome_name
target_genome_name
=
"
CM020642.1_Azucena_chromosome10
"
target_genome_name
=
"
genome2_chr10
"
# get the start position of the features on the linear genome, using their coordinates on the graph and the coordinantes of the segments on the genome
def
get_feature_start_on_genome
(
start_seg
,
feat_id
):
seg_start_pos
=
segments_on_target_genome
[
start_seg
][
1
]
...
...
@@ -247,11 +251,11 @@ def get_feature_path(paths,first_seg,last_seg):
first_seg_stranded
=
first_strand
+
first_seg
last_strand
=
convert_strand
(
segments_on_target_genome
[
last_seg
][
3
])
last_seg_stranded
=
last_strand
+
last_seg
index_first_seg
=
int
(
paths
[
"
CM020642.1_Azucena_chromosome10
"
].
index
(
first_seg_stranded
))
index_last_seg
=
int
(
paths
[
"
CM020642.1_Azucena_chromosome10
"
].
index
(
last_seg_stranded
))
index_first_seg
=
int
(
paths
[
target_genome_name
].
index
(
first_seg_stranded
))
index_last_seg
=
int
(
paths
[
target_genome_name
].
index
(
last_seg_stranded
))
first_index
=
min
(
index_first_seg
,
index_last_seg
)
last_index
=
max
(
index_last_seg
,
index_first_seg
)
list_segfeat_azu
=
paths
[
"
CM020642.1_Azucena_chromosome10
"
][
first_index
:
last_index
+
1
]
list_segfeat_azu
=
paths
[
target_genome_name
][
first_index
:
last_index
+
1
]
list_segfeat_azu_corrected
=
[
convert_strand
(
segment_stranded
[
0
])
+
segment_stranded
[
1
:]
for
segment_stranded
in
list_segfeat_azu
]
return
list_segfeat_azu_corrected
...
...
@@ -318,7 +322,7 @@ def reset_var(variation):
variation
.
ref
=
''
variation
.
alt
=
''
def
get_old_new_pos_substitution
(
feat_start
,
variation
,
list_segfeat_azu
,
feat
,
j
):
def
get_old_new_pos_substitution
(
feat_start
,
variation
,
list_segfeat_azu
,
feat
):
pos_old
=
str
(
int
(
Segments
[
variation
.
start_var
].
start
)
-
int
(
feat_start
)
+
1
)
start_feat
=
get_feature_start_on_genome
(
list_segfeat_azu
[
0
][
1
:],
feat
)
start_var
=
int
(
segments_on_target_genome
[
variation
.
start_on_target
][
1
])
...
...
@@ -368,11 +372,16 @@ def init_new_var(variation,type,list_segfeat_nb,list_segfeat_azu,i,j,seg_seq,fea
variation
.
ref
=
seg_seq
[
list_segfeat_nb
[
i
][
1
:]]
variation
.
alt
=
"
-
"
def
continue_var
(
variation
,
seg_seq
,
list_segfeat_nb
,
list_segfeat_azu
,
i
,
j
):
def
continue_var
(
variation
,
seg_seq
,
list_segfeat_nb
,
list_segfeat_azu
,
i
,
j
,
genome_to_continue
):
if
variation
.
type
==
"
substitution
"
:
variation
.
ref
+=
seg_seq
[
list_segfeat_nb
[
i
][
1
:]]
variation
.
alt
+=
seg_seq
[
list_segfeat_azu
[
j
][
1
:]]
if
genome_to_continue
==
0
:
# genome_to_continue allows to choose if the substitution continues for the original or the target genome, or both.
variation
.
ref
+=
seg_seq
[
list_segfeat_nb
[
i
][
1
:]]
variation
.
alt
+=
seg_seq
[
list_segfeat_azu
[
j
][
1
:]]
elif
genome_to_continue
==
1
:
# deletion
variation
.
ref
+=
seg_seq
[
list_segfeat_nb
[
i
][
1
:]]
elif
genome_to_continue
==
2
:
# insertion
variation
.
alt
+=
seg_seq
[
list_segfeat_azu
[
j
][
1
:]]
elif
variation
.
type
==
"
insertion
"
:
variation
.
alt
+=
seg_seq
[
list_segfeat_azu
[
j
][
1
:]]
elif
variation
.
type
==
"
deletion
"
:
...
...
This diff is collapsed.
Click to expand it.
Functions_output.py
+
41
−
17
View file @
2bbe863d
...
...
@@ -55,14 +55,14 @@ def genome_gff(pos_seg, gff, out, gfa):
print
(
"
generation of the genome
'
s gff
"
)
# create variables and open files
[
once
,
detail
,
var
,
stats
]
=
[
False
,
Fals
e
,
True
,
False
]
[
once
,
detail
,
var
,
stats
]
=
[
True
,
Tru
e
,
True
,
False
]
if
var
:
[
paths
,
seg_seq
]
=
get_segments_sequence_and_paths
(
gfa
)
file_out_var
=
open
(
"
variations
_chr10
.txt
"
,
'
w
'
)
file_out_var
=
open
(
"
test_data/
variations.txt
"
,
'
w
'
)
global
output_variations
output_variations
=
[
0
,
""
,
file_out_var
]
if
detail
:
file_out_detail
=
open
(
"
azucena
_detail
_chr10
.gff
"
,
'
w
'
)
file_out_detail
=
open
(
"
test_data/genome2
_detail.gff
"
,
'
w
'
)
global
output_detail_gff
output_detail_gff
=
[
0
,
""
,
file_out_detail
]
if
once
:
...
...
@@ -160,34 +160,55 @@ def print_variations(first_seg,last_seg,feat,paths,seg_seq):
# if both segments are absent in the other genome, its a substitution
variation
.
last_seg_in_target
=
list_segfeat_azu
[
j
][
1
:]
if
(
variation
.
type
==
'
insertion
'
)
|
(
variation
.
type
==
'
deletion
'
):
# print the current variation before treating the substitution
print_current_var
(
variation
,
feat_start
,
list_segfeat_azu
,
feat
,
i
,
j
)
print_current_var
(
variation
,
feat_start
,
list_segfeat_azu
,
feat
,
i
)
print
(
list_segfeat_nb
[
i
],
list_segfeat_azu
[
j
],
variation
.
type
,
"
\n
"
)
reset_var
(
variation
)
var_count
+=
1
if
variation
.
type
==
'
substitution
'
:
continue_var
(
variation
,
seg_seq
,
list_segfeat_nb
,
list_segfeat_azu
,
i
,
j
)
continue_var
(
variation
,
seg_seq
,
list_segfeat_nb
,
list_segfeat_azu
,
i
,
j
,
0
)
else
:
# initiate substitution
init_new_var
(
variation
,
"
substitution
"
,
list_segfeat_nb
,
list_segfeat_azu
,
i
,
j
,
seg_seq
,
feature
)
i
+=
1
;
j
+=
1
else
:
# azu segment not in nb, but nb segment in azu : insertion
if
(
variation
.
type
==
'
substitution
'
)
|
(
variation
.
type
==
'
deletion
'
):
# print the current variation before treating the insertion
print_current_var
(
variation
,
feat_start
,
list_segfeat_azu
,
feat
,
i
,
j
)
else
:
# azu segment not in nb, but nb segment in azu : insertion or continue substitution
if
variation
.
type
==
'
deletion
'
:
# print the current variation before treating the insertion
print_current_var
(
variation
,
feat_start
,
list_segfeat_azu
,
feat
,
i
)
print
(
list_segfeat_nb
[
i
],
list_segfeat_azu
[
j
],
variation
.
type
,
"
\n
"
)
reset_var
(
variation
)
var_count
+=
1
variation
.
last_seg_in_target
=
list_segfeat_azu
[
j
][
1
:]
if
variation
.
type
==
'
insertion
'
:
continue_var
(
variation
,
seg_seq
,
list_segfeat_nb
,
list_segfeat_azu
,
i
,
j
)
continue_var
(
variation
,
seg_seq
,
list_segfeat_nb
,
list_segfeat_azu
,
i
,
j
,
0
)
elif
variation
.
type
==
"
substitution
"
:
while
list_segfeat_azu
[
j
]
!=
list_segfeat_nb
[
i
]:
continue_var
(
variation
,
seg_seq
,
list_segfeat_nb
,
list_segfeat_azu
,
i
,
j
,
2
)
j
+=
1
print_current_var
(
variation
,
feat_start
,
list_segfeat_azu
,
feat
,
i
)
print
(
list_segfeat_nb
[
i
],
list_segfeat_azu
[
j
],
variation
.
type
,
"
\n
"
)
reset_var
(
variation
)
var_count
+=
1
j
-=
1
else
:
# intiate insertion
init_new_var
(
variation
,
"
insertion
"
,
list_segfeat_nb
,
list_segfeat_azu
,
i
,
j
,
seg_seq
,
feature
)
j
+=
1
elif
list_segfeat_nb
[
i
]
not
in
list_segfeat_azu
:
# nb segment not in azu, but azu segment in nb : deletion
if
(
variation
.
type
==
'
substitution
'
)
|
(
variation
.
type
==
'
insertion
'
):
# print the current variation before treating the deletion
print_current_var
(
variation
,
feat_start
,
list_segfeat_azu
,
feat
,
i
,
j
)
elif
list_segfeat_nb
[
i
]
not
in
list_segfeat_azu
:
# nb segment not in azu, but azu segment in nb : deletion or continue substitution
if
variation
.
type
==
'
insertion
'
:
# print the current variation before treating the deletion
print_current_var
(
variation
,
feat_start
,
list_segfeat_azu
,
feat
,
i
)
print
(
list_segfeat_nb
[
i
],
list_segfeat_azu
[
j
],
variation
.
type
,
"
\n
"
)
reset_var
(
variation
)
var_count
+=
1
if
variation
.
type
==
'
deletion
'
:
continue_var
(
variation
,
seg_seq
,
list_segfeat_nb
,
list_segfeat_azu
,
i
,
j
)
continue_var
(
variation
,
seg_seq
,
list_segfeat_nb
,
list_segfeat_azu
,
i
,
j
,
0
)
elif
variation
.
type
==
"
substitution
"
:
while
list_segfeat_azu
[
j
]
!=
list_segfeat_nb
[
i
]:
continue_var
(
variation
,
seg_seq
,
list_segfeat_nb
,
list_segfeat_azu
,
i
,
j
,
1
)
i
+=
1
print_current_var
(
variation
,
feat_start
,
list_segfeat_azu
,
feat
,
i
)
print
(
list_segfeat_nb
[
i
],
list_segfeat_azu
[
j
],
variation
.
type
,
"
\n
"
)
reset_var
(
variation
)
var_count
+=
1
i
-=
1
else
:
# intiate deletion
init_new_var
(
variation
,
"
deletion
"
,
list_segfeat_nb
,
list_segfeat_azu
,
i
,
j
,
seg_seq
,
feature
)
i
+=
1
...
...
@@ -199,19 +220,22 @@ def print_variations(first_seg,last_seg,feat,paths,seg_seq):
else
:
# segment present in both, no variation. print the running indel if there is one
if
variation
.
type
!=
''
:
# print the current variation if there is one
print_current_var
(
variation
,
feat_start
,
list_segfeat_azu
,
feat
,
i
,
j
)
print_current_var
(
variation
,
feat_start
,
list_segfeat_azu
,
feat
,
i
)
print
(
list_segfeat_nb
[
i
],
list_segfeat_azu
[
j
],
variation
.
type
,
"
\n
"
)
var_count
+=
1
reset_var
(
variation
)
variation
.
last_seg_in_target
=
list_segfeat_azu
[
j
][
1
:]
i
+=
1
;
j
+=
1
if
(
variation
.
type
!=
''
):
# if there was a current variation when we reached the end, print it
print_current_var
(
variation
,
feat_start
,
list_segfeat_azu
,
feat
,
i
,
j
)
print_current_var
(
variation
,
feat_start
,
list_segfeat_azu
,
feat
,
i
)
print
(
list_segfeat_nb
[
i
],
list_segfeat_azu
[
j
-
1
],
variation
.
type
,
"
\n
"
)
var_count
+=
1
reset_var
(
variation
)
if
i
<=
len
(
list_segfeat_nb
)
-
1
:
# if we didn't reach the length of the segment list for the first genome, the end is missing for the second genome
print_last_deletion
(
variation
,
list_segfeat_nb
,
i
,
feat_start
,
feature
,
seg_seq
)
print
(
list_segfeat_nb
[
i
],
"
end deletion
"
,
"
\n
"
)
var_count
+=
1
reset_var
(
variation
)
...
...
@@ -219,7 +243,7 @@ def print_variations(first_seg,last_seg,feat,paths,seg_seq):
print_novar
(
variation
)
def
print_current_var
(
variation
,
feat_start
,
list_segfeat_azu
,
feat
,
i
,
j
):
def
print_current_var
(
variation
,
feat_start
,
list_segfeat_azu
,
feat
,
i
):
if
variation
.
type
==
'
insertion
'
:
[
pos_old
,
pos_new
]
=
get_old_new_pos_insertion
(
variation
,
feat_start
,
list_segfeat_azu
,
feat
)
line
=
f
'
{
variation
.
feature_id
}
\t
{
variation
.
feature_type
}
\t
{
variation
.
chr
}
\t
{
variation
.
start_new
}
\t
{
variation
.
stop_new
}
\t
{
variation
.
size_new
}
\t
{
variation
.
inversion
}
\t
{
variation
.
size_diff
}
\t
insertion
\t
-
\t
{
variation
.
alt
}
\t
{
len
(
variation
.
alt
)
}
\t
{
pos_old
}
\t
{
pos_new
}
\n
'
...
...
@@ -229,7 +253,7 @@ def print_current_var(variation,feat_start,list_segfeat_azu,feat,i,j):
line
=
f
'
{
variation
.
feature_id
}
\t
{
variation
.
feature_type
}
\t
{
variation
.
chr
}
\t
{
variation
.
start_new
}
\t
{
variation
.
stop_new
}
\t
{
variation
.
size_new
}
\t
{
variation
.
inversion
}
\t
{
variation
.
size_diff
}
\t
deletion
\t
{
variation
.
ref
}
\t
-
\t
{
len
(
variation
.
ref
)
}
\t
{
pos_old
}
\t
{
pos_new
}
\n
'
write_line
(
line
,
output_variations
,
False
)
elif
variation
.
type
==
'
substitution
'
:
[
pos_old
,
pos_new
]
=
get_old_new_pos_substitution
(
feat_start
,
variation
,
list_segfeat_azu
,
feat
,
j
)
[
pos_old
,
pos_new
]
=
get_old_new_pos_substitution
(
feat_start
,
variation
,
list_segfeat_azu
,
feat
)
if
len
(
variation
.
ref
)
==
len
(
variation
.
alt
):
# if the substituion is between two segment of the same size, print it
size_subs
=
len
(
variation
.
ref
)
line
=
f
'
{
variation
.
feature_id
}
\t
{
variation
.
feature_type
}
\t
{
variation
.
chr
}
\t
{
variation
.
start_new
}
\t
{
variation
.
stop_new
}
\t
{
variation
.
size_new
}
\t
{
variation
.
inversion
}
\t
{
variation
.
size_diff
}
\t
substitution
\t
{
variation
.
ref
}
\t
{
variation
.
alt
}
\t
{
size_subs
}
\t
{
pos_old
}
\t
{
pos_new
}
\n
'
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment