Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
G
GrAnnoT
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
DIADE
dynadiv
GrAnnoT
Commits
5bad4c14
Commit
5bad4c14
authored
1 year ago
by
nina.marthe_ird.fr
Browse files
Options
Downloads
Patches
Plain Diff
nom du génome cible maintenant spécifié dans le main, plus simple pour le changer
parent
85ed4724
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
Functions.py
+3
-6
3 additions, 6 deletions
Functions.py
Functions_output.py
+4
-4
4 additions, 4 deletions
Functions_output.py
main.py
+51
-23
51 additions, 23 deletions
main.py
with
58 additions
and
33 deletions
Functions.py
+
3
−
6
View file @
5bad4c14
...
@@ -2,9 +2,6 @@ from Graph_gff import Segments, Features, get_feature_start_on_segment, get_feat
...
@@ -2,9 +2,6 @@ from Graph_gff import Segments, Features, get_feature_start_on_segment, get_feat
global
segments_on_target_genome
global
segments_on_target_genome
segments_on_target_genome
=
{}
segments_on_target_genome
=
{}
global
target_genome_name
target_genome_name
=
"
CM020642.1_Azucena_chromosome10
"
target_genome_name
=
"
genome4_chr10
"
# get the start position of the features on the linear genome, using their coordinates on the graph and the coordinantes of the segments on the genome
# get the start position of the features on the linear genome, using their coordinates on the graph and the coordinantes of the segments on the genome
def
get_feature_start_on_genome
(
start_seg
,
feat_id
):
def
get_feature_start_on_genome
(
start_seg
,
feat_id
):
...
@@ -245,7 +242,7 @@ def compare_strand(list_1,list_2,list_1_unstrand,list_2_unstrand):
...
@@ -245,7 +242,7 @@ def compare_strand(list_1,list_2,list_1_unstrand,list_2_unstrand):
same_strand_count
+=
1
same_strand_count
+=
1
return
[
seg_common
,
same_strand_count
]
return
[
seg_common
,
same_strand_count
]
def
get_feature_path
(
paths
,
first_seg
,
last_seg
):
def
get_feature_path
(
paths
,
first_seg
,
last_seg
,
target_genome_name
):
# find the path in azucena.
# find the path in azucena.
first_strand
=
convert_strand
(
segments_on_target_genome
[
first_seg
][
3
])
first_strand
=
convert_strand
(
segments_on_target_genome
[
first_seg
][
3
])
first_seg_stranded
=
first_strand
+
first_seg
first_seg_stranded
=
first_strand
+
first_seg
...
@@ -325,7 +322,7 @@ class Variation:
...
@@ -325,7 +322,7 @@ class Variation:
#def __str__(self):
#def __str__(self):
# return f"id={self.id}, position on the original genome={self.chr}:{self.start}-{self.stop}, size={self.size}, features={self.features}"
# return f"id={self.id}, position on the original genome={self.chr}:{self.start}-{self.stop}, size={self.size}, features={self.features}"
def
create_var
(
feature_id
,
first_seg
,
last_seg
,
paths
):
def
create_var
(
feature_id
,
first_seg
,
last_seg
,
paths
,
target_genome_name
):
feature
=
Features
[
feature_id
]
feature
=
Features
[
feature_id
]
start_new_genome
=
get_feature_start_on_genome
(
first_seg
,
feature_id
)
start_new_genome
=
get_feature_start_on_genome
(
first_seg
,
feature_id
)
stop_new_genome
=
get_feature_stop_on_genome
(
last_seg
,
feature_id
)
stop_new_genome
=
get_feature_stop_on_genome
(
last_seg
,
feature_id
)
...
@@ -333,7 +330,7 @@ def create_var(feature_id,first_seg,last_seg,paths):
...
@@ -333,7 +330,7 @@ def create_var(feature_id,first_seg,last_seg,paths):
size_diff
=
str
(
size_new_genome
-
feature
.
size
)
size_diff
=
str
(
size_new_genome
-
feature
.
size
)
# get feature paths on the original genome and on the target genome
# get feature paths on the original genome and on the target genome
list_segfeat_azu
=
get_feature_path
(
paths
,
first_seg
,
last_seg
)
list_segfeat_azu
=
get_feature_path
(
paths
,
first_seg
,
last_seg
,
target_genome_name
)
list_segfeat_nb
=
feature
.
segments_list
list_segfeat_nb
=
feature
.
segments_list
[
list_segfeat_nb
,
list_segfeat_azu
,
inversion
]
=
detect_gene_inversion
(
list_segfeat_nb
,
list_segfeat_azu
)
[
list_segfeat_nb
,
list_segfeat_azu
,
inversion
]
=
detect_gene_inversion
(
list_segfeat_nb
,
list_segfeat_azu
)
...
...
This diff is collapsed.
Click to expand it.
Functions_output.py
+
4
−
4
View file @
5bad4c14
...
@@ -51,7 +51,7 @@ def gff_one(first_seg,last_seg,feature_id,list_seg,max_diff):
...
@@ -51,7 +51,7 @@ def gff_one(first_seg,last_seg,feature_id,list_seg,max_diff):
# writes the gff of azucena using the gff of the graph
# writes the gff of azucena using the gff of the graph
def
genome_gff
(
pos_seg
,
gff
,
gfa
,
out_once
,
out_detail
,
out_var
):
def
genome_gff
(
pos_seg
,
gff
,
gfa
,
out_once
,
out_detail
,
out_var
,
target_genome_name
):
print
(
"
generation of the genome
'
s gff
"
)
print
(
"
generation of the genome
'
s gff
"
)
# create variables and open files
# create variables and open files
...
@@ -112,7 +112,7 @@ def genome_gff(pos_seg, gff, gfa, out_once, out_detail, out_var):
...
@@ -112,7 +112,7 @@ def genome_gff(pos_seg, gff, gfa, out_once, out_detail, out_var):
# outputs the detail of variations of the feature :
# outputs the detail of variations of the feature :
if
var
:
if
var
:
print_variations
(
first_seg
,
last_seg
,
feat
,
paths
,
seg_seq
)
print_variations
(
first_seg
,
last_seg
,
feat
,
paths
,
seg_seq
,
target_genome_name
)
write_line
(
""
,
output_variations
,
True
)
write_line
(
""
,
output_variations
,
True
)
if
stats
==
True
:
if
stats
==
True
:
...
@@ -141,10 +141,10 @@ def genome_gff(pos_seg, gff, gfa, out_once, out_detail, out_var):
...
@@ -141,10 +141,10 @@ def genome_gff(pos_seg, gff, gfa, out_once, out_detail, out_var):
# functions to get the detail of the variations in the features
# functions to get the detail of the variations in the features
def
print_variations
(
first_seg
,
last_seg
,
feat
,
paths
,
seg_seq
):
def
print_variations
(
first_seg
,
last_seg
,
feat
,
paths
,
seg_seq
,
target_genome_name
):
if
(
first_seg
!=
''
):
# if the feature is not completly absent # add the else, output absent features
if
(
first_seg
!=
''
):
# if the feature is not completly absent # add the else, output absent features
[
variation
,
list_segfeat_nb
,
list_segfeat_azu
]
=
create_var
(
feat
,
first_seg
,
last_seg
,
paths
)
# removes the strands in the segment lists
[
variation
,
list_segfeat_nb
,
list_segfeat_azu
]
=
create_var
(
feat
,
first_seg
,
last_seg
,
paths
,
target_genome_name
)
# removes the strands in the segment lists
feature
=
Features
[
feat
]
feature
=
Features
[
feat
]
feat_start
=
feature
.
start
feat_start
=
feature
.
start
# loop to go through both paths with i and j
# loop to go through both paths with i and j
...
...
This diff is collapsed.
Click to expand it.
main.py
+
51
−
23
View file @
5bad4c14
...
@@ -7,7 +7,53 @@ from Graph_gff import *
...
@@ -7,7 +7,53 @@ from Graph_gff import *
from
Functions_output
import
*
from
Functions_output
import
*
#from inference import *
#from inference import *
run
=
"
test
"
run
=
"
command_line
"
if
run
==
"
command_line
"
:
import
sys
if
not
(
len
(
sys
.
argv
)
>=
4
)
:
# intersect, gfa, pos_seg
print
(
"
expected input : intersect, gfa file with walks, bed file with positions of the segments on the target genome
"
)
#print("output : graph gff, graph gaf, target genome gff*2+variations")
sys
.
exit
(
1
)
elif
(
sys
.
argv
[
1
]
==
"
-h
"
)
:
print
(
"
expected input : intersect, gfa file with walks, bed file with positions of the segments on the target genome
"
)
print
(
"
output : graph gff, graph gaf, target genome gff*2+variations
"
)
sys
.
exit
(
1
)
intersect
=
sys
.
argv
[
1
]
gfa
=
sys
.
argv
[
2
]
pos_seg
=
sys
.
argv
[
3
]
out_gff
=
gfa
.
split
(
"
/
"
)[
-
1
].
split
(
"
.
"
)[
0
:
-
1
][
0
]
+
"
.gff
"
out_gaf
=
gfa
.
split
(
"
/
"
)[
-
1
].
split
(
"
.
"
)[
0
:
-
1
][
0
]
+
"
.gaf
"
out_once
=
pos_seg
.
split
(
"
/
"
)[
-
1
].
split
(
"
.
"
)[
0
:
-
1
][
0
]
+
"
.gff
"
out_detail
=
pos_seg
.
split
(
"
/
"
)[
-
1
].
split
(
"
.
"
)[
0
:
-
1
][
0
]
+
"
_detail.gff
"
out_var
=
pos_seg
.
split
(
"
/
"
)[
-
1
].
split
(
"
.
"
)[
0
:
-
1
][
0
]
+
"
_variations.txt
"
if
len
(
sys
.
argv
)
==
5
:
target_genome_name
=
sys
.
argv
[
5
]
else
:
target_genome_name
=
pos_seg
.
split
(
"
/
"
)[
-
1
].
split
(
"
.
"
)[
0
:
-
1
][
0
]
print
(
target_genome_name
)
# input : intersect, gfa, pos_seg.
# out_gff, out_gaf, out_once, out_var, out_detail.
# out_gf/af = "graph.gf/af"
# out_once/detail: récupérer pos_seg, enlever le .bed, ajouter _detail au besoin. pareil pour var, le renommer azu_var_chr10.txt ?
# pos_seg.split("/")[-1].split(".")[0:-1][0]+".gff"
load_intersect
(
intersect
)
# outputs the gff and gaf of the graph for chr10
graph_gff
(
out_gff
)
graph_gaf
(
out_gaf
,
gfa
)
# outputs the gff of a genome for the chr10
genome_gff
(
pos_seg
,
out_gff
,
gfa
,
out_once
,
out_detail
,
out_var
,
target_genome_name
)
if
run
==
"
test
"
:
if
run
==
"
test
"
:
intersect_path
=
'
/home/nina/annotpangenome/test_data/input_data_inf/intersect.bed
'
intersect_path
=
'
/home/nina/annotpangenome/test_data/input_data_inf/intersect.bed
'
...
@@ -27,29 +73,10 @@ if run=="test":
...
@@ -27,29 +73,10 @@ if run=="test":
gff
=
"
test_data/graph.gff
"
gff
=
"
test_data/graph.gff
"
# outputs the gff of a genome for the chr10
target_genome_name
=
"
genome4_chr10
"
genome_gff
(
pos_seg
,
gff
,
gfa
,
out_once
,
out_detail
,
out_var
)
if
run
==
"
chr3
"
:
intersect_path
=
'
/home/nina/annotpangenome/chr3/intersect_segments_genes_irgsp_chr3.bed
'
load_intersect
(
intersect_path
)
# outputs the gff of the graph for chr10
output_gff
=
'
graph_chr3.gff
'
gfa
=
"
test_graph
"
graph_gff
(
output_gff
)
pos_seg
=
"
seg_coord/AzucenaRS1_chromosome3_corrected.bed
"
out_once
=
"
azucena_chr3.gff
"
out_var
=
"
variations_chr3.txt
"
out_detail
=
"
azucena_detail_chr3.gff
"
gff
=
"
graph_chr3.gff
"
# outputs the gff of a genome for the chr10
# outputs the gff of a genome for the chr10
genome_gff
(
pos_seg
,
gff
,
gfa
,
out_once
,
out_detail
,
out_var
)
genome_gff
(
pos_seg
,
gff
,
gfa
,
out_once
,
out_detail
,
out_var
,
target_genome_name
)
if
run
==
"
reel
"
:
if
run
==
"
reel
"
:
...
@@ -79,8 +106,9 @@ if run=="reel":
...
@@ -79,8 +106,9 @@ if run=="reel":
out_var
=
"
variations_chr10.gff
"
out_var
=
"
variations_chr10.gff
"
gff
=
"
graph_chr10.gff
"
gff
=
"
graph_chr10.gff
"
target_genome_name
=
"
CM020642.1_Azucena_chromosome10
"
# outputs the gff of a genome for the chr10
# outputs the gff of a genome for the chr10
genome_gff
(
pos_seg
,
gff
,
gfa
,
out_once
,
out_detail
,
out_var
)
genome_gff
(
pos_seg
,
gff
,
gfa
,
out_once
,
out_detail
,
out_var
,
target_genome_name
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment