Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
O
Oceano2python
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
US191
Oceano2python
Commits
e1ced3d2
Commit
e1ced3d2
authored
2 years ago
by
Jacques Grelet
Browse files
Options
Downloads
Patches
Plain Diff
use regex for filename to reorder the list, dict now
parent
fdde2570
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
config.toml
+6
-9
6 additions, 9 deletions
config.toml
profile.py
+33
-16
33 additions, 16 deletions
profile.py
with
39 additions
and
25 deletions
config.toml
+
6
−
9
View file @
e1ced3d2
...
...
@@ -26,8 +26,8 @@ PI = "BOURLES"
CREATOR
=
"Jacques.Grelet@ird.fr"
[ctd]
cruisePrefix
=
"fr29"
station
PrefixLength
=
3
# extract profile/station number from fileName
#
station
= 'fr29(\d{3})'
titleSummary
=
"CTD profiles processed during PIRATA-FR29 cruise"
typeInstrument
=
"SBE911+"
instrumentNumber
=
"09P1263"
...
...
@@ -87,8 +87,7 @@ julianOrigin = 1
[btl]
cruisePrefix
=
"fr29"
stationPrefixLength
=
3
station
=
'fr29(\d{3})'
typeInstrument
=
"SBE32 standard 24 Niskin bottles"
instrumentNumber
=
"unknown"
titleSummary
=
"Water sample during PIRATA-FR32 cruise with 22 levels"
...
...
@@ -149,8 +148,7 @@ instrumentNumber = 102
TE35
=
16
[xbt]
cruisePrefix
=
"fr29"
stationPrefixLength
=
3
station
=
'[CT]\d+_(\d{5})'
typeInstrument
=
"SIPPICAN+"
instrumentNumber
=
"N/A"
acquisitionSoftware
=
"WinMK21"
...
...
@@ -165,7 +163,7 @@ comment = "Extract from .edf files"
[xbt.header]
endHeader
=
'^Depth\s*\(m\)'
station
=
'Sequence\s*#\s*:\s*(\d*)'
#
station = 'Sequence\s*#\s*:\s*(\d*)'
TIME
=
'Time of Launch\s*[:=]\s*(\d+):(\d+):(\d+)'
DATE
=
'Date of Launch\s*[:=]\s*(\d+)/(\d+)/(\d+)'
DATETIME
=
'System UpLoad Time\s*=\s*(\w+)\s+(\d+)\s+(\d+)\s+(\d+):(\d+):(\d+)'
...
...
@@ -179,8 +177,7 @@ comment = "Extract from .edf files"
[
ladcp
]
cruisePrefix
=
"fr29"
stationPrefixLength
=
3
station
=
'fr29(\d{3})'
typeInstrument
=
"LADCP WH150/WH300"
instrumentNumber
=
"24543/24085"
acquisitionSoftware
=
"BBTALK"
...
...
This diff is collapsed.
Click to expand it.
profile.py
+
33
−
16
View file @
e1ced3d2
...
...
@@ -4,7 +4,8 @@ file_extractor.py
import
fileinput
import
linecache
import
logging
from
operator
import
length_hint
from
operator
import
length_hint
,
ne
from
tkinter
import
N
import
toml
import
sys
import
argparse
...
...
@@ -228,14 +229,19 @@ class Profile:
self
.
m
=
m
self
.
n
=
n
if
n
==
0
:
sys
.
exit
(
"
No file read, check for a match between the file names and the toml configuration file
"
)
def
read_files
(
self
,
cfg
,
device
):
logging
.
debug
(
"
Enter in read_files()
"
)
# initialize datetime object
dt
=
datetime
station_regex
=
None
fileName_dict
=
{}
new_fileName_dict
=
{}
# get the dictionary from toml block, device must be is in lower case
# get the dictionary from toml
split
block, device must be is in lower case
hash
=
cfg
[
device
.
lower
()][
'
split
'
]
# set separator field if declared in toml section, none by default
...
...
@@ -246,24 +252,35 @@ class Profile:
if
'
julianOrigin
'
in
cfg
[
device
.
lower
()]:
self
.
__julianOrigin
=
cfg
[
device
.
lower
()][
'
julianOrigin
'
]
# prepare the regex to extract station number from filename
# by default, station or profile number is extract from the filename
if
'
cruisePrefix
'
in
cfg
[
device
.
lower
()]:
cruisePrefix
=
cfg
[
device
.
lower
()][
'
cruisePrefix
'
]
print
(
cruisePrefix
)
if
'
stationPrefixLength
'
in
cfg
[
device
.
lower
()]:
stationPrefixLength
=
cfg
[
device
.
lower
()][
'
stationPrefixLength
'
]
print
(
stationPrefixLength
)
station_regex
=
re
.
compile
(
f
"
{
cruisePrefix
}
(\d{{
{
stationPrefixLength
}
}})
"
)
# read each file and extract header and data and fill sqlite tables
for
file
in
self
.
fname
:
# prepare the regex to extract station number from filename by defaut
# if [device]['station'] defined
if
'
station
'
in
cfg
[
device
.
lower
()]:
station_regex
=
re
.
compile
(
cfg
[
device
.
lower
()][
'
station
'
])
logging
.
debug
(
f
"
Station regex:
{
station_regex
}
"
)
# Sometimes, when files start with different letters, the argv list is not well ordered
for
file
in
self
.
fname
:
if
station_regex
.
search
(
file
):
[
station
]
=
station_regex
.
search
(
file
).
groups
()
fileName_dict
[
int
(
station
)]
=
file
else
:
# filename dosn't match regex
continue
# use list comprehension to reoder the dictionnary fileName_dict
for
v
in
sorted
(
fileName_dict
.
keys
()):
new_fileName_dict
[
v
]
=
fileName_dict
[
v
]
# [(fileName_dict[key]= value) for (key, value) in sorted(fileName_dict.items(), key=lambda x: x[1])]
else
:
# we have to build a dictionary from the list of files
for
i
in
range
(
1
,
len
(
self
.
fname
)):
new_fileName_dict
[
i
]
=
self
.
fname
[
i
-
1
]
# read each file from dict and extract header and data, fill sqlite tables and array
for
station
,
file
in
new_fileName_dict
.
items
():
process_header
=
False
process_data
=
False
sql
=
{}
# by default, station or profile number is extract from the filename
if
station_regex
.
search
(
file
):
if
station_regex
!=
None
and
station_regex
.
search
(
file
):
[
station
]
=
station_regex
.
search
(
file
).
groups
()
sql
[
'
station
'
]
=
int
(
station
)
logging
.
debug
(
f
"
Station match:
{
sql
[
'
station
'
]
}
"
)
...
...
@@ -394,7 +411,7 @@ class Profile:
# now, extract and process all data
# split the line, remove leading and trailing space before
p
=
line
.
strip
().
split
(
self
.
__separator
)
#
logging.debug(f"line split: {p}")
logging
.
debug
(
f
"
line split:
{
p
}
"
)
#logging.debug(f"line end: {p[-1]}")
# skip to next line in file when skipLineWith is defined
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment