Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
O
Oceano2python
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
US191
Oceano2python
Commits
91783768
"README.md" did not exist on "79cc36b57c1d00308263ba74c6ad8a53802f955a"
Commit
91783768
authored
2 years ago
by
Jacques Grelet
Browse files
Options
Downloads
Patches
Plain Diff
first version read station and data
parent
df2f7553
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
db.py
+2
-0
2 additions, 0 deletions
db.py
file_extractor.py
+78
-197
78 additions, 197 deletions
file_extractor.py
tests/test.toml
+2
-1
2 additions, 1 deletion
tests/test.toml
with
82 additions
and
198 deletions
db.py
+
2
−
0
View file @
91783768
...
...
@@ -6,6 +6,8 @@ from notanorm import SqliteDb
# https://www.sqlite.org
# https://stackoverflow.com/questions/1309989/parameter-substitution-for-a-sqlite-in-clause
# https://www.sqlitetutorial.net/sqlite-autoincrement/
# https://pymotw.com/2/sqlite3/
# https://sebastianraschka.com/Articles/2014_sqlite_in_python_tutorial.html
def
main
():
...
...
This diff is collapsed.
Click to expand it.
file_extractor.py
+
78
−
197
View file @
91783768
...
...
@@ -2,6 +2,7 @@
file_extractor.py
'''
import
fileinput
import
linecache
import
logging
import
toml
import
sys
...
...
@@ -11,6 +12,30 @@ import re
from
datetime
import
datetime
import
tools
from
physical_parameter
import
Roscop
from
notanorm
import
SqliteDb
# define SQL station table
table_station
=
"""
CREATE TABLE station (
id INTEGER PRIMARY KEY,
header TEXT,
date_time TEXT NOT NULL UNIQUE,
julian_day REAL NOT NULL UNIQUE,
latitude REAL NOT NULL,
longitude REAL NOT NULL,
max_depth REAL,
bottom_depth REAL
);
"""
# define the profile table
# the id is actually the rowid AUTOINCREMENT column.
table_profile
=
"""
CREATE TABLE profile (
id INTEGER PRIMARY KEY,
station_id INTEGER,
FOREIGN KEY (station_id)
REFERENCES station (id)
);
"""
class
FileExtractor
:
...
...
@@ -29,7 +54,7 @@ class FileExtractor:
'''
# constructor with values by defaul
def
__init__
(
self
,
fname
,
roscop
,
keys
,
separator
=
None
):
def
__init__
(
self
,
fname
,
roscop
,
keys
,
separator
=
None
,
dbname
=
"
:memory:
"
):
# attibutes
# public:
self
.
fname
=
fname
...
...
@@ -38,10 +63,11 @@ class FileExtractor:
self
.
n
=
0
self
.
m
=
0
self
.
lineHeader
=
0
self
.
db
=
SqliteDb
(
dbname
)
# private:
self
.
__separator
=
separator
self
.
__header
=
{}
self
.
__header
=
''
self
.
__data
=
{}
self
.
__regex
=
{}
# replace this constante with roscop fill value
...
...
@@ -87,213 +113,68 @@ class FileExtractor:
for
key
in
d
.
keys
():
self
.
__regex
[
key
]
=
re
.
compile
(
d
[
key
])
def
first_pass
(
self
):
'''
Returns
------
out : [n,m]
The size of array.
'''
lineHeader
=
0
lineData
=
0
filesRead
=
0
indMax
=
0
isHeader
=
True
def
read_files
(
self
,
cfg
,
device
):
for
file
in
self
.
fname
:
with
fileinput
.
input
(
file
,
openhook
=
fileinput
.
hook_encoded
(
"
ISO-8859-1
"
))
as
f
:
lineData
=
0
lineHeader
=
0
isHeader
=
True
filesRead
+=
1
for
line
in
f
:
# header detection, skip header lines
if
isHeader
:
if
'
isHeader
'
in
self
.
__regex
:
if
self
.
__regex
[
'
isHeader
'
].
match
(
line
):
lineHeader
+=
1
continue
elif
'
endHeader
'
in
self
.
__regex
:
if
self
.
__regex
[
'
endHeader
'
].
match
(
line
):
lineHeader
+=
1
isHeader
=
False
else
:
lineHeader
+=
1
continue
# increment the line number
lineData
+=
1
print
(
'
Create table station
'
)
#db.query("DROP DATABASE IF EXISTS '{}'".format(fname))
self
.
db
.
query
(
table_station
)
if
lineData
>
indMax
:
indMax
=
lineData
logging
.
debug
(
"
{} -> header: {:>{w}} data: {:>{w2}}
"
.
format
(
file
,
lineHeader
,
lineData
,
w
=
3
,
w2
=
6
))
# the size of arrays
self
.
n
=
filesRead
self
.
m
=
indMax
self
.
lineHeader
=
lineHeader
print
(
'
Create table profile
'
)
self
.
db
.
query
(
table_profile
)
# second pass, extract data from roscop code in fname and fill array
def
second_pass
(
self
,
cfg
,
device
,
variables_1D
):
'''
Read the file to its internal dict
for
pm
in
self
.
keys
:
print
(
'
\t
Update table profile with new column {}
'
.
format
(
pm
))
addColumn
=
"
ALTER TABLE profile ADD COLUMN {} REAL NOT NULL
"
.
format
(
pm
)
self
.
db
.
query
(
addColumn
)
Parameters
----------
keys: sequence, a list of physical parameter to read.
ex: [
'
PRES
'
,
'
TEMP
'
,
'
PSAL
'
]
cfg: toml.load() instance, configuration file
device: str, instrument
ex: CTD,XBT or LADCP
'''
n
=
0
m
=
0
# initialize datetime object
dt
=
datetime
# get the dictionary from toml block, device must be is in lower case
hash
=
cfg
[
'
split
'
][
device
.
lower
()]
# set separator field if declared in toml section, none by default
if
'
separator
'
in
cfg
[
device
.
lower
()]:
self
.
__separator
=
cfg
[
device
.
lower
()][
'
separator
'
]
# set skipHeader is declared in toml section, 0 by default
# get the dictionary from toml block, device must be is in lower case
hash
=
cfg
[
'
split
'
][
device
.
lower
()]
# initialize arrays, move at the end of firstPass ?
for
key
in
variables_1D
:
#self.__data[key] = np.ones((self.n)) * self.__FillValue
if
'
_FillValue
'
in
self
.
roscop
[
key
]:
self
.
__data
[
key
]
=
np
.
full
(
self
.
n
,
self
.
roscop
[
key
][
'
_FillValue
'
])
else
:
self
.
__data
[
key
]
=
np
.
empty
(
self
.
n
)
for
key
in
self
.
keys
:
# mult by __fillValue next
# the shape parameter has to be an int or sequence of ints
if
'
_FillValue
'
in
self
.
roscop
[
key
]:
self
.
__data
[
key
]
=
np
.
full
([
self
.
n
,
self
.
m
],
self
.
roscop
[
key
][
'
_FillValue
'
])
else
:
self
.
__data
[
key
]
=
np
.
empty
([
self
.
n
,
self
.
m
])
for
file
in
self
.
fname
:
with
fileinput
.
input
(
file
,
openhook
=
fileinput
.
hook_encoded
(
"
ISO-8859-1
"
))
as
f
:
logging
.
debug
(
file
)
day
=
month
=
year
=
hour
=
minute
=
second
=
0
file
,
openhook
=
fileinput
.
hook_encoded
(
"
ISO-8859-1
"
))
as
f
:
for
line
in
f
:
if
f
.
filelineno
()
<
self
.
lineHeader
+
1
:
if
self
.
__regex
[
'
isHeader
'
].
match
(
line
):
self
.
__header
+=
line
continue
if
self
.
__regex
[
'
endHeader
'
].
match
(
line
):
# read and decode header
for
k
in
self
.
__regex
.
keys
():
# key is DATETIME
if
k
==
"
DATETIME
"
and
self
.
__regex
[
k
].
search
(
line
):
(
month
,
day
,
year
,
hour
,
minute
,
second
)
=
\
self
.
__regex
[
k
].
search
(
line
).
groups
()
# format date and time to "May 09 2011 16:33:53"
dateTime
=
"
%s/%s/%s %s:%s:%s
"
%
(
day
,
month
,
year
,
hour
,
minute
,
second
)
# set datetime object
dt
=
dt
.
strptime
(
dateTime
,
"
%d/%b/%Y %H:%M:%S
"
)
# dt.strptime(dateTime, "%d/%b/%Y %H:%M:%S")# dateTime conversion to "09/05/2011 16:33:53"
# dateTime = "%s" % \
# (dt.strptime(dateTime, "%d/%b/%Y %H:%M:%S").strftime("%d/%m/%Y %H:%M:%S"))
# # conversion to "20110509163353"
# epic_date = "%s" % \
# (dt.strptime(dateTime, "%d/%m/%Y %H:%M:%S").strftime("%Y%m%d%H%M%S"))
# # conversion to julian day
# julian = float((dt.strptime(dateTime, "%d/%m/%Y %H:%M:%S").strftime("%j"))) \
# + ((float(hour) * 3600.) + (float(minute) * 60.) + float(second) ) / 86400.
# # we use julian day with origine 0
# julian -= 1
self
.
__data
[
'
TIME
'
][
n
]
=
tools
.
dt2julian
(
dt
)
# key is DATE
if
k
==
"
DATE
"
and
self
.
__regex
[
k
].
search
(
line
):
if
device
.
lower
()
==
'
ladcp
'
:
(
year
,
month
,
day
)
=
\
self
.
__regex
[
k
].
search
(
line
).
groups
()
else
:
(
month
,
day
,
year
)
=
\
self
.
__regex
[
k
].
search
(
line
).
groups
()
# key is TIME
if
k
==
"
TIME
"
and
self
.
__regex
[
k
].
search
(
line
):
(
hour
,
minute
,
second
)
=
\
self
.
__regex
[
k
].
search
(
line
).
groups
()
# format date and time to "May 09 2011 16:33:53"
dateTime
=
"
%s/%s/%s %s:%s:%s
"
%
(
day
,
month
,
year
,
hour
,
minute
,
second
)
# dateTime conversion to "09/05/2011 16:33:53"
dateTime
=
"
%s
"
%
\
(
dt
.
strptime
(
dateTime
,
"
%d/%m/%Y %H:%M:%S
"
).
strftime
(
"
%d/%m/%Y %H:%M:%S
"
))
# set datetime object
dt
=
dt
.
strptime
(
dateTime
,
"
%d/%m/%Y %H:%M:%S
"
)
# # conversion to "20110509163353"
# epic_date = "%s" % \
# (dt.strptime(dateTime, "%d/%m/%Y %H:%M:%S").strftime("%Y%m%d%H%M%S"))
# # conversion to julian day
# julian = float((dt.strptime(dateTime, "%d/%m/%Y %H:%M:%S").strftime("%j"))) \
# + ((float(hour) * 3600.) + (float(minute) * 60.) + float(second) ) / 86400.
# # we use julian day with origine 0
# julian -= 1
self
.
__data
[
'
TIME
'
][
n
]
=
tools
.
dt2julian
(
dt
)
# key is LATITUDE
if
k
==
"
LATITUDE
"
and
self
.
__regex
[
k
].
search
(
line
):
if
device
.
lower
()
==
'
ladcp
'
:
[
latitude
]
=
self
.
__regex
[
k
].
search
(
line
).
groups
()
else
:
(
lat_deg
,
lat_min
,
lat_hemi
)
=
self
.
__regex
[
k
].
search
(
line
).
groups
()
# format latitude to string
latitude_str
=
"
%s%c%s %s
"
%
(
lat_deg
,
tools
.
DEGREE
,
lat_min
,
lat_hemi
)
# transform to decimal using ternary operator
latitude
=
float
(
lat_deg
)
+
(
float
(
lat_min
)
/
60.
)
if
lat_hemi
==
'
N
'
else
\
(
float
(
lat_deg
)
+
(
float
(
lat_min
)
/
60.
))
*
-
1
self
.
__data
[
'
LATITUDE
'
][
n
]
=
latitude
# key is LONGITUDE
if
k
==
"
LONGITUDE
"
and
self
.
__regex
[
k
].
search
(
line
):
if
device
.
lower
()
==
'
ladcp
'
:
[
longitude
]
=
self
.
__regex
[
k
].
search
(
line
).
groups
()
else
:
(
lon_deg
,
lon_min
,
lon_hemi
)
=
self
.
__regex
[
k
].
search
(
line
).
groups
()
# format longitude to string
longitude_str
=
"
%s%c%s %s
"
%
(
lon_deg
,
tools
.
DEGREE
,
lon_min
,
lon_hemi
)
# transform to decimal using ternary operator
longitude
=
float
(
lon_deg
)
+
(
float
(
lon_min
)
/
60.
)
if
lon_hemi
==
'
E
'
else
\
(
float
(
lon_deg
)
+
(
float
(
lon_min
)
/
60.
))
*
-
1
self
.
__data
[
'
LONGITUDE
'
][
n
]
=
longitude
# key is BATH
if
k
==
"
BATH
"
and
self
.
__regex
[
k
].
search
(
line
):
[
bath
]
=
self
.
__regex
[
k
].
search
(
line
).
groups
()
self
.
__data
[
'
BATH
'
][
n
]
=
bath
#for k in self.__regex.keys():
# fake
print
(
"
insert station
"
)
self
.
db
.
insert
(
"
station
"
,
id
=
1
,
header
=
self
.
__header
,
date_time
=
"
2022-04-06 12:00:00.000
"
,
julian_day
=
10.5
,
latitude
=
-
10.2
,
longitude
=
23.6
)
continue
print
(
line
)
# split the line, remove leading and trailing space before
p
=
line
.
strip
().
split
(
self
.
__separator
)
str
=
'
'
# fill array with extracted value of line for eack key (physical parameter)
sql
=
'
station_id = 1
'
# for key in self.keys:
# # Insert data in profile table
# sql += ", {} = {}".format(key, p[hash[key]])
# print(sql)
#sql = ', '.join(['{} = {}'.format(key, p[hash[key]]) for key in self.keys])
#print(sql)
sql
=
{}
#[sql[key] = p[hash[key]] for key in self.keys]
sql
[
'
station_id
'
]
=
1
for
key
in
self
.
keys
:
self
.
__data
[
key
][
n
,
m
]
=
p
[
hash
[
key
]]
# debug info
str
+=
"
{:>{width}}
"
.
format
(
p
[
hash
[
key
]],
width
=
8
)
logging
.
debug
(
str
)
sql
[
key
]
=
p
[
hash
[
key
]]
print
(
sql
)
self
.
db
.
insert
(
"
profile
"
,
sql
)
#self.db.insert("profile", station_id = 1, PRES = 1, TEMP = 20, PSAL = 35, DOX2 = 20, DENS = 30)
# increment m indice (the line number
)
m
+=
1
n
+=
1
m
=
0
print
(
'
get sizes:
'
)
st
=
self
.
db
.
query
(
'
SELECT COUNT(id) FROM station
'
)
max_press
=
self
.
db
.
query
(
'
SELECT count(PRES) FROM profile
'
)
print
(
st
,
max_press
)
# for testing in standalone context
...
...
@@ -335,10 +216,10 @@ if __name__ == "__main__":
print
(
"
File(s): {}, Config: {}
"
.
format
(
args
.
fname
,
args
.
config
))
cfg
=
toml
.
load
(
args
.
config
)
fe
.
set_regex
(
cfg
,
args
.
instrument
)
fe
.
first_pass
(
)
print
(
"
Indices: {} x {}
\n
keys: {}
"
.
format
(
fe
.
n
,
fe
.
m
,
fe
.
keys
))
fe
.
second_pass
(
cfg
,
args
.
instrument
,
[
'
TIME
'
,
'
LATITUDE
'
,
'
LONGITUDE
'
,
'
BATH
'
])
# debug
print
(
fe
[
'
PRES
'
])
print
(
fe
[
'
TEMP
'
])
print
(
fe
[
'
PSAL
'
])
fe
.
read_files
(
cfg
,
args
.
instrument
)
#
print("Indices: {} x {}\nkeys: {}".format(fe.n, fe.m, fe.keys))
#
fe.second_pass(cfg, args.instrument, ['TIME', 'LATITUDE', 'LONGITUDE','BATH'])
#
#
debug
#
print(fe['PRES'])
#
print(fe['TEMP'])
#
print(fe['PSAL'])
This diff is collapsed.
Click to expand it.
tests/test.toml
+
2
−
1
View file @
91783768
...
...
@@ -34,7 +34,8 @@ typeInstrument = "SBE911+"
instrumentNumber
=
"09P1263"
[ctd.header]
isHeader
=
'^[*#]'
isHeader
=
'^[*#]+\s+'
endHeader
=
'^[*]END[*]'
DATETIME
=
'System UpLoad Time\s*=\s*(\w+)\s+(\d+)\s+(\d+)\s+(\d+):(\d+):(\d+)'
LATITUDE
=
'NMEA\s+Latitude\s*[:
=
]\s*(\d+)\s+(\d+\.\d+)\s+(\w)
'
LONGITUDE
=
'NMEA\s+Longitude\s*[:
=
]\s*(\d+)\s+(\d+.\d+)\s+(\w)
'
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment