Skip to content
Snippets Groups Projects
Commit 7782e4a1 authored by jacques.grelet_ird.fr's avatar jacques.grelet_ird.fr
Browse files

add ctd.header section

decode TIME, LATITUDE and LONGITUDE
save in netcdf (hard coded)
parent 8dc6f622
No related branches found
No related tags found
No related merge requests found
...@@ -8,7 +8,9 @@ import sys ...@@ -8,7 +8,9 @@ import sys
import argparse import argparse
import numpy as np import numpy as np
import re import re
from datetime import datetime
DEGREE = 176
class FileExtractor: class FileExtractor:
...@@ -39,6 +41,7 @@ class FileExtractor: ...@@ -39,6 +41,7 @@ class FileExtractor:
self.__separator = separator self.__separator = separator
self.__header = {} self.__header = {}
self.__data = {} self.__data = {}
self.__regex = {}
# replace this constante with roscop fill value # replace this constante with roscop fill value
self.__FillValue = 1e36 self.__FillValue = 1e36
...@@ -65,9 +68,16 @@ class FileExtractor: ...@@ -65,9 +68,16 @@ class FileExtractor:
buf += "{}\n".format(self.__data[key]) buf += "{}\n".format(self.__data[key])
return buf return buf
#def re_compile(self): def set_regex(self, cfg):
# first pass on file(s) # first pass on file(s)
d = cfg['ctd']['header']
#print(d, end='\n')
for key in d.keys():
print("{}: {}".format(key, d[key]))
self.__regex[key] = re.compile(d[key])
print(end='\n')
def first_pass(self): def first_pass(self):
''' '''
...@@ -85,7 +95,7 @@ class FileExtractor: ...@@ -85,7 +95,7 @@ class FileExtractor:
file, openhook=fileinput.hook_encoded("ISO-8859-1")) as f: file, openhook=fileinput.hook_encoded("ISO-8859-1")) as f:
filesRead += 1 filesRead += 1
for line in f: for line in f:
if line[0] == '#' or line[0] == '*': if self.__regex['isHeader'].match(line):
continue continue
# increment the line number # increment the line number
...@@ -103,7 +113,7 @@ class FileExtractor: ...@@ -103,7 +113,7 @@ class FileExtractor:
# return self.n, self.m # return self.n, self.m
# second pass, extract data from roscop code in fname and fill array # second pass, extract data from roscop code in fname and fill array
def second_pass(self, cfg, device): def second_pass(self, cfg, device, variables_1D):
''' '''
Read the file to its internal dict Read the file to its internal dict
...@@ -117,6 +127,8 @@ class FileExtractor: ...@@ -117,6 +127,8 @@ class FileExtractor:
''' '''
n = 0 n = 0
m = 0 m = 0
# initialize datetime object
dt = datetime
# set skipHeader is declared in toml section, 0 by default # set skipHeader is declared in toml section, 0 by default
if 'separator' in cfg[device.lower()]: if 'separator' in cfg[device.lower()]:
...@@ -129,6 +141,9 @@ class FileExtractor: ...@@ -129,6 +141,9 @@ class FileExtractor:
hash = cfg['split'][device.lower()] hash = cfg['split'][device.lower()]
# initialize arrays, move at the end of firstPass ? # initialize arrays, move at the end of firstPass ?
for key in variables_1D:
self.__data[key] = np.ones((self.n)) * self.__FillValue
for key in self.keys: for key in self.keys:
# mult by __fillValue next # mult by __fillValue next
# the shape parameter has to be an int or sequence of ints # the shape parameter has to be an int or sequence of ints
...@@ -140,7 +155,55 @@ class FileExtractor: ...@@ -140,7 +155,55 @@ class FileExtractor:
for line in f: for line in f:
if f.filelineno() < self.__skip_header + 1: if f.filelineno() < self.__skip_header + 1:
continue continue
if line[0] == '#' or line[0] == '*': # read and decode header
if self.__regex['isHeader'].match(line):
if self.__regex['TIME'].search(line):
(month, day, year, hour, minute, second) = \
self.__regex['TIME'].search(line).groups()
# format date and time to "May 09 2011 16:33:53"
dateTime = "%s/%s/%s %s:%s:%s" % (day, month, year, hour, minute, second)
# dateTime conversion to "09/05/2011 16:33:53"
dateTime = "%s" % \
(dt.strptime(dateTime, "%d/%b/%Y %H:%M:%S").strftime("%d/%m/%Y %H:%M:%S"))
# conversion to "20110509163353"
epic_date = "%s" % \
(dt.strptime(dateTime, "%d/%m/%Y %H:%M:%S").strftime("%Y%m%d%H%M%S"))
# conversion to julian day
julian = float((dt.strptime(dateTime, "%d/%m/%Y %H:%M:%S").strftime("%j"))) \
+ ((float(hour) * 3600.) + (float(minute) * 60.) + float(second) ) / 86400.
# we use julian day with origine 0
julian -= 1
print("{:07.4f} : {} / {}".format(julian, dateTime, epic_date))
self.__data['TIME'][n] = julian
if self.__regex['LATITUDE'].search(line):
(lat_deg, lat_min, lat_hemi) = self.__regex['LATITUDE'].search(line).groups()
# format latitude to string
latitude_str = "%s%c%s %s" % (lat_deg, DEGREE, lat_min, lat_hemi)
# transform to decimal using ternary operator
latitude = float(lat_deg) + (float(lat_min) / 60.) if lat_hemi == 'N' else \
(float(lat_deg) + (float(lat_min) / 60.)) * -1
print("{:07.4f} : {}".format(latitude, latitude_str))
self.__data['LATITUDE'][n] = latitude
if self.__regex['LONGITUDE'].search(line):
(lon_deg, lon_min, lon_hemi) = self.__regex['LONGITUDE'].search(line).groups()
# format longitude to string
longitude_str = "%s%c%s %s" % (lon_deg, DEGREE, lon_min, lon_hemi)
# transform to decimal using ternary operator
longitude = float(lon_deg) + (float(lon_min) / 60.) if lon_hemi == 'E' else \
(float(lon_deg) + (float(lon_min) / 60.)) * -1
print("{:07.4f} : {}".format(longitude, longitude_str))
self.__data['LONGITUDE'][n] = longitude
continue continue
# split the line, remove leading and trailing space before # split the line, remove leading and trailing space before
p = line.strip().split(self.__separator) p = line.strip().split(self.__separator)
......
...@@ -4,13 +4,13 @@ from numpy import arange, dtype ...@@ -4,13 +4,13 @@ from numpy import arange, dtype
from physical_parameter import Roscop from physical_parameter import Roscop
def writeNetCDF(fileName, fe): def writeNetCDF(fileName, fe, variables_1D):
# ncvars is a dictionary that store a netcdf variable for each physical parameter key # ncvars is a dictionary that store a netcdf variable for each physical parameter key
ncvars = {} ncvars = {}
# variables and dimensions use for 1D and 2D variables # variables and dimensions use for 1D and 2D variables
variables_1D = ['TIME', 'LATITUDE', 'LONGITUDE'] #variables_1D = ['TIME', 'LATITUDE', 'LONGITUDE']
variables = variables_1D.copy() variables = variables_1D.copy()
dims_2D = ['TIME', 'DEPTH'] dims_2D = ['TIME', 'DEPTH']
...@@ -21,6 +21,7 @@ def writeNetCDF(fileName, fe): ...@@ -21,6 +21,7 @@ def writeNetCDF(fileName, fe):
nc = Dataset(fileName, "w", format="NETCDF3_CLASSIC") nc = Dataset(fileName, "w", format="NETCDF3_CLASSIC")
logging.debug(' ' + nc.data_model) logging.debug(' ' + nc.data_model)
print('writing netCDF file: {}'.format(fileName)) print('writing netCDF file: {}'.format(fileName))
# create dimensions # create dimensions
# n is number of profiles, m the max size of profiles # n is number of profiles, m the max size of profiles
time = nc.createDimension("TIME", fe.n) time = nc.createDimension("TIME", fe.n)
...@@ -70,8 +71,10 @@ def writeNetCDF(fileName, fe): ...@@ -70,8 +71,10 @@ def writeNetCDF(fileName, fe):
# write the ncvars # write the ncvars
for key in variables: for key in variables:
if any(key in item for item in variables_1D): if any(key in item for item in variables_1D):
#print("Key: {}, {}".format(key,fe[key]))
ncvars[key][:] = fe[key] ncvars[key][:] = fe[key]
else: else:
#print("Key: {}, {}".format(key,fe[key]))
ncvars[key][:, :] = fe[key] ncvars[key][:, :] = fe[key]
# close the netcdf file # close the netcdf file
......
...@@ -14,6 +14,7 @@ import netcdf ...@@ -14,6 +14,7 @@ import netcdf
# typeInstrument is a dictionary as key: files extension # typeInstrument is a dictionary as key: files extension
typeInstrument = {'CTD': ('cnv', 'CNV'), 'XBT': ( typeInstrument = {'CTD': ('cnv', 'CNV'), 'XBT': (
'EDF', 'edf'), 'LADCP': ('lad', 'LAD'), 'TSG': 'COLCOR'} 'EDF', 'edf'), 'LADCP': ('lad', 'LAD'), 'TSG': 'COLCOR'}
variables_1D = ['TIME', 'LATITUDE', 'LONGITUDE']
ti = typeInstrument # an alias ti = typeInstrument # an alias
filesBrowsePosition_row = 2 filesBrowsePosition_row = 2
filesBrowsePosition_column = 1 filesBrowsePosition_column = 1
...@@ -136,10 +137,12 @@ def process(args, cfg, ti): ...@@ -136,10 +137,12 @@ def process(args, cfg, ti):
# fileExtractor # fileExtractor
fe = FileExtractor(args.files, args.keys) fe = FileExtractor(args.files, args.keys)
fe.set_regex(cfg)
# cfg = toml.load(args.config) # cfg = toml.load(args.config)
fe.first_pass() fe.first_pass()
# fe.secondPass(['PRES', 'TEMP', 'PSAL', 'DOX2'], cfg, 'ctd') # fe.secondPass(['PRES', 'TEMP', 'PSAL', 'DOX2'], cfg, 'ctd')
fe.second_pass(cfg, ti) fe.second_pass(cfg, ti, variables_1D)
# fe.disp(['PRES', 'TEMP', 'PSAL', 'DOX2']) # fe.disp(['PRES', 'TEMP', 'PSAL', 'DOX2'])
return fe return fe
...@@ -273,4 +276,4 @@ if __name__ == "__main__": ...@@ -273,4 +276,4 @@ if __name__ == "__main__":
fe = process(args, cfg, device) fe = process(args, cfg, device)
#print("Dimensions: {} x {}".format(fe.m, fe.n)) #print("Dimensions: {} x {}".format(fe.m, fe.n))
#print(fe.disp()) #print(fe.disp())
netcdf.writeNetCDF( 'output/test.nc', fe) netcdf.writeNetCDF( 'output/test.nc', fe,variables_1D)
...@@ -21,16 +21,18 @@ stationPrefixLength = 3 ...@@ -21,16 +21,18 @@ stationPrefixLength = 3
titleSummary = "CTD profiles processed during PIRATA-FR29 cruise" titleSummary = "CTD profiles processed during PIRATA-FR29 cruise"
typeInstrument = "SBE911+" typeInstrument = "SBE911+"
instrumentNumber = "09P1263" instrumentNumber = "09P1263"
isHeader = '^[*#]'
isDevice = [ '^\*\s+(Sea-Bird)','(\*END\*)' ] [ctd.header]
time = 'System UpLoad Time\s*=\s*(\w+)\s+(\d+)\s+(\d+)\s+(\d+):(\d+):(\d+)' isHeader = '^[*#]'
latitude = 'NMEA\s+Latitude\s*[:=]\s*(\d+)\s+(\d+.\d+)\s+(\w)' isDevice = '^\*\s+(Sea-Bird)'
longitude = 'NMEA\s+Longitude\s*[:=]\s*(\d+)\s+(\d+.\d+)\s+(\w)' TIME = 'System UpLoad Time\s*=\s*(\w+)\s+(\d+)\s+(\d+)\s+(\d+):(\d+):(\d+)'
date = 'Date\s*:\s*(\d+)/(\d+)/(\d+)' LATITUDE = 'NMEA\s+Latitude\s*[:=]\s*(\d+)\s+(\d+\.\d+)\s+(\w)'
hour = '[Heure|Hour]\s*:\s*(\d+)[:hH](\d+):(\d+)' LONGITUDE = 'NMEA\s+Longitude\s*[:=]\s*(\d+)\s+(\d+.\d+)\s+(\w)'
bottomDepth = 'Bottom Depth\s*:\s*(\d*\.?\d+?)\s*\S*' date = 'Date\s*:\s*(\d+)/(\d+)/(\d+)'
operator = 'Operator\s*:\s*(.*)' hour = '[Heure|Hour]\s*:\s*(\d+)[:hH](\d+):(\d+)'
type = 'Type\s*:\s*(.*)' bottomDepth = 'Bottom Depth\s*:\s*(\d*\.?\d+?)\s*\S*'
operator = 'Operator\s*:\s*(.*)'
type = 'Type\s*:\s*(.*)'
[btl] [btl]
typeInstrument = "SBE32 standard 24 Niskin bottles" typeInstrument = "SBE32 standard 24 Niskin bottles"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment