Skip to content
Snippets Groups Projects
Commit 7782e4a1 authored by jacques.grelet_ird.fr's avatar jacques.grelet_ird.fr
Browse files

add ctd.header section

decode TIME, LATITUDE and LONGITUDE
save in netcdf (hard coded)
parent 8dc6f622
No related branches found
No related tags found
No related merge requests found
......@@ -8,7 +8,9 @@ import sys
import argparse
import numpy as np
import re
from datetime import datetime
DEGREE = 176
class FileExtractor:
......@@ -39,6 +41,7 @@ class FileExtractor:
self.__separator = separator
self.__header = {}
self.__data = {}
self.__regex = {}
# replace this constante with roscop fill value
self.__FillValue = 1e36
......@@ -65,9 +68,16 @@ class FileExtractor:
buf += "{}\n".format(self.__data[key])
return buf
#def re_compile(self):
def set_regex(self, cfg):
# first pass on file(s)
# first pass on file(s)
d = cfg['ctd']['header']
#print(d, end='\n')
for key in d.keys():
print("{}: {}".format(key, d[key]))
self.__regex[key] = re.compile(d[key])
print(end='\n')
def first_pass(self):
'''
......@@ -85,7 +95,7 @@ class FileExtractor:
file, openhook=fileinput.hook_encoded("ISO-8859-1")) as f:
filesRead += 1
for line in f:
if line[0] == '#' or line[0] == '*':
if self.__regex['isHeader'].match(line):
continue
# increment the line number
......@@ -103,7 +113,7 @@ class FileExtractor:
# return self.n, self.m
# second pass, extract data from roscop code in fname and fill array
def second_pass(self, cfg, device):
def second_pass(self, cfg, device, variables_1D):
'''
Read the file to its internal dict
......@@ -117,6 +127,8 @@ class FileExtractor:
'''
n = 0
m = 0
# initialize datetime object
dt = datetime
# set skipHeader is declared in toml section, 0 by default
if 'separator' in cfg[device.lower()]:
......@@ -129,6 +141,9 @@ class FileExtractor:
hash = cfg['split'][device.lower()]
# initialize arrays, move at the end of firstPass ?
for key in variables_1D:
self.__data[key] = np.ones((self.n)) * self.__FillValue
for key in self.keys:
# mult by __fillValue next
# the shape parameter has to be an int or sequence of ints
......@@ -140,7 +155,55 @@ class FileExtractor:
for line in f:
if f.filelineno() < self.__skip_header + 1:
continue
if line[0] == '#' or line[0] == '*':
# read and decode header
if self.__regex['isHeader'].match(line):
if self.__regex['TIME'].search(line):
(month, day, year, hour, minute, second) = \
self.__regex['TIME'].search(line).groups()
# format date and time to "May 09 2011 16:33:53"
dateTime = "%s/%s/%s %s:%s:%s" % (day, month, year, hour, minute, second)
# dateTime conversion to "09/05/2011 16:33:53"
dateTime = "%s" % \
(dt.strptime(dateTime, "%d/%b/%Y %H:%M:%S").strftime("%d/%m/%Y %H:%M:%S"))
# conversion to "20110509163353"
epic_date = "%s" % \
(dt.strptime(dateTime, "%d/%m/%Y %H:%M:%S").strftime("%Y%m%d%H%M%S"))
# conversion to julian day
julian = float((dt.strptime(dateTime, "%d/%m/%Y %H:%M:%S").strftime("%j"))) \
+ ((float(hour) * 3600.) + (float(minute) * 60.) + float(second) ) / 86400.
# we use julian day with origine 0
julian -= 1
print("{:07.4f} : {} / {}".format(julian, dateTime, epic_date))
self.__data['TIME'][n] = julian
if self.__regex['LATITUDE'].search(line):
(lat_deg, lat_min, lat_hemi) = self.__regex['LATITUDE'].search(line).groups()
# format latitude to string
latitude_str = "%s%c%s %s" % (lat_deg, DEGREE, lat_min, lat_hemi)
# transform to decimal using ternary operator
latitude = float(lat_deg) + (float(lat_min) / 60.) if lat_hemi == 'N' else \
(float(lat_deg) + (float(lat_min) / 60.)) * -1
print("{:07.4f} : {}".format(latitude, latitude_str))
self.__data['LATITUDE'][n] = latitude
if self.__regex['LONGITUDE'].search(line):
(lon_deg, lon_min, lon_hemi) = self.__regex['LONGITUDE'].search(line).groups()
# format longitude to string
longitude_str = "%s%c%s %s" % (lon_deg, DEGREE, lon_min, lon_hemi)
# transform to decimal using ternary operator
longitude = float(lon_deg) + (float(lon_min) / 60.) if lon_hemi == 'E' else \
(float(lon_deg) + (float(lon_min) / 60.)) * -1
print("{:07.4f} : {}".format(longitude, longitude_str))
self.__data['LONGITUDE'][n] = longitude
continue
# split the line, remove leading and trailing space before
p = line.strip().split(self.__separator)
......
......@@ -4,13 +4,13 @@ from numpy import arange, dtype
from physical_parameter import Roscop
def writeNetCDF(fileName, fe):
def writeNetCDF(fileName, fe, variables_1D):
# ncvars is a dictionary that store a netcdf variable for each physical parameter key
ncvars = {}
# variables and dimensions use for 1D and 2D variables
variables_1D = ['TIME', 'LATITUDE', 'LONGITUDE']
#variables_1D = ['TIME', 'LATITUDE', 'LONGITUDE']
variables = variables_1D.copy()
dims_2D = ['TIME', 'DEPTH']
......@@ -21,6 +21,7 @@ def writeNetCDF(fileName, fe):
nc = Dataset(fileName, "w", format="NETCDF3_CLASSIC")
logging.debug(' ' + nc.data_model)
print('writing netCDF file: {}'.format(fileName))
# create dimensions
# n is number of profiles, m the max size of profiles
time = nc.createDimension("TIME", fe.n)
......@@ -70,8 +71,10 @@ def writeNetCDF(fileName, fe):
# write the ncvars
for key in variables:
if any(key in item for item in variables_1D):
#print("Key: {}, {}".format(key,fe[key]))
ncvars[key][:] = fe[key]
else:
#print("Key: {}, {}".format(key,fe[key]))
ncvars[key][:, :] = fe[key]
# close the netcdf file
......
......@@ -14,6 +14,7 @@ import netcdf
# typeInstrument is a dictionary as key: files extension
typeInstrument = {'CTD': ('cnv', 'CNV'), 'XBT': (
'EDF', 'edf'), 'LADCP': ('lad', 'LAD'), 'TSG': 'COLCOR'}
variables_1D = ['TIME', 'LATITUDE', 'LONGITUDE']
ti = typeInstrument # an alias
filesBrowsePosition_row = 2
filesBrowsePosition_column = 1
......@@ -136,10 +137,12 @@ def process(args, cfg, ti):
# fileExtractor
fe = FileExtractor(args.files, args.keys)
fe.set_regex(cfg)
# cfg = toml.load(args.config)
fe.first_pass()
# fe.secondPass(['PRES', 'TEMP', 'PSAL', 'DOX2'], cfg, 'ctd')
fe.second_pass(cfg, ti)
fe.second_pass(cfg, ti, variables_1D)
# fe.disp(['PRES', 'TEMP', 'PSAL', 'DOX2'])
return fe
......@@ -273,4 +276,4 @@ if __name__ == "__main__":
fe = process(args, cfg, device)
#print("Dimensions: {} x {}".format(fe.m, fe.n))
#print(fe.disp())
netcdf.writeNetCDF( 'output/test.nc', fe)
netcdf.writeNetCDF( 'output/test.nc', fe,variables_1D)
......@@ -21,16 +21,18 @@ stationPrefixLength = 3
titleSummary = "CTD profiles processed during PIRATA-FR29 cruise"
typeInstrument = "SBE911+"
instrumentNumber = "09P1263"
isHeader = '^[*#]'
isDevice = [ '^\*\s+(Sea-Bird)','(\*END\*)' ]
time = 'System UpLoad Time\s*=\s*(\w+)\s+(\d+)\s+(\d+)\s+(\d+):(\d+):(\d+)'
latitude = 'NMEA\s+Latitude\s*[:=]\s*(\d+)\s+(\d+.\d+)\s+(\w)'
longitude = 'NMEA\s+Longitude\s*[:=]\s*(\d+)\s+(\d+.\d+)\s+(\w)'
date = 'Date\s*:\s*(\d+)/(\d+)/(\d+)'
hour = '[Heure|Hour]\s*:\s*(\d+)[:hH](\d+):(\d+)'
bottomDepth = 'Bottom Depth\s*:\s*(\d*\.?\d+?)\s*\S*'
operator = 'Operator\s*:\s*(.*)'
type = 'Type\s*:\s*(.*)'
[ctd.header]
isHeader = '^[*#]'
isDevice = '^\*\s+(Sea-Bird)'
TIME = 'System UpLoad Time\s*=\s*(\w+)\s+(\d+)\s+(\d+)\s+(\d+):(\d+):(\d+)'
LATITUDE = 'NMEA\s+Latitude\s*[:=]\s*(\d+)\s+(\d+\.\d+)\s+(\w)'
LONGITUDE = 'NMEA\s+Longitude\s*[:=]\s*(\d+)\s+(\d+.\d+)\s+(\w)'
date = 'Date\s*:\s*(\d+)/(\d+)/(\d+)'
hour = '[Heure|Hour]\s*:\s*(\d+)[:hH](\d+):(\d+)'
bottomDepth = 'Bottom Depth\s*:\s*(\d*\.?\d+?)\s*\S*'
operator = 'Operator\s*:\s*(.*)'
type = 'Type\s*:\s*(.*)'
[btl]
typeInstrument = "SBE32 standard 24 Niskin bottles"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment