Newer
Older
'''
file_extractor.py
'''
import fileinput
import sys
import argparse
import numpy as np
This class read multiple ASCII file, extract physical parameter from ROSCOP codification at the given column
and fill arrays.
Parameters
----------
fname : file, str, pathlib.Path, list of str
File, filename, or list to read.
skip_header : int, optional
The number of lines to skip at the beginning of the file.
'''
# constructor with values by default
self.n = 0
self.m = 0
# replace this constante with roscop fill value
self.FillValue = 1e36
def __str__(self):
''' overload string representation '''
return 'Class FileExtractor, file: %s, size = %d' % (self.fname, len(self))
# for key in keys:
# print("{}:".format(key))
# print(self.__data[key])
buf = ''
buf += "{}:\n".format(key)
buf += "{}\n".format(self.__data[key])
return buf
# first pass on file(s)
def firstPass(self):
'''
Returns
------
out : [n,m]
The size of array.
'''
lineRead = 0
filesRead = 0
indMax = 0
with fileinput.input(
file, openhook=fileinput.hook_encoded("ISO-8859-1")) as f:
filesRead += 1
for line in f:
if line[0] == '#' or line[0] == '*':
continue
# increment the line number
lineRead += 1
if lineRead > indMax:
indMax = lineRead
logging.debug(
" file: {} -> read: {:>{width}}".format(
file, lineRead, width=6))
lineRead = 0
# the size of arrays
self.n = filesRead
self.m = indMax
return self.n, self.m
# second pass, extract data from roscop code in fname and fill array
def secondPass(self, keys, cfg, device):
'''
Read the file to its internal dict
Parameters
----------
keys: sequence, a list of physical parameter to read.
ex: ['PRES', 'TEMP', 'PSAL']
cfg: toml.load() instance, configuration file
device: str, instrument
ex: CTD,XBT or LADCP
'''
# set skipHeader is declared in toml section, 0 by default
if 'skipHeader' in cfg[device.lower()]:
self.__skip_header = cfg[device.lower()]['skipHeader']
logging.debug(self.__skip_header)
# get the dictionary from toml block, device must be is in lower case
hash = cfg['split'][device.lower()]
# initialize arrays, move at the end of firstPass ?
for key in keys:
# mult by __fillValue next
# the shape parameter has to be an int or sequence of ints
self.__data[key] = np.ones((self.n, self.m)) * self.FillValue
with fileinput.input(
file, openhook=fileinput.hook_encoded("ISO-8859-1")) as f:
for line in f:
if f.filelineno() < self.__skip_header + 1:
if line[0] == '#' or line[0] == '*':
continue
# split the line
p = line.split()
str = ' '
# fill array with extracted value of line for eack key (physical parameter)
for key in keys:
# debug info
str += "{:>{width}}".format(
logging.debug(str)
# increment m indice (the line number)
m += 1
n += 1
m = 0
# for testing in standalone context
# ---------------------------------
if __name__ == "__main__":
# > python file_extractor.py data/cnv/dfr2900[1-3].cnv -d
# > python file_extractor.py data/cnv/dfr2900[1-3].cnv -k PRES TEMP PSAL DOX2 DENS
# > python file_extractor.py data/cnv/dfr29*.cnv -d
parser = argparse.ArgumentParser(
description='This class read multiple ASCII file, extract physical parameter \
from ROSCOP codification at the given column and fill arrays ',
epilog='J. Grelet IRD US191 - March 2019')
parser.add_argument('-d', '--debug', help='display debug informations',
action='store_true')
parser.add_argument('-c', '--config', help="toml configuration file, (default: %(default)s)",
default='tests/test.toml')
parser.add_argument('-k', '--key', nargs='+', default=['PRES', 'TEMP', 'PSAL'],
help='display dictionary for key(s), (default: %(default)s)')
help='cnv file(s) to parse, (default: data/cnv/dfr29*.cnv)')
# display extra logging info
# see: https://stackoverflow.com/questions/14097061/easier-way-to-enable-verbose-logging
# https://docs.python.org/2/howto/argparse.html
args = parser.parse_args()
if args.debug:
logging.basicConfig(
format='%(levelname)s:%(message)s', level=logging.DEBUG)
fe = FileExtractor(args.fname)
print("File(s): {}, Config: {}".format(args.fname, args.config))
[n, m] = fe.firstPass()
print("Indices:", n, m)
# fe.secondPass(['PRES', 'TEMP', 'PSAL', 'DOX2'], cdf, 'ctd')