Skip to content
Snippets Groups Projects
Commit e1ced3d2 authored by Jacques Grelet's avatar Jacques Grelet
Browse files

use regex for filename to reorder the list, dict now

parent fdde2570
No related branches found
No related tags found
No related merge requests found
......@@ -26,8 +26,8 @@ PI = "BOURLES"
CREATOR = "Jacques.Grelet@ird.fr"
[ctd]
cruisePrefix = "fr29"
stationPrefixLength = 3
# extract profile/station number from fileName
#station = 'fr29(\d{3})'
titleSummary = "CTD profiles processed during PIRATA-FR29 cruise"
typeInstrument = "SBE911+"
instrumentNumber = "09P1263"
......@@ -87,8 +87,7 @@ julianOrigin = 1
[btl]
cruisePrefix = "fr29"
stationPrefixLength = 3
station = 'fr29(\d{3})'
typeInstrument = "SBE32 standard 24 Niskin bottles"
instrumentNumber = "unknown"
titleSummary = "Water sample during PIRATA-FR32 cruise with 22 levels"
......@@ -149,8 +148,7 @@ instrumentNumber = 102
TE35 = 16
[xbt]
cruisePrefix = "fr29"
stationPrefixLength = 3
station = '[CT]\d+_(\d{5})'
typeInstrument = "SIPPICAN+"
instrumentNumber = "N/A"
acquisitionSoftware = "WinMK21"
......@@ -165,7 +163,7 @@ comment = "Extract from .edf files"
[xbt.header]
endHeader = '^Depth\s*\(m\)'
station = 'Sequence\s*#\s*:\s*(\d*)'
#station = 'Sequence\s*#\s*:\s*(\d*)'
TIME = 'Time of Launch\s*[:=]\s*(\d+):(\d+):(\d+)'
DATE = 'Date of Launch\s*[:=]\s*(\d+)/(\d+)/(\d+)'
DATETIME = 'System UpLoad Time\s*=\s*(\w+)\s+(\d+)\s+(\d+)\s+(\d+):(\d+):(\d+)'
......@@ -179,8 +177,7 @@ comment = "Extract from .edf files"
[ladcp]
cruisePrefix = "fr29"
stationPrefixLength = 3
station = 'fr29(\d{3})'
typeInstrument = "LADCP WH150/WH300"
instrumentNumber = "24543/24085"
acquisitionSoftware = "BBTALK"
......
......@@ -4,7 +4,8 @@ file_extractor.py
import fileinput
import linecache
import logging
from operator import length_hint
from operator import length_hint, ne
from tkinter import N
import toml
import sys
import argparse
......@@ -228,14 +229,19 @@ class Profile:
self.m = m
self.n = n
if n == 0:
sys.exit("No file read, check for a match between the file names and the toml configuration file")
def read_files(self, cfg, device):
logging.debug("Enter in read_files()")
# initialize datetime object
dt = datetime
station_regex = None
fileName_dict = {}
new_fileName_dict ={}
# get the dictionary from toml block, device must be is in lower case
# get the dictionary from toml split block, device must be is in lower case
hash = cfg[device.lower()]['split']
# set separator field if declared in toml section, none by default
......@@ -246,24 +252,35 @@ class Profile:
if 'julianOrigin' in cfg[device.lower()]:
self.__julianOrigin = cfg[device.lower()]['julianOrigin']
# prepare the regex to extract station number from filename
# by default, station or profile number is extract from the filename
if 'cruisePrefix' in cfg[device.lower()]:
cruisePrefix = cfg[device.lower()]['cruisePrefix']
print(cruisePrefix)
if 'stationPrefixLength' in cfg[device.lower()]:
stationPrefixLength = cfg[device.lower()]['stationPrefixLength']
print(stationPrefixLength)
station_regex = re.compile(f"{cruisePrefix}(\d{{{stationPrefixLength}}})")
# read each file and extract header and data and fill sqlite tables
for file in self.fname:
# prepare the regex to extract station number from filename by defaut
# if [device]['station'] defined
if 'station' in cfg[device.lower()]:
station_regex = re.compile(cfg[device.lower()]['station'])
logging.debug(f"Station regex: {station_regex}")
# Sometimes, when files start with different letters, the argv list is not well ordered
for file in self.fname:
if station_regex.search(file):
[station] = station_regex.search(file).groups()
fileName_dict[int(station)] = file
else: # filename dosn't match regex
continue
# use list comprehension to reoder the dictionnary fileName_dict
for v in sorted(fileName_dict.keys()):
new_fileName_dict[v]= fileName_dict[v]
# [(fileName_dict[key]= value) for (key, value) in sorted(fileName_dict.items(), key=lambda x: x[1])]
else:
# we have to build a dictionary from the list of files
for i in range(1, len(self.fname)):
new_fileName_dict[i] = self.fname[i-1]
# read each file from dict and extract header and data, fill sqlite tables and array
for station, file in new_fileName_dict.items():
process_header = False
process_data = False
sql = {}
# by default, station or profile number is extract from the filename
if station_regex.search(file):
if station_regex != None and station_regex.search(file):
[station] = station_regex.search(file).groups()
sql['station'] = int(station)
logging.debug(f"Station match: {sql['station']}")
......@@ -394,7 +411,7 @@ class Profile:
# now, extract and process all data
# split the line, remove leading and trailing space before
p = line.strip().split(self.__separator)
#logging.debug(f"line split: {p}")
logging.debug(f"line split: {p}")
#logging.debug(f"line end: {p[-1]}")
# skip to next line in file when skipLineWith is defined
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment