#!/usr/bin/python # # ctd.py # $Id$ """ Python script for extracting data from SBE-processing files and write out in various format used by datagui, ODV, Netcdf. usage: $ python3 ctd.py [options] <files> options are: -h, --help : display this help and quit -v, --version : display program version and quit -e, --echo : display processed file name -q, --quiet : don't display output -o, --output=<file> : output file name without extention, default is pirata_fr24 -r --roscop=<file> : load file for code roscop description --cycle_mesure=<name> : cycle_mesure name --plateforme=<name> : ship or plateforme name --date_debut=JJ/MM/YYYY : starting date from cycle_mesure --date_fin=JJ/MM/YYYY : end date from cycle_mesure --institut=<name> : institute name --code_oopc=<value> : processing code --pi=<pi_name> : chief scientist --ascii : ASCII output instead XML --xml : XML output (default) --odv : ODV (Ocean Data View) spreadsheet output --netcdf : NetCDF OceanSITES output --all : ASCII, XML, ODV and NetCDF outputs --local : use local DTD for XML --secondary : use T/C secondary sensor --sn=<serial_number> --type=<instrument_type> example: $ python3 ctd.py --cycle_mesure=PIRATA-FR29 --institut=IRD --plateforme="THALASSA" --sn=09P01263 --type=SBE911+ --pi=BOURLES --date_debut=01/03/2019 --date_fin=04/04/2019 data/asc/fr29???.hdr --echo --local --ascii """ import sys import re import getopt import string import fileinput import csv from datetime import datetime # a lire dans la premier fichier seasave_version = "7.21b" # initialize constants VERSION = "V1.1 J Grelet - IRD - US191 IMAGO, Plouzane - March 2019" DEGREE = 176 CODE = -1 CONTEXTE = "PIRATA" TIMEZONE = "GMT" FORMAT_DATE = "DMY" # initialize variables and options # -------------------------------- echo = False quiet = True xml = True ascii = False odv = False netcdf = False dtd = True debug = 0 code_oopc = '0A' cycle_mesure = 'PIRATA-FR29' plateforme = 'THALASSA' institut = 'IRD' sn = '09P01263' type = 'SBE911+' pi = 'BOURLES' date_debut = '01/03/2019' date_fin = '04/04/2019' output_file = 'pirata-fr29' latitude = 0. latitude_str = '' longitude = 0. longitude_str = '' julian = 0 epic_date = '' Pres = 0 bottom_depth = 0 # initialize datetime object # -------------------------- dt = datetime # --------------------------------------------------------------- # prepare compiled patterns for regular expressions # --------------------------------------------------------------- # extract station number # ------------------------------------------------- re_station_number = re.compile(r"fr29(\d{3})") # extract CTD station start date # ------------------------------ re_ctd_upload_time = \ re.compile( r"System UpLoad Time\s*=\s*(\w+)\s+(\d+)\s+(\d+)\s+(\d+):(\d+):(\d+)") # extract CTD station latitude # ---------------------------- re_latitude = re.compile(r"NMEA\s+Latitude\s*[:=]\s*(\d+)\s+(\d+.\d+)\s+(\w)") # extract CTD station longitude # ----------------------------- re_longitude = re.compile( r"NMEA\s+Longitude\s*[:=]\s*(\d+)\s+(\d+.\d+)\s+(\w)") # extract bottom depth at station position from echo sounder # ------------------------------------------------------------ re_bottom_depth = re.compile(r"Bottom\sDepth\s*[:=]\s*(\d+)") # --------------------------------------------------------------- # display help using text inside """ """" at the beginning of the # script with special attribute __doc__ and quit # --------------------------------------------------------------- def usage(): print(__doc__) sys.exit() # ------------------------------------------------ # display version and quit # ------------------------------------------------ def version(): print("%s: %s" % (sys.argv[0], VERSION)) sys.exit() # ------------------------------------------------------------------------------ # entete XML # ------------------------------------------------------------------------------ def entete_xml(fd): # my $today = &dateFormat(undef,"%d/%m/%Y"); today = '06/03/2019' fd.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n") # les commentaires ne sont pas acceptes par XML Toolbox Matlab de Geodise if (dtd): # fd.write("<!DOCTYPE OCEANO SYSTEM \"M:\ACQUISIT\oceano.dtd\">\n') fd.write("<!DOCTYPE OCEANO SYSTEM \"/m/oceano.dtd\">\n") # fd.write("<!DOCTYPE OCEANO PUBLIC \"-//US191//DTD OCEANO//FR\" \ # \"http://antea/acquisit/oceano.dtd\">\n') else: fd.write("<!DOCTYPE OCEANO PUBLIC \"-//US191//DTD OCEANO//FR\" \ \"http://www.brest.ird.fr/us191/database/oceano.dtd\">\n") fd.write("<OCEANO TYPE=\"PROFIL\">\n") fd.write(" <ENTETE>\n") fd.write(" <PLATEFORME>\n") fd.write(" <LIBELLE>%s</LIBELLE>\n" % plateforme) fd.write(" </PLATEFORME>\n") fd.write(" <CYCLE_MESURE CONTEXTE=\"%s\" TIMEZONE=\"%s\" FORMAT=\"%s\">\n" % (CONTEXTE, TIMEZONE, FORMAT_DATE)) fd.write(" <LIBELLE>%s</LIBELLE>\n" % cycle_mesure) fd.write(" <DATE_DEBUT>%s</DATE_DEBUT>\n" % date_debut) fd.write(" <DATE_FIN>%s</DATE_FIN>\n" % date_fin) fd.write(" <INSTITUT>%s</INSTITUT>\n" % institut) fd.write(" <RESPONSABLE>%s</RESPONSABLE>\n" % pi) fd.write(" <ACQUISITION LOGICIEL=\"SEASAVE\" VERSION=\"%s\"></ACQUISITION>\n" % seasave_version) fd.write(" <TRAITEMENT LOGICIEL=\"SBEDATAPROCESSING\" VERSION=\"%s\"></TRAITEMENT>\n" % seasave_version) fd.write(" <VALIDATION LOGICIEL=\"%s\" VERSION=\"%s\" DATE=\"%s\" OPERATEUR=\"%s\" CODIFICATION=\"OOPC\">\n" % ( sys.argv[0], VERSION, today, pi)) fd.write(" <CODE>%s</CODE>\n" % code_oopc) fd.write( " <COMMENTAIRE>Extraction realisee avant la post-calibration</COMMENTAIRE>\n") fd.write(" </VALIDATION>\n") fd.write(" </CYCLE_MESURE>\n") fd.write(" <INSTRUMENT TYPE=\"%s\" NUMERO_SERIE=\"%s\">\n" % (type, sn)) fd.write(" </INSTRUMENT>\n") # decode_con_file(fileName) fd.write(" </ENTETE>\n") fd.write(" <DATA>\n") # ------------------------------------------------------------------------------ # read code roscop file # ------------------------------------------------------------------------------ def codeRoscop(file): print("Code roscop file: %s" % code_roscop) with open(code_roscop, 'rt') as f: reader = csv.DictReader(f, delimiter=';') for row in reader: print("%s : %s : %s : %s : %s" % (row['key'], row['long_name'], row['standard_name'], row['units'], row['format'])) sys.exit() # display help with no arg on command-line # ---------------------------------------- if len(sys.argv) == 1: usage() # setting and get otpions # ----------------------- try: # The return args consists of two elements: the first is a list of # (option, arg) pairs; the second is the list of program arguments # left after the option list was stripped # see optparse, a powerful library for parsing command-line options # The gnu version of getopt means that option and non-option # arguments may be intermixed # The -e is a simple flag, while -d, -o and -r require an argument. # The option definition string is "ed:r:". # To mix option and non-option arguments on the command line in any order, # use gnu_getopt() instead getopt # ------------------------------------------------------------------ options, args = getopt.gnu_getopt(sys.argv[1:], 'ed:qo:hvr:', ['echo', 'debug=', 'quiet', 'output=', 'help', 'version', 'roscop=', 'cycle_mesure=', 'plateforme=', 'date_debut=', 'date_fin=', 'institut=', 'code_oopc=', 'pi=', 'ascii', 'xml', 'odv', 'netcdf', 'all', 'local', 'secondary', 'sn=', 'type=']) # if bad option, display an error message and usage # ------------------------------------------------- except getopt.GetoptError as err: print(str(err)) usage() # iterate over options list # ------------------------- for option, arg in options: if option in ('-e', '--echo'): echo = True elif option in ('-q', '--quiet'): quiet = False elif option in ('-d', '--debug'): debug = int(arg) elif option in ('-o', '--output'): outputfile = arg elif option in ('-h', '--help'): usage() elif option in ('-v', '--version'): version() elif option in ('-r', '--roscop'): code_roscop = arg codeRoscop(code_roscop) elif option == '--cycle_mesure': cycle_mesure = arg elif option == '--plateforme': plateforme = arg elif option == '--date_debut': date_debut = arg elif option == '--date_fin': date_fin = arg elif option == '--institut': institut = arg elif option == '--code_oopc': code_oopc = arg elif option == '--pi': pi = arg elif option == '--ascii': ascii = True elif option == '--xml': xml = True elif option == '--odv': odv = True elif option == '--netcdf': netcdf = True elif option == '--all': ascii = xml = odv = netcdf = True # for debug only, display arg list # -------------------------------- if debug == 1: print("Args: %s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (cycle_mesure, plateforme, date_debut, date_fin, institut, code_oopc, pi)) # open ascii files # ---------------- if ascii: hdr_file = open(output_file + '.ctd', "w") ascii_file = open(output_file + '_ctd', "w") # write header to ascii files # --------------------------------- hdr_file.write("//%s %s %s %s %s %s\n" % (cycle_mesure, plateforme, institut, type, sn, pi)) hdr_file.write("St Date Time Latitude Longitude Depth\n") ascii_file.write("//%s %s %s %s %s %s\n" % (cycle_mesure, plateforme, institut, type, sn, pi)) ascii_file.write( "PRFL PRES TEMP PSAL DENS SVEL DOX2 FLU2 TUR3 NAVG\n") # open xml file # ------------- if xml: xml_file = open(output_file + '_ctd.xml', "w") # write header to xml files # --------------------------------- entete_xml(xml_file) xml_file.write( "PRFL PRES TEMP PSAL DENS SVEL DOX2 FLU2 TUR3 NAVG\n") if echo: # display selected output format # ------------------------------ print("Output: ") if (ascii): print("ASCII ") if (xml): print("XML ") if (odv): print("ODV ") # display header # -------------- print("\n//%s %s %s %s %s %s" % (cycle_mesure, plateforme, institut, type, sn, pi)) print(" File St Date Time Latitude Longitude Depth Bottom", end='') # iterate over file list getting with getopt.gnu_getopt # ---------------------------------------------------- for fileName in args: # display processed filename # suppress '\n' character written at the end with a comma # ------------------------------------------------------- if echo: print("\n%s" % (fileName), end='') # extract station number from file name using compiled regexp # ----------------------------------------------------------- if re_station_number.search(fileName): station = int(re_station_number.search(fileName).group(1)) # open current file # TODOS: add some tests on file existence # --------------------------------------- file = open(fileName, "r", encoding="ISO-8859-1") # read each line of current file # ------------------------------ for line in file.readlines(): # extract CTD station start date # ------------------------------ if re_ctd_upload_time.search(line): (month, day, year, hour, minute, second) = \ re_ctd_upload_time.search(line).groups() # format date and time to "May 09 2011 16:33:53" # ----------------------------------------------- dateTime = "%s/%s/%s %s:%s:%s" % (day, month, year, hour, minute, second) # dateTime conversion to "09/05/2011 16:33:53" # -------------------------------------------- dateTime = "%s" % \ (dt.strptime(dateTime, "%d/%b/%Y %H:%M:%S").strftime("%d/%m/%Y %H:%M:%S")) # conversion to "20110509163353" # -------------------------------------------- epic_date = "%s" % \ (dt.strptime(dateTime, "%d/%m/%Y %H:%M:%S").strftime("%Y%m%d%H%M%S")) # conversion to julian day # ------------------------ julian = float((dt.strptime(dateTime, "%d/%m/%Y %H:%M:%S").strftime("%j"))) \ + ((float(hour) * 3600.) + (float(minute) * 60.) + float(second)) / \ 86400. # we use julian day with origine 0 # -------------------------------- julian -= 1 # extract latitude conversion # -------------------------------------------- if re_latitude.search(line): (lat_deg, lat_min, lat_hemi) = re_latitude.search(line).groups() # format latitude to string # ------------------------- latitude_str = "%s%c%s %s" % (lat_deg, DEGREE, lat_min, lat_hemi) # transform to decimal using ternary operator # ------------------------------------------- latitude = float(lat_deg) + (float(lat_min) / 60.) if lat_hemi == 'N' else \ (float(lat_deg) + (float(lat_min) / 60.)) * -1 # extract longitude conversion # -------------------------------------------- if re_longitude.search(line): (lon_deg, lon_min, lon_hemi) = re_longitude.search(line).groups() # format longitude to string # ------------------------- longitude_str = "%s%c%s %s" % (lon_deg, DEGREE, lon_min, lon_hemi) # transform to decimal using ternary operator # ------------------------------------------- longitude = float(lon_deg) + (float(lon_min) / 60.) if lon_hemi == 'E' \ else (float(lon_deg) + (float(lon_min) / 60.)) * -1 # extract bottom depth # -------------------------------------------- if re_bottom_depth.search(line): bottom_depth = re_bottom_depth.search(line).group(1) # close header file # ----------------- file.close() # display header information in console # ------------------------------------- if echo: print(" %03d %s %s %s %5.0f %5.0f" % (station, dateTime, latitude_str, longitude_str, float(Pres), float(bottom_depth)), end='') # write station header to ascii files # ----------------------------------- if ascii: hdr_file.write("%03d %s %s %s %5.0f\n" % (station, dateTime, latitude_str, longitude_str, float(Pres))) # write station header in decimal with PRES = -1, and 5 decimals # -------------------------------------------------------------- ascii_file.write("%3d %4d %9.5f %8.5f %9.5f %s 1e36 1e36 1e36 1e36\n" % (station, CODE, julian, latitude, longitude, epic_date)) # write station header in decimal with PRES = -1, and 5 decimals # -------------------------------------------------------------- xml_file.write("%3d %4d %9.5f %8.5f %9.5f %s 1e36 1e36 1e36 1e36\n" % (station, CODE, julian, latitude, longitude, epic_date)) # substitute .hdr or .HDR in fileName with .asc # --------------------------------------------- fileName = re.sub(r'\.(?i)hdr$', '.asc', fileName) # we don't use __builtin__ readline method of file object that haven't # method to get read line number # -------------------------------------------------------------------- file = fileinput.input( fileName, openhook=fileinput.hook_encoded("ISO-8859-1")) # iterate over the lines of opened file "fileName" # ------------------------------------------------ for line in file: # skip header line # ---------------- if file.isfirstline(): continue else: # extract data # ------------ (scan, TimeJ, Pres, Depth, T0, T1, C0, C1, v1, v2, v1dt, v2dt, Xmiss, FlC, Aqua, Ox0, Ox1, S0, S1, sigmateta0, sigmateta1, sndvel0, sndvel1, nbin, flag) = line.split() # write data to ascii file # ------------------------ if ascii: ascii_file.write( "%3d %4d %6.5g %6.5g %6.3f %7.2f %7.6g %6.4g %7.6g %3d\n" % (station, float(Pres), float(T0), float(S0), float(sigmateta0), float(sndvel0), float(Ox0), float(FlC), float(Xmiss), int(nbin))) # write data to xml file # ------------------------ if xml: xml_file.write( "%3d %4d %6.5g %6.5g %6.3f %7.2f %7.6g %6.4g %7.6g %3d\n" % (station, float(Pres), float(T0), float(S0), float(sigmateta0), float(sndvel0), float(Ox0), float(FlC), float(Xmiss), int(nbin))) # close current file # ------------------ file.close() # end of loop over files # ---------------------- if echo: print("\n") # close files # ----------- if ascii: hdr_file.close() ascii_file.close() if xml: xml_file.write(" </DATA>\n") xml_file.write("</OCEANO>\n") xml_file.close()