#!/usr/bin/env python
Functions to call ECMWF Reanalysis with CDS-api

- ERA5-land hourly request
- ERA5-land daily request
- request a list of hourly variables dedicated to the calculus of ET0
 and the generation of MODSPA daily forcing files

@author: rivalland

import os  # for path exploration
from typing import List  # to declare variables
import numpy as np  # for math on arrays
import xarray as xr  # to manage nc files
from datetime import datetime  # to manage dates
from p_tqdm import p_map  # for multiprocessing with progress bars
from dateutil.rrule import rrule, MONTHLY
from fnmatch import fnmatch  # for file name matching
import re  # for string comparison
import warnings  # to suppress pandas warning

# CDS API external library
# source:
import cdsapi  # to download cds data
import requests  # to request data

# FAO ET0 calculator external library
# Notes
# source:
# documentation:
import eto  # to calculate ET0

def era5_enclosing_shp_aera(area: List[float], pas: float) -> tuple:
   Find the four coordinates including the boxbound scene
   to agree with gridsize resolution
   system projection: WGS84 lat/lon degree
       area: [lat north, lon west, lat south, lon east] in degree WGS84, float
       pas: gridsize
    -coordinates list corresponding to N,W,S,E corners of the grid in decimal degree
   Note: gdal coordinates reference upper left corner of pixel
         ERA5 coordinates refere to center of grid
         To resolve this difference an offset of pas/2 is apply
    lat_max, lon_min, lat_min, lon_max = area

    # North
    era5_lat_max = round((lat_max//pas+1)*pas, 2)
    # West
    era5_lon_min = round((lon_min//pas)*pas, 2)
    # South
    era5_lat_min = round((lat_min//pas)*pas, 2)
    # Est
    era5_lon_max = round((lon_max//pas+1)*pas, 2)

    era5_area = era5_lat_max, era5_lon_min, era5_lat_min, era5_lon_max

    return era5_area  # [N,W,S,E]

def call_era5land_daily(args: tuple) -> None:
    query of one month of daily ERA5-land data of a selected variable
    according to a selected statistic


    year : TYPE str
        year at YYYY format.
    month : TYPE str
        month at MM format.
    variable : TYPE str
        user-selectable variable
        cf. Appendix A Table 3 for list of input variables availables.
    statistic : TYPE str
        daily statistic choosed, 3 possibility
        daily_mean or daily_minimum or daily_maximum.
    area : TYPE list of 4 int
        area = [lat_max, lon_min, lat_min, lon_max]
    output_path : TYPE str
        path for output file.

    Netcdf File.

    year, month, variable, statistic, area, output_path = args
    # set name of output file for each month (statistic, variable, year, month)
    output_filename = \
        output_path+os.sep +\

    if os.path.isfile(output_filename):
        print(output_filename, ' already exist')

            c = cdsapi.Client(timeout=300)

            result = c.service("tool.toolbox.orchestrator.workflow",
                                   "realm": "c3s",
                                   "project": "app-c3s-daily-era5-statistics",
                                   "version": "master",
                                   "kwargs": {
                                       "dataset": "reanalysis-era5-land",
                                       "product_type": "reanalysis",
                                       "variable": variable,
                                       "statistic": statistic,
                                       "year": year,
                                       "month": month,
                                       "time_zone": "UTC+00:0",
                                       "frequency": "1-hourly",
                                       "grid": "0.1/0.1",
                                       "area": {"lat": [area[2], area[0]],
                                                "lon": [area[1], area[3]]}
                                   "workflow_name": "application"

            location = result[0]['location']
            res = requests.get(location, stream=True)
            print("Writing data to " + output_filename)
            with open(output_filename, 'wb') as fh:
                for r in res.iter_content(chunk_size=1024):
            print('!! request', variable, '  failed !! -> year', year, 'month', month)
    return None

def call_era5land_daily_for_MODSPA(start_date: str, end_date: str, area: List[float], output_path: str, processes: int = 9) -> None:
    request ERA5-land daily variables needed for ET0 calculus and MODSPA forcing!/dataset/reanalysis-era5-land?tab=overview

        start_date: YYYY-MM-DD string
        end_date: YYYY-MM-DD string
        area: [nord, ouest, sud, est] in degree WGS84, float
        filename_out: fichier de sortie, format, string
        processes: nombre de processeurs logiques à utiliser

         called land surface variables :

    start_date : TYPE
    end_date : TYPE
    area : TYPE
    output_path : TYPE


    # list of first day of each month date into period
    strt_dt = datetime.strptime(start_date, '%Y-%m-%d').replace(day=1)
    end_dt = datetime.strptime(end_date, '%Y-%m-%d').replace(day=1)

    periods = [dt for dt in rrule(
        freq=MONTHLY, dtstart=strt_dt, until=end_dt, bymonthday=1)]

    dico = {
        '2m_temperature': ['daily_minimum', 'daily_maximum'],
        '10m_u_component_of_wind': ['daily_mean'],
        '10m_v_component_of_wind': ['daily_mean'],
        'total_precipitation': ['daily_mean'],
        'surface_solar_radiation_downwards': ['daily_mean'],
        '2m_dewpoint_temperature': ['daily_minimum', 'daily_maximum']

    args = []
    # loop on variable to upload
    for variable in dico.keys():
        # loop on statistic associated to variable to upload
        for statistic in dico[variable]:
            # loop on year and month
            for dt in periods:
                year = str(dt.year)
                month = '0'+str(dt.month)
                month = month[-2:]
                # Requete ERA5-land
                args.append((year, month, variable, statistic, area, output_path))
    # Start pool
    p_map(call_era5land_daily, args, **{"num_cpus": processes})
    return None

def filename_to_datetime(filename: str) ->
    filename_to_datetime returns a `` object for the date of the given file name.

    ## Arguments
    1. filename: `str `
        name or path of the product

    ## Returns
    1. date: `` object, date of the product

    # Search for a date pattern (yyyy_mm_dd) in the product name or path
    match ='\d{4}_\d{2}', filename)
    format = '%Y_%m'
    datetime_object = datetime.strptime(match[0], format)

def concat_monthly_nc_file(list_era5land_monthly_files: List[str], list_variables: List[str], output_path: str) -> List[str]:
    Concatenate monthly netcdf datasets into a single file for each given variable.

    ## Arguments
    1. list_era5land_monthly_files: `List[str]`
        list of daily files per month
    2. list_variables: `List[str]`
        names of the required variables as written in the filename
    3. output_path: `List[str]`
        path to which save the aggregated files

    ## Returns
    1. list_era5land_files: `List[str]`
        the list of paths to the aggregated files
    if not os.path.exists(output_path): os.mkdir(output_path)
    list_era5land_files = []
    # concatenate all dates into a single file for each variable
    for variable in list_variables:
        curr_var_list = []
        dates = []
        for file in list_era5land_monthly_files:
            # find specific variable
            if fnmatch(file, '*' + variable + '*'):
        curr_datasets = []
        for file in curr_var_list:
            # open all months for the given variable

        # Create file name
            concatenated_file = output_path + os.sep + 'era5-land_' + dates[0].strftime('%m-%Y') + '_' + dates[-1].strftime('%m-%Y') + '_' + variable + '.nc'
        # Concatenate monthly datasets
        concatenated_dataset = xr.concat(curr_datasets, dim = 'time')
        # Save datasets
        concatenated_dataset.to_netcdf(path = concatenated_file, mode = 'w',)
        # Add filename to output list
    return list_era5land_files

def uz_to_u2(u_z: List[float], h: float) -> List[float]:
    The wind speed measured at heights other than 2 m can be adjusted according
    to the follow equation

    u_z : TYPE float array
        measured wind speed z m above the ground surface, ms- 1.
    h : TYPE float
        height of the measurement above the ground surface, m.

    u2 : TYPE float array
        average daily wind speed in meters per second (ms- 1 ) measured at 2 m above the ground.

    u2 = u_z*4.87/(np.log(67.8*h - 5.42))
    return u2

def ea_calc(T: float) -> float:
    Actual vapour pressure (ea) derived from dewpoint temperature '
    T : Temperature in degree celsius.

    e_a :the actual Vapour pressure in Kpa

    e_a = 0.6108*np.exp(17.27*T/(T+237.15))
    return e_a

def load_variable(file_name: str) -> xr.Dataset:
    Loads an ERA5 meteorological variable into a xarray
    dataset according to the modspa architecture.

    ## Arguments
    1. file_name: `str`
        netcdf file to load

    ## Returns
    1. variable: `xr.Dataset`
        output xarray dataset
    # Rename temperature variables according to the statistic (max or min)
    if fnmatch(file_name, '*era5-land*2m_temperature_daily_maximum*'):  # maximum temperature
        variable = xr.open_dataset(file_name).rename({'t2m': 't2m_max'}).drop_vars('realization')  # netcdfs from ERA5 carry an unecessary 'realization' coordinate, so it is dropped 
    elif fnmatch(file_name, '*era5-land*2m_temperature_daily_minimum*'):  # minimum temperature
        variable = xr.open_dataset(file_name).rename({'t2m': 't2m_min'}).drop_vars('realization')
    elif fnmatch(file_name, '*era5-land*2m_dewpoint_temperature_daily_maximum*'):  # maximum dewpoint temperature
        variable = xr.open_dataset(file_name).rename({'d2m': 'd2m_max'}).drop_vars('realization')
    elif fnmatch(file_name, '*era5-land*2m_dewpoint_temperature_daily_minimum*'):  # minimum temperature
        variable = xr.open_dataset(file_name).rename({'d2m': 'd2m_min'}).drop_vars('realization')
    # Other variables can be loaded without modification
        variable = xr.open_dataset(file_name).drop_vars('realization')
    return variable

def calculate_ET0_pixel(pixel_dataset: xr.Dataset, lat: float, lon: float, h: float = 10) -> np.ndarray:
    Calculate ET0 over the year for a single pixel of the ERA5 weather dataset.

    ## Arguments
    1. pixel_dataset: `xr.Dataset`
        extracted dataset that contains all information for a single pixel
    2. lat: `float`
        latitudinal coordinate of that pixel
    3. lon: `float`
        longitudinal coordinate of that pixel
    4. h: `float` `default = 10`
        height of ERA5 wind measurement in meters

    ## Returns
    1. ET0_values: `np.ndarray`
        numpy array containing the ET0 values for each day
    # Conversion of xarray dataset to dataframe for ET0 calculation
    ET0 = pixel_dataset.d2m_max.to_dataframe().rename(columns = {'d2m_max' : 'Dew_Point_T_max'}) - 273.15  # conversion of temperatures from K to °C

    ET0['Dew_Point_T_min'] = pixel_dataset.d2m_min.to_dataframe()['d2m_min'].values - 273.15  # conversion of temperatures from K to °C
    ET0['T_min'] = pixel_dataset.t2m_min.to_dataframe()['t2m_min'].values - 273.15  # conversion of temperatures from K to °C
    ET0['T_max'] = pixel_dataset.t2m_max.to_dataframe()['t2m_max'].values - 273.15  # conversion of temperatures from K to °C
    ET0['Rain'] =['tp'].values*1000  # conversion of total precipitation from meters to milimeters
    # Conversion of easward and northward wind values to scalar wind
    ET0['U_z'] =  np.sqrt(pixel_dataset.u10.to_dataframe()['u10'].values**2 + pixel_dataset.v10.to_dataframe()['v10'].values**2)
    ET0['RH_max'] =  100 * ea_calc(ET0['Dew_Point_T_min']) / ea_calc(ET0['T_min'])  # calculation of relative humidity from dew point temperature and temperature
    ET0['RH_min'] =  100 * ea_calc(ET0['Dew_Point_T_max']) / ea_calc(ET0['T_max'])  # calculation of relative humidity from dew point temperature and temperature
    ET0['R_s'] = pixel_dataset.ssrd.to_dataframe()['ssrd'].values/1e6  # to convert downward total radiation from J/m² to MJ/m²

    ET0.drop(columns = ['Dew_Point_T_max', 'Dew_Point_T_min'], inplace = True)  # drop unecessary columns
    # Start ET0 calculation
    eto_calc = eto.ETo()
    warnings.filterwarnings('ignore')  # remove pandas warning

    # ET0 calculation for given pixel (lat, lon) values
                        freq = 'D',  # daily frequence
                        # Elevation of the met station above mean sea level (m) (only needed if P is not in df).
                        z_msl = 0.,
                        lat = lat,
                        lon = lon,
                        TZ_lon = None,
                        z_u = h)  # h: height of raw wind speed measurement

    # Retrieve ET0 values
    ET0_values = eto_calc.eto_fao(max_ETo=15, min_ETo=0, interp=True, maxgap=10).values  # ETo_FAO_mm
    return ET0_values

def era5Land_nc_daily_to_ET0(list_era5land_files: List[str], output_nc_file: str, ndvi_path: str, h: float = 10) -> None:
    Calculate ET0 values from the ERA5 netcdf weather variables.
    Output netcdf contains the ET0 values for each day in the selected
    time period and for each ERA5 pixel covering the required area.

    ## Arguments
    1. list_era5land_files: `List[str]`
        list of netcdf files containing the necessary variables
    2. output_nc_file: `str`
        output netcdf file to save
    3. h: `float` `default = 10`
        height of ERA5 wind measurements in meters

    ## Returns
    # Load all monthly files into a single xarray dataset that contains all dates (daily frequency)
    raw_weather_ds = None
    for file in list_era5land_files:
        if not raw_weather_ds:
            raw_weather_ds = load_variable(file)
            temp = load_variable(file)
            raw_weather_ds = xr.merge([temp, raw_weather_ds])
    del temp
    # Create ET0 variable (that will be saved) and set attributes 
    raw_weather_ds = raw_weather_ds.assign(ET0 = (raw_weather_ds.dims, np.zeros(tuple(raw_weather_ds.dims[d] for d in list(raw_weather_ds.dims)), dtype = 'float32')))

    # Loop on lattitude and longitude coordinates to calculate ET0 per "pixel"
    for lat in raw_weather_ds.coords['lat'].values:
        for lon in raw_weather_ds.coords['lon'].values:
            # Select whole time period for given (lat, lon) values
            select_ds = raw_weather_ds.sel({'lat' : lat, 'lon' : lon}).drop_vars(['lat', 'lon'])

            # Calculate ET0 values for given pixel
            ET0_values = calculate_ET0_pixel(select_ds, lat, lon, h)
            # Write ET0 values in xarray Dataset
            raw_weather_ds['ET0'].loc[{'lat' : lat, 'lon' : lon}] = ET0_values
    # Get necessary data for final dataset and rewrite netcdf attributes
    final_weather_ds = raw_weather_ds.drop_vars(names = ['ssrd', 'v10', 'u10', 't2m_max', 't2m_min', 'd2m_max', 'd2m_min'])  # remove unwanted variables
    final_weather_ds['tp'] = final_weather_ds['tp'] * 1000  # conversion from m to mm
    # Change datatype to reduce memory usage
    final_weather_ds = (final_weather_ds * 1000).astype('u2')  
    # Set variable attributes 
    final_weather_ds['ET0'].attrs['units'] = 'mm'
    final_weather_ds['ET0'].attrs['standard_name'] = 'Potential evapotranspiration'
    final_weather_ds['ET0'].attrs['comment'] = 'Potential evapotranspiration accumulated over the day, calculated with the FAO-56 method'
    final_weather_ds['ET0'].attrs['scale factor'] = '1000'
    final_weather_ds['tp'].attrs['units'] = 'mm'
    final_weather_ds['tp'].attrs['standard_name'] = 'Precipitation'
    final_weather_ds['tp'].attrs['comment'] = 'Volume of total daily precipitation expressed as water height in milimeters'
    final_weather_ds['tp'].attrs['scale factor'] = '1000'

    # Save dataset to netcdf, still in wgs84 (lat, lon) coordinates
    final_weather_ds.to_netcdf(path = output_nc_file, encoding = {"ET0": {"dtype": "u2"}, "tp": {"dtype": "u2"}})
    return None