# -*- coding: UTF-8 -*-
# Python
"""
04-07-2023
@author: rivallandv, heavily modified by jeremy auclair

Download ERA5 daily weather files for modspa
"""

import sys  # for path management
import os  # for path exploration
import geopandas as gpd  # to manage shapefiles
from psutil import cpu_count  # to get number of physical cores available
from p_tqdm import p_map  # for multiprocessing with progress bars
import modspa_pixel.preprocessing.lib_era5_land_pixel as era5land  # custom built functions for ERA5-Land data download
from modspa_pixel.config.config import config  # to load modspa config file
from modspa_pixel.preprocessing.parcel_to_pixel import convert_dataframe_to_xarray


def request_ER5_weather(config_file: str, ndvi_path: str, raw_S2_image_ref: str = None, shapefile: str = None,  mode: str = 'pixel') -> str:
    """
    Download ERA5 reanalysis daily weather files, concatenate and calculate ET0
    to obtain a netCDF4 dataset for precipitation and ET0 values. Weather data
    reprojection and conversion can take some time for large spatial windows.

    Arguments
    =========
    
    1. config_file: ``str``
        json configuration file
    2. ndvi_path: ``str``
        path to ndvi cube, used for weather data reprojection
    3. raw_S2_image_ref: ``str`` ``default = None``
        unmodified sentinel-2 image at correct resolution for
        weather data reprojection in pixel mode
    4. shapefile: ``str`` ``default = None``
        path to shapefile for extraction in parcel mode
    5. mode: ``str`` ``default = 'pixel'``
        choose between ``'pixel'`` and ``'parcel'`` mode

    Returns
    =======
    
    1. weather_file: ``str``
        path to netCDF4 file containing weather data
    """
    
    # Get config file
    config_params = config(config_file)
    
    # Get parameters
    data_path = config_params.data_path
    shapefile_path = config_params.shapefile_path
    run_name = config_params.run_name
    mode = config_params.mode
    start_date = config_params.start_date
    end_date = config_params.end_date
    weather_overwrite = config_params.weather_overwrite
    
    # Set output paths
    outpath = os.path.join(data_path, 'WEATHER', run_name)

    # Create daily wheather ncfile name
    weather_daily_ncFile = os.path.join(outpath, start_date + '_' + end_date + '_' + run_name + '_era5-land-daily-meteo')
    
    # Check if file exists and ndvi overwrite is false
    if os.path.exists(weather_daily_ncFile + '.nc' * (mode == 'pixel') + '_parcel.nc' * (mode == 'parcel' )) and not weather_overwrite:
        
        return weather_daily_ncFile

    # Geometry configuration
    wgs84_epsg = 'epsg:4326'  # WGS84 is the ERA5 epsg
    
    # ERA5 product parameters
    grid_size = 0.25
    wind_height = 10  # height of ERA5 wind measurements in meters

    print('REQUEST CONFIGURATION INFORMATIONS:')
    if shapefile_path:
        if os.path.exists(shapefile_path):
            print('shapeFile: ', shapefile_path)
        else:
            print('shapeFile not found')

    else:
        # print('specify either shapeFile, boxbound or point coordinate in json file')
        print('specify shapeFile in json file')
        sys.exit(-1)
    print('period: ', start_date, ' - ', end_date)
    print('experiment name:', run_name)
    if os.path.exists(outpath):
        print('path for nc files: ', outpath)
    else:
        os.mkdir(outpath)
        print('mkdir path for nc files: ', outpath)
    print('----------')

    # Request ERA5-land BoxBound Determination
    if shapefile_path:
        # Load shapefile to access geometrics informations for ERA5-Land request
        gdf_expe_polygons = gpd.read_file(shapefile_path)
        print('Input polygons CRS :', gdf_expe_polygons.crs)
        expe_epsg = gdf_expe_polygons.crs

        # verification que les polygones sont tous fermés
        liste_polygons_validity = gdf_expe_polygons.geometry.is_valid
        if list(liste_polygons_validity).count(False) > 0:
            
            print('some polygons of Shapefile are not valid')
            polygons_invalid = liste_polygons_validity.loc[liste_polygons_validity == False]
            print('invalid polygons:', polygons_invalid)
            
            for i in polygons_invalid.index:
                gdf_expe_polygons.geometry[i]

            # Application d'un buffer de zero m
            gdf_expe_polygons_clean = gdf_expe_polygons.geometry.buffer(0)
            gdf_expe_polygons = gdf_expe_polygons_clean

        # search for the total extent of the whole polygons in lat/lon [xlo/ylo/xhi/yhi] [W S E N]
        expe_polygons_boxbound = gdf_expe_polygons.total_bounds
        expe_polygons_boxbound = list(expe_polygons_boxbound)
        print('shape extend in ', expe_epsg.srs, ':', expe_polygons_boxbound)

        if expe_epsg.srs != wgs84_epsg:
            print('--- convert extend in wgs84 coordinates ---')
            
            # idem en wgs84 pour des lat/lon en degree (format utilisé par google earth engine)
            expe_polygons_boxbound_wgs84 = gdf_expe_polygons.to_crs(wgs84_epsg).total_bounds

        else:
            expe_polygons_boxbound_wgs84 = expe_polygons_boxbound

    print('boxbound [N W S E] extend in ', wgs84_epsg)
    print(expe_polygons_boxbound_wgs84)

    print('--start request--')

    # Get number of available CPUs
    nb_processes = 2 * min([cpu_count(logical = False), len(os.sched_getaffinity(0)), config_params.max_cpu])  # downloading data demands very little computing power, each processor core can manage multiple downloads

    # Define variables
    variables = ['2m_temperature', '2m_dewpoint_temperature', 'surface_solar_radiation_downwards', '10m_u_component_of_wind', '10m_v_component_of_wind', 'total_precipitation']
    
    # Split download per year if necessary
    dates = era5land.split_dates_by_year(start_date, end_date)
    if len(dates) == 1:
        
        # Generate arguments
        args = [(var, outpath, start_date, end_date, expe_polygons_boxbound_wgs84, grid_size) for var in variables]
    else:
        args = []
        for d in dates:
            date1, date2 = d
            temp = [(var, outpath, date1, date2, expe_polygons_boxbound_wgs84, grid_size) for var in variables]
            args.extend(temp)
    
    # Download in multiprocess
    weather_files = p_map(era5land.call_era5landhourly, args, **{"num_cpus": nb_processes})
    
    print('\nPath to daily weather netCDF file: ', outpath)
    
    # Generate pandas dataframe for parcel mode
    if mode == 'parcel':
        
        # Create save path
        weather_datframe = weather_daily_ncFile + '_df.csv'
        weather_dataset = weather_daily_ncFile + '_parcel.nc'
        
        # Check if weather dataset already exists
        if os.path.exists(weather_dataset) and not weather_overwrite:
            return weather_dataset
        
        # Generate daily weather datasets as Geotiffs for each variable
        weather_daily_rain, weather_daily_ET0 = era5land.era5Land_daily_to_yearly_parcel(weather_files, variables, weather_daily_ncFile, start_date, end_date, h = wind_height)
        
        # Generate and save weather dataframe
        era5land.extract_weather_dataframe(weather_daily_rain, weather_daily_ET0, shapefile, config_file, weather_datframe, max_cpu = config_params.max_cpu)
        
        # Convert dataframe to xarray dataset
        convert_dataframe_to_xarray(weather_datframe, weather_dataset, variables = ['Rain', 'ET0'], data_types = ['u2', 'u2'])
    
        print('\nWeather dataset:', weather_daily_ncFile)
        
        return weather_dataset
    
    # Calculate ET0 over the whole time period
    weather_daily_ncFile = era5land.era5Land_daily_to_yearly_pixel(weather_files, variables, weather_daily_ncFile, raw_S2_image_ref, ndvi_path, start_date, end_date, h = wind_height, max_ram = config_params.max_ram, weather_overwrite = weather_overwrite)
    
    print('\nWeather dataset:', weather_daily_ncFile)

    return weather_daily_ncFile