diff --git a/main_run_samir.py b/main_run_samir.py index aeed8abe21fdd2ac662abf2b2182903307b3d514..d46765a0de4765c0e726eb13b68036738b99310e 100644 --- a/main_run_samir.py +++ b/main_run_samir.py @@ -55,12 +55,11 @@ if __name__ == '__main__': elif preferred_provider == 'usgs': ndvi_path = os.path.join(data_path, 'IMAGERY', + 'USGS', 'NDVI', run_name, run_name + '_NDVI_cube_' + start_date + '_' + end_date + '.nc') - # TODO: change weather path once weather update is done ## Weather if mode == 'pixel': - weather_path = os.path.join(data_path, 'WEATHER', run_name, 'ncdailyfiles', start_date + '_' + end_date + '_' + run_name + '_era5-land-daily-meteo.nc') + weather_path = os.path.join(data_path, 'WEATHER', run_name, start_date + '_' + end_date + '_' + run_name + '_era5-land-daily-meteo.nc') else: - weather_path = os.path.join(data_path, 'WEATHER', run_name, 'ncdailyfiles', start_date + '_' + end_date + '_' + run_name + '_era5-land-daily-meteo_parcel.nc') + weather_path = os.path.join(data_path, 'WEATHER', run_name, start_date + '_' + end_date + '_' + run_name + '_era5-land-daily-meteo_parcel.nc') # Kcb_max path Kcb_max_obs_path = os.path.join(os.path.dirname(ndvi_path), 'Kcb_max_obs.nc') diff --git a/postprocessing/explore_parcel_outputs.ipynb b/postprocessing/explore_parcel_outputs.ipynb index 8cd649a9f50708136ee99e3213acd3ba3edcae3d..1e12422d43add691d86ae7784739031a5f4aae5d 100644 --- a/postprocessing/explore_parcel_outputs.ipynb +++ b/postprocessing/explore_parcel_outputs.ipynb @@ -80,7 +80,7 @@ "elif config_params.preferred_provider == 'usgs':\n", " ndvi_path = os.path.join(data_path, 'IMAGERY', 'USGS', 'NDVI', run_name, run_name + '_NDVI_cube_' + start_date + '_' + end_date + '.nc')\n", " \n", - "weather_path = os.path.join(data_path, 'WEATHER', run_name, 'ncdailyfiles', start_date + '_' + end_date + '_' + run_name + '_era5-land-daily-meteo_df.csv')\n", + "weather_path = os.path.join(data_path, 'WEATHER', run_name, start_date + '_' + end_date + '_' + run_name + '_era5-land-daily-meteo_df.csv')\n", "\n", "# Open Land Cover raster\n", "land_cover = xr.open_dataarray(land_cover_path)\n", diff --git a/postprocessing/explore_pixel_outputs.ipynb b/postprocessing/explore_pixel_outputs.ipynb index a5a08a5976b5394382321bf07fff0cbcea7d031a..dbb207d756f651e4fdfd97d942699e4b619bc623 100644 --- a/postprocessing/explore_pixel_outputs.ipynb +++ b/postprocessing/explore_pixel_outputs.ipynb @@ -69,7 +69,7 @@ "elif config_params.preferred_provider == 'usgs':\n", " ndvi_path = os.path.join(data_path, 'IMAGERY', 'USGS', 'NDVI', run_name, run_name + '_NDVI_cube_' + start_date + '_' + end_date + '.nc')\n", " \n", - "weather_path = os.path.join(data_path, 'WEATHER', run_name, 'ncdailyfiles', start_date + '_' + end_date + '_' + run_name + '_era5-land-daily-meteo.nc')\n", + "weather_path = os.path.join(data_path, 'WEATHER', run_name, start_date + '_' + end_date + '_' + run_name + '_era5-land-daily-meteo.nc')\n", "\n", "# Open Land Cover raster\n", "land_cover = xr.open_dataarray(land_cover_path)\n", diff --git a/preprocessing/custom_inputs_parcel.ipynb b/preprocessing/custom_inputs_parcel.ipynb index bbaaf0faf18526dbfd62329128d5b813fb03477c..17423252db495d17427eea9c31db2a7eca14b893 100644 --- a/preprocessing/custom_inputs_parcel.ipynb +++ b/preprocessing/custom_inputs_parcel.ipynb @@ -40,7 +40,7 @@ "rcParams.update({'font.size': 15})\n", "\n", "# Open config file\n", - "config_file = currentdir + os.sep + 'config' + os.sep + 'config_modspa.json'\n", + "config_file = os.path.join(currentdir, 'config', 'config_modspa.json')\n", "\n", "# Open config file and load parameters\n", "config_params = config(config_file)\n", @@ -458,7 +458,7 @@ "shapefile_raw = '/home/auclairj/notebooks/Shapefiles/Aurade_parcel_raw/Aurade_parcel_raw.shp'\n", "\n", "# Path to csv conversion file\n", - "conversion_csv_file = currentdir + os.sep + 'preprocessing' + os.sep + 'csv_files' + os.sep + 'class_conversion_parcel.csv'\n", + "conversion_csv_file = os.path.join(currentdir, 'preprocessing', 'csv_files', 'class_conversion_parcel.csv')\n", "\n", "# Open shapefile\n", "shapefile = gpd.read_file(shapefile_raw)\n", @@ -579,10 +579,10 @@ " \n", " # Replace the nodata values with nan\n", " cropped_raster = cropped_raster.astype(np.float32)\n", - " cropped_raster[cropped_raster == nodata] = np.NaN\n", + " cropped_raster[cropped_raster == nodata] = np.nan\n", " \n", " masked_raster = masked_raster.astype(np.float32)\n", - " masked_raster[masked_raster == nodata] = np.NaN\n", + " masked_raster[masked_raster == nodata] = np.nan\n", " \n", " # Calculate the zonal statistics\n", " raster_stats.extend([[id, np.nanmean(masked_raster), LC]])\n", diff --git a/preprocessing/custom_inputs_pixel.ipynb b/preprocessing/custom_inputs_pixel.ipynb index dd8388f9907e3a19bb71c163e531ff712daac590..d87f4d771ed33af3ea38f6030342f715d9a2650b 100644 --- a/preprocessing/custom_inputs_pixel.ipynb +++ b/preprocessing/custom_inputs_pixel.ipynb @@ -80,7 +80,7 @@ " return array\n", "\n", "# Open config file\n", - "config_file = currentdir + os.sep + 'config' + os.sep + 'config_modspa.json'\n", + "config_file = os.path.join(currentdir, 'config', 'config_modspa.json')\n", "\n", "# Open config file and load parameters\n", "config_params = config(config_file)\n", @@ -145,7 +145,7 @@ "raw_lc_path = '/mnt/e/MODSPA/LAND_COVER/Aurade_test/Aurade_test_LC.tif'\n", "\n", "# Path to csv conversion file\n", - "conversion_csv_file = currentdir + os.sep + 'preprocessing' + os.sep + 'csv_files' + os.sep + 'class_conversion.csv'\n", + "conversion_csv_file = os.path.join(currentdir, 'preprocessing', 'csv_files', 'class_conversion.csv')\n", "\n", "# Open and plot land cover raster\n", "landcover = xr.open_dataarray(raw_lc_path).squeeze(dim = ['band'], drop = True).rename('class').astype(np.uint8)\n", @@ -405,7 +405,7 @@ "plt.title('Field capacity');\n", "\n", "# Save low resolution dataset\n", - "original_save_path = soil_path + os.sep + 'Soil_low_resolution.nc'\n", + "original_save_path = os.path.join(soil_path, 'Soil_low_resolution.nc')\n", "soil_params.to_netcdf(original_save_path)" ] }, @@ -458,15 +458,15 @@ "\n", "# Reference image to reproject on\n", "if config_params.preferred_provider == 'copernicus':\n", - " image_path = config_params.data_path + os.sep + 'IMAGERY' + os.sep + 'SCIHUB'\n", - " ndvi_path = image_path + os.sep + 'NDVI'\n", + " image_path = os.path.join(config_params.data_path, 'IMAGERY', 'SCIHUB')\n", + " ndvi_path = os.path.join(image_path, 'NDVI')\n", "elif config_params.preferred_provider == 'theia':\n", - " image_path = config_params.data_path + os.sep + 'IMAGERY' + os.sep + 'THEIA'\n", - " ndvi_path = image_path + os.sep + 'NDVI'\n", + " image_path = os.path.join(config_params.data_path, 'IMAGERY', 'THEIA')\n", + " ndvi_path = os.path.join(image_path, 'NDVI')\n", "elif config_params.preferred_provider == 'usgs':\n", - " image_path = config_params.data_path + os.sep + 'IMAGERY' + os.sep + 'USGS'\n", - " ndvi_path = image_path + os.sep + 'NDVI'\n", - "reference_image = ndvi_path + os.sep + config_params.run_name + os.sep + config_params.run_name + '_grid_reference.tif'\n", + " image_path = os.path.join(config_params.data_path, 'IMAGERY', 'USGS')\n", + " ndvi_path = os.path.join(image_path, 'NDVI')\n", + "reference_image = os.path.join(ndvi_path, config_params.run_name, config_params.run_name + '_grid_reference.tif')\n", "\n", "# Get bounds of simulation area\n", "shapefile = gpd.read_file(shapefile_bounds).to_crs('EPSG:4326')\n", @@ -664,7 +664,7 @@ "data_path = config_params.data_path\n", "\n", "# Set paths\n", - "weather_path = data_path + os.sep + 'WEATHER' + os.sep + run_name + os.sep + 'ncdailyfiles' + os.sep + start_date + '_' + end_date + '_' + run_name + '_era5-land-daily-meteo.nc'\n", + "weather_path = os.path.join(data_path, 'WEATHER', run_name, start_date + '_' + end_date + '_' + run_name + '_era5-land-daily-meteo.nc')\n", "\n", "# Get time window\n", "start_date_obj = datetime.strptime(start_date, '%Y-%m-%d')\n", @@ -691,13 +691,13 @@ "\n", "# Get NDVI structure\n", "if preferred_provider == 'copernicus':\n", - " image_path = data_path + os.sep + 'IMAGERY' + os.sep + 'SCIHUB' + os.sep + 'NDVI'\n", + " image_path = data_path, 'IMAGERY', 'SCIHUB', 'NDVI'\n", "elif preferred_provider == 'theia':\n", - " image_path = data_path + os.sep + 'IMAGERY' + os.sep + 'THEIA' + os.sep + 'NDVI'\n", + " image_path = data_path, 'IMAGERY', 'THEIA', 'NDVI'\n", "elif preferred_provider == 'usgs':\n", - " image_path = data_path + os.sep + 'IMAGERY' + os.sep + 'USGS' + os.sep + 'NDVI'\n", + " image_path = data_path, 'IMAGERY', 'USGS', 'NDVI'\n", "\n", - "ndvi_path = image_path + os.sep + run_name + os.sep + run_name + '_NDVI_cube_' + start_date + '_' + end_date + '.nc'\n", + "ndvi_path = image_path, run_name, run_name + '_NDVI_cube_' + start_date + '_' + end_date + '.nc'\n", "\n", "# Create empty weather dataset with NDVI structure\n", "uniform_weather = xr.open_dataset(ndvi_path).rename_vars({'NDVI': 'Rain'}).copy(deep = True).transpose('time', 'y', 'x')\n", diff --git a/preprocessing/download_ERA5_weather.py b/preprocessing/download_ERA5_weather.py index 59ef77121d161e499daaee4638d395cfd909aca6..f1373e4fbd71fd69e3a12b2dfebc2042bb7605f3 100644 --- a/preprocessing/download_ERA5_weather.py +++ b/preprocessing/download_ERA5_weather.py @@ -7,12 +7,11 @@ Download ERA5 daily weather files for modspa """ -import glob # for path management import sys # for path management import os # for path exploration import geopandas as gpd # to manage shapefiles -from datetime import datetime # manage dates from psutil import cpu_count # to get number of physical cores available +from p_tqdm import p_map # for multiprocessing with progress bars import modspa_pixel.preprocessing.lib_era5_land_pixel as era5land # custom built functions for ERA5-Land data download from modspa_pixel.config.config import config # to load modspa config file from modspa_pixel.preprocessing.parcel_to_pixel import convert_dataframe_to_xarray @@ -73,6 +72,7 @@ def request_ER5_weather(config_file: str, ndvi_path: str, raw_S2_image_ref: str wgs84_epsg = 'epsg:4326' # WGS84 is the ERA5 epsg # ERA5 product parameters + grid_size = 0.25 wind_height = 10 # height of ERA5 wind measurements in meters print('REQUEST CONFIGURATION INFORMATIONS:') @@ -118,7 +118,7 @@ def request_ER5_weather(config_file: str, ndvi_path: str, raw_S2_image_ref: str gdf_expe_polygons = gdf_expe_polygons_clean # search for the total extent of the whole polygons in lat/lon [xlo/ylo/xhi/yhi] [W S E N] - expe_polygons_boxbound = gdf_expe_polygons.geometry.total_bounds + expe_polygons_boxbound = gdf_expe_polygons.total_bounds expe_polygons_boxbound = list(expe_polygons_boxbound) print('shape extend in ', expe_epsg.srs, ':', expe_polygons_boxbound) @@ -126,32 +126,39 @@ def request_ER5_weather(config_file: str, ndvi_path: str, raw_S2_image_ref: str print('--- convert extend in wgs84 coordinates ---') # idem en wgs84 pour des lat/lon en degree (format utilisé par google earth engine) - expe_polygons_boxbound_wgs84 = gdf_expe_polygons.to_crs(wgs84_epsg).geometry.total_bounds - - # convert to list for earth engine - expe_polygons_boxbound_wgs84 = list(expe_polygons_boxbound_wgs84) + expe_polygons_boxbound_wgs84 = gdf_expe_polygons.to_crs(wgs84_epsg).total_bounds + else: expe_polygons_boxbound_wgs84 = expe_polygons_boxbound - # switch coordinates order to agree with ECMWF order: N W S E - expe_area = expe_polygons_boxbound_wgs84[3], expe_polygons_boxbound_wgs84[0], expe_polygons_boxbound_wgs84[1], expe_polygons_boxbound_wgs84[2] - print('boxbound [N W S E] extend in ', wgs84_epsg) - print(expe_area) - # determine boxbound for ECMWF request (included shape boxbound) - era5_expe_polygons_boxbound_wgs84 = era5land.era5_enclosing_shp_aera(expe_area, 0.1) - print('boxbound [N W S E] request extend in ', wgs84_epsg) - print(era5_expe_polygons_boxbound_wgs84) + print(expe_polygons_boxbound_wgs84) print('--start request--') # Get number of available CPUs - nb_processes = 4 * min([cpu_count(logical = False), len(os.sched_getaffinity(0)), config_params.max_cpu]) # downloading data demands very little computing power, each processor core can manage multiple downloads + nb_processes = 2 * min([cpu_count(logical = False), len(os.sched_getaffinity(0)), config_params.max_cpu]) # downloading data demands very little computing power, each processor core can manage multiple downloads - # Call daily data - weather_file = era5land.call_era5land_daily(start_date, end_date, era5_expe_polygons_boxbound_wgs84, output_path = outpath) + # Define variables + variables = ['2m_temperature', '2m_dewpoint_temperature', 'surface_solar_radiation_downwards', '10m_u_component_of_wind', '10m_v_component_of_wind', 'total_precipitation'] + + # Split download per year if necessary + dates = era5land.split_dates_by_year(start_date, end_date) + if len(dates) == 1: + + # Generate arguments + args = [(var, outpath, start_date, end_date, expe_polygons_boxbound_wgs84, grid_size) for var in variables] + else: + args = [] + for d in dates: + date1, date2 = d + temp = [(var, outpath, date1, date2, expe_polygons_boxbound_wgs84, grid_size) for var in variables] + args.extend(temp) + + # Download in multiprocess + weather_files = p_map(era5land.call_era5landhourly, args, **{"num_cpus": nb_processes}) - print('path to daily weather netCDF file: ', weather_file) + print('\nPath to daily weather netCDF file: ', outpath) # Generate pandas dataframe for parcel mode if mode == 'parcel': @@ -165,7 +172,7 @@ def request_ER5_weather(config_file: str, ndvi_path: str, raw_S2_image_ref: str return weather_dataset # Generate daily weather datasets as Geotiffs for each variable - weather_daily_rain, weather_daily_ET0 = era5land.era5Land_daily_to_yearly_parcel(weather_file, weather_daily_ncFile, start_date, end_date, h = wind_height) + weather_daily_rain, weather_daily_ET0 = era5land.era5Land_daily_to_yearly_parcel(weather_files, variables, weather_daily_ncFile, start_date, end_date, h = wind_height) # Generate and save weather dataframe era5land.extract_weather_dataframe(weather_daily_rain, weather_daily_ET0, shapefile, config_file, weather_datframe) @@ -178,7 +185,7 @@ def request_ER5_weather(config_file: str, ndvi_path: str, raw_S2_image_ref: str return weather_dataset # Calculate ET0 over the whole time period - weather_daily_ncFile = era5land.era5Land_daily_to_yearly_pixel(weather_file, weather_daily_ncFile, raw_S2_image_ref, ndvi_path, start_date, end_date, h = wind_height, max_ram = config_params.max_ram, weather_overwrite = weather_overwrite) + weather_daily_ncFile = era5land.era5Land_daily_to_yearly_pixel(weather_files, variables, weather_daily_ncFile, raw_S2_image_ref, ndvi_path, start_date, end_date, h = wind_height, max_ram = config_params.max_ram, weather_overwrite = weather_overwrite) print('\nWeather dataset:', weather_daily_ncFile) diff --git a/preprocessing/lib_era5_land_pixel.py b/preprocessing/lib_era5_land_pixel.py index 3db3087925978bf6eb7cbbed3cba846956141206..f7009bddb909470f338ae2fbadad7e96c4f9ed8a 100644 --- a/preprocessing/lib_era5_land_pixel.py +++ b/preprocessing/lib_era5_land_pixel.py @@ -13,13 +13,10 @@ Functions to call ECMWF Reanalysis with CDS-api """ import os # for path exploration and file management -from typing import List, Tuple # to declare variables import numpy as np # for math on arrays import xarray as xr # to manage nc files import rioxarray # to manage georeferenced images from datetime import datetime # to manage dates -from p_tqdm import p_map # for multiprocessing with progress bars -from dateutil.rrule import rrule, MONTHLY from fnmatch import fnmatch # for file name matching import pandas as pd # to manage dataframes import rasterio as rio # to manage geotiff images @@ -33,13 +30,11 @@ from psutil import virtual_memory # to check available ram from psutil import cpu_count # to get number of physical cores available from modspa_pixel.config.config import config # to import config file from modspa_pixel.source.modspa_samir import calculate_time_slices_to_load # to optimise I/O operations -import re # for string comparison import warnings # to suppress pandas warning # CDS API external library # source: https://pypi.org/project/cdsapi/ import cdsapi # to download cds data -import requests # to request data # FAO ET0 calculator external library # Notes @@ -48,7 +43,7 @@ import requests # to request data import eto # to calculate ET0 -def era5_enclosing_shp_aera(area: List[float], pas: float) -> Tuple[float, float, float, float]: +def era5_enclosing_shp_aera(bbox: list[float], pas: float) -> tuple[float, float, float, float]: """ Find the four coordinates including the boxbound scene to agree with gridsize resolution @@ -57,7 +52,7 @@ def era5_enclosing_shp_aera(area: List[float], pas: float) -> Tuple[float, float Arguments ========= - 1. area: ``List[float]`` + 1. area: ``list[float]`` bounding box of the demanded area list of floats: [lat north, lon west, lat south, lon east] in degree WGS84 2. pas: ``float`` @@ -66,7 +61,7 @@ def era5_enclosing_shp_aera(area: List[float], pas: float) -> Tuple[float, float Returns ======= - 1. era5_area: ``Tuple[float, float, float, float]`` + 1. era5_area: ``tuple[float, float, float, float]`` coordinates list corresponding to N,W,S,E corners of the grid in decimal degree .. note:: @@ -75,308 +70,154 @@ def era5_enclosing_shp_aera(area: List[float], pas: float) -> Tuple[float, float """ - lat_max, lon_min, lat_min, lon_max = area - + lat_max, lon_min, lat_min, lon_max = bbox[3], bbox[0], bbox[1], bbox[2] + # North - era5_lat_max = round((lat_max//pas+2)*pas, 2) + era5_lat_max = round((lat_max // pas + 1) * pas, 2) # West - era5_lon_min = round((lon_min//pas)*pas, 2) + era5_lon_min = round((lon_min // pas) * pas, 2) # South - era5_lat_min = round((lat_min//pas)*pas, 2) + era5_lat_min = round((lat_min // pas) * pas, 2) # Est - era5_lon_max = round((lon_max//pas+2)*pas, 2) + era5_lon_max = round((lon_max // pas + 1) * pas, 2) - era5_area = era5_lat_max, era5_lon_min, era5_lat_min, era5_lon_max + era5_area = [era5_lat_max, era5_lon_min, era5_lat_min, era5_lon_max] return era5_area # [N,W,S,E] -# TODO: create new function to get daily statistics from hourly data, adapt parameters for each variable type, take inspiration from pySPARSE weather part -def call_era5land_daily(start_date: str, end_date: str, area: list[float, float, float, float], output_path: str) -> None: +def split_dates_by_year(start_date: str, end_date: str) -> list[tuple[str, str]] | list: """ - Query of one month of daily ERA5-land data of a selected variable - according to a selected statistic - - Documentation: - `cds_climate <https://datastore.copernicus-climate.eu/documents/app-c3s-daily-era5-statistics/C3S_Application-Documentation_ERA5-daily-statistics-v2.pdf>`_ - + Given a start and end date, returns tuples of start and end dates IN THE SAME YEAR. Arguments ========= - - (packed in args: ``tuple``) - + 1. start_date: ``str`` start date in YYYY-MM-DD format 2. end_date: ``str`` end date in YYYY-MM-DD format - 3. area: ``List[int]`` - bounding box of the demanded area - area = [lat_max, lon_min, lat_min, lon_max] - 4. output_path: ``str`` - path for output file. Returns ======= - - ``None`` + + 1. dates: ``list[tuple[str, str]] | list`` + output tuples of start and end dates """ - # Variable dictionnary - dico = { - '2m_temperature': ['daily_minimum', 'daily_maximum'], - '2m_dewpoint_temperature': ['daily_minimum', 'daily_maximum'], - '10m_u_component_of_wind': ['daily_mean'], - '10m_v_component_of_wind': ['daily_mean'], - 'total_precipitation': ['daily_maximum'], - 'surface_solar_radiation_downwards': ['daily_maximum'] - } - - # Create name of output fileatistic+".nc" - output_filename = os.path.join(output_path, 'ERA5-land_' + start_date + '_' + end_date + '.nc') - print(output_filename) - - # Get datetime objects for start and end date start = datetime.strptime(start_date, '%Y-%m-%d') end = datetime.strptime(end_date, '%Y-%m-%d') - # if os.path.isfile(output_filename): - # print(output_filename, ' already exist') - # else: - # try: - - # c = cdsapi.Client(timeout=300) - - # result = c.service("tool.toolbox.orchestrator.workflow", - # params={ - # "realm": "c3s", - # "project": "app-c3s-daily-era5-statistics", - # "version": "master", - # "kwargs": { - # "dataset": "reanalysis-era5-land", - # "product_type": "reanalysis", - # "variable": variable, - # "statistic": statistic, - # "year": year, - # "month": month, - # "time_zone": "UTC+00:0", - # "frequency": "1-hourly", - # "grid": "0.1/0.1", - # "area": {"lat": [area[2], area[0]], - # "lon": [area[1], area[3]]} - # }, - # "workflow_name": "application" - # }) - - # location = result[0]['location'] - # res = requests.get(location, stream=True) - # print("Writing data to " + output_filename) - # with open(output_filename, 'wb') as fh: - # for r in res.iter_content(chunk_size=1024): - # fh.write(r) - # fh.close() - # except: - # print('!! request', variable, ' failed !! -> year', year, 'month', month) - - - # Initialize the CDS API client - c = cdsapi.Client(timeout = 300) - - # Define the parameters for the data request - params = { - "dataset": "reanalysis-era5-single-levels", - "product_type": "reanalysis", - "variable": list(dico.keys()), - "statistic": [stat for stats in dico.values() for stat in stats], - "year": [str(year) for year in range(start.year, end.year + 1)], - "month": [f"{month:02d}" for month in range(1, 13)], - "day": [f"{day:02d}" for day in range(1, 32)], - "time_zone": "UTC+00:00", - "frequency": "daily", - "grid": [0.1, 0.1], - "area": [area[2], area[0], area[1], area[3]] # North, West, South, East - } - - # Request the data - result = c.service("tool.toolbox.orchestrator.workflow", { - "realm": "c3s", - "project": "app-c3s-daily-era5-statistics", - "version": "master", - "kwargs": params, - "workflow_name": "application" - }) - - # Get the location of the downloaded data - location = result[0]['location'] - - # Download the data - res = requests.get(location, stream=True) - print("Writing data to " + output_filename) - with open(output_filename, 'wb') as fh: - for r in res.iter_content(chunk_size=1024): - fh.write(r) - - return None - + if start.year == end.year: + return [(start_date, end_date)] -def call_era5land_daily_for_MODSPA(start_date: str, end_date: str, area: List[float], output_path: str, processes: int = 9) -> None: - """ - request ERA5-land daily variables needed for ET0 calculus and MODSPA forcing - `reanalysis_era5 <https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-land?tab=overview>`_ + dates = [] + current_start = start + while current_start.year <= end.year: + if current_start.year == end.year: + current_end = end + else: + current_end = datetime(current_start.year, 12, 31) - Information on requested variables - ---------------------------------- - - called land surface variables : - * **2m_temperature** - * **surface_solar_radiation_downward** - * **total_precipitation** - * **10m_u_component_of_wind** - * **10m_v_component_of_wind** - - Arguments - ========= - - 1. start_date: ``str`` - start date in YYYY-MM-DD format - 2. end_date: ``str`` - end date in YYYY-MM-DD format - 3. area: ``List[float]`` - bounding box of the demanded area - area = [lat_max, lon_min, lat_min, lon_max] - 4. output_path: ``str`` - output file name, ``.nc`` extension - 5. processes: ``int`` ``default = 9`` - number of logical processors on which to run the download command. - can be higher than your actual number of processor cores, - download operations have a low CPU demand. + dates.append((current_start.strftime('%Y-%m-%d'), current_end.strftime('%Y-%m-%d'))) - Returns - ======= - - ``None`` - """ + current_start = datetime(current_start.year + 1, 1, 1) - # list of first day of each month date into period - strt_dt = datetime.strptime(start_date, '%Y-%m-%d').replace(day=1) - end_dt = datetime.strptime(end_date, '%Y-%m-%d').replace(day=1) - - periods = [dt for dt in rrule( - freq=MONTHLY, dtstart=strt_dt, until=end_dt, bymonthday=1)] - - dico = { - '2m_temperature': ['daily_minimum', 'daily_maximum'], - '10m_u_component_of_wind': ['daily_mean'], - '10m_v_component_of_wind': ['daily_mean'], - 'total_precipitation': ['daily_maximum'], - 'surface_solar_radiation_downwards': ['daily_maximum'] - } - - args = [] - - # loop on variable to upload - for variable in dico.keys(): - # loop on statistic associated to variable to upload - for statistic in dico[variable]: - # loop on year and month - for dt in periods: - year = str(dt.year) - month = '0'+str(dt.month) - month = month[-2:] - # Requete ERA5-land - args.append((year, month, variable, statistic, area, output_path)) - - # Start pool - p_map(call_era5land_daily, args, **{"num_cpus": processes}) - - return None + return dates -def filename_to_datetime(filename: str) -> datetime.date: +def call_era5landhourly(args: tuple) -> None: """ - filename_to_datetime returns a ``datetime.date`` object for the date of the given file name. + Download weather data for the given variable. Arguments are packed in a tuple for multiprocessing. Arguments ========= - 1. filename: ``str`` - name or path of the product - - Returns - ======= - - 1. date: ``datetime.date`` - datetime.date object, date of the product - """ - - # Search for a date pattern (yyyy_mm_dd) in the product name or path - match = re.search('\d{4}_\d{2}', filename) - format = '%Y_%m' - datetime_object = datetime.strptime(match[0], format) - return datetime_object.date() - - -def concat_monthly_nc_file(list_era5land_monthly_files: List[str], list_variables: List[str], output_path: str) -> List[str]: - """ - Concatenate monthly netcdf datasets into a single file for each given variable. - - Arguments - ========= - - 1. list_era5land_monthly_files: ``List[str]`` - list of daily files per month - 2. list_variables: ``List[str]`` - names of the required variables as written in the filename - 3. output_path: ``List[str]`` - path to which save the aggregated files + 1. variable: ``str`` + name of ER5-Land weather variable + 2. output_path: ``str`` + output path to download netcdf file + 3. start_date: ``str`` + start date in YYYY-MM-DD format + (start and end date must be in the same + year to reduce data to download) + 4. end_date: ``str`` + end date in YYYY-MM-DD format + (start and end date must be in the same + year to reduce data to download) + 5. bbox: ``list[float, float, float, float]`` + bounding box of area to download data + 6. gridsize: ``float`` ``default = 0.1`` + gridsize of data to download Returns ======= - 1. list_era5land_files: ``List[str]`` - the list of paths to the aggregated files - """ + 1. output_filename: ``str`` + output file name + """ - if not os.path.exists(output_path): os.mkdir(output_path) + variable, output_path, start_date, end_date, bbox, gridsize = args - list_era5land_monthly_files.sort() + # full path name of the output file + output_filename = os.path.join(output_path, 'ERA5-land_' + variable + '_' + start_date + '_' + end_date + '.nc') - list_era5land_files = [] + # Get time periods for download + start_date = datetime.strptime(start_date, '%Y-%m-%d') + end_date = datetime.strptime(end_date, '%Y-%m-%d') - # concatenate all dates into a single file for each variable - for variable in list_variables: - curr_var_list = [] - dates = [] - for file in list_era5land_monthly_files: - # find specific variable - if fnmatch(file, '*' + variable + '*'): - curr_var_list.append(file) - dates.append(filename_to_datetime(file)) - - curr_datasets = [] - for file in curr_var_list: - # open all months for the given variable - curr_datasets.append(xr.open_dataset(file)) + # Generate time inputs + months = [] + current = start_date + while current <= end_date: + month_str = current.strftime('%m') + if month_str not in months: + months.append(month_str) + # Move to the next month + if current.month == 12: + current = current.replace(year=current.year + 1, month=1) + else: + current = current.replace(month=current.month + 1) - # Create file name - try: - concatenated_file = os.path.join(output_path, 'era5-land_' + dates[0].strftime('%m-%Y') + '_' + dates[-1].strftime('%m-%Y') + '_' + variable + '.nc') - except: - print(variable) - - # Concatenate monthly datasets - concatenated_dataset = xr.concat(curr_datasets, dim = 'time') - - # Save datasets - concatenated_dataset.to_netcdf(path = concatenated_file, mode = 'w',) - - # Add filename to output list - list_era5land_files.append(concatenated_file) + # Generate the list of days + days = [f'{day:02}' for day in range(1, 32)] - return list_era5land_files + # Generate time + time = [f'{hour:02}:00' for hour in range(0, 24)] + # Get modified bbox + area = era5_enclosing_shp_aera(bbox, gridsize) -def uz_to_u2(u_z: List[float], h: float) -> List[float]: + # Check if file already exists + if os.path.isfile(output_filename): + print('\n', output_filename, 'already exist !\n') + else: + # cds api request + client = cdsapi.Client(timeout = 300) + try: + client.retrieve('reanalysis-era5-single-levels', + request = { + 'product_type': ['reanalysis'], + 'variable': [variable], + 'year': [start_date.strftime(format = '%Y')], + 'month': months, + 'day': days, + 'time': time, + 'data_format': 'netcdf', + 'download_format': 'unarchived', + 'area': area, + 'grid': [gridsize, gridsize], + }, + target = output_filename) + print('\n', output_filename, ' downloaded !\n') + + except Exception as e: + print('\nRequest failed, error message:\n\n', e, '\n') + + return output_filename + + +def uz_to_u2(u_z: list[float], h: float) -> list[float]: """ The wind speed measured at heights other than 2 m can be adjusted according to the follow equation @@ -414,48 +255,6 @@ def ea_calc(T: float) -> float: return 0.6108 * np.exp(17.27 * T / (T + 237.15)) -def load_variable(file_name: str) -> xr.Dataset: - """ - Loads an ERA5 meteorological variable into a xarray - dataset according to the modspa architecture. - - Arguments - ========= - - 1. file_name: ``str`` - netcdf file to load - - Returns - ======= - - 1. variable: ``xr.Dataset`` - output xarray dataset - """ - - # Rename temperature variables according to the statistic (max or min) - if fnmatch(file_name, '*era5-land*2m_temperature_daily_maximum*'): # maximum temperature - variable = xr.open_dataset(file_name).rename({'t2m': 't2m_max'}).drop_vars('realization') # netcdfs from ERA5 carry an unecessary 'realization' coordinate, so it is dropped - - elif fnmatch(file_name, '*era5-land*2m_temperature_daily_minimum*'): # minimum temperature - variable = xr.open_dataset(file_name).rename({'t2m': 't2m_min'}).drop_vars('realization') - - elif fnmatch(file_name, '*era5-land*2m_dewpoint_temperature_daily_maximum*'): # maximum dewpoint temperature - variable = xr.open_dataset(file_name).rename({'d2m': 'd2m_max'}).drop_vars('realization') - - elif fnmatch(file_name, '*era5-land*2m_dewpoint_temperature_daily_minimum*'): # minimum temperature - variable = xr.open_dataset(file_name).rename({'d2m': 'd2m_min'}).drop_vars('realization') - - # Other variables can be loaded without modification - else: - try: - variable = xr.open_dataset(file_name).drop_vars('realization') - - except: - variable = xr.open_dataset(file_name) - - return variable - - def combine_weather2netcdf(rain_file: str, ET0_tile: str, ndvi_path: str, save_path: str, available_ram: int) -> None: """ Convert the Rain and ET0 geotiffs into a single weather netcdf dataset. @@ -618,10 +417,18 @@ def calculate_ET0_pixel(pixel_dataset: xr.Dataset, lat: float, lon: float, h: fl # TODO: adapt for safran # Conversion of xarray dataset to dataframe for ET0 calculation # Conversion of temparature - ET0 = pixel_dataset.t2m_min.to_dataframe().rename(columns = {'t2m_min' : 'T_min'}) - 273.15 # conversion of temperatures from K to °C - ET0['T_max'] = pixel_dataset.t2m_max.to_dataframe()['t2m_max'].values - 273.15 # conversion of temperatures from K to °C + ET0 = pixel_dataset.t2m.resample(time = '1D').min().to_dataframe().rename(columns = {'t2m' : 'T_min'}) - 273.15 # conversion of temperatures from K to °C + ET0['T_max'] = pixel_dataset.t2m.resample(time = '1D').max().to_dataframe()['t2m'].values - 273.15 # conversion of temperatures from K to °C + + ET0['R_s'] = pixel_dataset.ssrd.resample(time = '1D').sum().to_dataframe()['ssrd'].values / 1e6 # to convert downward total radiation from J/m² to MJ/m² - ET0['R_s'] = pixel_dataset.ssrd.to_dataframe()['ssrd'].values / 1e6 # to convert downward total radiation from J/m² to MJ/m² + # Calculate relative humidity + pixel_dataset['ea'] = ea_calc(pixel_dataset.t2m - 273.15) + pixel_dataset['es'] = ea_calc(pixel_dataset.d2m - 273.15) + pixel_dataset['rh'] = np.clip(100.*(pixel_dataset.es / pixel_dataset.ea), a_min = 0, a_max = 100) + + ET0['RH_max'] = pixel_dataset.rh.resample(time = '1D').max().to_dataframe()['rh'].values + ET0['RH_min'] = pixel_dataset.rh.resample(time = '1D').min().to_dataframe()['rh'].values if safran: # Add relative humidity @@ -633,7 +440,7 @@ def calculate_ET0_pixel(pixel_dataset: xr.Dataset, lat: float, lon: float, h: fl else: # Conversion of eastward and northward wind values to scalar wind - ET0['U_z'] = np.sqrt(pixel_dataset.u10.to_dataframe()['u10'].values**2 + pixel_dataset.v10.to_dataframe()['v10'].values**2) + ET0['U_z'] = np.sqrt(pixel_dataset.u10.resample(time = '1D').mean().to_dataframe()['u10'].values**2 + pixel_dataset.v10.resample(time = '1D').mean().to_dataframe()['v10'].values**2) # Start ET0 calculation eto_calc = eto.ETo() @@ -655,7 +462,7 @@ def calculate_ET0_pixel(pixel_dataset: xr.Dataset, lat: float, lon: float, h: fl return ET0_values -def convert_interleave_mode(args: Tuple[str, str, bool]) -> None: +def convert_interleave_mode(args: tuple[str, str, bool]) -> None: """ Convert Geotiff files obtained from OTB to Band interleave mode for faster band reading. @@ -698,7 +505,7 @@ def convert_interleave_mode(args: Tuple[str, str, bool]) -> None: return None -def era5Land_daily_to_yearly_pixel(weather_file: str, output_file: str, raw_S2_image_ref: str, ndvi_path: str, start_date: str, end_date: str, h: float = 10, max_ram: int = 8, use_OTB: bool = False, weather_overwrite: bool = False, safran: bool = False) -> str: +def era5Land_daily_to_yearly_pixel(weather_files: list[str], variables: list[str], output_file: str, raw_S2_image_ref: str, ndvi_path: str, start_date: str, end_date: str, h: float = 10, max_ram: int = 8, use_OTB: bool = False, weather_overwrite: bool = False, safran: bool = False) -> str: """ Calculate ET0 values from the ERA5 netcdf weather variables. Output netcdf contains the ET0 and precipitation values for @@ -710,27 +517,29 @@ def era5Land_daily_to_yearly_pixel(weather_file: str, output_file: str, raw_S2_i 1. weather_file: ``str`` path to netCDF raw weather files - 2. output_file: ``str`` + 2. variables: ``list[str]`` + list of variables downloaded from era5 + 3. output_file: ``str`` output file name without extension - 3. raw_S2_image_ref: ``str`` + 4. raw_S2_image_ref: ``str`` raw Sentinel 2 image at right resolution for reprojection - 4. ndvi_path: ``str`` + 5. ndvi_path: ``str`` path to ndvi dataset, used for attributes and coordinates - 5. start_date: ``str`` + 6. start_date: ``str`` beginning of the time window to download (format: ``YYYY-MM-DD``) - 6. end_date: ``str`` + 7. end_date: ``str`` end of the time window to download (format: ``YYYY-MM-DD``) - 7. h: ``float`` ``default = 10`` + 8. h: ``float`` ``default = 10`` height of ERA5 wind measurements in meters - 8. max_ram: ``int`` ``default = 8`` + 9. max_ram: ``int`` ``default = 8`` max ram (in GiB) for reprojection and conversion. Two subprocesses are spawned for OTB, each receiviving half of requested memory. - 9. use_OTB: ``bool`` ``default = False`` + 10. use_OTB: ``bool`` ``default = False`` boolean to choose to use OTB or not, tests will be added later - 10. weather_overwrite: ``bool`` ``default = False`` + 11. weather_overwrite: ``bool`` ``default = False`` boolean to choose to overwrite weather netCDF - 11. safran: ``bool`` ``default = False`` + 12. safran: ``bool`` ``default = False`` boolean to adapt to a custom SAFRAN weather dataset Returns @@ -749,14 +558,21 @@ def era5Land_daily_to_yearly_pixel(weather_file: str, output_file: str, raw_S2_i print('\nRequested', max_ram, 'GiB of memory when available memory is approximately', round(virtual_memory().available / (1024**3), 1), 'GiB.\n\nExiting script.\n') return None - # Load all monthly files into a single xarray dataset that contains all dates (daily frequency) - raw_weather_ds = xr.open_dataset(weather_file, decode_coords = 'all') + # Load all weather files in a single dataset + raw_weather_ds = xr.Dataset() + for var in variables: + temp = [] + for file in weather_files: + if fnmatch(file, '*' + var + '*'): + temp.append(file) + raw_weather_ds = xr.merge([raw_weather_ds, xr.open_mfdataset(temp).drop_vars(['number', 'expver']).rename({'valid_time': 'time', 'latitude': 'lat', 'longitude': 'lon'})]) # Clip extra dates raw_weather_ds = raw_weather_ds.sel({'time': slice(start_date, end_date)}).sortby(variables = 'time') + resampled_weather_ds = raw_weather_ds.resample(time = '1D').sum() # Create ET0 variable (that will be saved) and set attributes - raw_weather_ds = raw_weather_ds.assign(ET0 = (raw_weather_ds.sizes, np.zeros(tuple(raw_weather_ds.sizes[d] for d in list(raw_weather_ds.sizes)), dtype = np.float32))) + resampled_weather_ds = resampled_weather_ds.assign(ET0 = (resampled_weather_ds.sizes, np.zeros(tuple(resampled_weather_ds.sizes[d] for d in list(resampled_weather_ds.sizes)), dtype = np.float32))) if safran: # Loop on y and x coordinates to calculate ET0 per "pixel" @@ -766,15 +582,15 @@ def era5Land_daily_to_yearly_pixel(weather_file: str, output_file: str, raw_S2_i # Select whole time period for given (lat, lon) values select_ds = raw_weather_ds.sel({'y' : y, 'x' : x}).drop_vars(['y', 'x']) - # TODO: adapt for SAFRAN + # TODO: adapt for new ERA5 data # Calculate ET0 values for given pixel ET0_values = calculate_ET0_pixel(select_ds, y, x, h) # Write ET0 values in xarray Dataset - raw_weather_ds['ET0'].loc[{'y' : y, 'x' : x}] = ET0_values + resampled_weather_ds['ET0'].loc[{'y' : y, 'x' : x}] = ET0_values # Get necessary data for final dataset - final_weather_ds = raw_weather_ds.drop_vars(names = ['ssrd', 't2m_max', 't2m_min', 'RH_max', 'RH_min', 'U_z']) # remove unwanted variables + final_weather_ds = resampled_weather_ds.drop_vars(names = ['ssrd', 't2m', 'd2m', 'RH_max', 'RH_min', 'U_z']) # remove unwanted variables else: # Loop on lattitude and longitude coordinates to calculate ET0 per "pixel" @@ -788,10 +604,10 @@ def era5Land_daily_to_yearly_pixel(weather_file: str, output_file: str, raw_S2_i ET0_values = calculate_ET0_pixel(select_ds, lat, lon, h) # Write ET0 values in xarray Dataset - raw_weather_ds['ET0'].loc[{'lat' : lat, 'lon' : lon}] = ET0_values + resampled_weather_ds['ET0'].loc[{'lat' : lat, 'lon' : lon}] = ET0_values # Get necessary data for final dataset - final_weather_ds = raw_weather_ds.drop_vars(names = ['ssrd', 'v10', 'u10', 't2m_max', 't2m_min']) # remove unwanted variables + final_weather_ds = resampled_weather_ds.drop_vars(names = ['ssrd', 'v10', 'u10', 't2m', 'd2m']) # remove unwanted variables # Scale data and rewrite netcdf attributes final_weather_ds['tp'] = final_weather_ds['tp'] * 1000 # conversion from m to mm @@ -902,7 +718,7 @@ def era5Land_daily_to_yearly_pixel(weather_file: str, output_file: str, raw_S2_i return output_file_final -def era5Land_daily_to_yearly_parcel(weather_file: str, output_file: str, start_date: str, end_date: str, h: float = 10) -> str: +def era5Land_daily_to_yearly_parcel(weather_files: list[str], variables: list[str], output_file: str, start_date: str, end_date: str, h: float = 10) -> str: """ Calculate ET0 values from the ERA5 netcdf weather variables. Output netcdf contains the ET0 and precipitation values for @@ -911,9 +727,11 @@ def era5Land_daily_to_yearly_parcel(weather_file: str, output_file: str, start_d Arguments ========= - 1. weather_file: ``str`` + 1. weather_file: ``list[str]`` path to netCDF raw weather files - 2. output_file: ``str`` + 2. variables: ``list[str]`` + list of variables downloaded from era5 + 3. output_file: ``str`` output file name without extension 3. start_date: ``str`` beginning of the time window to download (format: ``YYYY-MM-DD``) @@ -931,8 +749,14 @@ def era5Land_daily_to_yearly_parcel(weather_file: str, output_file: str, start_d path to ``Geotiff`` file containing ET0 data """ - # Load all monthly files into a single xarray dataset that contains all dates (daily frequency) - raw_weather_ds = xr.open_dataset(weather_file, decode_coords = 'all') + # Load all weather files in a single dataset + raw_weather_ds = xr.Dataset() + for var in variables: + temp = [] + for file in weather_files: + if fnmatch(file, '*' + var + '*'): + temp.append(file) + raw_weather_ds = xr.merge([raw_weather_ds, xr.open_mfdataset(temp).drop_vars(['number', 'expver']).rename({'valid_time': 'time', 'latitude': 'lat', 'longitude': 'lon'})]) # Clip extra dates raw_weather_ds = raw_weather_ds.sel({'time': slice(start_date, end_date)}) @@ -981,7 +805,7 @@ def era5Land_daily_to_yearly_parcel(weather_file: str, output_file: str, start_d return output_file_rain, output_file_ET0 -def extract_rasterstats(args: tuple) -> List[float]: +def extract_rasterstats(args: tuple) -> list[float]: """ Generate a dataframe for a given raster and a geopandas shapefile object. It iterates over the features of the shapefile geometry (polygons). This @@ -1008,7 +832,7 @@ def extract_rasterstats(args: tuple) -> List[float]: Returns ======= - 1. raster_stats: ``List[float]`` + 1. raster_stats: ``list[float]`` list containing weather values and feature information for every polygon in the shapefile """