-
Jérémy AUCLAIR authored
Various code updates. Bug correction and improvements in preprocessing, new python only reprojection method.
Jérémy AUCLAIR authoredVarious code updates. Bug correction and improvements in preprocessing, new python only reprojection method.
download_ERA5_weather.py 8.96 KiB
# -*- coding: UTF-8 -*-
# Python
"""
04-07-2023
@author: rivallandv, heavily modified by jeremy auclair
Download ERA5 daily weather files for modspa
"""
import glob # for path management
import sys # for path management
import os # for path exploration
import geopandas as gpd # to manage shapefiles
from datetime import datetime # manage dates
from psutil import cpu_count # to get number of physical cores available
import modspa_pixel.preprocessing.lib_era5_land_pixel as era5land # custom built functions for ERA5-Land data download
from modspa_pixel.config.config import config # to load modspa config file
from modspa_pixel.preprocessing.parcel_to_pixel import convert_dataframe_to_xarray
def request_ER5_weather(config_file: str, ndvi_path: str, raw_S2_image_ref: str = None, shapefile: str = None, mode: str = 'pixel') -> str:
"""
Download ERA5 reanalysis daily weather files, concatenate and calculate ET0
to obtain a netCDF4 dataset for precipitation and ET0 values. Weather data
reprojection and conversion can take some time for large spatial windows.
Arguments
=========
1. config_file: ``str``
json configuration file
2. ndvi_path: ``str``
path to ndvi cube, used for weather data reprojection
3. raw_S2_image_ref: ``str`` ``default = None``
unmodified sentinel-2 image at correct resolution for
weather data reprojection in pixel mode
4. shapefile: ``str`` ``default = None``
path to shapefile for extraction in parcel mode
5. mode: ``str`` ``default = 'pixel'``
choose between ``'pixel'`` and ``'parcel'`` mode
Returns
=======
1. weather_file: ``str``
path to netCDF4 file containing weather data
"""
# Get config file
config_params = config(config_file)
# Get parameters
data_path = config_params.data_path
shapefile_path = config_params.shapefile_path
run_name = config_params.run_name
mode = config_params.mode
start_date = config_params.start_date
end_date = config_params.end_date
weather_overwrite = config_params.weather_overwrite
# Set output paths
outpath = data_path + os.sep + 'WEATHER' + os.sep + run_name
save_dir = outpath + os.sep + 'ncdailyfiles'
# Create daily wheather ncfile name
weather_daily_ncFile = save_dir + os.sep + start_date + '_' + end_date + '_' + run_name + '_era5-land-daily-meteo'
# Check if file exists and ndvi overwrite is false
if os.path.exists(weather_daily_ncFile + '.nc' * (mode == 'pixel') + '_parcel.nc' * (mode == 'parcel' )) and not weather_overwrite:
return weather_daily_ncFile
# Geometry configuration
wgs84_epsg = 'epsg:4326' # WGS84 is the ERA5 epsg
# ERA5 product parameters
wind_height = 10 # height of ERA5 wind measurements in meters
print('REQUEST CONFIGURATION INFORMATIONS:')
if shapefile_path:
if os.path.exists(shapefile_path):
print('shapeFile: ', shapefile_path)
else:
print('shapeFile not found')
else:
# print('specify either shapeFile, boxbound or point coordinate in json file')
print('specify shapeFile in json file')
sys.exit(-1)
print('period: ', start_date, ' - ', end_date)
print('experiment name:', run_name)
if os.path.exists(outpath):
print('path for nc files: ', outpath)
else:
os.mkdir(outpath)
print('mkdir path for nc files: ', outpath)
print('----------')
# Request ERA5-land BoxBound Determination
if shapefile_path:
# Load shapefile to access geometrics informations for ERA5-Land request
gdf_expe_polygons = gpd.read_file(shapefile_path)
print('Input polygons CRS :', gdf_expe_polygons.crs)
expe_epsg = gdf_expe_polygons.crs
# verification que les polygones sont tous fermés
liste_polygons_validity = gdf_expe_polygons.geometry.is_valid
if list(liste_polygons_validity).count(False) > 0:
print('some polygons of Shapefile are not valid')
polygons_invalid = liste_polygons_validity.loc[liste_polygons_validity == False]
print('invalid polygons:', polygons_invalid)
for i in polygons_invalid.index:
gdf_expe_polygons.geometry[i]
# Application d'un buffer de zero m
gdf_expe_polygons_clean = gdf_expe_polygons.geometry.buffer(0)
gdf_expe_polygons = gdf_expe_polygons_clean
# search for the total extent of the whole polygons in lat/lon [xlo/ylo/xhi/yhi] [W S E N]
expe_polygons_boxbound = gdf_expe_polygons.geometry.total_bounds
expe_polygons_boxbound = list(expe_polygons_boxbound)
print('shape extend in ', expe_epsg.srs, ':', expe_polygons_boxbound)
if expe_epsg.srs != wgs84_epsg:
print('--- convert extend in wgs84 coordinates ---')
# idem en wgs84 pour des lat/lon en degree (format utilisé par google earth engine)
expe_polygons_boxbound_wgs84 = gdf_expe_polygons.to_crs(wgs84_epsg).geometry.total_bounds
# convert to list for earth engine
expe_polygons_boxbound_wgs84 = list(expe_polygons_boxbound_wgs84)
else:
expe_polygons_boxbound_wgs84 = expe_polygons_boxbound
# switch coordinates order to agree with ECMWF order: N W S E
expe_area = expe_polygons_boxbound_wgs84[3], expe_polygons_boxbound_wgs84[0], expe_polygons_boxbound_wgs84[1], expe_polygons_boxbound_wgs84[2]
print('boxbound [N W S E] extend in ', wgs84_epsg)
print(expe_area)
# determine boxbound for ECMWF request (included shape boxbound)
era5_expe_polygons_boxbound_wgs84 = era5land.era5_enclosing_shp_aera(expe_area, 0.1)
print('boxbound [N W S E] request extend in ', wgs84_epsg)
print(era5_expe_polygons_boxbound_wgs84)
print('--start request--')
# Get number of available CPUs
nb_processes = 4 * min([cpu_count(logical = False), len(os.sched_getaffinity(0)), config_params.max_cpu]) # downloading data demands very little computing power, each processor core can manage multiple downloads
# Call daily data
era5land.call_era5land_daily_for_MODSPA(start_date, end_date, era5_expe_polygons_boxbound_wgs84, output_path = outpath, processes = nb_processes)
# Get list of files
list_era5land_hourly_ncFiles = []
for file in glob.glob(outpath + os.sep + 'ERA5-land_*'):
if era5land.filename_to_datetime(file) >= datetime.strptime(start_date, '%Y-%m-%d').replace(day = 1).date() and era5land.filename_to_datetime(file) <= datetime.strptime(end_date, '%Y-%m-%d').replace(day = 1).date() and file.endswith('.nc'):
list_era5land_hourly_ncFiles.append(file)
list_era5land_hourly_ncFiles.sort()
for ncfile in list_era5land_hourly_ncFiles:
print(ncfile)
if os.path.exists(outpath+os.sep+'ncdailyfiles'):
print('path for nc daily files: ', save_dir)
else:
os.mkdir(outpath+os.sep+'ncdailyfiles')
print('mkdir path for nc daily files: ', save_dir)
print('----------')
# Temporary save directory for daily file merge
variable_list = ['2m_temperature_daily_maximum', '2m_temperature_daily_minimum', 'total_precipitation_daily_maximum', '10m_u_component_of_wind_daily_mean', '10m_v_component_of_wind_daily_mean', 'surface_solar_radiation_downwards_daily_maximum']
# Aggregate monthly files
aggregated_files = era5land.concat_monthly_nc_file(list_era5land_hourly_ncFiles, variable_list, save_dir)
# Generate pandas dataframe for parcel mode
if mode == 'parcel':
# Create save path
weather_datframe = weather_daily_ncFile + '_df.csv'
weather_dataset = weather_daily_ncFile + '_parcel.nc'
# Check if weather dataset already exists
if os.path.exists(weather_dataset) and not weather_overwrite:
return weather_dataset
# Generate daily weather datasets as Geotiffs for each variable
weather_daily_rain, weather_daily_ET0 = era5land.era5Land_daily_to_yearly_parcel(aggregated_files, weather_daily_ncFile, start_date, end_date, h = wind_height)
# Generate and save weather dataframe
era5land.extract_weather_dataframe(weather_daily_rain, weather_daily_ET0, shapefile, config_file, weather_datframe)
# Convert dataframe to xarray dataset
convert_dataframe_to_xarray(weather_datframe, weather_dataset, variables = ['Rain', 'ET0'], data_types = ['u2', 'u2'])
print('\nWeather dataset:', weather_daily_ncFile)
return weather_dataset
# Calculate ET0 over the whole time period
weather_daily_ncFile = era5land.era5Land_daily_to_yearly_pixel(aggregated_files, weather_daily_ncFile, raw_S2_image_ref, ndvi_path, start_date, end_date, h = wind_height, max_ram = config_params.max_ram, weather_overwrite = weather_overwrite)
print('\nWeather dataset:', weather_daily_ncFile)
return weather_daily_ncFile