Skip to content
Snippets Groups Projects
Commit e2a6509b authored by Jeremy Auclair's avatar Jeremy Auclair
Browse files

Changed weather data loading to geotiff (can't find tool to convert tif to...

Changed weather data loading to geotiff (can't find tool to convert tif to netcdf for large datasets), rewrote some function description, minor refactoring.
parent ecf4ef49
Branches
No related tags found
No related merge requests found
...@@ -2,26 +2,43 @@ ...@@ -2,26 +2,43 @@
# Python # Python
""" """
04-07-2023 04-07-2023
@author: rivallandv, modified by jeremy auclair @author: rivallandv, heavily modified by jeremy auclair
Download ERA5 weather files for modspa Download ERA5 daily weather files for modspa
""" """
import glob # for path management import glob # for path management
import sys # for path management import sys # for path management
import os # for path exploration import os # for path exploration
import xarray as xr # to manage nc files from typing import Tuple
import pandas as pd # to manage dataframes
import geopandas as gpd # to manage shapefiles import geopandas as gpd # to manage shapefiles
from psutil import cpu_count # to get number of physical cores available from psutil import cpu_count # to get number of physical cores available
import input.lib_era5_land_pixel as era5land # custom built functions for ERA5-Land data download import input.lib_era5_land_pixel as era5land # custom built functions for ERA5-Land data download
from config.config import config # to import config file from config.config import config # to import config file
def request_ER5_weather(input_file: str, raw_S2_image_ref: str) -> str: def request_ER5_weather(config_file: str, raw_S2_image_ref: str) -> Tuple[str, str]:
"""
Download ERA5 reanalysis daily weather files, concatenate and calculate ET0
to obtain a geotiff multiband (one band per day) image for precipitation and
ET0 values.
## Arguments
config_file: `str`
json configuration file
raw_S2_image_ref: `str`
unmodified sentinel-2 image at correct resolution for
weather data reprojection
## Returns
1. precip_file: `str`
path to file containing precipitation data
2. ET0_file: `str`
path to file containing ET0 data
"""
# Get config file # Get config file
config_params = config(input_file) config_params = config(config_file)
outpath = config_params.era5_path + os.sep + config_params.run_name outpath = config_params.era5_path + os.sep + config_params.run_name
# Geometry configuration # Geometry configuration
...@@ -50,7 +67,7 @@ def request_ER5_weather(input_file: str, raw_S2_image_ref: str) -> str: ...@@ -50,7 +67,7 @@ def request_ER5_weather(input_file: str, raw_S2_image_ref: str) -> str:
print('mkdir path for nc files: ', outpath) print('mkdir path for nc files: ', outpath)
print('----------') print('----------')
# %% Request ERA5-land BoxBound Determination # Request ERA5-land BoxBound Determination
if config_params.shapefile_path: if config_params.shapefile_path:
# Load shapefile to access geometrics informations for ERA5-Land request # Load shapefile to access geometrics informations for ERA5-Land request
gdf_expe_polygons = gpd.read_file(config_params.shapefile_path) gdf_expe_polygons = gpd.read_file(config_params.shapefile_path)
...@@ -60,9 +77,11 @@ def request_ER5_weather(input_file: str, raw_S2_image_ref: str) -> str: ...@@ -60,9 +77,11 @@ def request_ER5_weather(input_file: str, raw_S2_image_ref: str) -> str:
# verification que les polygones sont tous fermés # verification que les polygones sont tous fermés
liste_polygons_validity = gdf_expe_polygons.geometry.is_valid liste_polygons_validity = gdf_expe_polygons.geometry.is_valid
if list(liste_polygons_validity).count(False) > 0: if list(liste_polygons_validity).count(False) > 0:
print('some polygons of Shapefile are not valid') print('some polygons of Shapefile are not valid')
polygons_invalid = liste_polygons_validity.loc[liste_polygons_validity == False] polygons_invalid = liste_polygons_validity.loc[liste_polygons_validity == False]
print('invalid polygons:', polygons_invalid) print('invalid polygons:', polygons_invalid)
for i in polygons_invalid.index: for i in polygons_invalid.index:
gdf_expe_polygons.geometry[i] gdf_expe_polygons.geometry[i]
...@@ -77,17 +96,17 @@ def request_ER5_weather(input_file: str, raw_S2_image_ref: str) -> str: ...@@ -77,17 +96,17 @@ def request_ER5_weather(input_file: str, raw_S2_image_ref: str) -> str:
if expe_epsg.srs != wgs84_epsg: if expe_epsg.srs != wgs84_epsg:
print('--- convert extend in wgs84 coordinates ---') print('--- convert extend in wgs84 coordinates ---')
# idem en wgs84 pour des lat/lon en degree (format utilisé par google earth engine) # idem en wgs84 pour des lat/lon en degree (format utilisé par google earth engine)
expe_polygons_boxbound_wgs84 = gdf_expe_polygons.to_crs( expe_polygons_boxbound_wgs84 = gdf_expe_polygons.to_crs(wgs84_epsg).geometry.total_bounds
wgs84_epsg).geometry.total_bounds
# convert to list for earth engine # convert to list for earth engine
expe_polygons_boxbound_wgs84 = list(expe_polygons_boxbound_wgs84) expe_polygons_boxbound_wgs84 = list(expe_polygons_boxbound_wgs84)
else: else:
expe_polygons_boxbound_wgs84 = expe_polygons_boxbound expe_polygons_boxbound_wgs84 = expe_polygons_boxbound
# switch coordinates order to agree with ECMWF order: N W S E # switch coordinates order to agree with ECMWF order: N W S E
expe_area = expe_polygons_boxbound_wgs84[3], expe_polygons_boxbound_wgs84[0],\ expe_area = expe_polygons_boxbound_wgs84[3], expe_polygons_boxbound_wgs84[0], expe_polygons_boxbound_wgs84[1], expe_polygons_boxbound_wgs84[2]
expe_polygons_boxbound_wgs84[1], expe_polygons_boxbound_wgs84[2]
print('boxbound [N W S E] extend in ', wgs84_epsg) print('boxbound [N W S E] extend in ', wgs84_epsg)
print(expe_area) print(expe_area)
...@@ -101,8 +120,6 @@ def request_ER5_weather(input_file: str, raw_S2_image_ref: str) -> str: ...@@ -101,8 +120,6 @@ def request_ER5_weather(input_file: str, raw_S2_image_ref: str) -> str:
# Get number of available CPUs # Get number of available CPUs
nb_processes = 4 * min([cpu_count(logical = False), len(os.sched_getaffinity(0)), config_params.max_cpu]) # downloading data demands very little computing power, each processor core can manage multiple downloads nb_processes = 4 * min([cpu_count(logical = False), len(os.sched_getaffinity(0)), config_params.max_cpu]) # downloading data demands very little computing power, each processor core can manage multiple downloads
#============================================================================================
# Call daily data # Call daily data
era5land.call_era5land_daily_for_MODSPA(config_params.start_date, config_params.end_date, era5_expe_polygons_boxbound_wgs84, output_path = outpath, processes = nb_processes) era5land.call_era5land_daily_for_MODSPA(config_params.start_date, config_params.end_date, era5_expe_polygons_boxbound_wgs84, output_path = outpath, processes = nb_processes)
...@@ -129,8 +146,9 @@ def request_ER5_weather(input_file: str, raw_S2_image_ref: str) -> str: ...@@ -129,8 +146,9 @@ def request_ER5_weather(input_file: str, raw_S2_image_ref: str) -> str:
aggregated_files = era5land.concat_monthly_nc_file(list_era5land_hourly_ncFiles, variable_list, save_dir) aggregated_files = era5land.concat_monthly_nc_file(list_era5land_hourly_ncFiles, variable_list, save_dir)
# Calculate ET0 over the whole time period # Calculate ET0 over the whole time period
era5land.era5Land_nc_daily_to_ET0(aggregated_files, weather_daily_ncFile, raw_S2_image_ref, config_params, h = wind_height) precip_file, ET0_file = era5land.era5Land_nc_daily_to_ET0(aggregated_files, weather_daily_ncFile, raw_S2_image_ref, config_params, h = wind_height)
print('\n', weather_daily_ncFile + '.nc', '\n') print(precip_file)
print(ET0_file)
return weather_daily_ncFile + '.nc' return precip_file, ET0_file
\ No newline at end of file \ No newline at end of file
...@@ -2,16 +2,17 @@ ...@@ -2,16 +2,17 @@
""" """
Functions to call ECMWF Reanalysis with CDS-api Functions to call ECMWF Reanalysis with CDS-api
- ERA5-land hourly request
- ERA5-land daily request - ERA5-land daily request
- request a list of hourly variables dedicated to the calculus of ET0 - request a list of daily variables dedicated to the calculus of ET0
and the generation of MODSPA daily forcing files and the generation of MODSPA daily forcing files
@author: rivalland heavily modified from rivallandv's original file
@author: auclairj
""" """
import os, shutil # for path exploration and file management import os, shutil # for path exploration and file management
from typing import List # to declare variables from typing import List, Tuple # to declare variables
import numpy as np # for math on arrays import numpy as np # for math on arrays
import xarray as xr # to manage nc files import xarray as xr # to manage nc files
from datetime import datetime # to manage dates from datetime import datetime # to manage dates
...@@ -21,7 +22,6 @@ from fnmatch import fnmatch # for file name matching ...@@ -21,7 +22,6 @@ from fnmatch import fnmatch # for file name matching
import rasterio # to manage geotiff images import rasterio # to manage geotiff images
from pandas import date_range from pandas import date_range
from rasterio.warp import reproject, Resampling # to reproject from rasterio.warp import reproject, Resampling # to reproject
from dask.diagnostics import ProgressBar
import re # for string comparison import re # for string comparison
import warnings # to suppress pandas warning import warnings # to suppress pandas warning
...@@ -434,48 +434,7 @@ def calculate_ET0_pixel(pixel_dataset: xr.Dataset, lat: float, lon: float, h: fl ...@@ -434,48 +434,7 @@ def calculate_ET0_pixel(pixel_dataset: xr.Dataset, lat: float, lon: float, h: fl
return ET0_values return ET0_values
def reproject_geotiff(source_image: str, destination_image: str, destination_crs: str): def era5Land_nc_daily_to_ET0(list_era5land_files: List[str], output_file: str, raw_S2_image_ref: str, h: float = 10, max_ram: int = 12288) -> Tuple[str, str]:
# Open the original GeoTIFF file
with rasterio.open(source_image) as src:
# Get the source CRS and transform
src_crs = src.crs
src_transform = src.transform
# Read the data as a numpy array
source = src.read()
# Optionally, calculate the destination transform and shape based on the new CRS
dst_transform, dst_width, dst_height = rasterio.warp.calculate_default_transform(
src_crs, destination_crs, src.width, src.height, *src.bounds)
# Create an empty numpy array for the destination
destination = np.zeros((src.count, dst_height, dst_width))
# Reproject the source to the destination
reproject(
source,
destination,
src_transform=src_transform,
src_crs=src_crs,
dst_transform=dst_transform,
dst_crs=destination_crs,
resampling=Resampling.nearest)
# Save the reprojected data as a new GeoTIFF file
with rasterio.open(destination_image, "w", **src.meta) as dst:
# Update the metadata with the new CRS, transform and shape
dst.update(
crs=destination_crs,
transform=dst_transform,
width=dst_width,
height=dst_height)
# Write the reprojected data to the file
dst.write(destination)
return None
def era5Land_nc_daily_to_ET0(list_era5land_files: List[str], output_file: str, raw_S2_image_ref: str, h: float = 10, max_ram: int = 12288) -> None:
""" """
Calculate ET0 values from the ERA5 netcdf weather variables. Calculate ET0 values from the ERA5 netcdf weather variables.
Output netcdf contains the ET0 values for each day in the selected Output netcdf contains the ET0 values for each day in the selected
...@@ -494,7 +453,10 @@ def era5Land_nc_daily_to_ET0(list_era5land_files: List[str], output_file: str, r ...@@ -494,7 +453,10 @@ def era5Land_nc_daily_to_ET0(list_era5land_files: List[str], output_file: str, r
max ram (in MiB) to give to OTB max ram (in MiB) to give to OTB
## Returns ## Returns
`None` 1. output_file_prec: `str`
path to file containing precipitation data
2. output_file_ET0: `str`
path to file containing ET0 data
""" """
# Load all monthly files into a single xarray dataset that contains all dates (daily frequency) # Load all monthly files into a single xarray dataset that contains all dates (daily frequency)
...@@ -564,4 +526,4 @@ def era5Land_nc_daily_to_ET0(list_era5land_files: List[str], output_file: str, r ...@@ -564,4 +526,4 @@ def era5Land_nc_daily_to_ET0(list_era5land_files: List[str], output_file: str, r
os.remove(output_file_ET0) os.remove(output_file_ET0)
shutil.move(output_file_ET0_reproj, output_file_ET0) shutil.move(output_file_ET0_reproj, output_file_ET0)
return None return output_file_prec, output_file_ET0
\ No newline at end of file \ No newline at end of file
...@@ -486,7 +486,7 @@ if __name__ == '__main__': ...@@ -486,7 +486,7 @@ if __name__ == '__main__':
@profile # type: ignore @profile # type: ignore
def run_samir(json_config_file: str, csv_param_file: str, ndvi_cube_path: str, weather_path: str, soil_params_path: str, land_cover_path: str, chunk_size: dict, save_path: str, max_GB: int = 2) -> None: def run_samir(json_config_file: str, csv_param_file: str, ndvi_cube_path: str, rain_path: str, ET0_path: str, soil_params_path: str, land_cover_path: str, chunk_size: dict, save_path: str, max_GB: int = 2) -> None:
# warnings.simplefilter("error", category = RuntimeWarning()) # warnings.simplefilter("error", category = RuntimeWarning())
warnings.filterwarnings("ignore", message="invalid value encountered in cast") warnings.filterwarnings("ignore", message="invalid value encountered in cast")
...@@ -641,9 +641,10 @@ if __name__ == '__main__': ...@@ -641,9 +641,10 @@ if __name__ == '__main__':
with nc.Dataset(soil_params_path, mode = 'r') as ds: with nc.Dataset(soil_params_path, mode = 'r') as ds:
FC = ds.variables['FC'][:,:] FC = ds.variables['FC'][:,:]
WP = ds.variables['WP'][:,:] WP = ds.variables['WP'][:,:]
with nc.Dataset(weather_path, mode ='r') as ds: with rio.open(rain_path, mode ='r') as ds:
prec = ds.variables['prec'][0,:,:] / 1000 prec = ds.read(1) / 1000
ET0 = ds.variables['ET0'][0,:,:] / 1000 with rio.open(ET0_path, mode = 'r') as ds:
ET0 = ds.read(1) / 1000
# Create progress bar # Create progress bar
progress_bar = tqdm(total = len(dates), desc = 'Running model', unit = ' days') progress_bar = tqdm(total = len(dates), desc = 'Running model', unit = ' days')
...@@ -776,10 +777,11 @@ if __name__ == '__main__': ...@@ -776,10 +777,11 @@ if __name__ == '__main__':
with nc.Dataset(ndvi_cube_path, mode = 'r') as ds: with nc.Dataset(ndvi_cube_path, mode = 'r') as ds:
# Dimensions of ndvi dataset : (time, x, y) # Dimensions of ndvi dataset : (time, x, y)
ndvi = ds.variables['ndvi'][i,:,:] / 255 ndvi = ds.variables['ndvi'][i,:,:] / 255
with nc.Dataset(weather_path, mode ='r') as ds: with rio.open(rain_path, mode ='r') as ds:
prec = ds.variables['prec'][i,:,:] / 1000 prec = ds.read(i+1) / 1000
ET0 = ds.variables['ET0'][i,:,:] / 1000 with rio.open(ET0_path, mode = 'r') as ds:
ET0_previous = ds.variables['ET0'][i-1,:,:] / 1000 ET0 = ds.read(i+1) / 1000
ET0_previous = ds.read(i) / 1000
# Update variables # Update variables
## Fraction cover ## Fraction cover
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment