Skip to content
Snippets Groups Projects
Commit ba969ba9 authored by Jeremy Auclair's avatar Jeremy Auclair
Browse files

input management almost finished

parent 9fff7850
No related branches found
No related tags found
No related merge requests found
tests.py tests.py
test_samir.py
test_numpy_xarray.py
*__pycache__* *__pycache__*
*config_modspa.json *config_modspa.json
dl_S2.csv dl_S2.csv
......
This diff is collapsed.
...@@ -15,11 +15,16 @@ import xarray as xr # to manage dataset ...@@ -15,11 +15,16 @@ import xarray as xr # to manage dataset
import pandas as pd # to manage dataframes import pandas as pd # to manage dataframes
import rasterio as rio # to open geotiff files import rasterio as rio # to open geotiff files
import geopandas as gpd # to manage shapefile crs projections import geopandas as gpd # to manage shapefile crs projections
from numpy import nan # to use xr.interpolate_na()
from shapely.geometry import box # to create boundary box from shapely.geometry import box # to create boundary box
from config.config import config # to import config file
from input.input_toolbox import product_str_to_datetime from input.input_toolbox import product_str_to_datetime
def calculate_ndvi(extracted_paths: Union[List[str], str], save_dir: str, boundary_shapefile_path: str, resolution: int = 20, chunk_size: dict = {'x': 4000, 'y': 4000, 'time': 8}, acorvi_corr: int = 500) -> str: def calculate_ndvi(extracted_paths: Union[List[str], str], save_dir: str, boundary_shapefile_path: str, config_file: str, resolution: int = 20, chunk_size: dict = {'x': 512, 'y': 256, 'time': -1}, acorvi_corr: int = 500) -> str:
# Open config_file
config_params = config(config_file)
# Check resolution for Sentinel-2 # Check resolution for Sentinel-2
if not resolution in [10, 20]: if not resolution in [10, 20]:
...@@ -72,13 +77,13 @@ def calculate_ndvi(extracted_paths: Union[List[str], str], save_dir: str, bounda ...@@ -72,13 +77,13 @@ def calculate_ndvi(extracted_paths: Union[List[str], str], save_dir: str, bounda
dates = [product_str_to_datetime(prod) for prod in red_paths] dates = [product_str_to_datetime(prod) for prod in red_paths]
# Open datasets with xarray # Open datasets with xarray
red = xr.open_mfdataset(red_paths, combine = 'nested', concat_dim = 'time', chunks = chunk_size, parallel = True).squeeze(dim = ['band'], drop = True).rename({'band_data': 'red'}).astype('f4') red = xr.open_mfdataset(red_paths, combine = 'nested', concat_dim = 'time', chunks = chunk_size, parallel = True).squeeze(dim = ['band'], drop = True).rename({'band_data': 'red'})
nir = xr.open_mfdataset(nir_paths, combine = 'nested', concat_dim = 'time', chunks = chunk_size, parallel = True).squeeze(dim = ['band'], drop = True).rename({'band_data': 'nir'}).astype('f4') nir = xr.open_mfdataset(nir_paths, combine = 'nested', concat_dim = 'time', chunks = chunk_size, parallel = True).squeeze(dim = ['band'], drop = True).rename({'band_data': 'nir'})
mask = xr.open_mfdataset(mask_paths, combine = 'nested', concat_dim = 'time', chunks = chunk_size, parallel = True).squeeze(dim = ['band'], drop = True).rename({'band_data': 'mask'}).astype('f4') mask = xr.open_mfdataset(mask_paths, combine = 'nested', concat_dim = 'time', chunks = chunk_size, parallel = True).squeeze(dim = ['band'], drop = True).rename({'band_data': 'mask'})
if resolution == 10: if resolution == 10:
mask = xr.where((mask == 4) | (mask == 5), 1, 0).interp(x = red.coords['x'], y = red.coords['y'], method = 'nearest') mask = xr.where((mask == 4) | (mask == 5), 1, nan).interp(x = red.coords['x'], y = red.coords['y'], method = 'nearest')
else: else:
mask = xr.where((mask == 4) | (mask == 5), 1, 0) mask = xr.where((mask == 4) | (mask == 5), 1, nan)
# Set time coordinate # Set time coordinate
red['time'] = pd.to_datetime(dates) red['time'] = pd.to_datetime(dates)
...@@ -94,8 +99,20 @@ def calculate_ndvi(extracted_paths: Union[List[str], str], save_dir: str, bounda ...@@ -94,8 +99,20 @@ def calculate_ndvi(extracted_paths: Union[List[str], str], save_dir: str, bounda
# Mask and scale ndvi # Mask and scale ndvi
ndvi['ndvi'] = xr.where(ndvi.ndvi < 0, 0, ndvi.ndvi) ndvi['ndvi'] = xr.where(ndvi.ndvi < 0, 0, ndvi.ndvi)
ndvi['ndvi'] = xr.where(ndvi.ndvi > 1, 1, ndvi.ndvi) ndvi['ndvi'] = xr.where(ndvi.ndvi > 1, 1, ndvi.ndvi)
ndvi['ndvi'] = (ndvi.ndvi*255).sortby('time') ndvi['ndvi'] = (ndvi.ndvi*255).chunk(chunk_size)
# Sort images by time
ndvi = ndvi.sortby('time')
# Interpolates on a daily frequency
daily_index = pd.date_range(start = config_params.start_date, end = config_params.end_date, freq = 'D')
# Resample the dataset to a daily frequency and reindex with the new DateTimeIndex
ndvi = ndvi.resample(time = '1D').asfreq().reindex(time = daily_index)
# Interpolate the dataset along the time dimension to fill nan values
ndvi = ndvi.interpolate_na(dim = 'time', method = 'linear', fill_value = 'extrapolate').astype('u1')
# Write attributes # Write attributes
ndvi['ndvi'].attrs['units'] = 'None' ndvi['ndvi'].attrs['units'] = 'None'
ndvi['ndvi'].attrs['standard_name'] = 'NDVI' ndvi['ndvi'].attrs['standard_name'] = 'NDVI'
...@@ -103,7 +120,7 @@ def calculate_ndvi(extracted_paths: Union[List[str], str], save_dir: str, bounda ...@@ -103,7 +120,7 @@ def calculate_ndvi(extracted_paths: Union[List[str], str], save_dir: str, bounda
ndvi['ndvi'].attrs['scale factor'] = '255' ndvi['ndvi'].attrs['scale factor'] = '255'
# Create save path # Create save path
ndvi_cube_path = save_dir + os.sep + 'NDVI_precube_' + dates[0].strftime('%d-%m-%Y') + '_' + dates[-1].strftime('%d-%m-%Y') + '.nc' ndvi_cube_path = save_dir + os.sep + 'NDVI_cube_' + dates[0].strftime('%d-%m-%Y') + '_' + dates[-1].strftime('%d-%m-%Y') + '.nc'
# Save NDVI cube to netcdf # Save NDVI cube to netcdf
ndvi.to_netcdf(ndvi_cube_path, encoding = {"ndvi": {"dtype": "u1", "_FillValue": 0}}) ndvi.to_netcdf(ndvi_cube_path, encoding = {"ndvi": {"dtype": "u1", "_FillValue": 0}})
......
...@@ -18,7 +18,7 @@ import input.lib_era5_land_pixel as era5land # custom built functions for ERA5- ...@@ -18,7 +18,7 @@ import input.lib_era5_land_pixel as era5land # custom built functions for ERA5-
from config.config import config # to import config file from config.config import config # to import config file
def request_ER5_weather(input_file: str, ndvi_path: str) -> str: def request_ER5_weather(input_file: str, raw_S2_image_ref: str) -> str:
# Get config file # Get config file
config_params = config(input_file) config_params = config(input_file)
...@@ -120,7 +120,7 @@ def request_ER5_weather(input_file: str, ndvi_path: str) -> str: ...@@ -120,7 +120,7 @@ def request_ER5_weather(input_file: str, ndvi_path: str) -> str:
print('----------') print('----------')
# Save daily wheather data into ncfile # Save daily wheather data into ncfile
weather_daily_ncFile = save_dir + os.sep + config_params.start_date + '_' + config_params.end_date + '_' + config_params.run_name + '_era5-land-daily-meteo.nc' weather_daily_ncFile = save_dir + os.sep + config_params.start_date + '_' + config_params.end_date + '_' + config_params.run_name + '_era5-land-daily-meteo'
# Temporary save directory for daily file merge # Temporary save directory for daily file merge
variable_list = ['2m_dewpoint_temperature_daily_maximum', '2m_dewpoint_temperature_daily_minimum', '2m_temperature_daily_maximum', '2m_temperature_daily_minimum', 'total_precipitation_daily_mean', '10m_u_component_of_wind_daily_mean', '10m_v_component_of_wind_daily_mean', 'surface_solar_radiation_downwards_daily_mean'] variable_list = ['2m_dewpoint_temperature_daily_maximum', '2m_dewpoint_temperature_daily_minimum', '2m_temperature_daily_maximum', '2m_temperature_daily_minimum', 'total_precipitation_daily_mean', '10m_u_component_of_wind_daily_mean', '10m_v_component_of_wind_daily_mean', 'surface_solar_radiation_downwards_daily_mean']
...@@ -129,7 +129,8 @@ def request_ER5_weather(input_file: str, ndvi_path: str) -> str: ...@@ -129,7 +129,8 @@ def request_ER5_weather(input_file: str, ndvi_path: str) -> str:
aggregated_files = era5land.concat_monthly_nc_file(list_era5land_hourly_ncFiles, variable_list, save_dir) aggregated_files = era5land.concat_monthly_nc_file(list_era5land_hourly_ncFiles, variable_list, save_dir)
# Calculate ET0 over the whole time period # Calculate ET0 over the whole time period
era5land.era5Land_nc_daily_to_ET0(aggregated_files, weather_daily_ncFile, ndvi_path, h = wind_height) era5land.era5Land_nc_daily_to_ET0(aggregated_files, weather_daily_ncFile, raw_S2_image_ref, config_params, h = wind_height)
print(weather_daily_ncFile)
print('\n', weather_daily_ncFile + '.nc', '\n')
return weather_daily_ncFile return weather_daily_ncFile + '.nc'
\ No newline at end of file \ No newline at end of file
...@@ -10,7 +10,7 @@ Functions to call ECMWF Reanalysis with CDS-api ...@@ -10,7 +10,7 @@ Functions to call ECMWF Reanalysis with CDS-api
@author: rivalland @author: rivalland
""" """
import os # for path exploration import os, shutil # for path exploration and file management
from typing import List # to declare variables from typing import List # to declare variables
import numpy as np # for math on arrays import numpy as np # for math on arrays
import xarray as xr # to manage nc files import xarray as xr # to manage nc files
...@@ -18,6 +18,11 @@ from datetime import datetime # to manage dates ...@@ -18,6 +18,11 @@ from datetime import datetime # to manage dates
from p_tqdm import p_map # for multiprocessing with progress bars from p_tqdm import p_map # for multiprocessing with progress bars
from dateutil.rrule import rrule, MONTHLY from dateutil.rrule import rrule, MONTHLY
from fnmatch import fnmatch # for file name matching from fnmatch import fnmatch # for file name matching
import rasterio # to manage geotiff images
from pandas import date_range
from rasterio.warp import reproject, Resampling # to reproject
from dask.diagnostics import ProgressBar
import re # for string comparison import re # for string comparison
import warnings # to suppress pandas warning import warnings # to suppress pandas warning
...@@ -429,19 +434,64 @@ def calculate_ET0_pixel(pixel_dataset: xr.Dataset, lat: float, lon: float, h: fl ...@@ -429,19 +434,64 @@ def calculate_ET0_pixel(pixel_dataset: xr.Dataset, lat: float, lon: float, h: fl
return ET0_values return ET0_values
def era5Land_nc_daily_to_ET0(list_era5land_files: List[str], output_nc_file: str, h: float = 10) -> None: def reproject_geotiff(source_image: str, destination_image: str, destination_crs: str):
# Open the original GeoTIFF file
with rasterio.open(source_image) as src:
# Get the source CRS and transform
src_crs = src.crs
src_transform = src.transform
# Read the data as a numpy array
source = src.read()
# Optionally, calculate the destination transform and shape based on the new CRS
dst_transform, dst_width, dst_height = rasterio.warp.calculate_default_transform(
src_crs, destination_crs, src.width, src.height, *src.bounds)
# Create an empty numpy array for the destination
destination = np.zeros((src.count, dst_height, dst_width))
# Reproject the source to the destination
reproject(
source,
destination,
src_transform=src_transform,
src_crs=src_crs,
dst_transform=dst_transform,
dst_crs=destination_crs,
resampling=Resampling.nearest)
# Save the reprojected data as a new GeoTIFF file
with rasterio.open(destination_image, "w", **src.meta) as dst:
# Update the metadata with the new CRS, transform and shape
dst.update(
crs=destination_crs,
transform=dst_transform,
width=dst_width,
height=dst_height)
# Write the reprojected data to the file
dst.write(destination)
return None
def era5Land_nc_daily_to_ET0(list_era5land_files: List[str], output_file: str, raw_S2_image_ref: str, config_params, h: float = 10, max_ram: int = 12288) -> None:
""" """
Calculate ET0 values from the ERA5 netcdf weather variables. Calculate ET0 values from the ERA5 netcdf weather variables.
Output netcdf contains the ET0 values for each day in the selected Output netcdf contains the ET0 values for each day in the selected
time period and for each ERA5 pixel covering the required area. time period and reprojected on the same grid as the NDVI values.
## Arguments ## Arguments
1. list_era5land_files: `List[str]` 1. list_era5land_files: `List[str]`
list of netcdf files containing the necessary variables list of netcdf files containing the necessary variables
2. output_nc_file: `str` 2. output_file: `str`
output netcdf file to save output file name without extension
3. h: `float` `default = 10` 3. raw_S2_image_ref: `str`
raw Sentinel 2 image at right resolution for reprojection
4. h: `float` `default = 10`
height of ERA5 wind measurements in meters height of ERA5 wind measurements in meters
5. max_ram: `int` `default = 12288`
max ram (in MiB) to give to OTB
## Returns ## Returns
`None` `None`
...@@ -477,7 +527,10 @@ def era5Land_nc_daily_to_ET0(list_era5land_files: List[str], output_nc_file: str ...@@ -477,7 +527,10 @@ def era5Land_nc_daily_to_ET0(list_era5land_files: List[str], output_nc_file: str
final_weather_ds['tp'] = final_weather_ds['tp'] * 1000 # conversion from m to mm final_weather_ds['tp'] = final_weather_ds['tp'] * 1000 # conversion from m to mm
# Change datatype to reduce memory usage # Change datatype to reduce memory usage
final_weather_ds = (final_weather_ds * 1000).astype('u2') final_weather_ds = (final_weather_ds * 1000).astype('u2')
# Write projection
final_weather_ds = final_weather_ds.rio.write_crs('EPSG:4326')
# Set variable attributes # Set variable attributes
final_weather_ds['ET0'].attrs['units'] = 'mm' final_weather_ds['ET0'].attrs['units'] = 'mm'
...@@ -487,9 +540,28 @@ def era5Land_nc_daily_to_ET0(list_era5land_files: List[str], output_nc_file: str ...@@ -487,9 +540,28 @@ def era5Land_nc_daily_to_ET0(list_era5land_files: List[str], output_nc_file: str
final_weather_ds['tp'].attrs['units'] = 'mm' final_weather_ds['tp'].attrs['units'] = 'mm'
final_weather_ds['tp'].attrs['standard_name'] = 'Precipitation' final_weather_ds['tp'].attrs['standard_name'] = 'Precipitation'
final_weather_ds['tp'].attrs['comment'] = 'Volume of total daily precipitation expressed as water height in milimeters' final_weather_ds['tp'].attrs['comment'] = 'Volume of total daily precipitation expressed as water height in milimeters'
final_weather_ds['tp'].attrs['scale factor'] = '1000' final_weather_ds['tp'].attrs['scale factor'] = '1000'
# Save dataset to netcdf, still in wgs84 (lat, lon) coordinates # Save dataset to geotiff, still in wgs84 (lat, lon) coordinates
final_weather_ds.to_netcdf(path = output_nc_file, encoding = {"ET0": {"dtype": "u2"}, "tp": {"dtype": "u2"}}) output_file_prec = output_file + '_prec.tif'
output_file_ET0 = output_file + '_ET0.tif'
final_weather_ds.tp.rio.to_raster(output_file_prec, dtype = 'uint16')
final_weather_ds.ET0.rio.to_raster(output_file_ET0, dtype = 'uint16')
# Reprojected image paths
output_file_prec_reproj = output_file + '_prec_reproj.tif'
output_file_ET0_reproj = output_file + '_ET0_reproj.tif'
# Run otbcli_SuperImpose
OTB_command = 'otbcli_Superimpose -inr ' + raw_S2_image_ref + ' -inm ' + output_file_prec + ' -out ' + output_file_prec_reproj + ' uint16 -ram ' + str(max_ram)
os.system(OTB_command)
OTB_command = 'otbcli_Superimpose -inr ' + raw_S2_image_ref + ' -inm ' + output_file_ET0 + ' -out ' + output_file_ET0_reproj + ' uint16 -ram ' + str(max_ram)
os.system(OTB_command)
# remove old files and rename outputs
os.remove(output_file_prec)
shutil.move(output_file_prec_reproj, output_file_prec)
os.remove(output_file_ET0)
shutil.move(output_file_ET0_reproj, output_file_ET0)
return None return None
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment