From e2a6509b52efa0dac497f0cdfa29a8d6889127fa Mon Sep 17 00:00:00 2001
From: Jeremy Auclair <jeremy.auclair@cesbio.cnes.fr>
Date: Tue, 25 Jul 2023 17:41:08 +0200
Subject: [PATCH] Changed weather data loading to geotiff (can't find tool to
 convert tif to netcdf for large datasets), rewrote some function description,
 minor refactoring.

---
 input/download_ERA5_weather.py | 50 +++++++++++++++++++---------
 input/lib_era5_land_pixel.py   | 60 +++++++---------------------------
 test_samir.py                  | 18 +++++-----
 3 files changed, 55 insertions(+), 73 deletions(-)

diff --git a/input/download_ERA5_weather.py b/input/download_ERA5_weather.py
index b82a9cc..d1a69b3 100644
--- a/input/download_ERA5_weather.py
+++ b/input/download_ERA5_weather.py
@@ -2,26 +2,43 @@
 # Python
 """
 04-07-2023
-@author: rivallandv, modified by jeremy auclair
+@author: rivallandv, heavily modified by jeremy auclair
 
-Download ERA5 weather files for modspa
+Download ERA5 daily weather files for modspa
 """
 
 import glob  # for path management
 import sys  # for path management
 import os  # for path exploration
-import xarray as xr  # to manage nc files
-import pandas as pd  # to manage dataframes
+from typing import Tuple
 import geopandas as gpd  # to manage shapefiles
 from psutil import cpu_count  # to get number of physical cores available
 import input.lib_era5_land_pixel as era5land  # custom built functions for ERA5-Land data download
 from config.config import config  # to import config file
 
 
-def request_ER5_weather(input_file: str, raw_S2_image_ref: str) -> str:
+def request_ER5_weather(config_file: str, raw_S2_image_ref: str) -> Tuple[str, str]:
+    """
+    Download ERA5 reanalysis daily weather files, concatenate and calculate ET0
+    to obtain a geotiff multiband (one band per day) image for precipitation and
+    ET0 values.
+
+    ## Arguments
+    config_file: `str`
+        json configuration file
+    raw_S2_image_ref: `str`
+        unmodified sentinel-2 image at correct resolution for
+        weather data reprojection
+
+    ## Returns
+    1. precip_file: `str`
+        path to file containing precipitation data
+    2. ET0_file: `str`
+        path to file containing ET0 data
+    """
     
     # Get config file
-    config_params = config(input_file)
+    config_params = config(config_file)
     outpath = config_params.era5_path + os.sep + config_params.run_name
 
     # Geometry configuration
@@ -50,7 +67,7 @@ def request_ER5_weather(input_file: str, raw_S2_image_ref: str) -> str:
         print('mkdir path for nc files: ', outpath)
     print('----------')
 
-    # %% Request ERA5-land BoxBound Determination
+    # Request ERA5-land BoxBound Determination
     if config_params.shapefile_path:
         # Load shapefile to access geometrics informations for ERA5-Land request
         gdf_expe_polygons = gpd.read_file(config_params.shapefile_path)
@@ -60,9 +77,11 @@ def request_ER5_weather(input_file: str, raw_S2_image_ref: str) -> str:
         # verification que les polygones sont tous fermés
         liste_polygons_validity = gdf_expe_polygons.geometry.is_valid
         if list(liste_polygons_validity).count(False) > 0:
+            
             print('some polygons of Shapefile are not valid')
             polygons_invalid = liste_polygons_validity.loc[liste_polygons_validity == False]
             print('invalid polygons:', polygons_invalid)
+            
             for i in polygons_invalid.index:
                 gdf_expe_polygons.geometry[i]
 
@@ -77,17 +96,17 @@ def request_ER5_weather(input_file: str, raw_S2_image_ref: str) -> str:
 
         if expe_epsg.srs != wgs84_epsg:
             print('--- convert extend in wgs84 coordinates ---')
+            
             # idem en wgs84 pour des lat/lon en degree (format utilisé par google earth engine)
-            expe_polygons_boxbound_wgs84 = gdf_expe_polygons.to_crs(
-                wgs84_epsg).geometry.total_bounds
+            expe_polygons_boxbound_wgs84 = gdf_expe_polygons.to_crs(wgs84_epsg).geometry.total_bounds
+            
             # convert to list for earth engine
             expe_polygons_boxbound_wgs84 = list(expe_polygons_boxbound_wgs84)
         else:
             expe_polygons_boxbound_wgs84 = expe_polygons_boxbound
 
         # switch coordinates order to agree with ECMWF order: N W S E
-        expe_area = expe_polygons_boxbound_wgs84[3], expe_polygons_boxbound_wgs84[0],\
-            expe_polygons_boxbound_wgs84[1], expe_polygons_boxbound_wgs84[2]
+        expe_area = expe_polygons_boxbound_wgs84[3], expe_polygons_boxbound_wgs84[0], expe_polygons_boxbound_wgs84[1], expe_polygons_boxbound_wgs84[2]
 
     print('boxbound [N W S E] extend in ', wgs84_epsg)
     print(expe_area)
@@ -101,8 +120,6 @@ def request_ER5_weather(input_file: str, raw_S2_image_ref: str) -> str:
     # Get number of available CPUs
     nb_processes = 4 * min([cpu_count(logical = False), len(os.sched_getaffinity(0)), config_params.max_cpu])  # downloading data demands very little computing power, each processor core can manage multiple downloads
 
-    #============================================================================================
-
     # Call daily data
     era5land.call_era5land_daily_for_MODSPA(config_params.start_date, config_params.end_date, era5_expe_polygons_boxbound_wgs84, output_path = outpath, processes = nb_processes)
 
@@ -129,8 +146,9 @@ def request_ER5_weather(input_file: str, raw_S2_image_ref: str) -> str:
     aggregated_files = era5land.concat_monthly_nc_file(list_era5land_hourly_ncFiles, variable_list, save_dir)
     
     # Calculate ET0 over the whole time period
-    era5land.era5Land_nc_daily_to_ET0(aggregated_files, weather_daily_ncFile, raw_S2_image_ref, config_params, h = wind_height)
+    precip_file, ET0_file = era5land.era5Land_nc_daily_to_ET0(aggregated_files, weather_daily_ncFile, raw_S2_image_ref, config_params, h = wind_height)
     
-    print('\n', weather_daily_ncFile + '.nc', '\n')
+    print(precip_file)
+    print(ET0_file)
 
-    return weather_daily_ncFile + '.nc'
\ No newline at end of file
+    return precip_file, ET0_file
\ No newline at end of file
diff --git a/input/lib_era5_land_pixel.py b/input/lib_era5_land_pixel.py
index 18f409a..fe18816 100644
--- a/input/lib_era5_land_pixel.py
+++ b/input/lib_era5_land_pixel.py
@@ -2,16 +2,17 @@
 """
 Functions to call ECMWF Reanalysis with CDS-api
 
-- ERA5-land hourly request
 - ERA5-land daily request
-- request a list of hourly variables dedicated to the calculus of ET0
+- request a list of daily variables dedicated to the calculus of ET0
  and the generation of MODSPA daily forcing files
+ 
+ heavily modified from rivallandv's original file
 
-@author: rivalland
+@author: auclairj
 """
 
 import os, shutil  # for path exploration and file management
-from typing import List  # to declare variables
+from typing import List, Tuple  # to declare variables
 import numpy as np  # for math on arrays
 import xarray as xr  # to manage nc files
 from datetime import datetime  # to manage dates
@@ -21,7 +22,6 @@ from fnmatch import fnmatch  # for file name matching
 import rasterio  # to manage geotiff images
 from pandas import date_range
 from rasterio.warp import reproject, Resampling  # to reproject
-from dask.diagnostics import ProgressBar
 
 import re  # for string comparison
 import warnings  # to suppress pandas warning
@@ -434,48 +434,7 @@ def calculate_ET0_pixel(pixel_dataset: xr.Dataset, lat: float, lon: float, h: fl
     return ET0_values
 
 
-def reproject_geotiff(source_image: str, destination_image: str, destination_crs: str):
-    
-    # Open the original GeoTIFF file
-    with rasterio.open(source_image) as src:
-        # Get the source CRS and transform
-        src_crs = src.crs
-        src_transform = src.transform
-        # Read the data as a numpy array
-        source = src.read()
-
-    # Optionally, calculate the destination transform and shape based on the new CRS
-    dst_transform, dst_width, dst_height = rasterio.warp.calculate_default_transform(
-    src_crs, destination_crs, src.width, src.height, *src.bounds)
-
-    # Create an empty numpy array for the destination
-    destination = np.zeros((src.count, dst_height, dst_width))
-
-    # Reproject the source to the destination
-    reproject(
-    source,
-    destination,
-    src_transform=src_transform,
-    src_crs=src_crs,
-    dst_transform=dst_transform,
-    dst_crs=destination_crs,
-    resampling=Resampling.nearest)
-
-    # Save the reprojected data as a new GeoTIFF file
-    with rasterio.open(destination_image, "w", **src.meta) as dst:
-        # Update the metadata with the new CRS, transform and shape
-        dst.update(
-        crs=destination_crs,
-        transform=dst_transform,
-        width=dst_width,
-        height=dst_height)
-        # Write the reprojected data to the file
-        dst.write(destination)
-    
-    return None
-
-
-def era5Land_nc_daily_to_ET0(list_era5land_files: List[str], output_file: str, raw_S2_image_ref: str, h: float = 10, max_ram: int = 12288) -> None:
+def era5Land_nc_daily_to_ET0(list_era5land_files: List[str], output_file: str, raw_S2_image_ref: str, h: float = 10, max_ram: int = 12288) -> Tuple[str, str]:
     """
     Calculate ET0 values from the ERA5 netcdf weather variables.
     Output netcdf contains the ET0 values for each day in the selected
@@ -494,7 +453,10 @@ def era5Land_nc_daily_to_ET0(list_era5land_files: List[str], output_file: str, r
         max ram (in MiB) to give to OTB
 
     ## Returns
-    `None`
+    1. output_file_prec: `str`
+        path to file containing precipitation data
+    2. output_file_ET0: `str`
+        path to file containing ET0 data
     """
     
     # Load all monthly files into a single xarray dataset that contains all dates (daily frequency)
@@ -564,4 +526,4 @@ def era5Land_nc_daily_to_ET0(list_era5land_files: List[str], output_file: str, r
     os.remove(output_file_ET0)
     shutil.move(output_file_ET0_reproj, output_file_ET0)
 
-    return None
\ No newline at end of file
+    return output_file_prec, output_file_ET0
\ No newline at end of file
diff --git a/test_samir.py b/test_samir.py
index 2bf4637..75cacba 100644
--- a/test_samir.py
+++ b/test_samir.py
@@ -486,7 +486,7 @@ if __name__ == '__main__':
 
 
     @profile  # type: ignore
-    def run_samir(json_config_file: str, csv_param_file: str, ndvi_cube_path: str, weather_path: str, soil_params_path: str, land_cover_path: str, chunk_size: dict, save_path: str, max_GB: int = 2) -> None:
+    def run_samir(json_config_file: str, csv_param_file: str, ndvi_cube_path: str, rain_path: str, ET0_path: str, soil_params_path: str, land_cover_path: str, chunk_size: dict, save_path: str, max_GB: int = 2) -> None:
         
         # warnings.simplefilter("error", category = RuntimeWarning())
         warnings.filterwarnings("ignore", message="invalid value encountered in cast")
@@ -641,9 +641,10 @@ if __name__ == '__main__':
         with nc.Dataset(soil_params_path, mode = 'r') as ds:
             FC = ds.variables['FC'][:,:]
             WP = ds.variables['WP'][:,:]
-        with nc.Dataset(weather_path, mode ='r') as ds:
-            prec = ds.variables['prec'][0,:,:] / 1000
-            ET0 = ds.variables['ET0'][0,:,:] / 1000
+        with rio.open(rain_path, mode ='r') as ds:
+            prec = ds.read(1) / 1000
+        with rio.open(ET0_path, mode = 'r') as ds:
+            ET0 = ds.read(1) / 1000
         
         # Create progress bar
         progress_bar = tqdm(total = len(dates), desc = 'Running model', unit = ' days')
@@ -776,10 +777,11 @@ if __name__ == '__main__':
             with nc.Dataset(ndvi_cube_path, mode = 'r') as ds:
                 # Dimensions of ndvi dataset : (time, x, y)
                 ndvi = ds.variables['ndvi'][i,:,:] / 255
-            with nc.Dataset(weather_path, mode ='r') as ds:
-                prec = ds.variables['prec'][i,:,:] / 1000
-                ET0 = ds.variables['ET0'][i,:,:] / 1000
-                ET0_previous = ds.variables['ET0'][i-1,:,:] / 1000
+            with rio.open(rain_path, mode ='r') as ds:
+                prec = ds.read(i+1) / 1000
+            with rio.open(ET0_path, mode = 'r') as ds:
+                ET0 = ds.read(i+1) / 1000
+                ET0_previous = ds.read(i) / 1000
         
             # Update variables
             ## Fraction cover
-- 
GitLab