minor update. code comments

4d3ace19 · Jeremy Auclair · 632a9c15 · 4d3ace19 · 4d3ace19
Commit 4d3ace19 authored 1 year ago by Jeremy Auclair
--- a/preprocessing/download_ERA5_weather.py
+++ b/preprocessing/download_ERA5_weather.py
@@ -10,7 +10,6 @@ Download ERA5 daily weather files for modspa
 import glob  # for path management
 import sys  # for path management
 import os  # for path exploration
-from typing import Tuple
 import geopandas as gpd  # to manage shapefiles
 from psutil import cpu_count  # to get number of physical cores available
 import modspa_pixel.preprocessing.lib_era5_land_pixel as era5land  # custom built functions for ERA5-Land data download

--- a/source/modspa_samir.py
+++ b/source/modspa_samir.py
@@ -731,7 +731,7 @@ def calculate_SWCe(Dei: np.ndarray, Dep: np.ndarray, fewi: np.ndarray, fewp: np.
    return np.where((fewi + fewp) > 0, (TEW - (Dei * fewi + Dep * fewp) / (fewi + fewp)) / TEW, (TEW - (Dei + Dep) / 2) / TEW)


-def calculate_memory_requirement(x_size: int, y_size: int, time_size: int, nb_inputs: int, nb_outputs: int, nb_variables: int, nb_params: int, nb_bits: int) -> float:
+def calculate_memory_requirement(x_size: int, y_size: int, time_size: int, nb_inputs: int, nb_outputs: int, nb_variables: int, nb_params: int, nb_bytes: int) -> float:
    """
    Calculate memory requirement (GiB) of calculation if all datasets where loaded in memory.
    Used to determine how to divide the datasets in times chunks for more efficient I/O
@@ -741,11 +741,11 @@ def calculate_memory_requirement(x_size: int, y_size: int, time_size: int, nb_in
    =========

    1. x_size: ``int``
-        x size of dataset
+        x size of dataset (pixels)
    2. y_size: ``int``
-        y size of dataset
+        y size of dataset (pixels)
    3. time_size: ``int``
-        number of time bands
+        number of time bands (dates)
    4. nb_inputs: ``int``
        number of input variables
    5. nb_outputs: ``int``
@@ -755,7 +755,7 @@ def calculate_memory_requirement(x_size: int, y_size: int, time_size: int, nb_in
    7. nb_params: ``int``
        number of raster parameters
    8. nb_bits: ``int``
-        number of bits of datatype for inputs and outputs
+        number of bytes of datatype for inputs and outputs

    Returns
    =======
@@ -765,13 +765,13 @@ def calculate_memory_requirement(x_size: int, y_size: int, time_size: int, nb_in
    """
    
    # Memory requirement of input datasets
-    input_memory_requirement = (x_size * y_size * time_size * nb_inputs * nb_bits) / (1024**3)
+    input_memory_requirement = (x_size * y_size * time_size * nb_inputs * nb_bytes) / (1024**3)
    
    # Memory requirement of calculation variables
    calculation_memory_requirement = (x_size * y_size * (nb_params *2 + nb_variables * 4)) / (1024**3)  # calculation done in float32, params in in16
    
    # Memory requirement of output datasets
-    output_memory_requirement = (x_size * y_size * time_size * nb_outputs * nb_bits) / (1024**3)
+    output_memory_requirement = (x_size * y_size * time_size * nb_outputs * nb_bytes) / (1024**3)
    
    # Total memory requirement
    total_memory_requirement = (input_memory_requirement + calculation_memory_requirement + output_memory_requirement) * 1.05  # 5% adjustment factor
@@ -1385,7 +1385,7 @@ def run_samir(csv_param_file: str, ndvi_cube_path: str, weather_path: str, soil_
    # Check how much memory the calculation would take if all the inputs would be loaded in memory
    # Unit is GiB
    # Datatype of variables is float32 for calculation
-    nb_bits = 2  # uint16 or int16
+    nb_bytes = 2  # uint16 or int16
    nb_inputs = 3  # NDVI, Rain, ET0
    if additional_outputs:
        nb_outputs = 6 + len(additional_outputs)  # DP, E, Irr, SWCe, SWCr, Tr
@@ -1395,7 +1395,7 @@ def run_samir(csv_param_file: str, ndvi_cube_path: str, weather_path: str, soil_
    security_factor = 0.8  # it is difficult to estimate true memory usage, apply a security factor to prevent memory overload
    
    # Get memory requirement
-    total_memory_requirement = calculate_memory_requirement(x_size, y_size, time_size, nb_inputs, nb_outputs, nb_variables, len(params)/2, nb_bits)
+    total_memory_requirement = calculate_memory_requirement(x_size, y_size, time_size, nb_inputs, nb_outputs, nb_variables, len(params)/2, nb_bytes)
    
    # Determine how many time slices can be loaded in memory at once
    # This will allow faster I/O operations and a faster runtime
@@ -1405,7 +1405,7 @@ def run_samir(csv_param_file: str, ndvi_cube_path: str, weather_path: str, soil_
    print('\nApproximate memory requirement of calculation:', print_size, print_unit + ', available memory:', available_ram, 'GiB\n\nLoading blocks of', time_slice, 'time bands\n')
    
    # ============ Prepare outputs ============ #
-    model_outputs = prepare_output_dataset(ndvi_cube_path, dimensions, scaling_dict, additional_outputs=additional_outputs)
+    model_outputs = prepare_output_dataset(ndvi_cube_path, dimensions, scaling_dict, additional_outputs = additional_outputs)

    # Create encoding dictionnary
    encoding_dict = {}
@@ -1444,7 +1444,7 @@ def run_samir(csv_param_file: str, ndvi_cube_path: str, weather_path: str, soil_

    # ============ Time loop ============ #
    # Create progress bar
-    progress_bar = tqdm(total=len(dates), desc='', unit=' days')
+    progress_bar = tqdm(total = len(dates), desc = '', unit = ' days')

    for i in range(0, len(dates)):