diff --git a/source/code_toolbox.py b/source/code_toolbox.py index cb6cc8cab7d918d06caf9fea85159d7d6bd8a810..d6492f9118c2309fa92e4633f129e2017acc39ac 100644 --- a/source/code_toolbox.py +++ b/source/code_toolbox.py @@ -8,8 +8,10 @@ Contains functions to facilitate code workflow, directory creation, Notebook rea """ from os import path # os management -from numpy import ceil # for vectorized maths +import numpy as np # for vectorized maths from pandas import DataFrame # to manage dataframes +import xarray as xr # to manage datasets +from numba.typed import List #type: ignore, to type lists from modspa_pixel.parameters.params_samir_class import samir_parameters # to load SAMIR parameters @@ -30,36 +32,24 @@ def format_duration(timedelta: float) -> None: ``None`` """ - if timedelta < 1e-6: - unit, scale = 'ns', 1e9 - elif timedelta < 1e-3: - unit, scale = 'µs', 1e6 - elif timedelta < 1: - unit, scale = 'ms', 1e3 + if timedelta < 0.9e-6: + print(round(timedelta * 1e9, 1), 'ns') + elif timedelta < 0.9e-3: + print(round(timedelta * 1e6, 1), 'µs') + elif timedelta < 0.9: + print(round(timedelta * 1e3, 1), 'ms') elif timedelta < 60: - unit, scale = 's', 1 - elif timedelta < 3600: - unit, scale = 'm', 1/60 - elif timedelta < 86400: - unit, scale = 'h', 1/3600 + print(round(timedelta, 1), 's') + elif timedelta < 3.6e3: + print(round(timedelta // 60), 'm', round(timedelta % 60, 1), 's') + elif timedelta < 24*3.6e3: + print(round((timedelta / 3.6e3) // 1), 'h', round((timedelta / 3.6e3) % 1 * 60 // 1), 'm', round((timedelta / 3.6e3) % 1 * 60 % 1 * 60, 1), 's') + elif timedelta < 48*3.6e3: + print(round((timedelta / (24 * 3.6e3)) // 1), 'day,', round(((timedelta / (24 * 3.6e3)) % 1 * 24) // 1), 'h,', + round((timedelta / (24*3.6e3)) % 1 * 24 % 1 * 60 // 1), 'm,', round((timedelta / (24 * 3.6e3)) % 1 * 24 % 1 * 60 % 1 * 60, 1), 's') else: - unit, scale = 'days', 1/86400 - - value = timedelta * scale - if unit in ['m', 'h']: - minutes, seconds = divmod(value, 60) - if unit == 'h': - hours, minutes = divmod(minutes, 60) - print(f"{int(hours)}h {int(minutes)}m {seconds:.1f}s") - else: - print(f"{int(minutes)}m {seconds:.1f}s") - elif unit == 'days': - days, remainder = divmod(value, 1) - hours, remainder = divmod(remainder * 24, 1) - minutes, seconds = divmod(remainder * 1440, 1) - print(f"{int(days)} days, {int(hours)}h, {int(minutes)}m, {seconds:.1f}s") - else: - print(f"{value:.1f} {unit}") + print(round((timedelta / (24 * 3.6e3)) // 1), 'days,', round(((timedelta / (24 * 3.6e3)) % 1 * 24) // 1), 'h,', + round((timedelta / (24 * 3.6e3)) % 1 * 24 % 1 * 60 // 1), 'm,', round((timedelta / (24 * 3.6e3)) % 1 * 24 % 1 * 60 % 1 * 60, 1), 's') return None @@ -306,7 +296,7 @@ def calculate_x_slices(x_size, y_size, time_size, available_ram, additinal_outpu return [slice(0, x_size)], [x_size] # Estimate the number of chunks required to stay below memory limit - approx_chunks = int(ceil(total_memory / available_ram)) + approx_chunks = int(np.ceil(total_memory / available_ram)) # Calculate the base chunk size base_chunk_size = x_size // approx_chunks @@ -317,7 +307,7 @@ def calculate_x_slices(x_size, y_size, time_size, available_ram, additinal_outpu max_chunk_size = base_chunk_size + 1 if remainder > 0 else base_chunk_size if max_chunk_size > min_chunk_size * (1 + tolerance): # Recalculate chunks to maintain balance - approx_chunks = int(ceil(x_size / ((1 + tolerance) * base_chunk_size))) + approx_chunks = int(np.ceil(x_size / ((1 + tolerance) * base_chunk_size))) base_chunk_size = x_size // approx_chunks remainder = x_size % approx_chunks min_chunk_size = base_chunk_size @@ -372,4 +362,194 @@ def print_memory_prevision(total_memory_requirement: float, actual_memory_requir else: print('\nApproximate memory requirement of calculation:', print_size1, print_unit1 + ', available memory:', available_ram, 'GiB\n\nLoading single block of', chunks[0], '*', y_size, '*', time_size, 'elements', '\n') - return None \ No newline at end of file + return None + + +def rasterize_samir_parameters(csv_param_file: str, land_cover_raster: str, irrigation_raster: str, init_RU_raster: str, x_slice: slice) -> tuple[np.ndarray, np.dtype]: + """ + Creates a dictionnary containing raster parameter values and the scale factors from the csv parameter file + and the land cover raster. For each parameter, the function loops on land cover classes to fill the raster. + + Before creating the dictionnary, it updates the parameter ``DataFrame`` and verifies the parameter range with + the ``test_samir_parameter()`` function. + + Arguments + ========= + + 1. csv_param_file: ``str`` + path to csv paramter file + 2. land_cover_raster: ``str`` + path to land cover netcdf raster + 3. irrigation_raster: ``str`` + path to irrigation mask raster + 4. init_RU_raster: ``str`` + path to initial soil water content. None if no input. + 5. x_slice: ``slice`` + x_slice to load + + Returns + ======= + + 1. parameter_dict: ``Dict[str, np.ndarray]`` + the dictionnary containing all the rasterized Parameters + and the scale factors + + """ + + # Get path of min max csv file + min_max_param_file = path.join(path.dirname(csv_param_file), 'params_samir_min_max.csv') + + # Load samir params into an object + table_param = samir_parameters(csv_param_file) + + # Set general variables + class_count = table_param.table.shape[1] - 2 # remove dtype and Default columns + + # Open land cover raster + land_cover = xr.open_dataarray(land_cover_raster).isel(x = x_slice).to_numpy() + shape = land_cover.shape + + # Modify parameter table based on its content + table_param.test_samir_parameter(min_max_param_file) + + # If test_samir_parameter returns 0, parameters are incorrect + if type(table_param.table) != DataFrame: + import sys # to exit script + print(f'\nModify the SAMIR parameter file: {csv_param_file}\n') + sys.exit() + + # Loop through parameters to count them + count = 0 + for parameter in table_param.table.index[1:]: + if parameter == 'Irrig_auto' or parameter == 'Init_RU': + pass + else: + count+=1 + + parameter_array = np.zeros((count,) + shape, dtype = np.float32, order = 'C') + param_list = [] + + i = 0 + # Loop on samir parameters and create + for parameter in table_param.table.index[1:]: + + if parameter == 'Irrig_auto' and table_param.table.at[parameter, 'load_raster'] == 1: + + # Load Irrig mask + Irrig_auto = np.ascontiguousarray(xr.open_dataarray(irrigation_raster, decode_coords = 'all').isel(x = x_slice).astype(np.bool_).values, dtype = np.bool_) + + continue + + elif parameter == 'Init_RU' and table_param.table.at[parameter, 'load_raster'] == 1: + + # Load initial soil water content + Init_RU = np.ascontiguousarray((xr.open_dataarray(init_RU_raster) / 1000).isel(x = x_slice).astype(np.float32).values, dtype = np.float32) + + continue + + # If scale_factor == 0, then the parameter does not have to be spatialized, it can stay scalar + elif parameter == 'Kcmax': + + param_list.append(parameter) + + # Take Default parameter value + parameter_array[i] = np.ones(shape = shape, dtype = np.float32) * np.float32(table_param.table.at[parameter, 'Default']) + i+=1 + + continue + + # Check data type based on parameter + if (parameter != 'Irrig_auto') and (parameter != 'Init_RU'): + dtype = np.float32 + param_list.append(parameter) + + # Create 2D array from land cover + value = land_cover.copy().astype(dtype) + + # Loop on classes to set parameter values for each class + for class_val, class_name in zip(range(1, class_count + 1), table_param.table.columns[2:]): + + # Get parameter value + param_value = table_param.table.at[parameter, class_name] + + # Load parameter value for each class + value = np.where(land_cover == class_val, np.float32(param_value), value) + + # Assign parameter value + if parameter == 'Irrig_auto': + Irrig_auto = value.astype(np.bool_) + elif parameter == 'Init_RU': + Init_RU = value.astype(np.float32) + else: + parameter_array[i] = value.astype(dtype) + i+=1 + + # Assure parameter array is contiguous in memory + parameter_array = np.ascontiguousarray(parameter_array, dtype = np.float32) + + # Change param_list type + param_list = List(param_list) + + return parameter_array, Irrig_auto, Init_RU, param_list + + +def get_scaling_elements(scaling_dict: dict[str, int], additional_outputs: dict[str, int]) -> tuple[list[str], np.ndarray, bool, np.ndarray, list[str], np.ndarray, np.ndarray]: + """ + Get output and additional output scaling names and arrays, tailored to work with numba compiled functions. + + Arguments + ========= + + 1. scaling_dict: ``dict[str, int]`` + output scaling dictionary + 2. additional_outputs: ``dict[str, int]`` + additional output scaling dictionary + + Returns + ======= + + 1. scaling_names: ``list[str]`` + numba typed list of names of standard output variables + 2. scaling array: ``np.ndarray`` + 1D numpy int16 array containing scaling factors corresponding to standard output variables + 3. write_additional_data: ``bool`` + boolean to trigger additional output data loading and writing + 4. additional_scaling_array: ``np.ndarray`` + 1D numpy int16 array containing scaling factors corresponding to addtional output variables + 5. additional_names: ``list[str]`` + numba typed list of names of addtional output variables + 6. additional_idx: ``np.ndarray`` + 1D numpy int16 array contaning indexes of additional variables to correctly access additional output data in numba compiled function + 7. additional_outputs_data: ``np.ndarray`` + empty 3D int16 array generated in case no additional data is required (necessary for numba compiled function). Rewritten when loading empty output arrays if additional output data is required. + """ + + # Standard scaling dict + scaling_array = [] + for var in scaling_dict.keys(): + scaling_array.append(scaling_dict[var]) + scaling_array = np.ascontiguousarray(np.array(scaling_array, dtype = np.int16)) + scaling_names = List(list(scaling_dict.keys())) + + # Manage additional output parameters + if additional_outputs is not None: + # Additional output scaling dict + additional_scaling_array = [] + for var in additional_outputs.keys(): + additional_scaling_array.append(additional_outputs[var]) + additional_scaling_array = np.array(additional_scaling_array, dtype = np.int16) + additional_names = list(additional_outputs.keys()) + field_name_to_index = {name: idx for idx, name in enumerate(additional_outputs)} + additional_idx = np.ascontiguousarray(np.array([field_name_to_index[var] for var in additional_names], dtype = np.int16)) + additional_names = List(additional_names) + write_additional_data = True + additional_outputs_data = None + else: + # Create empty variables for correct numba parsing + additional_scaling_array = np.array([0], dtype = np.int16) + additional_idx = np.array([0], dtype = np.int16) + additional_names = List(['0']) + additional_outputs_data = np.empty(shape = (1,1,1,1), dtype = np.int16) + write_additional_data = False + + return scaling_names, scaling_array, write_additional_data, additional_scaling_array, additional_names, additional_idx, additional_outputs_data \ No newline at end of file diff --git a/source/modspa_samir.py b/source/modspa_samir.py index 0530ede98bd3e82eeeb70408455cb3da3748e8ab..7f4e972cc561a46d92c152a756ad522bddfc0df1 100644 --- a/source/modspa_samir.py +++ b/source/modspa_samir.py @@ -14,142 +14,12 @@ import rioxarray # to better manage dataset projections from numba import njit, types, boolean, uint8, uint16, int16, float32, int64, set_num_threads # to compile functions in nopython mode for faster calculation from numba.typed import List #type: ignore, to type lists import numpy as np # for vectorized maths -import pandas as pd # to manage dataframes import netCDF4 as nc # to efficiently read and write netCDF files from tqdm import tqdm # to show a progress bar from psutil import virtual_memory # to check available ram from psutil import cpu_count # to get number of physical cores available from gc import collect # to free up unused memory -from modspa_pixel.parameters.params_samir_class import samir_parameters # to load SAMIR parameters -from modspa_pixel.source.code_toolbox import calculate_memory_requirement, calculate_x_slices, print_memory_prevision # to print memory requirements - - -def rasterize_samir_parameters(csv_param_file: str, land_cover_raster: str, irrigation_raster: str, init_RU_raster: str, x_slice: slice) -> tuple[np.ndarray, np.dtype]: - """ - Creates a dictionnary containing raster parameter values and the scale factors from the csv parameter file - and the land cover raster. For each parameter, the function loops on land cover classes to fill the raster. - - Before creating the dictionnary, it updates the parameter ``DataFrame`` and verifies the parameter range with - the ``test_samir_parameter()`` function. - - Arguments - ========= - - 1. csv_param_file: ``str`` - path to csv paramter file - 2. land_cover_raster: ``str`` - path to land cover netcdf raster - 3. irrigation_raster: ``str`` - path to irrigation mask raster - 4. init_RU_raster: ``str`` - path to initial soil water content. None if no input. - 5. x_slice: ``slice`` - x_slice to load - - Returns - ======= - - 1. parameter_dict: ``Dict[str, np.ndarray]`` - the dictionnary containing all the rasterized Parameters - and the scale factors - - """ - - # Get path of min max csv file - min_max_param_file = os.path.join(os.path.dirname(csv_param_file), 'params_samir_min_max.csv') - - # Load samir params into an object - table_param = samir_parameters(csv_param_file) - - # Set general variables - class_count = table_param.table.shape[1] - 2 # remove dtype and Default columns - - # Open land cover raster - land_cover = xr.open_dataarray(land_cover_raster).isel(x = x_slice).to_numpy() - shape = land_cover.shape - - # Modify parameter table based on its content - table_param.test_samir_parameter(min_max_param_file) - - # If test_samir_parameter returns 0, parameters are incorrect - if type(table_param.table) != pd.DataFrame: - import sys # to exit script - print(f'\nModify the SAMIR parameter file: {csv_param_file}\n') - sys.exit() - - # Loop through parameters to count them - count = 0 - for parameter in table_param.table.index[1:]: - if parameter == 'Irrig_auto' or parameter == 'Init_RU': - pass - else: - count+=1 - - parameter_array = np.zeros((count,) + shape, dtype = np.float32, order = 'C') - param_list = [] - - i = 0 - # Loop on samir parameters and create - for parameter in table_param.table.index[1:]: - - if parameter == 'Irrig_auto' and table_param.table.at[parameter, 'load_raster'] == 1: - - # Load Irrig mask - Irrig_auto = np.ascontiguousarray(xr.open_dataarray(irrigation_raster, decode_coords = 'all').isel(x = x_slice).astype(np.bool_).values, dtype = np.bool_) - - continue - - elif parameter == 'Init_RU' and table_param.table.at[parameter, 'load_raster'] == 1: - - # Load initial soil water content - Init_RU = np.ascontiguousarray((xr.open_dataarray(init_RU_raster) / 1000).isel(x = x_slice).astype(np.float32).values, dtype = np.float32) - - continue - - # If scale_factor == 0, then the parameter does not have to be spatialized, it can stay scalar - elif parameter == 'Kcmax': - - param_list.append(parameter) - - # Take Default parameter value - parameter_array[i] = np.ones(shape = shape, dtype = np.float32) * np.float32(table_param.table.at[parameter, 'Default']) - i+=1 - - continue - - # Check data type based on parameter - if (parameter != 'Irrig_auto') and (parameter != 'Init_RU'): - dtype = np.float32 - param_list.append(parameter) - - # Create 2D array from land cover - value = land_cover.copy().astype(dtype) - - # Loop on classes to set parameter values for each class - for class_val, class_name in zip(range(1, class_count + 1), table_param.table.columns[2:]): - - # Get parameter value - param_value = table_param.table.at[parameter, class_name] - - # Load parameter value for each class - value = np.where(land_cover == class_val, np.float32(param_value), value) - - # Assign parameter value - if parameter == 'Irrig_auto': - Irrig_auto = value.astype(np.bool_) - elif parameter == 'Init_RU': - Init_RU = value.astype(np.float32) - else: - parameter_array[i] = value.astype(dtype) - i+=1 - - # Assure parameter array is contiguous in memory - parameter_array = np.ascontiguousarray(parameter_array, dtype = np.float32) - - # Change param_list type - param_list = List(param_list) - - return parameter_array, Irrig_auto, Init_RU, param_list +from modspa_pixel.source.code_toolbox import calculate_memory_requirement, calculate_x_slices, print_memory_prevision, rasterize_samir_parameters, get_scaling_elements # to print memory requirements def prepare_output_dataset(ndvi_path: str, dimensions: dict[str, int], scaling_dict: dict[str, int] = {'E': 1000, 'Tr': 1000, 'SWCe': 1000, 'SWCr': 1000, 'DP': 100, 'Irr': 100}, additional_outputs: dict[str, int] = None) -> xr.Dataset: @@ -1253,32 +1123,7 @@ def run_samir(csv_param_file: str, ndvi_cube_path: str, weather_path: str, soil_ model_outputs.close() # ============ Scaling factors ============ # - # Standard scaling dict - scaling_array = [] - for var in scaling_dict.keys(): - scaling_array.append(scaling_dict[var]) - scaling_array = np.ascontiguousarray(np.array(scaling_array, dtype = np.int16)) - scaling_names = List(list(scaling_dict.keys())) - - # Manage additional output parameters - if additional_outputs is not None: - # Additional output scaling dict - additional_scaling_array = [] - for var in additional_outputs.keys(): - additional_scaling_array.append(additional_outputs[var]) - additional_scaling_array = np.array(additional_scaling_array, dtype = np.int16) - additional_names = list(additional_outputs.keys()) - field_name_to_index = {name: idx for idx, name in enumerate(additional_outputs)} - additional_idx = np.ascontiguousarray(np.array([field_name_to_index[var] for var in additional_names], dtype = np.int16)) - additional_names = List(additional_names) - write_additional_data = True - else: - # Create empty variables for correct numba parsing - additional_scaling_array = np.array([0], dtype = np.int16) - additional_idx = np.array([0], dtype = np.int16) - additional_names = List(['0']) - additional_outputs_data = np.empty(shape = (1,1,1,1), dtype = np.int16) - write_additional_data = False + scaling_names, scaling_array, write_additional_data, additional_scaling_array, additional_names, additional_idx, additional_outputs_data = get_scaling_elements(scaling_dict, additional_outputs) # ============ Begining of x chunk loops ============ ## Create progress bar progress_bar = tqdm(total = x_size * y_size * time_size, desc = '', unit = ' it', unit_scale = True)