Various modifications in preprocessing codes. Improved datachunking, corrected...

Various modifications in preprocessing codes. Improved datachunking, corrected bugs in weather dataset download, small improvements in script speed. Added a notebook to help prepare landcover and soil data for pixel mode

Various modifications in preprocessing codes. Improved datachunking, corrected...
b6d2738b · Jeremy Auclair · 6117b8db · b6d2738b · b6d2738b · b6d2738b
Commit b6d2738b authored Sep 7, 2023 by Jeremy Auclair
--- a/config/config_modspa.json
+++ b/config/config_modspa.json
 {
    "_comment": "Sart date of the period on which the model will run",
-    "start_date": "2020-01-01",
+    "start_date": "2019-01-01",

    "_comment": "End date of the period on which the model will run",
-    "end_date": "2020-12-31",
+    "end_date": "2019-12-31",

    "_comment": "Path to eodag configuration file for sentinel-2 image download",
    "path_to_eodag_config_file": "/home/auclairj/.config/eodag/eodag.yml",

    "_comment": "Path to the shapefile to run the model on",
-    "shapefile_path": "/mnt/e/DATA/SCIHUB/boundary.shp",
+    "shapefile_path": "/home/auclairj/notebooks/Shapefiles/Aurade_tests/aurade_square.shp",

    "_comment": "Path to the directory on which the satellite image data will be downloaded",
    "download_path": "/mnt/e/DATA",
@@ -17,12 +17,21 @@
    "_comment": "output path for netcdf era5 files (Weather)",
    "era5_path": "/mnt/e/DATA/WEATHER",

+    "_comment": "output path for output netcdf files (outpus will be stored in a subdirectory carrying the run_name)",
+    "output_path": "/mnt/e/DATA/OUTPUT",
+
    "_comment": "Name of the current run, all output files will be saved under a subdirectory of Saves/ with that name, log file will also have that name",
-    "run_name": "TEST",
+    "run_name": "Aurade_test",

    "_comment": "Prefered S2 data provider, choices = theia, copernicus",
    "preferred_provider": "copernicus",

+    "_comment": "Choose between parcel or pixel mode for the run. You need a parcel shapefile for the parcel mode, and a boundary shapefile for the pixel mode.",
+    "mode": "pixel",
+
+    "_comment": "Resolution in meters in which to run the processing chain. Choice is either 10 or 20.",
+    "resolution": 10,
+
    "_comment": "Overwrite NDVI images or not (set to true if you want the code to rewrite NDVI images, takes longer)",
    "ndvi_overwrite": false,


--- a/docs/source/filetree.rst
+++ b/docs/source/filetree.rst
@@ -34,6 +34,7 @@
    │   ├── xls_NDVI_1000.nc
    │   └── ndvi_S2.csv
    ├── test_samir_dask.py
+    ├── extracted_S2.csv
    ├── tests.py
    ├── modspa_pixel_env.yml
    ├── source
@@ -52,6 +53,7 @@
    │   ├── parcel_to_pixel.py
    │   ├── lib_era5_land_pixel.py
    │   ├── calculate_ndvi.py
+    │   ├── custom_inputs.ipynb
    │   ├── input_toolbox.py
    │   └── download_ERA5_weather.py
    ├── test_samir.py

--- a/preprocessing/calculate_ndvi.py
+++ b/preprocessing/calculate_ndvi.py
@@ -25,35 +25,27 @@ from modspa_pixel.config.config import config  # to import config file
 from modspa_pixel.preprocessing.input_toolbox import product_str_to_datetime, read_product_info


-def calculate_ndvi(extracted_paths: Union[List[str], str], save_dir: str, boundary_shapefile_path: str, resolution: int = 20, chunk_size: dict = {'x': 4096, 'y': 4096, 'time': 2}, acorvi_corr: int = 500) -> str:
+def calculate_ndvi(extracted_paths: Union[List[str], str], config_file: str, chunk_size: dict = {'x': 1000, 'y': 1000, 'time': -1}, acorvi_corr: int = 500) -> str:
    """
    Calculate ndvi images in a xarray dataset (a data cube) and save it.
    ndvi values are scaled and saved as ``uint8`` (0 to 255).
    
    .. warning:: Current version for Copernicus Sentinel-2 images
    
+    .. warning:: only 10 and 20 meters currently supported
+
    Arguments
    =========

    1. extracted_paths: ``Union[List[str], str]``
        list of paths to extracted sentinel-2 products
        or path to ``csv``` file containing those paths
-    2. save_dir: ``str``
-        directory in which to save the ndvi pre-cube
-    3. boundary_shapefile_path: ``str``
-        shapefile path to save the geographical bounds
-        of the ndvi tile, used later to download weather
-        products
-    4. resolution: ``int`` ``default = 20``
-        resolution in meters
-        
-        .. warning:: only 10 and 20 meters currently supported
-        
-        
-    5. chunk_size: ``dict`` ``default = {'x': 4096, 'y': 4096, 'time': 2}``
+    2. config_file: ``str``
+        path to configuration file
+    3. chunk_size: ``dict`` ``default = {'x': 1000, 'y': 1000, 'time': -1}``
        dictionnary containing the chunk size for
        the xarray dask calculation
-    6. acorvi_corr: ``int`` ``default = 500``
+    4. acorvi_corr: ``int`` ``default = 500``
        acorvi correction parameter to add to the red band
        to adjust ndvi values

@@ -64,6 +56,18 @@ def calculate_ndvi(extracted_paths: Union[List[str], str], save_dir: str, bounda
        path to save the ndvi pre-cube
    """
    
+    # Open config_file
+    config_params = config(config_file)
+    
+    # Load parameters from config file
+    boundary_shapefile_path = config_params.shapefile_path
+    run_name = config_params.run_name
+    resolution = config_params.resolution
+    if config_params.preferred_provider == 'copernicus':
+        save_dir = config_params.download_path + os.sep + 'SCIHUB' + os.sep + 'NDVI'
+    else:
+        save_dir = config_params.download_path + os.sep + 'THEIA' + os.sep + 'NDVI'
+    
    # Check resolution for Sentinel-2
    if not resolution in [10, 20]:
        print('Resolution should be equal to 10 or 20 meters for sentinel-2')
@@ -83,30 +87,22 @@ def calculate_ndvi(extracted_paths: Union[List[str], str], save_dir: str, bounda
    mask_paths = []
    if resolution == 10:
        for product in extracted_paths:
-            if fnmatch(product, '*_B04_10m*'):
-                red_paths.append(product)
-            elif fnmatch(product, '*_B08_10m*'):
-                nir_paths.append(product)
-            elif fnmatch(product, '*_SCL_20m*'):
-                mask_paths.append(product)
+            for file in os.listdir(product):
+                if fnmatch(file, '*_B04_10m*'):
+                    red_paths.append(product + os.sep + file)
+                elif fnmatch(file, '*_B08_10m*'):
+                    nir_paths.append(product + os.sep + file)
+                elif fnmatch(file, '*_SCL_20m*'):
+                    mask_paths.append(product + os.sep + file)
    else:
        for product in extracted_paths:
-            if fnmatch(product, '*_B04_20m*'):
-                red_paths.append(product)
-            elif fnmatch(product, '*_B08_10m*'):
-                nir_paths.append(product)
-            elif fnmatch(product, '*_SCL_20m*'):
-                mask_paths.append(product)
-    
-    # Create boundary shapefile from Sentinel-2 image for weather download
-    ra = rio.open(red_paths[0])
-    bounds  = ra.bounds
-    geom = box(*bounds)
-    df = gpd.GeoDataFrame({"id":1,"geometry":[geom]})
-    df.crs = ra.crs
-    df.geometry = df.geometry.to_crs('epsg:4326')
-    df.to_file(boundary_shapefile_path)
-    del df, ra, geom, bounds
+            for file in os.listdir(product):
+                if fnmatch(file, '*_B04_20m*'):
+                    red_paths.append(product + os.sep + file)
+                elif fnmatch(file, '*_B08_10m*'):
+                    nir_paths.append(product + os.sep + file)
+                elif fnmatch(file, '*_SCL_20m*'):
+                    mask_paths.append(product + os.sep + file)

    # Sort and get dates
    red_paths.sort()
@@ -114,10 +110,20 @@ def calculate_ndvi(extracted_paths: Union[List[str], str], save_dir: str, bounda
    mask_paths.sort()
    dates = [product_str_to_datetime(prod) for prod in red_paths]
    
-    # Open datasets with xarray
-    red = xr.open_mfdataset(red_paths, combine = 'nested', concat_dim = 'time', chunks = chunk_size, parallel = True).squeeze(dim = ['band'], drop = True).rename({'band_data': 'red'})
-    nir = xr.open_mfdataset(nir_paths, combine = 'nested', concat_dim = 'time', chunks = chunk_size, parallel = True).squeeze(dim = ['band'], drop = True).rename({'band_data': 'nir'})
-    mask = xr.open_mfdataset(mask_paths, combine = 'nested', concat_dim = 'time', chunks = chunk_size, parallel = True).squeeze(dim = ['band'], drop = True).rename({'band_data': 'mask'})
+    # Get crs
+    with rio.open(red_paths[0]) as temp:
+        crs = temp.crs
+    
+    # Open shapefile to clip the dataset
+    shapefile = gpd.read_file(boundary_shapefile_path)
+    shapefile = shapefile.to_crs(crs)
+    bounds = shapefile.bounds.values[0]
+    
+    # Open datasets with xarray, select only data inside the bounds of the input shapefile
+    red = xr.open_mfdataset(red_paths, combine = 'nested', concat_dim = 'time', chunks = chunk_size, parallel = True).squeeze(dim = ['band'], drop = True).rename({'band_data': 'red'}).sel(x = slice(bounds[0], bounds[2]), y = slice(bounds[3], bounds[1]))
+    nir = xr.open_mfdataset(nir_paths, combine = 'nested', concat_dim = 'time', chunks = chunk_size, parallel = True).squeeze(dim = ['band'], drop = True).rename({'band_data': 'nir'}).sel(x = slice(bounds[0], bounds[2]), y = slice(bounds[3], bounds[1]))
+    mask = xr.open_mfdataset(mask_paths, combine = 'nested', concat_dim = 'time', chunks = chunk_size, parallel = True).squeeze(dim = ['band'], drop = True).rename({'band_data': 'mask'}).sel(x = slice(bounds[0], bounds[2]), y = slice(bounds[3], bounds[1]))
+    
    if resolution == 10:
        mask = xr.where((mask == 4) | (mask == 5), 1, 0).interp(x = red.coords['x'], y = red.coords['y'], method = 'nearest')
    else:
@@ -129,6 +135,10 @@ def calculate_ndvi(extracted_paths: Union[List[str], str], save_dir: str, bounda
    nir['time'] = pd.to_datetime(dates)
    mask['time'] = pd.to_datetime(dates)
    
+    # Save single red band as reference tif for later use in reprojection algorithms
+    ref = xr.open_dataset(red_paths[0]).squeeze(dim = ['band'], drop = True).rename({'band_data': 'red'}).sel(x = slice(bounds[0], bounds[2]), y = slice(bounds[3], bounds[1])).rio.write_crs(crs)
+    ref.rio.to_raster(save_dir + os.sep + config_params.run_name + '_grid_reference.tif')
+
    # Create ndvi dataset and calculate ndvi
    ndvi = red
    ndvi = ndvi.drop('red')
@@ -147,16 +157,17 @@ def calculate_ndvi(extracted_paths: Union[List[str], str], save_dir: str, bounda
    ndvi['NDVI'].attrs['scale factor'] = '255'
    
    # Create save path
-    ndvi_cube_path = save_dir + os.sep + 'NDVI_precube_' + dates[0].strftime('%d-%m-%Y') + '_' + dates[-1].strftime('%d-%m-%Y') + '.nc'
+    ndvi_cube_path = save_dir + os.sep + run_name + '_NDVI_precube_' + dates[0].strftime('%d-%m-%Y') + '_' + dates[-1].strftime('%d-%m-%Y') + '.nc'
    
    # Save NDVI cube to netcdf
-    ndvi.to_netcdf(ndvi_cube_path, encoding = {"NDVI": {"dtype": "u1", "_FillValue": 0, "chunksizes": (4, 1024, 1024)}}) #, 'zlib': True, "complevel": 4}})
+    file_chunksize = (ndvi.dims['time'], 500, 500)
+    ndvi.to_netcdf(ndvi_cube_path, encoding = {"NDVI": {"dtype": "u1", "_FillValue": 0, "chunksizes": file_chunksize}})
    ndvi.close()
    
    return ndvi_cube_path


-def interpolate_ndvi(ndvi_path: str, save_dir: str, config_file: str, chunk_size: dict = {'x': 512, 'y': 512, 'time': -1}) -> str:
+def interpolate_ndvi(ndvi_path: str, config_file: str, chunk_size: dict = {'x': 500, 'y': 500, 'time': -1}) -> str:
    """
    Interpolate the ndvi cube to a daily frequency between the
    desired dates defined in the ``json`` config file.
@@ -164,13 +175,11 @@ def interpolate_ndvi(ndvi_path: str, save_dir: str, config_file: str, chunk_size
    Arguments
    =========

-    ndvi_path: ``str``
+    1. ndvi_path: ``str``
        path to ndvi pre-cube
-    save_dir: ``str``
-        path to save interpolated ndvi cube
-    config_file: ``str``
+    2. config_file: ``str``
        path to ``json`` config file
-    chunk_size: ``dict`` ``default = {'x': 512, 'y': 512, 'time': -1}``
+    3. chunk_size: ``dict`` ``default = {'x': 500, 'y': 500, 'time': -1}``
        chunk size to use by dask for calculation,
        ``'time' = -1`` means the chunk has the whole
        time dimension in it. The Dataset can't be
@@ -185,6 +194,13 @@ def interpolate_ndvi(ndvi_path: str, save_dir: str, config_file: str, chunk_size
    # Open config_file
    config_params = config(config_file)
    
+    # Load parameters from config file
+    run_name = config_params.run_name
+    if config_params.preferred_provider == 'copernicus':
+        save_dir = config_params.download_path + os.sep + 'SCIHUB' + os.sep + 'NDVI'
+    else:
+        save_dir = config_params.download_path + os.sep + 'THEIA' + os.sep + 'NDVI'
+    
    # Open NDVI pre-cube
    ndvi = xr.open_dataset(ndvi_path, chunks = chunk_size)
    
@@ -213,10 +229,14 @@ def interpolate_ndvi(ndvi_path: str, save_dir: str, config_file: str, chunk_size
    ndvi['spatial_ref'] = spatial_ref
    
    # Create save path
-    ndvi_cube_path = save_dir + os.sep + 'NDVI_cube_' + dates[0].strftime('%d-%m-%Y') + '_' + dates[-1].strftime('%d-%m-%Y') + '.nc'
+    ndvi_cube_path = save_dir + os.sep + run_name + '_NDVI_cube_' + dates[0].strftime('%d-%m-%Y') + '_' + dates[-1].strftime('%d-%m-%Y') + '.nc'
    
    # Save NDVI cube to netcdf
-    ndvi.to_netcdf(ndvi_cube_path, encoding = {"NDVI": {"dtype": "u1", "_FillValue": 0, "chunksizes": (4, 1024, 1024)}}) #, 'zlib': True, "complevel": 4}})
+    if ndvi.dims['y'] > 1500 and ndvi.dims['x'] > 1500:
+        file_chunksize = (1, 1000, 1000)
+    else:
+        file_chunksize = (1, ndvi.dims['y'], ndvi.dims['x'])
+    ndvi.to_netcdf(ndvi_cube_path, encoding = {"NDVI": {"dtype": "u1", "_FillValue": 0, "chunksizes": file_chunksize}})
    ndvi.close()
    
    return ndvi_cube_path

--- a/preprocessing/custom_inputs.ipynb
+++ b/preprocessing/custom_inputs.ipynb
--- a/preprocessing/download_S2.py
+++ b/preprocessing/download_S2.py
@@ -17,6 +17,7 @@ import csv  # for loading and saving path results in csv format
 import zipfile as zp  # to open zip archives
 from tqdm import tqdm  # to print progress bars during code execution
 from fnmatch import fnmatch  # for character string comparison
+from modspa_pixel.preprocessing.input_toolbox import read_product_info


 def download_S2_data(start_date: str, end_date: str, preferred_provider: str, save_path: str, shapefile: str, mode: str = 'pixel', cloud_cover_limit: int = 80) -> List[str]:
@@ -91,6 +92,28 @@ def download_S2_data(start_date: str, end_date: str, preferred_provider: str, sa

    # Filter products that have more clouds than desired
    products_to_download = all_products.filter_property(cloudCover = cloud_cover_limit, operator = 'lt')
+    
+    # Choose only one tile if pixel mode
+    if mode == 'pixel':
+        tiles = []
+        for product in products_to_download:
+            _, tile, _, _ = read_product_info(product.properties['title']) 
+            if tile not in tiles:
+                tiles.append(tile)
+
+        if len(tiles) > 1:
+            tile_index = int(input(f'\nMultiple tiles cover your shapefile ({tiles}), which one do you want to choose ? Type in the index from 0 to {len(tiles) - 1}'))
+
+            chosen_tile = tiles[tile_index]
+
+            print(f'\nChosen tile: {chosen_tile}\n')
+
+            for product in products_to_download:
+                _, tile, _, _ = read_product_info(product.properties['title']) 
+                if not tile == chosen_tile:
+                    products_to_download.remove(product)
+    
+    # Download filtered products
    product_paths = dag.download_all(products_to_download, extract = False)  # No archive extraction
    product_paths.sort()

@@ -143,6 +166,8 @@ def extract_zip_archives(download_path: str, list_paths: Union[List[str], str],
    # Final product list
    product_list = []
    
+    # Create progress bar
+    print('')
    progress_bar = tqdm(total = len(list_paths))
    
    for file_path in list_paths:

--- a/preprocessing/extract_ndvi.py
+++ b/preprocessing/extract_ndvi.py
@@ -438,7 +438,7 @@ def interpolate_ndvi_feature(args: tuple) -> pd.DataFrame:
    return interpolated_id


-def interpolate_ndvi(ndvi_dataframe: Union[pd.DataFrame, str], save_path: str, start_date: str, end_date: str, interp_method: str = 'linear', threshold_ratio_deriv: int = 25, threshold_median: float = 0.1, median_window: int = 3, max_cpu: int = 4) -> pd.DataFrame:
+def interpolate_ndvi_parcel(ndvi_dataframe: Union[pd.DataFrame, str], save_path: str, start_date: str, end_date: str, interp_method: str = 'linear', threshold_ratio_deriv: int = 25, threshold_median: float = 0.1, median_window: int = 3, max_cpu: int = 4) -> pd.DataFrame:
    """
    interpolate_ndvi takes in the filtered NDVI ``DataFrame`` and builds a ``DataFrame`` with daily values
    between the first and last dates and fills in the missing values with the chosen interpolation

--- a/preprocessing/input_toolbox.py
+++ b/preprocessing/input_toolbox.py
@@ -6,13 +6,15 @@ Contains functions to facilitate code workflow, directory creation, Notebook rea
 @author: jeremy auclair
 """

+import os  # for path management
 from typing import List, Tuple  # to declare argument types
 import numpy as np  # vectorized math
 from fnmatch import fnmatch  # to match character strings
 import re  # for character string search
 import datetime  # for date management
 from scipy.signal import medfilt  # scipy median filter
-from numba import jit, njit
+from numba import jit, njit  # to compile functions for faster execution
+from modspa_pixel.config.config import config  # to import config file


 def product_str_to_datetime(product_name: str) -> datetime.date:
@@ -162,6 +164,74 @@ def read_product_info(path_to_product: str) -> Tuple[datetime.date, str, str, st
    return date, tile, provider, satellite
    

+def prepare_directories(config_file: str) -> None:
+    """
+    Creates the directories for the data inputs and outputs.
+
+    Arguments
+    =========
+    
+    1. config_file: ``str``
+        path in which the directories will be created
+
+    Returns
+    =======
+    
+    ``None``
+    """
+    
+    # Open config file
+    config_params = config(config_file)
+    
+    # Get parameters
+    download_path = config_params.download_path
+    era5_path = config_params.era5_path
+    run_name = config_params.run_name
+    output_path = config_params.output_path
+
+    # Create all the directories for downloaded data if they do not exist.
+    if not os.path.exists(download_path):
+        os.mkdir(download_path)
+
+    # Path for scihub data
+    scihub_path = download_path + os.sep + 'SCIHUB'
+    if not os.path.exists(scihub_path):
+        os.mkdir(scihub_path)
+    ndvi_scihub_path = scihub_path + os.sep + 'NDVI'
+    if not os.path.exists(ndvi_scihub_path):
+        os.mkdir(ndvi_scihub_path)
+
+    # Path for theia data
+    theia_path = download_path + os.sep + 'THEIA'
+    if not os.path.exists(theia_path):
+        os.mkdir(theia_path)
+    ndvi_theia_path = theia_path + os.sep + 'NDVI'
+    if not os.path.exists(ndvi_theia_path):
+        os.mkdir(ndvi_theia_path)
+
+    # Path for usgs data
+    usgs_path = download_path + os.sep + 'USGS'
+    if not os.path.exists(usgs_path):
+        os.mkdir(usgs_path)
+    ndvi_usgs_path = usgs_path + os.sep + 'NDVI'
+    if not os.path.exists(ndvi_usgs_path):
+        os.mkdir(ndvi_usgs_path)
+    
+    # Create weather data directories
+    if not os.path.exists(era5_path):
+        os.mkdir(era5_path)
+        if not os.path.exists(era5_path + os.sep + run_name):
+            os.mkdir(era5_path + os.sep + run_name)
+    
+    # Create output data directories
+    if not os.path.exists(output_path):
+        os.mkdir(output_path)
+        if not os.path.exists(output_path + os.sep + run_name):
+            os.mkdir(output_path + os.sep + run_name)
+
+    return None
+    
+
 @jit(nopython = False, forceobj = True)
 def intersection(list1: list, list2: list) -> list:
    """

--- a/preprocessing/lib_era5_land_pixel.py
+++ b/preprocessing/lib_era5_land_pixel.py
@@ -69,20 +69,20 @@ def era5_enclosing_shp_aera(area: List[float], pas: float) -> Tuple[float, float
        
    .. note:: 
    
-        gdal coordinates reference upper left corner of pixel, ERA5 coordinates refere to center of grid. To resolve this difference an offset of pas/2 is apply
+        gdal coordinates reference upper left corner of pixel, ERA5 coordinates refere to center of grid. To resolve this difference an offset of pas/2 is applied
        
    """
    
    lat_max, lon_min, lat_min, lon_max = area

    # North
-    era5_lat_max = round((lat_max//pas+1)*pas, 2)
+    era5_lat_max = round((lat_max//pas+2)*pas, 2)
    # West
    era5_lon_min = round((lon_min//pas)*pas, 2)
    # South
    era5_lat_min = round((lat_min//pas)*pas, 2)
    # Est
-    era5_lon_max = round((lon_max//pas+1)*pas, 2)
+    era5_lon_max = round((lon_max//pas+2)*pas, 2)

    era5_area = era5_lat_max, era5_lon_min, era5_lat_min, era5_lon_max

@@ -448,7 +448,7 @@ def combine_weather2netcdf(rain_file: str, ET0_tile: str, ndvi_path: str, save_p
    ndvi = xr.open_dataset(ndvi_path)
    
    # Create empty dataset with same structure
-    weather = ndvi.drop_vars(['ndvi']).copy(deep = True)
+    weather = ndvi.drop_vars(['NDVI']).copy(deep = True)
    
    weather['Rain'] = (ndvi.dims, np.zeros(tuple(ndvi.dims[d] for d in list(ndvi.dims)), dtype = np.uint16))
    weather['Rain'].attrs['units'] = 'mm'
@@ -468,8 +468,12 @@ def combine_weather2netcdf(rain_file: str, ET0_tile: str, ndvi_path: str, save_p
        encoding_dict = {}
        encod = {}
        encod['dtype'] = 'u2'
-        # encod['zlib'] = True
-        # encod['complevel'] = 4
+        encod['_FillValue'] = 0
+        file_chunksize = (1, ndvi.dims['y'], ndvi.dims['x'])
+        encod['chunksizes'] = file_chunksize
+        # TODO: check if compression affects calculation speed
+        encod['zlib'] = True
+        encod['complevel'] = 1
        encoding_dict[variable] = encod

    # Save empty output
@@ -487,9 +491,9 @@ def combine_weather2netcdf(rain_file: str, ET0_tile: str, ndvi_path: str, save_p
    security_factor = 0.8  # it is difficult to estimate true memory usage with compression algorithms, apply a security factor to prevent memory overload
    
    # Get the number of time bands that can be loaded at once
-    time_slice, remainder, already_loaded = calculate_time_slices_to_load(memory_requirement, dims[0], security_factor, available_ram)
+    time_slice, remainder, already_written = calculate_time_slices_to_load(memory_requirement, dims[0], security_factor, available_ram)
    
-    print('\nApproximate memory requirement of conversion:', round(memory_requirement, 3), 'GiB\nAvailable memory:', available_ram, 'GiB\n\nLoading', time_slice, 'time slices at a time.\n' )
+    print('\nApproximate memory requirement of conversion:', round(memory_requirement, 3), 'GiB\nAvailable memory:', available_ram, 'GiB\n\nLoading blocks of', time_slice, 'time bands.\n')
    
    # Open empty dataset
    weather = nc.Dataset(save_path, mode = 'r+')
@@ -500,13 +504,13 @@ def combine_weather2netcdf(rain_file: str, ET0_tile: str, ndvi_path: str, save_p
    # Data variables
    for i in range(dims[0]):
        
-        if time_slice == dims[0] and not already_loaded:  # if whole dataset fits in memory and it has not already been loaded
+        if time_slice == dims[0] and not already_written:  # if whole dataset fits in memory and it has not already been loaded
            
            weather.variables['Rain'][:,:,:] = rain_tif.read()
            weather.variables['ET0'][:,:,:] = ET0_tif.read()
-            already_loaded = True
+            already_written = True
        
-        elif i % time_slice == 0:  # load a time slice every time i is divisible by the size of the time slice
+        elif i % time_slice == 0 and not already_written:  # load a time slice every time i is divisible by the size of the time slice
            if i + time_slice <= dims[0]:  # if the time slice does not gow over the dataset size
                
                weather.variables['Rain'][i: i + time_slice, :, :] = rain_tif.read(tuple(k+1 for k in range(i, i + time_slice)))
@@ -687,6 +691,7 @@ def era5Land_daily_to_yearly_pixel(list_era5land_files: List[str], output_file:
    raw_weather_ds = raw_weather_ds.assign(ET0 = (raw_weather_ds.dims, np.zeros(tuple(raw_weather_ds.dims[d] for d in list(raw_weather_ds.dims)), dtype = 'float64')))

    # Loop on lattitude and longitude coordinates to calculate ET0 per "pixel"
+    # Fast enough for small datasets (low resolution)
    for lat in raw_weather_ds.coords['lat'].values:
        for lon in raw_weather_ds.coords['lon'].values:
            # Select whole time period for given (lat, lon) values
@@ -732,8 +737,8 @@ def era5Land_daily_to_yearly_pixel(list_era5land_files: List[str], output_file:
    output_file_final = output_file + '.nc'
    
    # otbcli_SuperImpose commands
-    OTB_command_reproj1 = 'otbcli_Superimpose -inr ' + raw_S2_image_ref + ' -inm ' + output_file_rain + ' -out ' + output_file_rain_reproj + ' uint16 -interpolator nn -ram ' + str(int(max_ram * 1024/2))
-    OTB_command_reproj2 = 'otbcli_Superimpose -inr ' + raw_S2_image_ref + ' -inm ' + output_file_ET0 + ' -out ' + output_file_ET0_reproj + ' uint16 -interpolator nn -ram ' + str(int(max_ram * 1024/2))
+    OTB_command_reproj1 = 'otbcli_Superimpose -inr ' + raw_S2_image_ref + ' -inm ' + output_file_rain + ' -out ' + output_file_rain_reproj + ' uint16 -interpolator linear -ram ' + str(int(max_ram * 1024/2))
+    OTB_command_reproj2 = 'otbcli_Superimpose -inr ' + raw_S2_image_ref + ' -inm ' + output_file_ET0 + ' -out ' + output_file_ET0_reproj + ' uint16 -interpolator linear -ram ' + str(int(max_ram * 1024/2))
    commands_reproj = [OTB_command_reproj1, OTB_command_reproj2]
    
    with Pool(2) as p:

--- a/source/modspa_samir.py
+++ b/source/modspa_samir.py
@@ -707,7 +707,7 @@ def read_inputs(ndvi_cube_path: str, weather_path: str, i: int, time_slice: int,
    if load_all:
        
        with nc.Dataset(ndvi_cube_path, mode='r') as ds:
-            # Dimensions of ndvi dataset : (time, x, y)
+            # Dimensions of ndvi dataset : (time, y, x)
            NDVI = np.asarray(ds.variables['NDVI'][:, :, :] / 255, dtype = np.float32)
        with nc.Dataset(weather_path, mode='r') as ds:
            # Dimensions of ndvi dataset : (time, x, y)
@@ -718,7 +718,7 @@ def read_inputs(ndvi_cube_path: str, weather_path: str, i: int, time_slice: int,
    else:
        
        with nc.Dataset(ndvi_cube_path, mode='r') as ds:
-            # Dimensions of ndvi dataset : (time, x, y)
+            # Dimensions of ndvi dataset : (time, y, x)
            NDVI = np.asarray(ds.variables['NDVI'][i: i + time_slice, :, :] / 255, dtype = np.float32)
        with nc.Dataset(weather_path, mode='r') as ds:
            # Dimensions of ndvi dataset : (time, x, y)
@@ -1028,7 +1028,7 @@ def samir_daily(NDVI: np.ndarray, ET0: np.ndarray, Rain: np.ndarray, Wfc: np.nda


 # @profile  # type: ignore
-def run_samir(json_config_file: str, csv_param_file: str, ndvi_cube_path: str, weather_path: str, soil_params_path: str, land_cover_path: str, save_path: str, additional_outputs: dict = None, available_ram: int = 8, dask: bool = False) -> None:
+def run_samir(json_config_file: str, csv_param_file: str, ndvi_cube_path: str, weather_path: str, soil_params_path: str, land_cover_path: str, save_path: str, additional_outputs: dict = None, available_ram: int = 8) -> None:

    # Turn off numpy warings
    np.seterr(divide='ignore', invalid='ignore')
@@ -1086,9 +1086,8 @@ def run_samir(json_config_file: str, csv_param_file: str, ndvi_cube_path: str, w
        # Write encoding dict
        encod = {}
        encod['dtype'] = 'i2'
+        # TODO: check if order of chunk size is correct
        encod['chunksizes'] = (x_size, y_size, time_slice)
-        encod['zlib'] = True
-        encod['complevel'] = 3
        encoding_dict[variable] = encod

    # Save empty output

--- a/test_samir.py
+++ b/test_samir.py