Skip to content
Snippets Groups Projects
Select Git revision
  • 5858df8ff096a4001afa6a0a4ef1580878ad4aee
  • main default protected
  • multiannot
  • syntegroup
  • inferences
  • 1.0.2
  • 1.0.1
  • 1.0.0
8 results

Functions.py

Blame
  • dev_samir_xarray.ipynb 720.04 KiB

    Build a validation ndvi and weather dataset

    In [1]:
    """
    Notebook for development and testing of the SAMIR Pixel model.
    """
    
    import xarray as xr
    import os
    import pandas as pd
    import numpy as np
    from tqdm import tqdm
    
    data_path = '/mnt/e/DATA/DEV_inputs_test'
    # data_path = './DEV_inputs_test'
    
    size = 10
    
    # Original sets
    ndvi_path = data_path + os.sep + 'ndvi_' + str(size) + '.nc'
    
    # Validation sets
    val_ndvi_path = data_path + os.sep + 'xls_NDVI_' + str(size) + '.nc'
    val_weather_path = data_path + os.sep + 'xls_weather_' + str(size) + '.nc'
    
    val_outputs = data_path + os.sep + 'xls_outputs_' + str(size) + '.nc'
    
    # Modspa excel file
    xls_file_path = '/home/auclairj/GIT/modspa_pixel/SAMIR_xls/SAMIRpixel_Reference_Simonneaux2012.xls'
    
    # Scaling dict
    additional_outputs = {'Zr': 10, 'Dei': 100, 'Dep': 100, 'Dr': 100, 'Dd': 100, 'Kei': 1e4, 'Kep': 1e4, 'Ks': 1e4, 'W': 1e4, 'Kcb': 1e4, 'fewi': 1e4, 'fewp': 1e4, 'TDW': 100, 'TAW': 100, 'FCov': 1e4, 'Tei': 1000, 'Tep': 1000, 'Diff_rei': 1e4, 'Diff_rep': 1e4, 'Diff_dr': 1e4}
    
    additional_outputs = {'Dr': 100, 'Dd': 100}
    # additional_outputs = {}
    
    normal_outputs = {'E': 1000, 'Tr': 1000, 'SWCe': 1000, 'SWCr': 1000, 'DP': 100, 'Irr': 100, 'ET0': 1000}
    
    additional_outputs.update(normal_outputs)
    In [1]:
    # Get input data
    modspa_excel = pd.read_excel(xls_file_path, sheet_name=0, header=10, index_col=0)
    modspa_excel = modspa_excel.loc[:, ~modspa_excel.columns.str.contains('^Unnamed')]
    
    # Dates
    dates = modspa_excel.index
    
    # Open empty dataset to get structure and reindex with correct dates
    empty_dataset = xr.open_dataset(ndvi_path)
    empty_dataset = empty_dataset.reindex(time = dates)
    
    # Transpose dimensions
    empty_dataset = empty_dataset.transpose('time', 'y', 'x')
    
    # Get the numpy array for 'ndvi'
    zero_values = empty_dataset['ndvi'].values
    
    # Transpose the numpy array for 'ndvi'
    zero_values = zero_values.transpose([0,2,1])
    empty_dataset['ndvi'] = empty_dataset.ndvi.transpose('time', 'y', 'x')
    
    # Assign the transposed numpy array back to 'ndvi'
    empty_dataset.ndvi.values = zero_values
    
    # Drop ndvi to get empty dataset
    empty_dataset = empty_dataset.drop_vars('ndvi')
    
    # Datasets
    ndvi_val = empty_dataset.copy(deep = True)
    weather_val = empty_dataset.copy(deep = True)
    outputs_val = empty_dataset.copy(deep = True)
    
    # Inputs
    ndvi_val['NDVI'] = (empty_dataset.dims, np.ones(tuple(empty_dataset.dims[d] for d in list(empty_dataset.dims)), dtype = 'uint8'))
    weather_val['Rain'] = (empty_dataset.dims, np.ones(tuple(empty_dataset.dims[d] for d in list(empty_dataset.dims)), dtype = 'uint16'))
    weather_val['ET0'] = (empty_dataset.dims, np.ones(tuple(empty_dataset.dims[d] for d in list(empty_dataset.dims)), dtype = 'uint16'))
    
    # variables
    variables = list(set(list(additional_outputs.keys())).intersection(set(modspa_excel.columns[0:-2])))
    variables.sort()
    
    # Outputs
    for var in variables:
        outputs_val[var] = (empty_dataset.dims, np.ones(tuple(empty_dataset.dims[d] for d in list(empty_dataset.dims)), dtype = 'int16'))
    
    # Progress bar
    pbar = tqdm(total = len(ndvi_val.coords['time'].values), desc = 'Writing datasets', unit = 'days')
    
    # Loop in time values
    i = 0
    for t in ndvi_val.coords['time'].values:
        # Inputs
        ndvi_val['NDVI'].loc[{'time' : t}] = ndvi_val['NDVI'].loc[{'time' : t}] * np.uint8(np.round(modspa_excel['NDVI'].values[i] * 255))
        weather_val['Rain'].loc[{'time' : t}] = weather_val['Rain'].loc[{'time' : t}] * np.uint16(np.round(modspa_excel['Rain'].values[i] * 100))
        weather_val['ET0'].loc[{'time' : t}] = weather_val['ET0'].loc[{'time' : t}] * np.uint16(np.round(modspa_excel['ET0'].values[i] * 1000))
    
        # Outputs
        for var in list(outputs_val.keys()):
            if var == 'NDVI':
                NDVI = np.round(modspa_excel[var].values[i] * 255)
                NDVI = NDVI / 255
                outputs_val[var].loc[{'time' : t}] = outputs_val[var].loc[{'time' : t}] * np.uint8(np.round(NDVI * additional_outputs[var]))
                continue
            
            outputs_val[var].loc[{'time' : t}] = outputs_val[var].loc[{'time' : t}] * np.int16(np.round(modspa_excel[var].values[i] * additional_outputs[var]))
        
        i+=1
        
        # Update progress bar
        pbar.update()
    
    # Close progress bar
    pbar.close()
    
    # Reorganize dimension order
    ndvi_val = ndvi_val.transpose('time', 'y', 'x')
    weather_val = weather_val.transpose('time', 'y', 'x')
    
    # Save datasets
    # Inputs
    ndvi_val.to_netcdf(val_ndvi_path, encoding = {"NDVI": {"dtype": "u1", "_FillValue": 0}})
    weather_val.to_netcdf(val_weather_path, encoding = {"Rain": {"dtype": "u2"}, "ET0": {"dtype": "u2"}})
    ndvi_val.close()
    weather_val.close()
    
    # Create encoding dictionnary
    for variable in list(outputs_val.keys()):
        # Write encoding dict
        encoding_dict = {}
        encod = {}
        encod['dtype'] = 'i2'
        encoding_dict[variable] = encod
    
    # Outputs
    outputs_val.to_netcdf(val_outputs, encoding = encoding_dict)
    outputs_val.close()
    Out [1]:
    ---------------------------------------------------------------------------
    NameError                                 Traceback (most recent call last)
    Cell In[1], line 2
          1 # Get input data
    ----> 2 modspa_excel = pd.read_excel(xls_file_path, sheet_name=0, header=10, index_col=0)
          3 modspa_excel = modspa_excel.loc[:, ~modspa_excel.columns.str.contains('^Unnamed')]
          5 # Dates
    
    NameError: name 'pd' is not defined
    

    Compare modspa-pixel and modspa-excel outputs

    In [1]:
    import os
    import xarray as xr
    import numpy as np
    import matplotlib.pyplot as plt
    from matplotlib import rcParams
    import matplotlib.dates as mdates
    
    # Settings for plots
    plt.style.use('default')
    rcParams['mathtext.fontset'] = 'stix'
    rcParams['font.family'] = 'STIXGeneral'
    rcParams.update({'font.size': 18})
    
    
    def plot_time_series(ds1: xr.Dataset, var: str, scale_factor: dict, ds2: xr.Dataset = None, label1: str = 'Dataset1', label2: str = 'Dataset2', unit: str = 'mm', title: str = 'variable comparison', date_intervals: int = 3, **kargs) -> None:
        """
        Plot times series of a uniform modspa dataset.
        Select first pixel (upper left corner) and plot
        its value over time.
    
        ## Arguments
        1. ds1: `xr.Dataset`
            first dataset to plot
        2. var: `str`
            name of variable to plot
        3. scale_factor1: `dict`
            scale factor dictionnary that contains
            scale factors for each variable
        4. ds2: `xr.Dataset` `default = None`
            second dataset to plot, optional
        5. label1: `str` `default = 'Dataset1'`
            label for first dataset
        6. label2: `str` `default = 'Dataset2'`
            label for second dataset, optional
        7. unit: `str` `default = 'mm'`
            unit of value
        8. title: `str` `default = 'variable comparison'`
            title of plot
        9. date_intervals: `int` `default = 3`
            date interval integer for the date format (`matplotlib.dates.WeekdayLocator`)
        10. **kwargs
            keyword parameters for the pyplot.plot() function (e.g `marker = 'o'`)
    
        ## Returns
        `None`
        """
        
        # Date format
        date_plot_format = mdates.WeekdayLocator(interval = date_intervals)
        date_format = mdates.DateFormatter('%Y-%m-%d')
        
        # Plot
        plt.figure(figsize = (14,7))
        plt.grid(color='silver', linestyle='--', axis = 'both', linewidth=1)
        plt.gca().xaxis.set_major_formatter(date_format)
        plt.gca().xaxis.set_major_locator(date_plot_format)
        (ds1.isel(x = 0, y = 0)[var] / scale_factor[var]).plot(label = label1, color = 'b', alpha = 0.8, **kargs)
        if ds2:
            (ds2.isel(x = 0, y = 0)[var] / scale_factor[var]).plot(label = label2, color = 'r', alpha = 0.8, **kargs)
        plt.title(var + ' ' + title)
        plt.ylabel(var + ' [' + unit + ']')
        plt.legend()
        
        return None
    
    
    data_path = '/mnt/e/DATA/DEV_inputs_test'
    # data_path = './DEV_inputs_test'
    
    size = 10
    
    # Inputs
    weather_path = data_path + os.sep + 'xls_weather_' + str(size) + '.nc'
    
    # Modspa-pixel output
    pix_outputs_path = data_path + os.sep + 'pix_outputs_' + str(size) + '.nc'
    
    # Excel output
    xls_outputs_path = data_path + os.sep + 'xls_outputs_' + str(size) + '.nc'
    
    # Open datasets
    pix_outputs = xr.open_dataset(pix_outputs_path)
    xls_outputs = xr.open_dataset(xls_outputs_path)
    weather = xr.open_dataset(weather_path)
    
    # Scaling dict
    additional_outputs = {'Zr': 10, 'Dei': 100, 'Dep': 100, 'Dr': 100, 'Dd': 100, 'Kei': 1e4, 'Kep': 1e4, 'Ks': 1e4, 'W': 1e4, 'Kcb': 1e4, 'Kri': 1e4, 'Krp': 1e4, 'NDVI': 1e4, 'fewi': 1e4, 'fewp': 1e4, 'TDW': 100, 'TAW': 100, 'FCov': 1e4, 'Tei': 1000, 'Tep': 1000, 'Diff_rei': 1e4, 'Diff_rep': 1e4, 'Diff_dr': 1e4, 'Rain': 100, 'p_cor': 1e4, 'Zd': 10, 'De_1': 100, 'Dei_1': 100, 'Dep_1': 100, 'Dr_1': 100, 'Dd_1': 100, 'Dei_2': 100, 'Dep_2': 100, 'Dr_2': 100, 'Dd_2': 100}
    normal_outputs = {'E': 1000, 'Tr': 1000, 'SWCe': 1000, 'SWCr': 1000, 'DP': 100, 'Irr': 100, 'ET0': 1000}
    
    additional_outputs.update(normal_outputs)
    
    # Parameters
    Irrig_auto = 1
    
    # Get list of common variables
    variables = list(set(list(pix_outputs.keys())).intersection(set(list(xls_outputs.keys()))))
    variables.sort()
    
    # Create empty dataset
    diff = pix_outputs.drop_vars(variables).copy(deep = True)
    
    # Compute differences (relative difference in %)
    for var in variables:
        if var == 'Irr':
            diff[var] = Irrig_auto * (np.abs(pix_outputs[var].copy(deep=True) - xls_outputs[var].copy(deep=True))) / np.abs(pix_outputs[var].copy(deep=True) + xls_outputs[var].copy(deep=True) + 0.00001) * 100
            continue
        diff[var] = (np.abs(pix_outputs[var].copy(deep=True) - xls_outputs[var].copy(deep=True))) / np.abs(pix_outputs[var].copy(deep=True) + xls_outputs[var].copy(deep=True) + 0.00001) * 100
    
    # Get valuesD
    differences_mean = {}
    for var in variables:
        differences_mean[var] = round(float(diff[var].mean(dim = 'time').mean().values), 3)
    
    # Plot difference on mean
    plt.figure(figsize=(16, 5))
    plt.grid(color='silver', linestyle='--', axis='y', linewidth=1)
    plt.bar(range(len(differences_mean)), list(differences_mean.values()), align='center', zorder = 2, color = 'firebrick')
    plt.xticks(range(len(differences_mean)), list(differences_mean.keys()), rotation='vertical')
    plt.ylabel('%')
    plt.title(r'Relative difference on mean $\mid \frac{pixel - excel}{pixel + excel} \mid$');
    plt.tight_layout()
    plt.savefig(data_path + os.sep + 'Images' + os.sep + 'relative_diff.png', dpi = 600)
    out [1]:
    In [2]:
    # Save directory
    im_save_dir = data_path + os.sep + 'Images'
    
    # Plot all variables and save
    for var in variables:
        
        # Save name
        im_save_name = im_save_dir + os.sep + var + '.png'
        
        # Plot
        plot_time_series(ds1 = xls_outputs, ds2 = pix_outputs, var = var, scale_factor = additional_outputs, label1 = 'Excel values', label2 = 'Pixel values', unit = 'mm', marker = None, markersize = 1.5)
        plt.tight_layout()
        plt.savefig(im_save_name, dpi = 600)
        plt.close()
    In [2]:
    # Plot two datasets to compare
    plot_time_series(ds1 = xls_outputs, ds2 = pix_outputs, var = 'E', label1 = 'Excel values', label2 = 'Pixel values', scale_factor = additional_outputs, unit = 'mm', marker = None, markersize = 1.5)
    plot_time_series(ds1 = xls_outputs, ds2 = pix_outputs, var = 'SWCr', label1 = 'Excel values', label2 = 'Pixel values', scale_factor = additional_outputs, unit = 'mm', marker = None, markersize = 1.5)
    plot_time_series(ds1 = xls_outputs, ds2 = pix_outputs, var = 'SWCe', label1 = 'Excel values', label2 = 'Pixel values', scale_factor = additional_outputs, unit = 'mm', marker = None, markersize = 1.5)
    
    # Plot only one dataset variable
    # plot_time_series(ds1 = pix_outputs, var = 'TEW', label1 = '$F_{Cov}$', scale_factor1 = 100, unit = 'mm')
    
    # Plot the relative difference dataset
    # plot_time_series(ds1 = diff, var = 'TAW', label1 = r'$\mid \frac{pixel - excel}{pixel + excel} \mid$ values', scale_factor1 = 1, unit = '%')
    # plot_time_series(ds1 = diff, var = 'Dep_2', label1 = r'$\mid \frac{pixel - excel}{pixel + excel} \mid$ values', scale_factor1 = 1, unit = '%')
    # plot_time_series(ds1 = diff, var = 'SWCe', label1 = r'$\mid \frac{pixel - excel}{pixel + excel} \mid$ values', scale_factor1 = 1, unit = '%')
    out [2]:
    In [3]:
    xls_outputs
    Out [3]:
    Coordinates:
      * x            (x) float64 7e+05 7e+05 7e+05 ... 7.019e+05 7.019e+05 7.02e+05
      * y            (y) float64 4.7e+06 4.7e+06 4.7e+06 ... 4.698e+06 4.698e+06
      * time         (time) datetime64[ns] 2006-02-06 2006-02-07 ... 2006-11-29
        spatial_ref  int64 ...
    In [2]:
    pix_outputs
    Out [2]: