parcel_to_pixel.py

# -*- coding: UTF-8 -*-
# Python
"""
29-08-2023
@author: jeremy auclair

Generate convert pandas dataframes to rasters for the pixel mode.
"""

from typing import List, Union  # to declare variables
import numpy as np  # for math on arrays
import xarray as xr  # to manage nc files
import pandas as pd  # to manage dataframes


def convert_dataframe_to_xarray(dataframe_in: Union[str, pd.DataFrame], save_path: str, variables: List[str], data_types: List[str]) -> None:
    """
    Convert ``pandas dataframes`` of the parcel mode into ``xarray datasets``
    for the model calculations. The resulting xarray dataset has dimensions
    ``time: number of dates``, ``x: number of poygons``, ``y: 1`` *(to make a 3D dataset)*.

    Arguments
    =========

    1. dataframe_in: ``str``
        dataframe or path to dataframe to convert
    2. save_path: ``str``
        save path of output xarray dataset
    3. variables: ``List[str]``
        name of variables (or variable, list can have one element)
        to put in the ouput dataset
    4. data_types: ``List[str]``
        xarray datatypes corresponding the the variable names, for
        correct saving of the dataset

    Returns
    =======

    ``None``
    """
    
    if type(dataframe_in) == str:
        # Open dataframe
        dataframe = pd.read_csv(dataframe_in).sort_values(by = ['date', 'id'])
        dataframe['date'] = pd.to_datetime(dataframe['date'])
    else:
        dataframe_in.reset_index(drop = True, inplace = True)
        dataframe = dataframe_in.sort_values(by = ['date', 'id'])
        dataframe['date'] = pd.to_datetime(dataframe['date'])

    # Create dimensions for xarray dataset
    x = [i+1 for i in range(len(set(dataframe['id'])))]
    y = [1]
    dates = np.unique(dataframe['date'].values)

    # Get dimension sizes
    time_size = len(dates)
    x_size = len(x)
    y_size = len(y)

    # Create dimension tuples
    dim_size = (time_size, y_size, x_size)
    dims = ('time', 'y', 'x')

    # Reshape variables in correct format and put them in a dictionnary
    data_variables = {}
    for var in variables:
        data_variables[var] = (dims, np.reshape(dataframe[var].values, dim_size))
    
    # Create xarray dataset
    xarray_dataset = xr.Dataset(data_vars = data_variables, coords = {'time': dates, 'y': y, 'x': x})
    
    # Create encoding dictionnary
    encoding_dict = {}
    for var, dtype in zip(variables, data_types):
        # Write encoding dict
        encod = {}
        encod['dtype'] = dtype
        encod['chunksizes'] = (time_size, y_size, x_size)
        encoding_dict[var] = encod
    
    # Save dataset as netCDF4 file
    xarray_dataset.to_netcdf(save_path, encoding = encoding_dict)
    
    return None


def convert_geodataframe_to_xarray(geodataframe_in: Union[str, pd.DataFrame], save_path: str, name: str, variable: str, data_type: str) -> None:
    """
    Convert ``geopandas GeoDataDrames` of the parcel mode into ``xarray DataArray``
    for the model calculations. The resulting xarray dataset has dimensions
    ``x: number of poygons``, ``y: 1`` *(to make a 2D dataset)*.

    Arguments
    =========

    1. geodataframe_in: ``str``
        geodataframe or path to geodataframe to convert
    2. save_path: ``str``
        save path of output xarray dataset
    3. name: ``str``
        name of dataarray to save
    4. variable: ``str``
        name of variable to extract
        to put in the ouput dataset
    5. data_type: ``str``
        xarray datatype corresponding the the variable name, for
        correct saving of the dataset

    Returns
    =======

    ``None``
    """
    
    if type(geodataframe_in) == str:
        # Open geodataframe
        geodataframe = pd.read_csv(geodataframe_in)
    else:
        geodataframe = geodataframe_in

    # Create dimensions for xarray dataset
    x = [i+1 for i in range(len(geodataframe.index.values))]
    y = [1]

    # Get dimension sizes
    x_size = len(x)
    y_size = len(y)

    # Create dimension tuples
    dim_size = (x_size, y_size)

    # Reshape variables in correct format and put them in a dictionnary
    data = np.reshape(geodataframe[variable].values, dim_size)
    
    # Create xarray dataset
    xarray_dataarray = xr.DataArray(data = data, coords = {'x': x, 'y': y}, name = name)
    
    # Create encoding dictionnary
    encoding_dict = {}
    encod = {}
    encod['dtype'] = data_type
    encod['chunksizes'] = (x_size, y_size)
    encoding_dict[name] = encod
    
    # Save dataset as netCDF4 file
    xarray_dataarray.to_netcdf(save_path, encoding = encoding_dict)
    
    return None