# -*- coding: UTF-8 -*- # Python """ 29-08-2023 @author: jeremy auclair Generate convert pandas dataframes to rasters for the pixel mode. """ from typing import List, Union # to declare variables import numpy as np # for math on arrays import xarray as xr # to manage nc files import pandas as pd # to manage dataframes def convert_dataframe_to_xarray(dataframe_in: Union[str, pd.DataFrame], save_path: str, variables: List[str], data_types: List[str]) -> None: """ Convert ``pandas dataframes`` of the parcel mode into ``xarray datasets`` for the model calculations. The resulting xarray dataset has dimensions ``time: number of dates``, ``x: number of poygons``, ``y: 1`` *(to make a 3D dataset)*. Arguments ========= 1. dataframe_in: ``str`` dataframe or path to dataframe to convert 2. save_path: ``str`` save path of output xarray dataset 3. variables: ``List[str]`` name of variables (or variable, list can have one element) to put in the ouput dataset 4. data_types: ``List[str]`` xarray datatypes corresponding the the variable names, for correct saving of the dataset Returns ======= ``None`` """ if type(dataframe_in) == str: # Open dataframe dataframe = pd.read_csv(dataframe_in).sort_values(by = ['date', 'id']) dataframe['date'] = pd.to_datetime(dataframe['date']) else: dataframe_in.reset_index(drop = True, inplace = True) dataframe = dataframe_in.sort_values(by = ['date', 'id']) dataframe['date'] = pd.to_datetime(dataframe['date']) # Create dimensions for xarray dataset x = [i+1 for i in range(len(set(dataframe['id'])))] y = [1] dates = np.unique(dataframe['date'].values) # Get dimension sizes time_size = len(dates) x_size = len(x) y_size = len(y) # Create dimension tuples dim_size = (time_size, y_size, x_size) dims = ('time', 'y', 'x') # Reshape variables in correct format and put them in a dictionnary data_variables = {} for var in variables: data_variables[var] = (dims, np.reshape(dataframe[var].values, dim_size)) # Create xarray dataset xarray_dataset = xr.Dataset(data_vars = data_variables, coords = {'time': dates, 'y': y, 'x': x}) # Create encoding dictionnary encoding_dict = {} for var, dtype in zip(variables, data_types): # Write encoding dict encod = {} encod['dtype'] = dtype encod['chunksizes'] = (time_size, y_size, x_size) encoding_dict[var] = encod # Save dataset as netCDF4 file xarray_dataset.to_netcdf(save_path, encoding = encoding_dict) return None def convert_geodataframe_to_xarray(geodataframe_in: Union[str, pd.DataFrame], save_path: str, name: str, variable: str, data_type: str) -> None: """ Convert ``geopandas GeoDataDrames` of the parcel mode into ``xarray DataArray`` for the model calculations. The resulting xarray dataset has dimensions ``x: number of poygons``, ``y: 1`` *(to make a 2D dataset)*. Arguments ========= 1. geodataframe_in: ``str`` geodataframe or path to geodataframe to convert 2. save_path: ``str`` save path of output xarray dataset 3. name: ``str`` name of dataarray to save 4. variable: ``str`` name of variable to extract to put in the ouput dataset 5. data_type: ``str`` xarray datatype corresponding the the variable name, for correct saving of the dataset Returns ======= ``None`` """ if type(geodataframe_in) == str: # Open geodataframe geodataframe = pd.read_csv(geodataframe_in) else: geodataframe = geodataframe_in # Create dimensions for xarray dataset x = [i+1 for i in range(len(geodataframe.index.values))] y = [1] # Get dimension sizes x_size = len(x) y_size = len(y) # Create dimension tuples dim_size = (x_size, y_size) # Reshape variables in correct format and put them in a dictionnary data = np.reshape(geodataframe[variable].values, dim_size) # Create xarray dataset xarray_dataarray = xr.DataArray(data = data, coords = {'x': x, 'y': y}, name = name) # Create encoding dictionnary encoding_dict = {} encod = {} encod['dtype'] = data_type encod['chunksizes'] = (x_size, y_size) encoding_dict[name] = encod # Save dataset as netCDF4 file xarray_dataarray.to_netcdf(save_path, encoding = encoding_dict) return None