# -*- coding: UTF-8 -*- # Python """ 29-08-2023 @author: jeremy auclair Convert pandas dataframes to rasters for the pixel mode. """ from typing import List, Union # to declare variables import numpy as np # for math on arrays import xarray as xr # to manage nc files import pandas as pd # to manage dataframes import geopandas as gpd # to manage shapefiles def convert_dataframe_to_xarray(dataframe_in: Union[str, pd.DataFrame], save_path: str, variables: List[str], data_types: List[str], time_dimension: bool = True) -> None: """ Convert ``pandas dataframes`` of the parcel mode into ``xarray datasets`` for the model calculations. The resulting xarray dataset has dimensions: ``time: number of dates``, ``y: 1``, ``x: number of poygons`` *(to make a 3D dataset)*, or dimensions: ``y: 1``, ``x: number of poygons`` *(to make a 2D dataset)* Arguments ========= 1. dataframe_in: ``Union[str, pd.DataFrame]`` dataframe or path to dataframe to convert 2. save_path: ``str`` save path of output xarray dataset 3. variables: ``List[str]`` name of variables (or variable, list can have one element) to put in the ouput dataset 4. data_types: ``List[str]`` xarray datatypes corresponding the the variable names, for correct saving of the dataset 5. time_dimension: ``bool`` ``default = True`` boolean to indicate if the dataframe has a time dimension Returns ======= ``None`` """ # If dataframe has a time dimenson (multiple dates) if time_dimension: if type(dataframe_in) == str: # Open dataframe dataframe = pd.read_csv(dataframe_in).sort_values(by = ['date', 'id']) dataframe['date'] = pd.to_datetime(dataframe['date']) else: dataframe_in.reset_index(drop = True, inplace = True) dataframe = dataframe_in.sort_values(by = ['date', 'id']) dataframe['date'] = pd.to_datetime(dataframe['date']) # Create dimensions for xarray dataset x = [i+1 for i in range(len(set(dataframe['id'])))] y = [1] dates = np.unique(dataframe['date'].values) # Get dimension sizes time_size = len(dates) x_size = len(x) y_size = len(y) # Create dimension tuples dim_size = (time_size, y_size, x_size) dims = ('time', 'y', 'x') # Reshape variables in correct format and put them in a dictionnary data_variables = {} for var in variables: data_variables[var] = (dims, np.reshape(dataframe[var].values, dim_size)) # Create xarray dataset xarray_dataset = xr.Dataset(data_vars = data_variables, coords = {'time': dates, 'y': y, 'x': x}) # Create encoding dictionnary encoding_dict = {} for var, dtype in zip(variables, data_types): # Write encoding dict encod = {} encod['dtype'] = dtype encod['chunksizes'] = (time_size, y_size, x_size) encoding_dict[var] = encod # Save dataset as netCDF4 file xarray_dataset.to_netcdf(save_path, encoding = encoding_dict) else: if type(dataframe_in) == str: # Open dataframe dataframe = pd.read_csv(dataframe_in).sort_values(by = ['id']) else: dataframe_in.reset_index(drop = True, inplace = True) dataframe = dataframe_in.sort_values(by = ['id']) # Create dimensions for xarray dataset x = [i+1 for i in range(len(set(dataframe['id'])))] y = [1] # Get dimension sizes x_size = len(x) y_size = len(y) # Create dimension tuples dim_size = (y_size, x_size) dims = ('y', 'x') # Reshape variables in correct format and put them in a dictionnary data_variables = {} for var in variables: data_variables[var] = (dims, np.reshape(dataframe[var].values, dim_size)) # Create xarray dataset xarray_dataset = xr.Dataset(data_vars = data_variables, coords = {'y': y, 'x': x}) # Create encoding dictionnary encoding_dict = {} for var, dtype in zip(variables, data_types): # Write encoding dict encod = {} encod['dtype'] = dtype encod['chunksizes'] = (y_size, x_size) encoding_dict[var] = encod # Save dataset as netCDF4 file xarray_dataset.to_netcdf(save_path, encoding = encoding_dict) return None def convert_geodataframe_to_xarray(geodataframe_in: Union[str, gpd.GeoDataFrame, pd.DataFrame], save_path: str, name: str, variable: str, data_type: str, global_attributes: List[dict] = []) -> None: """ Convert ``geopandas GeoDataDrames`` of the parcel mode into ``xarray DataArray`` for the model calculations. The resulting xarray dataset has dimensions: ``y: 1``, ``x: number of poygons`` *(to make a 2D dataset)*. Arguments ========= 1. geodataframe_in: ``Union[str, gpd.GeoDataFrame, pd.DataFrame]`` geodataframe or path to geodataframe to convert 2. save_path: ``str`` save path of output xarray dataset 3. name: ``str`` name of dataarray to save 4. variable: ``str`` name of variable to extract to put in the ouput dataset 5. data_type: ``str`` xarray datatype corresponding the the variable name, for correct saving of the dataset 6. global_attribute: ``List[dict]`` list of optionnal attributes to add to the netCDF file, give a list of single element dictionnary {key: value} Returns ======= ``None`` """ if type(geodataframe_in) == str: # Open geodataframe geodataframe = pd.read_csv(geodataframe_in) else: geodataframe = geodataframe_in # Create dimensions for xarray dataset x = [i+1 for i in range(len(geodataframe.index.values))] y = [1] # Get dimension sizes x_size = len(x) y_size = len(y) # Create dimension tuples dim_size = (y_size, x_size) # Reshape variables in correct format and put them in a dictionnary data = np.reshape(geodataframe[variable].values, dim_size) # Create xarray dataset xarray_dataarray = xr.DataArray(data = data, coords = {'y': y, 'x': x}, name = name) # Create encoding dictionnary encoding_dict = {} encod = {} encod['dtype'] = data_type encod['chunksizes'] = (y_size, x_size) encoding_dict[name] = encod # Add potential attribute for attribute in global_attributes: key, val = list(attribute.items())[0] xarray_dataarray.attrs[key] = val # Save dataset as netCDF4 file xarray_dataarray.to_netcdf(save_path, encoding = encoding_dict) return None