# -*- coding: UTF-8 -*-
# Python
"""
29-08-2023
@author: jeremy auclair
Convert pandas dataframes to rasters for the pixel mode.
"""
from typing import List, Union # to declare variables
import numpy as np # for math on arrays
import xarray as xr # to manage nc files
import pandas as pd # to manage dataframes
import geopandas as gpd # to manage shapefiles
[docs]
def convert_dataframe_to_xarray(dataframe_in: Union[str, pd.DataFrame], save_path: str, variables: List[str], data_types: List[str], time_dimension: bool = True) -> None:
"""
Convert ``pandas dataframes`` of the parcel mode into ``xarray datasets``
for the model calculations. The resulting xarray dataset has dimensions:
``time: number of dates``, ``y: 1``, ``x: number of poygons`` *(to make a 3D dataset)*,
or dimensions: ``y: 1``, ``x: number of poygons`` *(to make a 2D dataset)*
Arguments
=========
1. dataframe_in: ``Union[str, pd.DataFrame]``
dataframe or path to dataframe to convert
2. save_path: ``str``
save path of output xarray dataset
3. variables: ``List[str]``
name of variables (or variable, list can have one element)
to put in the ouput dataset
4. data_types: ``List[str]``
xarray datatypes corresponding the the variable names, for
correct saving of the dataset
5. time_dimension: ``bool`` ``default = True``
boolean to indicate if the dataframe has a time dimension
Returns
=======
``None``
"""
# If dataframe has a time dimenson (multiple dates)
if time_dimension:
if type(dataframe_in) == str:
# Open dataframe
dataframe = pd.read_csv(dataframe_in).sort_values(by = ['date', 'id'])
dataframe['date'] = pd.to_datetime(dataframe['date'])
else:
dataframe_in.reset_index(drop = True, inplace = True)
dataframe = dataframe_in.sort_values(by = ['date', 'id'])
dataframe['date'] = pd.to_datetime(dataframe['date'])
# Create dimensions for xarray dataset
x = [i+1 for i in range(len(set(dataframe['id'])))]
y = [1]
dates = np.unique(dataframe['date'].values)
# Get dimension sizes
time_size = len(dates)
x_size = len(x)
y_size = len(y)
# Create dimension tuples
dim_size = (time_size, y_size, x_size)
dims = ('time', 'y', 'x')
# Reshape variables in correct format and put them in a dictionnary
data_variables = {}
for var in variables:
data_variables[var] = (dims, np.reshape(dataframe[var].values, dim_size))
# Create xarray dataset
xarray_dataset = xr.Dataset(data_vars = data_variables, coords = {'time': dates, 'y': y, 'x': x})
# Create encoding dictionnary
encoding_dict = {}
for var, dtype in zip(variables, data_types):
# Write encoding dict
encod = {}
encod['dtype'] = dtype
encod['chunksizes'] = (time_size, y_size, x_size)
encoding_dict[var] = encod
# Save dataset as netCDF4 file
xarray_dataset.to_netcdf(save_path, encoding = encoding_dict)
else:
if type(dataframe_in) == str:
# Open dataframe
dataframe = pd.read_csv(dataframe_in).sort_values(by = ['id'])
else:
dataframe_in.reset_index(drop = True, inplace = True)
dataframe = dataframe_in.sort_values(by = ['id'])
# Create dimensions for xarray dataset
x = [i+1 for i in range(len(set(dataframe['id'])))]
y = [1]
# Get dimension sizes
x_size = len(x)
y_size = len(y)
# Create dimension tuples
dim_size = (y_size, x_size)
dims = ('y', 'x')
# Reshape variables in correct format and put them in a dictionnary
data_variables = {}
for var in variables:
data_variables[var] = (dims, np.reshape(dataframe[var].values, dim_size))
# Create xarray dataset
xarray_dataset = xr.Dataset(data_vars = data_variables, coords = {'y': y, 'x': x})
# Create encoding dictionnary
encoding_dict = {}
for var, dtype in zip(variables, data_types):
# Write encoding dict
encod = {}
encod['dtype'] = dtype
encod['chunksizes'] = (y_size, x_size)
encoding_dict[var] = encod
# Save dataset as netCDF4 file
xarray_dataset.to_netcdf(save_path, encoding = encoding_dict)
return None
def convert_geodataframe_to_xarray(geodataframe_in: Union[str, gpd.GeoDataFrame, pd.DataFrame], save_path: str, name: str, variable: str, data_type: str, global_attributes: List[dict] = []) -> None:
"""
Convert ``geopandas GeoDataDrames`` of the parcel mode into ``xarray DataArray``
for the model calculations. The resulting xarray dataset has dimensions:
``y: 1``, ``x: number of poygons`` *(to make a 2D dataset)*.
Arguments
=========
1. geodataframe_in: ``Union[str, gpd.GeoDataFrame, pd.DataFrame]``
geodataframe or path to geodataframe to convert
2. save_path: ``str``
save path of output xarray dataset
3. name: ``str``
name of dataarray to save
4. variable: ``str``
name of variable to extract
to put in the ouput dataset
5. data_type: ``str``
xarray datatype corresponding the the variable name, for
correct saving of the dataset
6. global_attribute: ``List[dict]``
list of optionnal attributes to add to the netCDF file, give a list
of single element dictionnary {key: value}
Returns
=======
``None``
"""
if type(geodataframe_in) == str:
# Open geodataframe
geodataframe = pd.read_csv(geodataframe_in)
else:
geodataframe = geodataframe_in
# Create dimensions for xarray dataset
x = [i+1 for i in range(len(geodataframe.index.values))]
y = [1]
# Get dimension sizes
x_size = len(x)
y_size = len(y)
# Create dimension tuples
dim_size = (y_size, x_size)
# Reshape variables in correct format and put them in a dictionnary
data = np.reshape(geodataframe[variable].values, dim_size)
# Create xarray dataset
xarray_dataarray = xr.DataArray(data = data, coords = {'y': y, 'x': x}, name = name)
# Create encoding dictionnary
encoding_dict = {}
encod = {}
encod['dtype'] = data_type
encod['chunksizes'] = (y_size, x_size)
encoding_dict[name] = encod
# Add potential attribute
for attribute in global_attributes:
key, val = list(attribute.items())[0]
xarray_dataarray.attrs[key] = val
# Save dataset as netCDF4 file
xarray_dataarray.to_netcdf(save_path, encoding = encoding_dict)
return None