Newer
Older
Jeremy Auclair
committed
# -*- coding: UTF-8 -*-
# Python
"""
29-08-2023
@author: jeremy auclair
Jeremy Auclair
committed
Generate convert pandas dataframes to rasters for the pixel mode.
Jeremy Auclair
committed
"""
Jeremy Auclair
committed
from typing import List, Union # to declare variables
Jeremy Auclair
committed
import numpy as np # for math on arrays
import xarray as xr # to manage nc files
Jeremy Auclair
committed
import pandas as pd # to manage dataframes
Jeremy Auclair
committed
def convert_dataframe_to_xarray(dataframe_in: Union[str, pd.DataFrame], save_path: str, variables: List[str], data_types: List[str]) -> None:
"""
Convert ``pandas dataframes`` of the parcel mode into ``xarray datasets``
for the model calculations. The resulting xarray dataset has dimensions
``time: number of dates``, ``x: number of poygons``, ``y: 1`` *(to make a 3D dataset)*.
Arguments
=========
Jeremy Auclair
committed
1. dataframe_in: ``str``
dataframe or path to dataframe to convert
2. save_path: ``str``
save path of output xarray dataset
3. variables: ``List[str]``
name of variables (or variable, list can have one element)
to put in the ouput dataset
4. data_types: ``List[str]``
xarray datatypes corresponding the the variable names, for
correct saving of the dataset
Returns
=======
``None``
"""
Jeremy Auclair
committed
Jeremy Auclair
committed
if type(dataframe_in) == str:
# Open dataframe
dataframe = pd.read_csv(dataframe_in).sort_values(by = ['date', 'id'])
dataframe['date'] = pd.to_datetime(dataframe['date'])
else:
dataframe_in.reset_index(drop = True, inplace = True)
Jeremy Auclair
committed
dataframe = dataframe_in.sort_values(by = ['date', 'id'])
dataframe['date'] = pd.to_datetime(dataframe['date'])
Jeremy Auclair
committed
Jeremy Auclair
committed
# Create dimensions for xarray dataset
x = [i+1 for i in range(len(set(dataframe['id'])))]
y = [1]
dates = np.unique(dataframe['date'].values)
Jeremy Auclair
committed
Jeremy Auclair
committed
# Get dimension sizes
time_size = len(dates)
x_size = len(x)
y_size = len(y)
Jeremy Auclair
committed
Jeremy Auclair
committed
# Create dimension tuples
Jeremy Auclair
committed
dim_size = (time_size, y_size, x_size)
dims = ('time', 'y', 'x')
Jeremy Auclair
committed
Jeremy Auclair
committed
# Reshape variables in correct format and put them in a dictionnary
data_variables = {}
for var in variables:
data_variables[var] = (dims, np.reshape(dataframe[var].values, dim_size))
Jeremy Auclair
committed
Jeremy Auclair
committed
# Create xarray dataset
Jeremy Auclair
committed
xarray_dataset = xr.Dataset(data_vars = data_variables, coords = {'time': dates, 'y': y, 'x': x})
Jeremy Auclair
committed
Jeremy Auclair
committed
# Create encoding dictionnary
encoding_dict = {}
for var, dtype in zip(variables, data_types):
# Write encoding dict
encod = {}
encod['dtype'] = dtype
Jeremy Auclair
committed
encod['chunksizes'] = (time_size, y_size, x_size)
Jeremy Auclair
committed
encoding_dict[var] = encod
Jeremy Auclair
committed
Jeremy Auclair
committed
# Save dataset as netCDF4 file
xarray_dataset.to_netcdf(save_path, encoding = encoding_dict)
Jeremy Auclair
committed
Jeremy Auclair
committed
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
return None
def convert_geodataframe_to_xarray(geodataframe_in: Union[str, pd.DataFrame], save_path: str, name: str, variable: str, data_type: str) -> None:
"""
Convert ``geopandas GeoDataDrames` of the parcel mode into ``xarray DataArray``
for the model calculations. The resulting xarray dataset has dimensions
``x: number of poygons``, ``y: 1`` *(to make a 2D dataset)*.
Arguments
=========
1. geodataframe_in: ``str``
geodataframe or path to geodataframe to convert
2. save_path: ``str``
save path of output xarray dataset
3. name: ``str``
name of dataarray to save
4. variable: ``str``
name of variable to extract
to put in the ouput dataset
5. data_type: ``str``
xarray datatype corresponding the the variable name, for
correct saving of the dataset
Returns
=======
``None``
"""
if type(geodataframe_in) == str:
# Open geodataframe
geodataframe = pd.read_csv(geodataframe_in)
else:
geodataframe = geodataframe_in
# Create dimensions for xarray dataset
x = [i+1 for i in range(len(geodataframe.index.values))]
y = [1]
# Get dimension sizes
x_size = len(x)
y_size = len(y)
# Create dimension tuples
dim_size = (x_size, y_size)
# Reshape variables in correct format and put them in a dictionnary
data = np.reshape(geodataframe[variable].values, dim_size)
# Create xarray dataset
xarray_dataarray = xr.DataArray(data = data, coords = {'x': x, 'y': y}, name = name)
# Create encoding dictionnary
encoding_dict = {}
encod = {}
encod['dtype'] = data_type
encod['chunksizes'] = (x_size, y_size)
encoding_dict[name] = encod
# Save dataset as netCDF4 file
xarray_dataarray.to_netcdf(save_path, encoding = encoding_dict)