Newer
Older
Jeremy Auclair
committed
# -*- coding: UTF-8 -*-
# Python
"""
29-08-2023
@author: jeremy auclair
Jeremy Auclair
committed
Convert pandas dataframes to rasters for the pixel mode.
Jeremy Auclair
committed
"""
Jeremy Auclair
committed
from typing import List, Union # to declare variables
Jeremy Auclair
committed
import numpy as np # for math on arrays
import xarray as xr # to manage nc files
Jeremy Auclair
committed
import pandas as pd # to manage dataframes
Jeremy Auclair
committed
import geopandas as gpd # to manage shapefiles
Jeremy Auclair
committed
Jeremy Auclair
committed
def convert_dataframe_to_xarray(dataframe_in: Union[str, pd.DataFrame], save_path: str, variables: List[str], data_types: List[str], time_dimension: bool = True) -> None:
"""
Convert ``pandas dataframes`` of the parcel mode into ``xarray datasets``
Jeremy Auclair
committed
for the model calculations. The resulting xarray dataset has dimensions:
``time: number of dates``, ``y: 1``, ``x: number of poygons`` *(to make a 3D dataset)*,
or dimensions: ``y: 1``, ``x: number of poygons`` *(to make a 2D dataset)*
Jeremy Auclair
committed
1. dataframe_in: ``Union[str, pd.DataFrame]``
Jeremy Auclair
committed
dataframe or path to dataframe to convert
2. save_path: ``str``
save path of output xarray dataset
3. variables: ``List[str]``
name of variables (or variable, list can have one element)
to put in the ouput dataset
4. data_types: ``List[str]``
xarray datatypes corresponding the the variable names, for
correct saving of the dataset
Jeremy Auclair
committed
5. time_dimension: ``bool`` ``default = True``
boolean to indicate if the dataframe has a time dimension
Returns
=======
``None``
"""
Jeremy Auclair
committed
Jeremy Auclair
committed
# If dataframe has a time dimenson (multiple dates)
if time_dimension:
Jeremy Auclair
committed
Jeremy Auclair
committed
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
if type(dataframe_in) == str:
# Open dataframe
dataframe = pd.read_csv(dataframe_in).sort_values(by = ['date', 'id'])
dataframe['date'] = pd.to_datetime(dataframe['date'])
else:
dataframe_in.reset_index(drop = True, inplace = True)
dataframe = dataframe_in.sort_values(by = ['date', 'id'])
dataframe['date'] = pd.to_datetime(dataframe['date'])
# Create dimensions for xarray dataset
x = [i+1 for i in range(len(set(dataframe['id'])))]
y = [1]
dates = np.unique(dataframe['date'].values)
# Get dimension sizes
time_size = len(dates)
x_size = len(x)
y_size = len(y)
# Create dimension tuples
dim_size = (time_size, y_size, x_size)
dims = ('time', 'y', 'x')
# Reshape variables in correct format and put them in a dictionnary
data_variables = {}
for var in variables:
data_variables[var] = (dims, np.reshape(dataframe[var].values, dim_size))
# Create xarray dataset
xarray_dataset = xr.Dataset(data_vars = data_variables, coords = {'time': dates, 'y': y, 'x': x})
# Create encoding dictionnary
encoding_dict = {}
for var, dtype in zip(variables, data_types):
# Write encoding dict
encod = {}
encod['dtype'] = dtype
encod['chunksizes'] = (time_size, y_size, x_size)
encoding_dict[var] = encod
# Save dataset as netCDF4 file
xarray_dataset.to_netcdf(save_path, encoding = encoding_dict)
Jeremy Auclair
committed
Jeremy Auclair
committed
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
else:
if type(dataframe_in) == str:
# Open dataframe
dataframe = pd.read_csv(dataframe_in).sort_values(by = ['id'])
else:
dataframe_in.reset_index(drop = True, inplace = True)
dataframe = dataframe_in.sort_values(by = ['id'])
# Create dimensions for xarray dataset
x = [i+1 for i in range(len(set(dataframe['id'])))]
y = [1]
# Get dimension sizes
x_size = len(x)
y_size = len(y)
# Create dimension tuples
dim_size = (y_size, x_size)
dims = ('y', 'x')
# Reshape variables in correct format and put them in a dictionnary
data_variables = {}
for var in variables:
data_variables[var] = (dims, np.reshape(dataframe[var].values, dim_size))
# Create xarray dataset
xarray_dataset = xr.Dataset(data_vars = data_variables, coords = {'y': y, 'x': x})
# Create encoding dictionnary
encoding_dict = {}
for var, dtype in zip(variables, data_types):
# Write encoding dict
encod = {}
encod['dtype'] = dtype
encod['chunksizes'] = (y_size, x_size)
encoding_dict[var] = encod
# Save dataset as netCDF4 file
xarray_dataset.to_netcdf(save_path, encoding = encoding_dict)
Jeremy Auclair
committed
Jeremy Auclair
committed
return None
Jeremy Auclair
committed
def convert_geodataframe_to_xarray(geodataframe_in: Union[str, gpd.GeoDataFrame, pd.DataFrame], save_path: str, name: str, variable: str, data_type: str, global_attributes: List[dict] = []) -> None:
Jeremy Auclair
committed
"""
Jeremy Auclair
committed
Convert ``geopandas GeoDataDrames`` of the parcel mode into ``xarray DataArray``
for the model calculations. The resulting xarray dataset has dimensions:
``y: 1``, ``x: number of poygons`` *(to make a 2D dataset)*.
Jeremy Auclair
committed
Arguments
=========
Jeremy Auclair
committed
1. geodataframe_in: ``Union[str, gpd.GeoDataFrame, pd.DataFrame]``
Jeremy Auclair
committed
geodataframe or path to geodataframe to convert
2. save_path: ``str``
save path of output xarray dataset
3. name: ``str``
name of dataarray to save
4. variable: ``str``
name of variable to extract
to put in the ouput dataset
5. data_type: ``str``
xarray datatype corresponding the the variable name, for
correct saving of the dataset
Jeremy Auclair
committed
6. global_attribute: ``List[dict]``
list of optionnal attributes to add to the netCDF file, give a list
of single element dictionnary {key: value}
Jeremy Auclair
committed
Returns
=======
``None``
"""
if type(geodataframe_in) == str:
# Open geodataframe
geodataframe = pd.read_csv(geodataframe_in)
else:
geodataframe = geodataframe_in
# Create dimensions for xarray dataset
x = [i+1 for i in range(len(geodataframe.index.values))]
y = [1]
# Get dimension sizes
x_size = len(x)
y_size = len(y)
# Create dimension tuples
Jeremy Auclair
committed
dim_size = (y_size, x_size)
Jeremy Auclair
committed
# Reshape variables in correct format and put them in a dictionnary
data = np.reshape(geodataframe[variable].values, dim_size)
# Create xarray dataset
Jeremy Auclair
committed
xarray_dataarray = xr.DataArray(data = data, coords = {'y': y, 'x': x}, name = name)
Jeremy Auclair
committed
# Create encoding dictionnary
encoding_dict = {}
encod = {}
encod['dtype'] = data_type
Jeremy Auclair
committed
encod['chunksizes'] = (y_size, x_size)
Jeremy Auclair
committed
encoding_dict[name] = encod
Jeremy Auclair
committed
# Add potential attribute
for attribute in global_attributes:
key, val = list(attribute.items())[0]
xarray_dataarray.attrs[key] = val
Jeremy Auclair
committed
# Save dataset as netCDF4 file
xarray_dataarray.to_netcdf(save_path, encoding = encoding_dict)