{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Manage non-automatic inputs for parcel mode\n",
    "\n",
    "This notebook provides tools to manage the inputs that are difficult to preprocess automatically, that come from different sources or require specific processing (like the land-cover or the soil data).\n",
    "\n",
    "### Load necessary libraries and define paths and functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os  # for path exploration\n",
    "import xarray as xr  # to manage dataset\n",
    "import rasterio as rio  # to open geotiff files\n",
    "import numpy as np  # vectorized math\n",
    "import pandas as pd  # to manage dataframes\n",
    "import geopandas as gpd  # to manage shapefile and crs projections\n",
    "import matplotlib.pyplot as plt  # to plot\n",
    "from matplotlib import rcParams  # plot parameters\n",
    "\n",
    "import sys  # system management\n",
    "currentdir = os.path.dirname(os.path.abspath(''))\n",
    "sys.path.insert(0, os.path.dirname(currentdir))\n",
    "from modspa_pixel.config.config import config  # to import config file\n",
    "from modspa_pixel.preprocessing.input_toolbox import prepare_directories  # to create necessary input directories\n",
    "from modspa_pixel.preprocessing.parcel_to_pixel import convert_geodataframe_to_xarray, convert_dataframe_to_xarray  # to convert geodataframe landcover or soil data to an xarray DataArray\n",
    "\n",
    "# Parameters for matplotlib\n",
    "plt.style.use('default')\n",
    "rcParams['mathtext.fontset'] = 'stix'\n",
    "rcParams['font.family'] = 'STIXGeneral'\n",
    "rcParams.update({'font.size': 15})\n",
    "\n",
    "# Open config file\n",
    "config_file = currentdir + os.sep + 'config' + os.sep + 'config_modspa.json'\n",
    "\n",
    "# Open config file and load parameters\n",
    "config_params = config(config_file)\n",
    "\n",
    "# Prepare directories\n",
    "prepare_directories(config_file)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Land Cover management"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><svg style=\"position: absolute; width: 0; height: 0; overflow: hidden\">\n",
       "<defs>\n",
       "<symbol id=\"icon-database\" viewBox=\"0 0 32 32\">\n",
       "<path d=\"M16 0c-8.837 0-16 2.239-16 5v4c0 2.761 7.163 5 16 5s16-2.239 16-5v-4c0-2.761-7.163-5-16-5z\"></path>\n",
       "<path d=\"M16 17c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n",
       "<path d=\"M16 26c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n",
       "</symbol>\n",
       "<symbol id=\"icon-file-text2\" viewBox=\"0 0 32 32\">\n",
       "<path d=\"M28.681 7.159c-0.694-0.947-1.662-2.053-2.724-3.116s-2.169-2.030-3.116-2.724c-1.612-1.182-2.393-1.319-2.841-1.319h-15.5c-1.378 0-2.5 1.121-2.5 2.5v27c0 1.378 1.122 2.5 2.5 2.5h23c1.378 0 2.5-1.122 2.5-2.5v-19.5c0-0.448-0.137-1.23-1.319-2.841zM24.543 5.457c0.959 0.959 1.712 1.825 2.268 2.543h-4.811v-4.811c0.718 0.556 1.584 1.309 2.543 2.268zM28 29.5c0 0.271-0.229 0.5-0.5 0.5h-23c-0.271 0-0.5-0.229-0.5-0.5v-27c0-0.271 0.229-0.5 0.5-0.5 0 0 15.499-0 15.5 0v7c0 0.552 0.448 1 1 1h7v19.5z\"></path>\n",
       "<path d=\"M23 26h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
       "<path d=\"M23 22h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
       "<path d=\"M23 18h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
       "</symbol>\n",
       "</defs>\n",
       "</svg>\n",
       "<style>/* CSS stylesheet for displaying xarray objects in jupyterlab.\n",
       " *\n",
       " */\n",
       "\n",
       ":root {\n",
       "  --xr-font-color0: var(--jp-content-font-color0, rgba(0, 0, 0, 1));\n",
       "  --xr-font-color2: var(--jp-content-font-color2, rgba(0, 0, 0, 0.54));\n",
       "  --xr-font-color3: var(--jp-content-font-color3, rgba(0, 0, 0, 0.38));\n",
       "  --xr-border-color: var(--jp-border-color2, #e0e0e0);\n",
       "  --xr-disabled-color: var(--jp-layout-color3, #bdbdbd);\n",
       "  --xr-background-color: var(--jp-layout-color0, white);\n",
       "  --xr-background-color-row-even: var(--jp-layout-color1, white);\n",
       "  --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n",
       "}\n",
       "\n",
       "html[theme=dark],\n",
       "body[data-theme=dark],\n",
       "body.vscode-dark {\n",
       "  --xr-font-color0: rgba(255, 255, 255, 1);\n",
       "  --xr-font-color2: rgba(255, 255, 255, 0.54);\n",
       "  --xr-font-color3: rgba(255, 255, 255, 0.38);\n",
       "  --xr-border-color: #1F1F1F;\n",
       "  --xr-disabled-color: #515151;\n",
       "  --xr-background-color: #111111;\n",
       "  --xr-background-color-row-even: #111111;\n",
       "  --xr-background-color-row-odd: #313131;\n",
       "}\n",
       "\n",
       ".xr-wrap {\n",
       "  display: block !important;\n",
       "  min-width: 300px;\n",
       "  max-width: 700px;\n",
       "}\n",
       "\n",
       ".xr-text-repr-fallback {\n",
       "  /* fallback to plain text repr when CSS is not injected (untrusted notebook) */\n",
       "  display: none;\n",
       "}\n",
       "\n",
       ".xr-header {\n",
       "  padding-top: 6px;\n",
       "  padding-bottom: 6px;\n",
       "  margin-bottom: 4px;\n",
       "  border-bottom: solid 1px var(--xr-border-color);\n",
       "}\n",
       "\n",
       ".xr-header > div,\n",
       ".xr-header > ul {\n",
       "  display: inline;\n",
       "  margin-top: 0;\n",
       "  margin-bottom: 0;\n",
       "}\n",
       "\n",
       ".xr-obj-type,\n",
       ".xr-array-name {\n",
       "  margin-left: 2px;\n",
       "  margin-right: 10px;\n",
       "}\n",
       "\n",
       ".xr-obj-type {\n",
       "  color: var(--xr-font-color2);\n",
       "}\n",
       "\n",
       ".xr-sections {\n",
       "  padding-left: 0 !important;\n",
       "  display: grid;\n",
       "  grid-template-columns: 150px auto auto 1fr 20px 20px;\n",
       "}\n",
       "\n",
       ".xr-section-item {\n",
       "  display: contents;\n",
       "}\n",
       "\n",
       ".xr-section-item input {\n",
       "  display: none;\n",
       "}\n",
       "\n",
       ".xr-section-item input + label {\n",
       "  color: var(--xr-disabled-color);\n",
       "}\n",
       "\n",
       ".xr-section-item input:enabled + label {\n",
       "  cursor: pointer;\n",
       "  color: var(--xr-font-color2);\n",
       "}\n",
       "\n",
       ".xr-section-item input:enabled + label:hover {\n",
       "  color: var(--xr-font-color0);\n",
       "}\n",
       "\n",
       ".xr-section-summary {\n",
       "  grid-column: 1;\n",
       "  color: var(--xr-font-color2);\n",
       "  font-weight: 500;\n",
       "}\n",
       "\n",
       ".xr-section-summary > span {\n",
       "  display: inline-block;\n",
       "  padding-left: 0.5em;\n",
       "}\n",
       "\n",
       ".xr-section-summary-in:disabled + label {\n",
       "  color: var(--xr-font-color2);\n",
       "}\n",
       "\n",
       ".xr-section-summary-in + label:before {\n",
       "  display: inline-block;\n",
       "  content: '►';\n",
       "  font-size: 11px;\n",
       "  width: 15px;\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       ".xr-section-summary-in:disabled + label:before {\n",
       "  color: var(--xr-disabled-color);\n",
       "}\n",
       "\n",
       ".xr-section-summary-in:checked + label:before {\n",
       "  content: '▼';\n",
       "}\n",
       "\n",
       ".xr-section-summary-in:checked + label > span {\n",
       "  display: none;\n",
       "}\n",
       "\n",
       ".xr-section-summary,\n",
       ".xr-section-inline-details {\n",
       "  padding-top: 4px;\n",
       "  padding-bottom: 4px;\n",
       "}\n",
       "\n",
       ".xr-section-inline-details {\n",
       "  grid-column: 2 / -1;\n",
       "}\n",
       "\n",
       ".xr-section-details {\n",
       "  display: none;\n",
       "  grid-column: 1 / -1;\n",
       "  margin-bottom: 5px;\n",
       "}\n",
       "\n",
       ".xr-section-summary-in:checked ~ .xr-section-details {\n",
       "  display: contents;\n",
       "}\n",
       "\n",
       ".xr-array-wrap {\n",
       "  grid-column: 1 / -1;\n",
       "  display: grid;\n",
       "  grid-template-columns: 20px auto;\n",
       "}\n",
       "\n",
       ".xr-array-wrap > label {\n",
       "  grid-column: 1;\n",
       "  vertical-align: top;\n",
       "}\n",
       "\n",
       ".xr-preview {\n",
       "  color: var(--xr-font-color3);\n",
       "}\n",
       "\n",
       ".xr-array-preview,\n",
       ".xr-array-data {\n",
       "  padding: 0 5px !important;\n",
       "  grid-column: 2;\n",
       "}\n",
       "\n",
       ".xr-array-data,\n",
       ".xr-array-in:checked ~ .xr-array-preview {\n",
       "  display: none;\n",
       "}\n",
       "\n",
       ".xr-array-in:checked ~ .xr-array-data,\n",
       ".xr-array-preview {\n",
       "  display: inline-block;\n",
       "}\n",
       "\n",
       ".xr-dim-list {\n",
       "  display: inline-block !important;\n",
       "  list-style: none;\n",
       "  padding: 0 !important;\n",
       "  margin: 0;\n",
       "}\n",
       "\n",
       ".xr-dim-list li {\n",
       "  display: inline-block;\n",
       "  padding: 0;\n",
       "  margin: 0;\n",
       "}\n",
       "\n",
       ".xr-dim-list:before {\n",
       "  content: '(';\n",
       "}\n",
       "\n",
       ".xr-dim-list:after {\n",
       "  content: ')';\n",
       "}\n",
       "\n",
       ".xr-dim-list li:not(:last-child):after {\n",
       "  content: ',';\n",
       "  padding-right: 5px;\n",
       "}\n",
       "\n",
       ".xr-has-index {\n",
       "  font-weight: bold;\n",
       "}\n",
       "\n",
       ".xr-var-list,\n",
       ".xr-var-item {\n",
       "  display: contents;\n",
       "}\n",
       "\n",
       ".xr-var-item > div,\n",
       ".xr-var-item label,\n",
       ".xr-var-item > .xr-var-name span {\n",
       "  background-color: var(--xr-background-color-row-even);\n",
       "  margin-bottom: 0;\n",
       "}\n",
       "\n",
       ".xr-var-item > .xr-var-name:hover span {\n",
       "  padding-right: 5px;\n",
       "}\n",
       "\n",
       ".xr-var-list > li:nth-child(odd) > div,\n",
       ".xr-var-list > li:nth-child(odd) > label,\n",
       ".xr-var-list > li:nth-child(odd) > .xr-var-name span {\n",
       "  background-color: var(--xr-background-color-row-odd);\n",
       "}\n",
       "\n",
       ".xr-var-name {\n",
       "  grid-column: 1;\n",
       "}\n",
       "\n",
       ".xr-var-dims {\n",
       "  grid-column: 2;\n",
       "}\n",
       "\n",
       ".xr-var-dtype {\n",
       "  grid-column: 3;\n",
       "  text-align: right;\n",
       "  color: var(--xr-font-color2);\n",
       "}\n",
       "\n",
       ".xr-var-preview {\n",
       "  grid-column: 4;\n",
       "}\n",
       "\n",
       ".xr-index-preview {\n",
       "  grid-column: 2 / 5;\n",
       "  color: var(--xr-font-color2);\n",
       "}\n",
       "\n",
       ".xr-var-name,\n",
       ".xr-var-dims,\n",
       ".xr-var-dtype,\n",
       ".xr-preview,\n",
       ".xr-attrs dt {\n",
       "  white-space: nowrap;\n",
       "  overflow: hidden;\n",
       "  text-overflow: ellipsis;\n",
       "  padding-right: 10px;\n",
       "}\n",
       "\n",
       ".xr-var-name:hover,\n",
       ".xr-var-dims:hover,\n",
       ".xr-var-dtype:hover,\n",
       ".xr-attrs dt:hover {\n",
       "  overflow: visible;\n",
       "  width: auto;\n",
       "  z-index: 1;\n",
       "}\n",
       "\n",
       ".xr-var-attrs,\n",
       ".xr-var-data,\n",
       ".xr-index-data {\n",
       "  display: none;\n",
       "  background-color: var(--xr-background-color) !important;\n",
       "  padding-bottom: 5px !important;\n",
       "}\n",
       "\n",
       ".xr-var-attrs-in:checked ~ .xr-var-attrs,\n",
       ".xr-var-data-in:checked ~ .xr-var-data,\n",
       ".xr-index-data-in:checked ~ .xr-index-data {\n",
       "  display: block;\n",
       "}\n",
       "\n",
       ".xr-var-data > table {\n",
       "  float: right;\n",
       "}\n",
       "\n",
       ".xr-var-name span,\n",
       ".xr-var-data,\n",
       ".xr-index-name div,\n",
       ".xr-index-data,\n",
       ".xr-attrs {\n",
       "  padding-left: 25px !important;\n",
       "}\n",
       "\n",
       ".xr-attrs,\n",
       ".xr-var-attrs,\n",
       ".xr-var-data,\n",
       ".xr-index-data {\n",
       "  grid-column: 1 / -1;\n",
       "}\n",
       "\n",
       "dl.xr-attrs {\n",
       "  padding: 0;\n",
       "  margin: 0;\n",
       "  display: grid;\n",
       "  grid-template-columns: 125px auto;\n",
       "}\n",
       "\n",
       ".xr-attrs dt,\n",
       ".xr-attrs dd {\n",
       "  padding: 0;\n",
       "  margin: 0;\n",
       "  float: left;\n",
       "  padding-right: 10px;\n",
       "  width: auto;\n",
       "}\n",
       "\n",
       ".xr-attrs dt {\n",
       "  font-weight: normal;\n",
       "  grid-column: 1;\n",
       "}\n",
       "\n",
       ".xr-attrs dt:hover span {\n",
       "  display: inline-block;\n",
       "  background: var(--xr-background-color);\n",
       "  padding-right: 10px;\n",
       "}\n",
       "\n",
       ".xr-attrs dd {\n",
       "  grid-column: 2;\n",
       "  white-space: pre-wrap;\n",
       "  word-break: break-all;\n",
       "}\n",
       "\n",
       ".xr-icon-database,\n",
       ".xr-icon-file-text2,\n",
       ".xr-no-icon {\n",
       "  display: inline-block;\n",
       "  vertical-align: middle;\n",
       "  width: 1em;\n",
       "  height: 1.5em !important;\n",
       "  stroke-width: 0;\n",
       "  stroke: currentColor;\n",
       "  fill: currentColor;\n",
       "}\n",
       "</style><pre class='xr-text-repr-fallback'>&lt;xarray.DataArray &#x27;class&#x27; (y: 1, x: 7485)&gt;\n",
       "[7485 values with dtype=uint8]\n",
       "Coordinates:\n",
       "  * y        (y) int64 1\n",
       "  * x        (x) int64 1 2 3 4 5 6 7 8 ... 7479 7480 7481 7482 7483 7484 7485\n",
       "Attributes:\n",
       "    class_names:  [&#x27;no_sim&#x27;, &#x27;Strawcereals&#x27;, &#x27;Oilseeds&#x27;, &#x27;Soy&#x27;, &#x27;Sunflower&#x27;, ...</pre><div class='xr-wrap' style='display:none'><div class='xr-header'><div class='xr-obj-type'>xarray.DataArray</div><div class='xr-array-name'>'class'</div><ul class='xr-dim-list'><li><span class='xr-has-index'>y</span>: 1</li><li><span class='xr-has-index'>x</span>: 7485</li></ul></div><ul class='xr-sections'><li class='xr-section-item'><div class='xr-array-wrap'><input id='section-14186b10-e990-4719-a733-98e5c8a3cee0' class='xr-array-in' type='checkbox' checked><label for='section-14186b10-e990-4719-a733-98e5c8a3cee0' title='Show/hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-array-preview xr-preview'><span>...</span></div><div class='xr-array-data'><pre>[7485 values with dtype=uint8]</pre></div></div></li><li class='xr-section-item'><input id='section-63981347-1141-408a-951a-07d3e974dc12' class='xr-section-summary-in' type='checkbox'  checked><label for='section-63981347-1141-408a-951a-07d3e974dc12' class='xr-section-summary' >Coordinates: <span>(2)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>y</span></div><div class='xr-var-dims'>(y)</div><div class='xr-var-dtype'>int64</div><div class='xr-var-preview xr-preview'>1</div><input id='attrs-c9c9039b-8dd6-472e-a3f2-d874e70a49b6' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-c9c9039b-8dd6-472e-a3f2-d874e70a49b6' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-050cefff-0585-4957-8fba-8e416ed36923' class='xr-var-data-in' type='checkbox'><label for='data-050cefff-0585-4957-8fba-8e416ed36923' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>array([1])</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>x</span></div><div class='xr-var-dims'>(x)</div><div class='xr-var-dtype'>int64</div><div class='xr-var-preview xr-preview'>1 2 3 4 5 ... 7482 7483 7484 7485</div><input id='attrs-7f693c8c-5b94-43e4-afc5-31652361568b' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-7f693c8c-5b94-43e4-afc5-31652361568b' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-5294234b-307b-428b-bd4a-b2c61281d73c' class='xr-var-data-in' type='checkbox'><label for='data-5294234b-307b-428b-bd4a-b2c61281d73c' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>array([   1,    2,    3, ..., 7483, 7484, 7485])</pre></div></li></ul></div></li><li class='xr-section-item'><input id='section-1c44b06f-3f32-4f13-9b3b-4a5920772fe5' class='xr-section-summary-in' type='checkbox'  ><label for='section-1c44b06f-3f32-4f13-9b3b-4a5920772fe5' class='xr-section-summary' >Indexes: <span>(2)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-index-name'><div>y</div></div><div class='xr-index-preview'>PandasIndex</div><div></div><input id='index-cb44c224-3cda-4f33-8959-1451253a44d5' class='xr-index-data-in' type='checkbox'/><label for='index-cb44c224-3cda-4f33-8959-1451253a44d5' title='Show/Hide index repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-index-data'><pre>PandasIndex(Index([1], dtype=&#x27;int64&#x27;, name=&#x27;y&#x27;))</pre></div></li><li class='xr-var-item'><div class='xr-index-name'><div>x</div></div><div class='xr-index-preview'>PandasIndex</div><div></div><input id='index-19f2ff73-4a11-4c11-b523-0efd0be8ad10' class='xr-index-data-in' type='checkbox'/><label for='index-19f2ff73-4a11-4c11-b523-0efd0be8ad10' title='Show/Hide index repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-index-data'><pre>PandasIndex(Index([   1,    2,    3,    4,    5,    6,    7,    8,    9,   10,\n",
       "       ...\n",
       "       7476, 7477, 7478, 7479, 7480, 7481, 7482, 7483, 7484, 7485],\n",
       "      dtype=&#x27;int64&#x27;, name=&#x27;x&#x27;, length=7485))</pre></div></li></ul></div></li><li class='xr-section-item'><input id='section-a57e4481-ae84-4274-944f-4dbcf5be9cdc' class='xr-section-summary-in' type='checkbox'  checked><label for='section-a57e4481-ae84-4274-944f-4dbcf5be9cdc' class='xr-section-summary' >Attributes: <span>(1)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><dl class='xr-attrs'><dt><span>class_names :</span></dt><dd>[&#x27;no_sim&#x27;, &#x27;Strawcereals&#x27;, &#x27;Oilseeds&#x27;, &#x27;Soy&#x27;, &#x27;Sunflower&#x27;, &#x27;Corn&#x27;, &#x27;Grasslands&#x27;, &#x27;Orchards&#x27;, &#x27;Vineyards&#x27;, &#x27;Broadleafforests&#x27;, &#x27;Coniferforests&#x27;, &#x27;Heathland&#x27;]</dd></dl></div></li></ul></div></div>"
      ],
      "text/plain": [
       "<xarray.DataArray 'class' (y: 1, x: 7485)>\n",
       "[7485 values with dtype=uint8]\n",
       "Coordinates:\n",
       "  * y        (y) int64 1\n",
       "  * x        (x) int64 1 2 3 4 5 6 7 8 ... 7479 7480 7481 7482 7483 7484 7485\n",
       "Attributes:\n",
       "    class_names:  ['no_sim', 'Strawcereals', 'Oilseeds', 'Soy', 'Sunflower', ..."
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Shapefile path\n",
    "shapefile_path = config_params.shapefile_path\n",
    "shapefile_raw = '/home/auclairj/notebooks/Shapefiles/Aurade_parcel_raw/Aurade_parcel_raw.shp'\n",
    "\n",
    "# Path to csv conversion file\n",
    "conversion_csv_file = currentdir + os.sep + 'preprocessing' + os.sep + 'csv_files' + os.sep + 'class_conversion_parcel.csv'\n",
    "\n",
    "# Open shapefile\n",
    "shapefile = gpd.read_file(shapefile_raw)\n",
    "\n",
    "# Read conversion csv\n",
    "class_conversion_dataframe = pd.read_csv(conversion_csv_file)\n",
    "new_classes_sorted = class_conversion_dataframe[['new_class', 'new_value']].sort_values(by = 'new_value')\n",
    "new_class_names = list(dict.fromkeys(new_classes_sorted['new_class'].values))\n",
    "\n",
    "# Apply some conversion\n",
    "shapefile['LC'] = shapefile['LC'].map(class_conversion_dataframe.set_index('old_value')['new_value'])\n",
    "\n",
    "# Save shapefile\n",
    "shapefile.to_file(shapefile_path, index = False)\n",
    "\n",
    "# Save path\n",
    "land_cover_path = config_params.land_cover_path\n",
    "convert_geodataframe_to_xarray(shapefile, land_cover_path, name = 'class', variable = 'LC', data_type = 'u1', global_attributes = [{'class_names': new_class_names}])\n",
    "\n",
    "landcover = xr.open_dataarray(land_cover_path)\n",
    "landcover.attrs['class_names'] = new_class_names\n",
    "display(landcover)\n",
    "landcover.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Soil data management\n",
    "\n",
    "### Define soil extraction function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tqdm import tqdm  # to follow progress\n",
    "from rasterio.mask import mask  # to mask images\n",
    "from shapely.geometry import box  # to extract parcel statistics\n",
    "from typing import List  # to declare variables\n",
    "\n",
    "\n",
    "def extract_soil_rasterstats(raster_path: str, shapefile: str) -> List[float]:\n",
    "    \"\"\"\n",
    "    Generate a dataframe for a given raster and a geopandas shapefile object. \n",
    "    It iterates over the features of the shapefile geometry (polygons). This\n",
    "    information is stored in a list.\n",
    "\n",
    "    It returns a list that contains the raster values, a feature ``id``\n",
    "    for the image and every polygon in the shapefile geometry.\n",
    "    It also has identification data relative to the shapefile: landcover (``LC``),\n",
    "    land cover identifier (``id``) This list is returned to be later agregated\n",
    "    in a ``DataFrame``.\n",
    "    \n",
    "    Arguments\n",
    "    =========\n",
    "\n",
    "    1. raster_path: ``str``\n",
    "        path to multiband Geotiff \n",
    "    2. shapefile: ``str``\n",
    "        path to shapefile\n",
    "\n",
    "    Returns\n",
    "    =======\n",
    "\n",
    "    1. raster_stats: ``List[float]``\n",
    "        list containing weather values and feature information for every\n",
    "        polygon in the shapefile\n",
    "    \"\"\"\n",
    "    \n",
    "    # Create dataframe where zonal statistics will be stored\n",
    "    raster_stats = []\n",
    "\n",
    "    # Open ndvi image and shapefile geometry\n",
    "    raster_dataset = rio.open(raster_path)\n",
    "\n",
    "    # Get input raster spatial reference and epsg code to reproject shapefile in the same spatial reference\n",
    "    target_epsg = raster_dataset.crs\n",
    "\n",
    "    # Open shapefile with geopandas and reproject its geometry\n",
    "    shapefile = gpd.read_file(shapefile)\n",
    "    shapefile['geometry'] = shapefile['geometry'].to_crs(target_epsg)\n",
    "\n",
    "    # Get no data value\n",
    "    nodata = raster_dataset.nodata\n",
    "    \n",
    "    # Create progress bar\n",
    "    progress_bar = tqdm(total = len(shapefile.index), desc='Extracting polygon values', unit=' polygons')\n",
    "\n",
    "    # Loop on the individual polygons in the shapefile geometry\n",
    "    for index, row in shapefile.iterrows():\n",
    "        \n",
    "        # Get the feature geometry as a shapely object\n",
    "        geom = row.geometry\n",
    "        \n",
    "        # id number of the current parcel geometry\n",
    "        id = index + 1\n",
    "        \n",
    "        # Get land cover\n",
    "        LC = row.LC\n",
    "        \n",
    "        # Create a bounding box around the geometry\n",
    "        bbox = box(*geom.bounds)\n",
    "        \n",
    "        # Crop the raster using the bounding box\n",
    "        try:\n",
    "            cropped_raster, _ = mask(raster_dataset, [bbox], crop = True, all_touched = True)\n",
    "        except:\n",
    "            print('\\nShapefile bounds are not contained in weather dataset bounds.\\n\\nExiting script.')\n",
    "            return None\n",
    "        \n",
    "        # Mask the raster using the geometry\n",
    "        masked_raster, _ = mask(raster_dataset, [geom], crop = True, all_touched = True)\n",
    "        \n",
    "        # Replace the nodata values with nan\n",
    "        cropped_raster = cropped_raster.astype(np.float32)\n",
    "        cropped_raster[cropped_raster == nodata] = np.NaN\n",
    "        \n",
    "        masked_raster = masked_raster.astype(np.float32)\n",
    "        masked_raster[masked_raster == nodata] = np.NaN\n",
    "        \n",
    "        # Calculate the zonal statistics\n",
    "        raster_stats.extend([[id, np.nanmean(masked_raster), LC]])\n",
    "        \n",
    "        # Update progress bar\n",
    "        progress_bar.update(1)\n",
    "    \n",
    "    # Close dataset and progress bar\n",
    "    raster_dataset.close()\n",
    "    progress_bar.close()\n",
    "\n",
    "    return raster_stats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Extracting polygon values: 100%|██████████| 7485/7485 [00:12<00:00, 590.73 polygons/s]\n",
      "Extracting polygon values: 100%|██████████| 7485/7485 [00:12<00:00, 592.57 polygons/s]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>Wwp</th>\n",
       "      <th>Wfc</th>\n",
       "      <th>LC</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0.067528</td>\n",
       "      <td>0.230739</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>0.080896</td>\n",
       "      <td>0.277372</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>0.080420</td>\n",
       "      <td>0.280211</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>0.080679</td>\n",
       "      <td>0.279007</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>0.080795</td>\n",
       "      <td>0.278668</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7480</th>\n",
       "      <td>7481</td>\n",
       "      <td>0.080901</td>\n",
       "      <td>0.278317</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7481</th>\n",
       "      <td>7482</td>\n",
       "      <td>0.080946</td>\n",
       "      <td>0.278263</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7482</th>\n",
       "      <td>7483</td>\n",
       "      <td>0.080867</td>\n",
       "      <td>0.278621</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7483</th>\n",
       "      <td>7484</td>\n",
       "      <td>0.080867</td>\n",
       "      <td>0.278621</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7484</th>\n",
       "      <td>7485</td>\n",
       "      <td>0.080966</td>\n",
       "      <td>0.278179</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>7485 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        id       Wwp       Wfc  LC\n",
       "0        1  0.067528  0.230739  16\n",
       "1        2  0.080896  0.277372  16\n",
       "2        3  0.080420  0.280211  16\n",
       "3        4  0.080679  0.279007   9\n",
       "4        5  0.080795  0.278668   9\n",
       "...    ...       ...       ...  ..\n",
       "7480  7481  0.080901  0.278317  10\n",
       "7481  7482  0.080946  0.278263  13\n",
       "7482  7483  0.080867  0.278621  13\n",
       "7483  7484  0.080867  0.278621   6\n",
       "7484  7485  0.080966  0.278179   6\n",
       "\n",
       "[7485 rows x 4 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Get soil data save path\n",
    "# soil_path = config_params.download_path + os.sep + 'SOIL' + os.sep + config_params.run_name\n",
    "soil_path = config_params.soil_path\n",
    "\n",
    "# Get raw soil data for extraction\n",
    "raw_soil =  '/mnt/e/DATA/SOIL/Aurade_test/Soil_low_resolution.nc'\n",
    "raw_Wwp = '/mnt/e/DATA/SOIL/Aurade_test/Soil_low_resolution_Wwp.tif'\n",
    "raw_Wfc = '/mnt/e/DATA/SOIL/Aurade_test/Soil_low_resolution_Wfc.tif'\n",
    "\n",
    "# Open raw dataset and fill nan values\n",
    "soil_dataset = xr.open_dataset(raw_soil).rio.write_crs('EPSG:4326')\n",
    "soil_dataset['Wfc']= soil_dataset.Wfc.fillna(soil_dataset.Wfc.mean(dim = ['latitude', 'longitude']))\n",
    "soil_dataset['Wwp']= soil_dataset.Wwp.fillna(soil_dataset.Wwp.mean(dim = ['latitude', 'longitude']))\n",
    "\n",
    "# Generate two geotiffs from the soil dataset\n",
    "soil_dataset.Wwp.rio.to_raster(raw_Wwp)\n",
    "soil_dataset.Wfc.rio.to_raster(raw_Wfc)\n",
    "\n",
    "# Shapefile path\n",
    "shapefile_path = config_params.shapefile_path\n",
    "\n",
    "# Extract zonal statistics\n",
    "soil_stats_Wwp = extract_soil_rasterstats(raw_Wwp, shapefile_path)\n",
    "soil_stats_Wfc = extract_soil_rasterstats(raw_Wfc, shapefile_path)\n",
    "\n",
    "# Build dataframe with zonal stats\n",
    "soil_df = pd.DataFrame(soil_stats_Wwp, columns = ['id', 'Wwp', 'LC'])\n",
    "soil_df['Wfc'] = pd.DataFrame(soil_stats_Wfc, columns = ['id', 'Wfc', 'LC'])['Wfc']\n",
    "soil_df = soil_df[['id', 'Wwp', 'Wfc', 'LC']]  # reorder variables\n",
    "soil_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Convert DataFrame to xarray dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "convert_dataframe_to_xarray(soil_df, soil_path, variables = ['Wwp', 'Wfc'], data_types = ['f4', 'f4'], time_dimension = False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "modspa_pixel",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}