Newer
Older

Jérémy AUCLAIR
committed
raster_dataset = rio.open(raster_path, mode = 'r')
Jeremy Auclair
committed
# Get input raster spatial reference and epsg code to reproject shapefile in the same spatial reference
Jeremy Auclair
committed
target_epsg = raster_dataset.crs
Jeremy Auclair
committed
# Open shapefile with geopandas and reproject its geometry
shapefile = gpd.read_file(shapefile)
shapefile['geometry'] = shapefile['geometry'].to_crs(target_epsg)
# Get no data value
Jeremy Auclair
committed
nodata = raster_dataset.nodata
Jeremy Auclair
committed
# Get the number of bands
Jeremy Auclair
committed
nbands = raster_dataset.count
Jeremy Auclair
committed
# Create progress bar

Jérémy AUCLAIR
committed
progress_bar = tqdm(total = len(shapefile.index), desc='Extracting zonal statistics', unit=' polygons')
Jeremy Auclair
committed
# Loop on the individual polygons in the shapefile geometry
for index, row in shapefile.iterrows():
# Get the feature geometry as a shapely object
geom = row.geometry
# id number of the current parcel geometry
id = index + 1
# Get land cover
LC = row.LC
# Crop the raster using the bounding box
try:

Jérémy AUCLAIR
committed
masked_raster, _ = mask(raster_dataset, [geom], crop = True)
Jeremy Auclair
committed
except:
print('\nShapefile bounds are not contained in weather dataset bounds.\n\nExiting script.')
return None
# Mask the raster using the geometry

Jérémy AUCLAIR
committed
masked_raster, _ = mask(raster_dataset, [geom], crop = True)
Jeremy Auclair
committed
# Replace the nodata values with nan
masked_raster = masked_raster.astype(np.float32)
masked_raster[masked_raster == nodata] = np.NaN
# Calculate the zonal statistics

Jérémy AUCLAIR
committed
mean = np.nanmean(masked_raster, axis = (1,2))
# Add statistics to output list
raster_stats.extend([[dates[i], id, mean[i], LC] for i in range(nbands)])
Jeremy Auclair
committed
# Update progress bar
progress_bar.update(1)
# Close dataset and progress bar
Jeremy Auclair
committed
raster_dataset.close()
Jeremy Auclair
committed
progress_bar.close()
Jeremy Auclair
committed
return raster_stats
Jeremy Auclair
committed
def extract_weather_dataframe(rain_path: str, ET0_path: str, shapefile: str, config_file: str, save_path: str) -> None:
"""
Extract a weather dataframe for each variable (Rain, ET0) and merge them in one
dataframe. This dataframe is saved as ``csv`` file.
Arguments
=========
Jeremy Auclair
committed
path to rain Geotiff file
Jeremy Auclair
committed
path to ET0 Geotiff file
Jeremy Auclair
committed
path to shapefile
Jeremy Auclair
committed
path to config file
Jeremy Auclair
committed
save path for weather dataframe
Returns
=======
``None``
"""
# Generate arguments for multiprocessing
args = [(rain_path, shapefile, config_file), (ET0_path, shapefile, config_file)]
print('\nStarting weather data extraction on two cores..\n')
# Extract weather values for both weather varialbes
with Pool(2) as p:
Jeremy Auclair
committed
results = p.map(extract_rasterstats, args)
Jeremy Auclair
committed
# Collect results in a single dataframe
weather_dataframe = pd.DataFrame(results[0], columns = ['date', 'id', 'Rain', 'LC'])
weather_dataframe['ET0'] = pd.DataFrame(results[1], columns = ['date', 'id', 'ET0', 'LC'])['ET0']
# Reorder columns
weather_dataframe = weather_dataframe.reindex(columns = ['date', 'id', 'Rain', 'ET0', 'LC'])
# Format datatypes
weather_dataframe['Rain'] = np.round(weather_dataframe['Rain']).astype(int)
weather_dataframe['ET0'] = np.round(weather_dataframe['ET0']).astype(int)
# Change date type
weather_dataframe['date'] = pd.to_datetime(weather_dataframe['date'])
# Save dataframe to csv
weather_dataframe.to_csv(save_path, index = False)
return None