lib_era5_land_pixel.py

    raster_dataset = rio.open(raster_path, mode = 'r')

    # Get input raster spatial reference and epsg code to reproject shapefile in the same spatial reference
    target_epsg = raster_dataset.crs

    # Open shapefile with geopandas and reproject its geometry
    shapefile = gpd.read_file(shapefile)
    shapefile['geometry'] = shapefile['geometry'].to_crs(target_epsg)

    # Get no data value
    nodata = raster_dataset.nodata
    
    # Get the number of bands
    nbands = raster_dataset.count
    
    # Create progress bar
    progress_bar = tqdm(total = len(shapefile.index), desc='Extracting zonal statistics', unit=' polygons')

    # Loop on the individual polygons in the shapefile geometry
    for index, row in shapefile.iterrows():
        
        # Get the feature geometry as a shapely object
        geom = row.geometry
        
        # id number of the current parcel geometry
        id = index + 1
        
        # Get land cover
        LC = row.LC
        
        # Crop the raster using the bounding box
        try:
            masked_raster, _ = mask(raster_dataset, [geom], crop = True)
        except:
            print('\nShapefile bounds are not contained in weather dataset bounds.\n\nExiting script.')
            return None
        
        # Mask the raster using the geometry
        masked_raster, _ = mask(raster_dataset, [geom], crop = True)
        
        # Replace the nodata values with nan
        masked_raster = masked_raster.astype(np.float32)
        masked_raster[masked_raster == nodata] = np.NaN
        
        # Calculate the zonal statistics
        mean = np.nanmean(masked_raster, axis = (1,2))
        
        # Add statistics to output list
        raster_stats.extend([[dates[i], id, mean[i], LC] for i in range(nbands)])
        
        # Update progress bar
        progress_bar.update(1)
    
    # Close dataset and progress bar
    raster_dataset.close()
    progress_bar.close()

    return raster_stats


def extract_weather_dataframe(rain_path: str, ET0_path: str, shapefile: str, config_file: str, save_path: str) -> None:
    """
    Extract a weather dataframe for each variable (Rain, ET0) and merge them in one
    dataframe. This dataframe is saved as ``csv`` file.

    Arguments
    =========

    1. rain_path: ``str``
        path to rain Geotiff file
    2. ET0_path: ``str``
        path to ET0 Geotiff file
    3. shapefile: ``str``
        path to shapefile
    4. config_file: ``str``
        path to config file
    5. save_path: ``str``
        save path for weather dataframe

    Returns
    =======

    ``None``
    """
    
    # Generate arguments for multiprocessing
    args = [(rain_path, shapefile, config_file), (ET0_path, shapefile, config_file)]
    
    print('\nStarting weather data extraction on two cores..\n')
    
    # Extract weather values for both weather varialbes
    with Pool(2) as p:
        results = p.map(extract_rasterstats, args)
    
    # Collect results in a single dataframe
    weather_dataframe = pd.DataFrame(results[0], columns = ['date', 'id', 'Rain', 'LC'])
    weather_dataframe['ET0'] = pd.DataFrame(results[1], columns = ['date', 'id', 'ET0', 'LC'])['ET0']
    
    # Reorder columns
    weather_dataframe = weather_dataframe.reindex(columns = ['date', 'id', 'Rain', 'ET0', 'LC'])
    
    # Format datatypes
    weather_dataframe['Rain'] = np.round(weather_dataframe['Rain']).astype(int)
    weather_dataframe['ET0'] = np.round(weather_dataframe['ET0']).astype(int)
    
    # Change date type
    weather_dataframe['date'] = pd.to_datetime(weather_dataframe['date'])
    
    # Save dataframe to csv
    weather_dataframe.to_csv(save_path, index = False)
    
    return None