Skip to content
Snippets Groups Projects
download_ERA5_weather.py 8.68 KiB
Newer Older
  • Learn to ignore specific revisions
  • @author: rivallandv, heavily modified by jeremy auclair
    
    Download ERA5 daily weather files for modspa
    
    """
    
    import glob  # for path management
    import sys  # for path management
    import os  # for path exploration
    import geopandas as gpd  # to manage shapefiles
    
    from datetime import datetime  # manage dates
    
    from psutil import cpu_count  # to get number of physical cores available
    
    import modspa_pixel.preprocessing.lib_era5_land_pixel as era5land  # custom built functions for ERA5-Land data download
    
    from modspa_pixel.config.config import config  # to load modspa config file
    
    from modspa_pixel.preprocessing.parcel_to_pixel import convert_dataframe_to_xarray
    
    def request_ER5_weather(config_file: str, ndvi_path: str, raw_S2_image_ref: str = None, shapefile: str = None,  mode: str = 'pixel') -> str:
    
        Download ERA5 reanalysis daily weather files, concatenate and calculate ET0
    
        to obtain a netCDF4 dataset for precipitation and ET0 values. Weather data
        reprojection and conversion can take some time for large spatial windows.
    
        Arguments
        =========
        
        1. config_file: ``str``
    
            path to ndvi cube, used for weather data reprojection
    
        3. raw_S2_image_ref: ``str`` ``default = None``
            unmodified sentinel-2 image at correct resolution for
            weather data reprojection in pixel mode
        4. shapefile: ``str`` ``default = None``
            path to shapefile for extraction in parcel mode
        5. mode: ``str`` ``default = 'pixel'``
            choose between ``'pixel'`` and ``'parcel'`` mode
    
        Returns
        =======
        
        1. weather_file: ``str``
            path to netCDF4 file containing weather data
    
        outpath = config_params.data_path + os.sep + 'WEATHER' + os.sep + config_params.run_name
    
    
        # Geometry configuration
        wgs84_epsg = 'epsg:4326'  # WGS84 is the ERA5 epsg
        
        # ERA5 product parameters
        wind_height = 10  # height of ERA5 wind measurements in meters
    
        print('REQUEST CONFIGURATION INFORMATIONS:')
        if config_params.shapefile_path:
            if os.path.exists(config_params.shapefile_path):
                print('shapeFile: ', config_params.shapefile_path)
            else:
                print('shapeFile not found')
    
        else:
            # print('specify either shapeFile, boxbound or point coordinate in json file')
            print('specify shapeFile in json file')
            sys.exit(-1)
        print('period: ', config_params.start_date, ' - ', config_params.end_date)
        print('experiment name:', config_params.run_name)
        if os.path.exists(outpath):
            print('path for nc files: ', outpath)
        else:
            os.mkdir(outpath)
            print('mkdir path for nc files: ', outpath)
        print('----------')
    
    
        # Request ERA5-land BoxBound Determination
    
        if config_params.shapefile_path:
            # Load shapefile to access geometrics informations for ERA5-Land request
            gdf_expe_polygons = gpd.read_file(config_params.shapefile_path)
            print('Input polygons CRS :', gdf_expe_polygons.crs)
            expe_epsg = gdf_expe_polygons.crs
    
            # verification que les polygones sont tous fermés
            liste_polygons_validity = gdf_expe_polygons.geometry.is_valid
            if list(liste_polygons_validity).count(False) > 0:
    
                print('some polygons of Shapefile are not valid')
                polygons_invalid = liste_polygons_validity.loc[liste_polygons_validity == False]
                print('invalid polygons:', polygons_invalid)
    
                for i in polygons_invalid.index:
                    gdf_expe_polygons.geometry[i]
    
                # Application d'un buffer de zero m
                gdf_expe_polygons_clean = gdf_expe_polygons.geometry.buffer(0)
                gdf_expe_polygons = gdf_expe_polygons_clean
    
            # search for the total extent of the whole polygons in lat/lon [xlo/ylo/xhi/yhi] [W S E N]
            expe_polygons_boxbound = gdf_expe_polygons.geometry.total_bounds
            expe_polygons_boxbound = list(expe_polygons_boxbound)
            print('shape extend in ', expe_epsg.srs, ':', expe_polygons_boxbound)
    
            if expe_epsg.srs != wgs84_epsg:
                print('--- convert extend in wgs84 coordinates ---')
    
                # idem en wgs84 pour des lat/lon en degree (format utilisé par google earth engine)
    
                expe_polygons_boxbound_wgs84 = gdf_expe_polygons.to_crs(wgs84_epsg).geometry.total_bounds
                
    
                # convert to list for earth engine
                expe_polygons_boxbound_wgs84 = list(expe_polygons_boxbound_wgs84)
            else:
                expe_polygons_boxbound_wgs84 = expe_polygons_boxbound
    
            # switch coordinates order to agree with ECMWF order: N W S E
    
            expe_area = expe_polygons_boxbound_wgs84[3], expe_polygons_boxbound_wgs84[0], expe_polygons_boxbound_wgs84[1], expe_polygons_boxbound_wgs84[2]
    
    
        print('boxbound [N W S E] extend in ', wgs84_epsg)
        print(expe_area)
        # determine boxbound for ECMWF request (included shape boxbound)
        era5_expe_polygons_boxbound_wgs84 = era5land.era5_enclosing_shp_aera(expe_area, 0.1)
        print('boxbound [N W S E] request extend in ', wgs84_epsg)
        print(era5_expe_polygons_boxbound_wgs84)
    
        print('--start request--')
    
        # Get number of available CPUs
        nb_processes = 4 * min([cpu_count(logical = False), len(os.sched_getaffinity(0)), config_params.max_cpu])  # downloading data demands very little computing power, each processor core can manage multiple downloads
    
        # Call daily data
        era5land.call_era5land_daily_for_MODSPA(config_params.start_date, config_params.end_date, era5_expe_polygons_boxbound_wgs84, output_path = outpath, processes = nb_processes)
    
        
        # Get list of files
        list_era5land_hourly_ncFiles = []
        for file in glob.glob(outpath + os.sep + 'ERA5-land_*'):
            if era5land.filename_to_datetime(file) >= datetime.strptime(config_params.start_date, '%Y-%m-%d').replace(day = 1).date() and era5land.filename_to_datetime(file) <= datetime.strptime(config_params.end_date, '%Y-%m-%d').replace(day = 1).date() and file.endswith('.nc'):
                list_era5land_hourly_ncFiles.append(file)
        list_era5land_hourly_ncFiles.sort()
        
    
        for ncfile in list_era5land_hourly_ncFiles:
            print(ncfile)
        
        save_dir = outpath + os.sep + 'ncdailyfiles'
        if os.path.exists(outpath+os.sep+'ncdailyfiles'):
            print('path for nc daily files: ', save_dir)
        else:
            os.mkdir(outpath+os.sep+'ncdailyfiles')
            print('mkdir path for nc daily files: ', save_dir)
        print('----------')
    
        # Save daily wheather data into ncfile
    
        weather_daily_ncFile = save_dir + os.sep + config_params.start_date + '_' + config_params.end_date + '_' + config_params.run_name + '_era5-land-daily-meteo'
    
    
        # Temporary save directory for daily file merge
    
        variable_list = ['2m_temperature_daily_maximum', '2m_temperature_daily_minimum', 'total_precipitation_daily_maximum', '10m_u_component_of_wind_daily_mean', '10m_v_component_of_wind_daily_mean', 'surface_solar_radiation_downwards_daily_maximum']
    
    
        # Aggregate monthly files
        aggregated_files = era5land.concat_monthly_nc_file(list_era5land_hourly_ncFiles, variable_list, save_dir)
        
    
        # Generate pandas dataframe for parcel mode
        if mode == 'parcel':
            
            # Create save path
            weather_datframe = weather_daily_ncFile + '_df.csv'
            weather_dataset = weather_daily_ncFile + '_parcel.nc'
            
    
            # Check if weather dataset already exists
            if os.path.exists(weather_dataset) and not config_params.weather_overwrite:
                return weather_dataset
            
            # Generate daily weather datasets as Geotiffs for each variable
    
            weather_daily_rain, weather_daily_ET0 = era5land.era5Land_daily_to_yearly_parcel(aggregated_files, weather_daily_ncFile, config_params.start_date, config_params.end_date, h = wind_height)
    
            # Generate and save weather dataframe
            era5land.extract_weather_dataframe(weather_daily_rain, weather_daily_ET0, shapefile, config_file, weather_datframe)
            
            # Convert dataframe to xarray dataset
            convert_dataframe_to_xarray(weather_datframe, weather_dataset, variables = ['Rain', 'ET0'], data_types = ['u2', 'u2'])
    
        
            print('\nWeather dataset:', weather_daily_ncFile)
    
        # Calculate ET0 over the whole time period
    
        weather_daily_ncFile = era5land.era5Land_daily_to_yearly_pixel(aggregated_files, weather_daily_ncFile, raw_S2_image_ref, ndvi_path, config_params.start_date, config_params.end_date, h = wind_height, max_ram = 16, weather_overwrite = config_params.weather_overwrite)
    
        print('\nWeather dataset:', weather_daily_ncFile)