diff --git a/README.md b/README.md index 6a81863217c0a85170bde0c78cc0fc11b4359067..386caa5fcfa0d3ba7761789ea1c5bfa131716819 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,9 @@ See https://www.gnu.org/licenses/gpl-3.0.en.html for details or the LICENSE.txt Author: ------- -Gilles Boulet -Jérémy Auclair +Gilles Boulet, +Jérémy Auclair, + CESBIO UMR5126, (CNRS,UPS,CNES,IRD) 13 avenue du Colonel Roche 31401 Toulouse cedex 9 diff --git a/config/config.json b/config/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/config/config.py b/config/config.py new file mode 100644 index 0000000000000000000000000000000000000000..7a2c7278e9bb15b548cb7f60c2434496b7ab59ee --- /dev/null +++ b/config/config.py @@ -0,0 +1,35 @@ +# -*- coding: UTF-8 -*- +# Python +""" +21-08-2024 +@author: jeremy auclair +""" +# Create class that contains configuration file data + +from json import load # to open config file + + +class config: + """ + The `Config` class contains input parameters necessary for the modspa chain to run. + + It loads all these parameters from the ``.json`` input file and loads it automatically. + + Attributes + ========== + + - ___: ``str`` + - ___: ``str`` + """ + + # Constructor + def __init__(self, config_file: str) -> None: + + # Load json file + with open(config_file, "r") as read_file: + input_data = load(read_file) + + # Add attributes in new object + for key, value in input_data.items(): + setattr(self, key.strip(), value) + diff --git a/config/parameters.csv b/config/parameters.csv new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/run_sparse.py b/run_sparse.py index a80a112ec18bafd85152846ba80ba795ec6ae940..3eef11f0adf37065d2ce99cc80e2963b2690a2be 100644 --- a/run_sparse.py +++ b/run_sparse.py @@ -12,16 +12,17 @@ To test model, see: https://runningfingers.com/seb.php if __name__ == '__main__': import numpy as np # vectorized calculations import src.pySPARSE as pySP # load SPARSE model function + import config.config as config # load config class # Inputs - Tsurf = np.full((2,2), 297.24) # 2D + Tsurf = np.full((2,2), 297.24) # 2D optical vza = 0 # 0D = 0 rg = np.full((2,2), 630) # 2D météo Ta = np.full((2,2), 293.15) # 2D météo rh = np.full((2,2), 50) # 2D météo ua = np.full((2,2), 2) # 2D météo - za = 3 # 0D météo -> doit être plus haut que Zf (significativement, loi de passage) - lai = np.full((2,2), 1.5) # 2D + za = 10 # 0D météo -> doit être plus haut que Zf (significativement, loi de passage) + lai = np.full((2,2), 1.5) # 2D optical glai = np.full((2,2), 1.5) # 2D 1er temps : comme LAI zf = np.full((2,2), 1) # 2D avec le landcover rstmin = np.full((2,2), 100) # 2D landcover : herbacé 100, arboré 200 @@ -29,7 +30,7 @@ if __name__ == '__main__': emisv = 0.98 # 0D emiss = 0.96 # 0D emissf = 0.97 # 0D - albe = np.full((2,2), 0.3) # 2D + albe = np.full((2,2), 0.3) # 2D optical xg = 0.315 # 0D sigmoy = 0.5 # 0D albmode = 'UnCapped' @@ -43,12 +44,9 @@ if __name__ == '__main__': - Write small python sripts to download, load or generate the 2D sources - Write a csv file containing 0D parameters for the function - Add function to generate a clean netCDF4 output file, with 2D variables as DataArrays and 0D variables as scalars, write all necessary attributes + - Add option to clip inputs with shapefile - * 2D Weather inputs: - ! Rg (incoming radiation - ERA5Land) - ! Ta (surface air temperature - ERA5Land) - ! RH (relative humidity - ERA5Land) - ! UA (surface wind speed - ERA5Land) + * Weather: done * 2D Optical inputs: ! Tsurf (surface temperature - LandSat) ! LAI (leaf area index - with BVNET from LandSat) diff --git a/src/landcover.py b/src/landcover.py new file mode 100644 index 0000000000000000000000000000000000000000..5cd887952e2818d772e2d534e0cf10e3c14d6871 --- /dev/null +++ b/src/landcover.py @@ -0,0 +1,21 @@ +# -*- coding: UTF-8 -*- +# Python +""" +23-08-2024 +@author: jeremy auclair + +Generate vegetation inputs for pySPARSE from land cover data. +""" + + + + +# For testing +if __name__ == '__main__': + from time import time # to time code excecution + + # Get start time of script + t0 = time() + + # Print runtime + print('\nExcecution time:', time() - t0, 's', end = '\n\n') \ No newline at end of file diff --git a/src/optical.py b/src/optical.py new file mode 100644 index 0000000000000000000000000000000000000000..9cac2cf089fd758f423749324e61a1eb66eb80ab --- /dev/null +++ b/src/optical.py @@ -0,0 +1,141 @@ +# -*- coding: UTF-8 -*- +# Python +""" +22-08-2024 +@author: jeremy auclair + +Generate optical inputs for pySPARSE from LandSat products. +""" + +import os # to manage file paths +from fnmatch import fnmatch # to find file names from regular expressions +import numpy as np # to manage vectorized calculations +import xarray as xr # to manage georeferenced datasets +from weather import product_str_to_datetime # to get datetime + + +def open_landsat_product(landsat_dir: str, output_dir: str, lai_file: str) -> str: + """ + Open LandSat image, calculate albedo, extract surface temperature and + load custom LAI to save in a single netCDF4 file. + + Arguments + ========= + + 1. landsat_dir: ``str`` + directory containing landsat bands + 2. output_dir: ``str`` + output directory to save inputs + 3. lai_file: ``str`` + path to file containing LAI data. + Must have the same resolution and projection as LandSat image. + + Returns + ======= + + 1. save_name: ``str`` + path to optical data netCDF4 dataset + """ + + # Define global variables + chunk_size3 = {'band': -1, 'y': 2048, 'x': 2048} + chunk_size2 = {'y': 2048, 'x': 2048} + # TODO: change mask value for other satellites + mask_val = 21824 + sr_scale = np.array([2.75e-5, - 0.2], dtype = np.float32) + st_scale = np.array([3.41802e-3, 149], dtype = np.float32) + + # Liang coefficients to calculate albedo + liang_coeffs_a = xr.DataArray(np.array([0.356, 0, 0.130, 0.373, 0.085, 0.072], dtype = np.float32), coords = {'band': 6}) + liang_coeffs_a['band'] = np.arange(2, 8) + liang_coeffs_b = np.float32(-0.0018) + + # Get xml file path + for root, _, files in os.walk(landsat_dir): + for name in files: + if fnmatch(name, '*_MTL.xml'): + xml_file = os.path.join(root, name) + + # Get date of image + passage_datetime = product_str_to_datetime(os.path.basename(landsat_dir), xml_file) + + # Create file list from image directory to load bands for albedo calculation + band_list = [] + for ext in ['*_SR_B2.TIF', '*_SR_B3.TIF', '*_SR_B4.TIF', '*_SR_B5.TIF', '*_SR_B6.TIF', '*_SR_B7.TIF']: + for root, _, files in os.walk(landsat_dir): + for name in files: + if fnmatch(name, ext): + band_list.append(os.path.join(root, name)) + + # Open required bands + input_image = xr.open_mfdataset(band_list, combine = 'nested', concat_dim = 'band').chunk(chunk_size3) * sr_scale[0] + sr_scale[1] + input_image['band'] = np.arange(2, 8) + crs = input_image.rio.crs + + # Calculate albedo + input_image = input_image.weighted(liang_coeffs_a).mean(dim = 'band').rename({'band_data': 'albedo'}).clip(min = 0, max = 1) + liang_coeffs_b + + # Get surface temperature band + for root, _, files in os.walk(landsat_dir): + for name in files: + if fnmatch(name, '*_ST_B10.TIF'): + st_file = os.path.join(root, name) + + # Open surface temperature + input_image['surface_temperature'] = xr.open_dataarray(st_file).squeeze(dim = 'band', drop = True).chunk(chunk_size2) * st_scale[0] + st_scale[1] + + # Open LAI image (same resolution and projection) + input_image['lai'] = xr.open_dataarray(lai_file).chunk(chunk_size2) + + # Get mask path + for root, _, files in os.walk(landsat_dir): + for name in files: + if fnmatch(name, '*_QA_PIXEL.TIF'): + mask_path = os.path.join(root, name) + + # Open mask + mask = xr.open_dataarray(mask_path).chunk(chunk_size2).squeeze(dim = 'band', drop = True).astype(np.uint16) + + # Mask image + input_image = input_image.where(mask == mask_val, other = np.nan) + + # Create encoding dictionnary + for variable in list(input_image.keys()): + # Write encoding dict + encod = {} + encod['dtype'] = 'f4' + if '_FillValue' in input_image[variable].attrs: + del input_image[variable].attrs['_FillValue'] + encod['_FillValue'] = np.nan + # TODO: check if compression affects reading speed + encod['zlib'] = True + encod['complevel'] = 1 + input_image[variable].encoding.update(encod) + + # Write crs + input_image.rio.write_crs(crs, inplace = True) + + # Create save name and save image + save_name = os.path.join(output_dir, 'LandSat_' + passage_datetime.strftime(format = '%Y-%m-%d') + '_optical.nc') + input_image.to_netcdf(save_name) + + return save_name + + +# For testing +if __name__ == '__main__': + from time import time # to time code excecution + + # Get start time of script + t0 = time() + + out_dir = '/mnt/e/SPARSE/test' + l8_dir = '/mnt/e/TEST/LANDSAT/LC08_L2SP_198031_20210413_20210423_02_T1' + lai_path = '/mnt/e/SPARSE/test/LC08_L2SP_198031_20210413_20210423_02_T1_LAI.nc' + + out_name = open_landsat_product(l8_dir, out_dir, lai_path) + + print(out_name) + + # Print runtime + print('\nExcecution time:', time() - t0, 's', end = '\n\n') \ No newline at end of file diff --git a/src/weather.py b/src/weather.py new file mode 100644 index 0000000000000000000000000000000000000000..89370d5c71aae57a729f05af62658f9048d8a95e --- /dev/null +++ b/src/weather.py @@ -0,0 +1,408 @@ +# -*- coding: UTF-8 -*- +# Python +""" +21-08-2024 +@author: jeremy auclair + +Generate weather inputs for pySPARSE from ERA5-Land data. +""" + +import os # for path exploration +import numpy as np # for math on arrays +import xarray as xr # to manage nc files +from rasterio.enums import Resampling # reprojection algorithms +from shapely.geometry import box # create polygon from bounding box +from geopandas import GeoDataFrame # create geodataframe +from datetime import datetime, timedelta # to manage dates +from fnmatch import fnmatch # to match srings +import re # to look for characters in a string +import xml.etree.ElementTree as ET # read xml files +from psutil import cpu_count # to get number of physical cores available +from psutil import virtual_memory # to check available ram + +# CDS API external library +import cdsapi # to download cds data + + +def product_str_to_datetime(product_name: str, xml_file: str) -> datetime: + """ + product_str_to_datetime returns a ``datetime`` object for the date of the given product. + Works for LandSat products. + + Arguments + ========= + + 1. product_name: ``str`` + name or path of the product + + Returns + ======= + + 1. product_date: ``datetime.date`` + datetime.date object, date of the product + """ + + # Look for time of passage in xml file + tree = ET.parse(xml_file) + root = tree.getroot() + + # Assuming the passage time is stored in an element named 'passage_time' + passage_time_str = root.find('.//SCENE_CENTER_TIME').text + + # Search for a date pattern (yyyymmdd) in the product name or path + try: + match = re.search('\d{4}\d{2}\d{2}', product_name) + format = '%Y%m%d/%H:%M:%S' + datetime_object = datetime.strptime(match[0] + '/' + passage_time_str[:8], format) + return datetime_object + except TypeError: + pass + + # Search for a date pattern (yymmdd) in the product name or path + try: + match = re.search('\d{2}\d{2}\d{2}', product_name) + format = '%Y%m%d/%H:%M:%S' + datetime_object = datetime.strptime(match[0] + '/' + passage_time_str[:8], format) + return datetime_object + except TypeError: + pass + + +def convert_bbox_crs(in_bbox: list[float, float, float, float], in_crs: str, out_crs: int = 4326) -> list[float, float, float, float]: + """ + Convert bounding box projection. + + Arguments + ========= + + 1. in_bbox: ``list[float, float, float, float]`` + input bounding boc + 2. in_crs: ``str`` + input crs + 3. out_crs: ``int`` ``default = 4326`` + output crs + + Returns + ======= + + 1. bbox: ``list[float, float, float, float]`` + reprojected bounding box + """ + + # Create a square polygon from the bounding box + polygon = box(*in_bbox) + + # Create a GeoDataFrame + gdf = GeoDataFrame({'geometry': [polygon]}, crs = in_crs).to_crs(out_crs) + + # Get bounding box in lat/lon + bbox = gdf.total_bounds + + return bbox + + +def era5_enclosing_shp_aera(bbox: list[float], pas: float) -> tuple[float, float, float, float]: + """ + Find the four coordinates including the boxbound scene + to agree with gridsize resolution + system projection: WGS84 lat/lon degree + + Arguments + ========= + + 1. bbox: ``List[float]`` + bounding box of the demanded area + list of floats: [lat north, lon west, lat south, lon east] in degree WGS84 + 2. pas: ``float`` + gridsize + + Returns + ======= + + 1. era5_area: ``Tuple[float, float, float, float]`` + coordinates list corresponding to N,W,S,E corners of the grid in decimal degree + + .. note:: + + gdal coordinates reference upper left corner of pixel, ERA5 coordinates refere to center of grid. To resolve this difference an offset of pas/2 is applied + + """ + + lat_max, lon_min, lat_min, lon_max = bbox[3], bbox[0], bbox[1], bbox[2] + # North + era5_lat_max = round((lat_max // pas + 2) * pas, 2) + # West + era5_lon_min = round((lon_min // pas) * pas, 2) + # South + era5_lat_min = round((lat_min // pas) * pas, 2) + # Est + era5_lon_max = round((lon_max // pas + 2) * pas, 2) + + era5_area = [era5_lat_max, era5_lon_min, era5_lat_min, era5_lon_max] + + return era5_area # [N,W,S,E] + + +def calculate_mem_size(time: int, y: int, x: int, data_vars: int, dtype: np.dtype) -> int: + """ + Calculate memory size of reprojected dataset in MiB. + + Arguments + ========= + + 1. time: ``int`` + length of time dimension + 2. y: ``int`` + length of y dimension (equivalent of latitude) + 3. x: ``int`` + mength of x dimension (equivalent of longitude) + 4. data_vars: ``int`` + number of data variables + 5. dtype: ``np.dtype`` + numpy data type + + Returns + ======= + + 1. mem_size: ``int`` + memory size of output dataset in MiB + """ + + return int((time * y * x * data_vars * np.dtype(dtype).itemsize) / (1024**2)) + + +def ea_calc(T: np.ndarray) -> np.ndarray: + """ + comments + Actual vapour pressure (ea or es) derived from dewpoint temperature or temperature. + + Arguments + ========= + + 1. T: ``np.ndarray`` + Temperature in degree celsius (float). + + Returns + ======= + + 1. e_a: ``np.ndarray`` + the actual Vapour pressure in Kpa + """ + + return 0.6108 * np.exp(17.27 * T / (T + 237.15)) + + +def call_era5landhourly(variables: str, passage_datetime: datetime, bbox: list[float, float, float, float], out_dir: str, gridsize: float = 0.1) -> None: + """ + Call a list of variables from CDS ERA5-land Reanalysis on an area, for a given dates with two hours. + + Arguments + ========= + + 1. variables: ``list[str]`` + variables to download. + https: // con f l u e nce.ecmwf.int/display/CKB/ERA5-Land%3A+data+documentation#ERA5Land:datadocumentation-parameterlistingParameterlistings + 2. passage_datetime: ``datetime`` + date and time of satellite passage in datetime format + 3. bbox: ``list[float]`` + bounding box [lat north, lon west, lat south, lon east] in degree WGS84 + 4. out_dir : ``str``, + absolute directory for output netcdf file. + 5. gridsize : ``float`` ``default = 0.1`` + spatial resolution in degree. The default is 0.1. + + Returns + ======= + + 1. output_filename: ``str`` + output filename + """ + + # full path name of the output file + output_filename = os.path.join(out_dir, 'ERA5Land_' + passage_datetime.strftime(format = '%Y-%m-%d') + '_weather.nc') + + # Get previous and next hour of satellite passage + previous_hour = passage_datetime.replace(minute = 0, second = 0, microsecond = 0) + next_hour = previous_hour + timedelta(hours = 1) + + # Get modified bbox + print(bbox) + area = era5_enclosing_shp_aera(bbox, gridsize) + print(area) + + # Check if file already exists + if os.path.isfile(output_filename): + print('\n', output_filename, 'already exist !\n') + else: + # cds api request + c = cdsapi.Client(timeout = 300) + try: + c.retrieve('reanalysis-era5-single-levels', + { + 'product_type': 'reanalysis', + 'format': 'netcdf', + 'variable': variables, + 'year': passage_datetime.strftime(format = '%Y'), + 'month': passage_datetime.strftime(format = '%m'), + 'day': passage_datetime.strftime(format = '%d'), + 'time': [previous_hour.strftime(format = '%H:%M'), next_hour.strftime(format = '%H:%M')], + 'area': area, + 'grid': [gridsize, gridsize], + }, + output_filename) + print('\n', output_filename, ' downloaded !\n') + + except: + print('\nRequest', variables, ' failed !\n') + + return output_filename + + +def create_weather_dataset(era5land_file: str, passage_datetime: datetime, landsat_img_file: str, mem_limit: int = None, nb_threads: int = None) -> str: + """ + Create high resolution weather dataset from ERA5 Land low resolution + raw weather variables and LandSat data at time of passage. + + Arguments + ========= + + 1. era5land_file: ``str`` + raw weather dataset path + 2. passage_datetime: ``datetime`` + datetime of landsat passage + 3. landsat_img_file: ``str`` + path to landsat band file + 4. mem_limit: ``int`` ``default = None`` + memory limit for reprojection (optional) + 5. nb_threads: ``int`` ``default = None`` + max number of threads to use (optional) + + Returns + ======= + + 1. save_name: ``str`` + output savename of reprojected weather data + """ + + # Open raw weather + raw_weather_dataset = xr.open_dataset(era5land_file) + + # Write input crs + raw_weather_dataset.rio.write_crs(4326, inplace = True) + + # Calculate temporary weather variables for SPARSE + raw_weather_dataset['ea'] = ea_calc(raw_weather_dataset.t2m - 273.15) + raw_weather_dataset['es'] = ea_calc(raw_weather_dataset.d2m - 273.15) + + # Calculate weather variables for SPARSE + weather_dataset = raw_weather_dataset[['ssrd', 't2m']] + weather_dataset['ua'] = np.sqrt(raw_weather_dataset.u10 ** 2 + raw_weather_dataset.v10 ** 2) + weather_dataset['rh'] = np.clip(100.*(raw_weather_dataset.es / raw_weather_dataset.ea), a_min = 0, a_max = 100) + + # Interpolate weather variables to satellite passage + weather_dataset = weather_dataset.interp(time = passage_datetime, method = 'linear').astype(np.float32) + + # Open landsat band to reproject weather dataset to LandSat grid + landsat_band = xr.open_dataarray(landsat_img_file) + + # Get variables + variables = [key for key in weather_dataset.keys() if key != 'spatial_ref'] + + # Get metadata + target_crs = landsat_band.rio.crs + spatial_ref = landsat_band.spatial_ref.load() + + # Manage ressources + if not mem_limit: + mem_limit = min([calculate_mem_size(1, len(landsat_band.y), len(landsat_band.x), len(variables), np.float32), 0.8 * virtual_memory().available / (1024**2)]) + if not nb_threads: + nb_threads = min([cpu_count(logical = True), len(os.sched_getaffinity(0))]) + + # Reproject + reprojected_weather_dataset = weather_dataset.rio.reproject(target_crs, transform = landsat_band.rio.transform(), shape = (landsat_band.rio.height, landsat_band.rio.width), resampling = Resampling.bilinear, num_threads = nb_threads, warp_mem_limit = mem_limit, nodata = np.nan) + + # Take reference spatial_ref + reprojected_weather_dataset['spatial_ref'] = spatial_ref + + # Create encoding dictionnary + for variable in list(reprojected_weather_dataset.keys()): + # Write encoding dict + encod = {} + encod['dtype'] = 'f4' + if '_FillValue' in reprojected_weather_dataset[variable].attrs: + del reprojected_weather_dataset[variable].attrs['_FillValue'] + encod['_FillValue'] = np.nan + # TODO: check if compression affects reading speed + encod['zlib'] = True + encod['complevel'] = 1 + reprojected_weather_dataset[variable].encoding.update(encod) + + # Create save name + save_name = era5land_file[:-3] + '_HR.nc' + + # Save file + reprojected_weather_dataset.to_netcdf(save_name) + + return save_name + + +def get_ERA5Land_data(landsat_dir: str, out_dir: str) -> str: + """ + Download ERA5 Land data, interpolate and reproject it according to + LandSat data. + + Arguments + ========= + + 1. landsat_dir: ``str`` + directory to landsat data + 2. out_dir: ``str`` + output directory for weather data + + Returns + ======= + + 1. weather_dataset_path: ``str`` + output path of reprojected weather dataset + """ + + # Get xml file path + for root, _, files in os.walk(landsat_dir): + for name in files: + if fnmatch(name, '*_MTL.xml'): + xml_file = os.path.join(root, name) + + # Get product name and passage datetime + product_name = os.path.basename(landsat_dir) + passage_datetime = product_str_to_datetime(product_name, xml_file) + + # Get bounding box + ## Get a LandSat band + for root, _, files in os.walk(landsat_dir): + for name in files: + if fnmatch(name, '*_SR_B2.TIF'): + B2_band = os.path.join(root, name) + + bounds = xr.open_dataarray(B2_band).rio.bounds() + crs = xr.open_dataarray(B2_band).rio.crs + + # Convert bounding box projection + bbox = convert_bbox_crs(bounds, crs, out_crs = 4326) + + # Call ERA5 download function + variables = ['2m_temperature', '2m_dewpoint_temperature', 'surface_solar_radiation_downwards', '10m_u_component_of_wind','10m_v_component_of_wind'] + output_file = call_era5landhourly(variables, passage_datetime, bbox, out_dir) + + # Convert output_files in a single dataset for SPARSE + weather_dataset_path = create_weather_dataset(output_file, passage_datetime, B2_band) + + return weather_dataset_path + + +# For testing +if __name__ == '__main__': + + out_dir = '/mnt/e/SPARSE/test' + l8_dir = '/mnt/e/TEST/LANDSAT/LC08_L2SP_198031_20210413_20210423_02_T1' + + get_ERA5Land_data(l8_dir, out_dir) \ No newline at end of file