diff --git a/sen2chain/__init__.py b/sen2chain/__init__.py index edcd026ac40ab1e73d134163ad53b862b234c49e..83cd50323fe9fb2ce950a214f32d892e9bce4028 100644 --- a/sen2chain/__init__.py +++ b/sen2chain/__init__.py @@ -1,6 +1,6 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 -# Copyright (C) 2018 Jeremy Commins <jebins@openmailbox.org> +# Copyright (C) 2018 Jeremy Commins <jebins@laposte.net> # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -21,18 +21,42 @@ This module lists all externally useful classes and functions. from .config import Config from .tiles import Tile -from .products import L1cProduct, L2aProduct, OldCloudMaskProduct, NewCloudMaskProduct, IndiceProduct +from .products import ( + L1cProduct, + L2aProduct, + OldCloudMaskProduct, + NewCloudMaskProduct, + IndiceProduct, +) from .library import Library from .data_request import DataRequest from .indices import IndicesCollection from .download_and_process import DownloadAndProcess from .time_series import TimeSeries from .automatization import Automatization -from .utils import format_word, grouper, datetime_to_str, str_to_datetime, human_size_decimal, human_size, get_current_Sen2Cor_version -from .geo_utils import serialise_tiles_index, get_processed_indices_vect, crop_product_by_shp -from .multi_processing import l2a_multiprocessing, cld_version_probability_iterations_reprocessing_multiprocessing, idx_multiprocessing +from .utils import ( + format_word, + grouper, + datetime_to_str, + str_to_datetime, + human_size_decimal, + human_size, + get_current_Sen2Cor_version, +) +from .geo_utils import ( + serialise_tiles_index, + get_processed_indices_vect, + crop_product_by_shp, +) +from .multi_processing import ( + l2a_multiprocessing, + cld_version_probability_iterations_reprocessing_multiprocessing, + idx_multiprocessing, +) from .tileset import TileSet from .jobs import Jobs, Job __version__ = "0.7.0" -__author__ = "Jérémy Commins <jebins@openmailbox.org> & Impact <pascal.mouquet@ird.fr>" +__author__ = ( + "Jérémy Commins <jebins@laposte.net> & Impact <pascal.mouquet@ird.fr>" +) diff --git a/sen2chain/automatization.py b/sen2chain/automatization.py index 6487bf64962d7d1b7f0d108ac19f1f77dbc8fe09..af6bd365edd19837913faa320c2ba415f0dfbcdc 100644 --- a/sen2chain/automatization.py +++ b/sen2chain/automatization.py @@ -1,14 +1,17 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 """ -Module for automatized downloading, processing and time series computing of new Sentinel-2 images. +Module for automatized downloading, processing and time series computing of new +Sentinel-2 images. """ + import time import logging import pandas as pd import numpy as np from pathlib import Path from datetime import datetime, timedelta + # type annotations from typing import Sequence, List, Set, Dict, Union, Tuple @@ -26,11 +29,12 @@ logging.basicConfig(level=logging.INFO) class Automatization: """Class to automate the downloading and processing of Sentinel-2 images. - At first launch it will scan L1C tiles in the library, create a new CSV file - in ``~/sen2chain/config/tiles_to_watch.csv``, and update the file. - If the CSV file already exists, it will read it and download and process new images for - the tiles listed in that file. + At first launch it will scan L1C tiles in the library, create a new CSV + file in ``~/sen2chain/config/tiles_to_watch.csv``, and update the file. + If the CSV file already exists, it will read it and download and process + new images for the tiles listed in that file. """ + _csv_path = Config().tiles_to_watch def __init__(self) -> None: @@ -54,32 +58,39 @@ class Automatization: def _create_df(self) -> None: """Creates a pandas dataframe.""" - self._df = pd.DataFrame(index=Library().l1c, - columns=["start_date", - "last_date", - "ignore", - "tags"]) + self._df = pd.DataFrame( + index=Library().l1c, + columns=["start_date", "last_date", "ignore", "tags"], + ) self._df.index.name = "tile" def _read_csv(self) -> None: """Reads the csv file.""" - self._df = pd.read_csv(self._csv_path, - sep=",", - converters={"tile": str.strip, - "start_date": str.strip, - "last_date": str.strip, - "ignore": str.strip, - "tags": str.strip}, - index_col="tile", - na_values="") - - self._df["start_date"] = pd.to_datetime(self._df["start_date"], format="%Y-%m-%d") - self._df["last_date"] = pd.to_datetime(self._df["last_date"], format="%Y-%m-%d") + self._df = pd.read_csv( + self._csv_path, + sep=",", + converters={ + "tile": str.strip, + "start_date": str.strip, + "last_date": str.strip, + "ignore": str.strip, + "tags": str.strip, + }, + index_col="tile", + na_values="", + ) + + self._df["start_date"] = pd.to_datetime( + self._df["start_date"], format="%Y-%m-%d" + ) + self._df["last_date"] = pd.to_datetime( + self._df["last_date"], format="%Y-%m-%d" + ) # bug sur replace: self._df.replace(r"^\s*$", np.nan, regex=True, inplace=True) # -> workaround: # for c in self._df.select_dtypes(include=["object"]).columns: - # self._df[c] = self._df[c].replace(r"^\s*$", np.nan, regex=True, inplace=True) + # self._df[c] = self._df[c].replace(r"^\s*$", np.nan, regex=True, inplace=True) def _save_csv(self) -> None: """Saves the dataframe to CSV.""" @@ -166,13 +177,17 @@ class Automatization: # Don't overload the server with useless requests : # Sentinel-2 revisit time is 5 days if request_date: - if not Automatization._ndays_since_date(request_date, revisit_period): + if not Automatization._ndays_since_date( + request_date, revisit_period + ): logger.info("Too early to check {}".format(tile)) continue logger.info("Checking tile: {}".format(tile)) - request = DataRequest(start_date=request_date if request_date else None, - end_date=None) + request = DataRequest( + start_date=request_date if request_date else None, + end_date=None, + ) request.from_tiles([tile]) self._products_list["hubs"].update(request.products_list["hubs"]) @@ -183,15 +198,16 @@ class Automatization: """Returns ignored tiles.""" return self._df[self._df["ignore"].notna()].index.values - def run(self, - tiles: List[str] = None, - process_products: bool = False, - indices_list: List[str] = None, - nodata_clouds: bool = True, - quicklook: bool = True, - hubs_limit: Dict[str, int] = None, - revisit_period: int = 2, - ) -> None: + def run( + self, + tiles: List[str] = None, + process_products: bool = False, + indices_list: List[str] = None, + nodata_clouds: bool = True, + quicklook: bool = True, + hubs_limit: Dict[str, int] = None, + revisit_period: int = 2, + ) -> None: """ Runs automatization. @@ -200,7 +216,8 @@ class Automatization: :param indices_list: list of valid indices names that will be processed. :param nodata_clouds: mask indices output rasters with a cloud-mask. :param quicklook: creates a quicklook for each indice processed. - :param revisit_period: number of days, since last date, to check again for new images. + :param revisit_period: number of days, since last date, to check again + for new images. """ logger.info("Running automatization") logger.info("Ignored tiles: {}".format(self._get_ignored_tiles(self))) @@ -215,18 +232,20 @@ class Automatization: self._get_tiles_to_update(tiles_list=tiles) self._get_products_list(revisit_period) if any(self._products_list.values()): - prods = DownloadAndProcess(identifiers=self._products_list, - hubs_limit=hubs_limit, - aws_limit=2, - process_products=process_products, - max_processes=3, - indices_list=indices_list, - nodata_clouds=nodata_clouds, - quicklook=quicklook) + prods = DownloadAndProcess( + identifiers=self._products_list, + hubs_limit=hubs_limit, + aws_limit=2, + process_products=process_products, + max_processes=3, + indices_list=indices_list, + nodata_clouds=nodata_clouds, + quicklook=quicklook, + ) failed = prods.failed_products if failed: print(failed) - # When working on a local network storage, pause the process in + # When working on a local network storage, pause the process in # order to let the file to be checked by the filesystem (lags). time.sleep(2) self._update_df() @@ -261,9 +280,10 @@ class Automatization: class TimeSeriesAutomatization: """Time series automatization. - Scans vectors files in the Time Series folder and computes a time - series extraction for each of the files. + Scans vectors files in the Time Series folder and computes a time series + extraction for each of the files. """ + _time_series_path = Path(Config().get("time_series_path")) def __init__(self) -> None: @@ -272,9 +292,15 @@ class TimeSeriesAutomatization: def _list_vectors_files(self) -> None: """Scans vectors files found in the TIME_SERIES folder.""" - valid_types = ("*.geojson", "*.gpkg", "*.shp", ) # type: Tuple[str, ...] + valid_types = ( + "*.geojson", + "*.gpkg", + "*.shp", + ) # type: Tuple[str, ...] for valid_type in valid_types: - self._vectors_files.extend(list(self._time_series_path.glob(valid_type))) + self._vectors_files.extend( + list(self._time_series_path.glob(valid_type)) + ) def run(self, indices: Sequence[str] = ("NDVI",)) -> None: """Computes time series extraction for each indice and for each @@ -284,8 +310,10 @@ class TimeSeriesAutomatization: """ for vectors_file in self._vectors_files: logger.info("Processing: {}".format(vectors_file.name)) - ts = TimeSeries(date_min=None, - date_max=None, - vectors_file=str(vectors_file), - indices=indices) + ts = TimeSeries( + date_min=None, + date_max=None, + vectors_file=str(vectors_file), + indices=indices, + ) ts.to_csv() diff --git a/sen2chain/cloud_mask.py b/sen2chain/cloud_mask.py index 131c05b0c8a2bef1a01712bc68224a26d771e5af..24334dd09c650b6382e84979c8100b20ef934ba2 100755 --- a/sen2chain/cloud_mask.py +++ b/sen2chain/cloud_mask.py @@ -1,16 +1,24 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 """ Module for computing cloud-mask from a sen2cor clouds classification raster. """ +import os import logging import pathlib from pathlib import Path import pyproj -#~ import otbApplication + import shapely.geometry -from shapely.geometry import box, Polygon, MultiPolygon, GeometryCollection, mapping, shape +from shapely.geometry import ( + box, + Polygon, + MultiPolygon, + GeometryCollection, + mapping, + shape, +) from shapely.ops import transform import rasterio from rasterio.features import shapes, rasterize @@ -19,9 +27,6 @@ from itertools import compress from scipy import ndimage from typing import Sequence, List, Dict, Union from osgeo import gdal -# import gdal -import os -# type annotations from .config import Config @@ -35,8 +40,9 @@ def max_dtype_value(dtype: np.dtype): def mask_footprint( - raster_band: Union[str, pathlib.PosixPath], footprint: List[float], - out_path: Union[str, pathlib.PosixPath] = "./footprint_masked.tif" + raster_band: Union[str, pathlib.PosixPath], + footprint: List[float], + out_path: Union[str, pathlib.PosixPath] = "./footprint_masked.tif", ) -> None: """Masks a raster band with a footprint. @@ -49,33 +55,46 @@ def mask_footprint( band = src.read(1) fp_poly = Polygon(footprint) - projection = lambda x, y: pyproj.transform(pyproj.Proj(init='epsg:4326'), - pyproj.Proj(profile["crs"].to_string()), x, y - ) + # FIXME: use def instead of lambda assignment + projection = lambda x, y: pyproj.transform( + pyproj.Proj(init="epsg:4326"), + pyproj.Proj(profile["crs"].to_string()), + x, + y, + ) fp_poly_reproj = transform(projection, fp_poly) - fp_band = rasterize(shapes=[mapping(fp_poly_reproj)], - all_touched=True, - dtype=np.uint8, - out_shape=(profile["width"], profile["height"]), - transform=profile["affine"] - ) + fp_band = rasterize( + shapes=[mapping(fp_poly_reproj)], + all_touched=True, + dtype=np.uint8, + out_shape=(profile["width"], profile["height"]), + transform=profile["affine"], + ) nodata_value = max_dtype_value(profile["dtype"]) - band_masked = np.where(fp_band == 1, band, nodata_value).astype(np.uint16) - - profile.update(driver="Gtiff", - nodata=nodata_value, - tiled=False, - transform=profile["affine"]) - - with rasterio.open(str(out_path), 'w', **profile) as out: + band_masked = np.where(fp_band == 1, band, nodata_value).astype( + np.uint16 + ) + + profile.update( + driver="Gtiff", + nodata=nodata_value, + tiled=False, + transform=profile["affine"], + ) + + with rasterio.open(str(out_path), "w", **profile) as out: out.write(band_masked, 1) def katana( - geometry: Union[shapely.geometry.polygon.Polygon, shapely.geometry.multipolygon.MultiPolygon], - threshold: int = 2500, count: int = 0 + geometry: Union[ + shapely.geometry.polygon.Polygon, + shapely.geometry.multipolygon.MultiPolygon, + ], + threshold: int = 2500, + count: int = 0, ) -> List[shapely.geometry.polygon.Polygon]: """ Splits a Polygon into two parts across it's shortest dimension @@ -87,7 +106,8 @@ def katana( """ # Credits: Joshua Arnott # Licence: BSD 2-clause - # https://snorfalorpagus.net/blog/2016/03/13/splitting-large-polygons-for-faster-intersections/ + # 'https://snorfalorpagus.net/blog/2016/03/13/splitting-large-polygons-for- + # faster-intersections/' logger.debug("Splitting polygons.") bounds = geometry.bounds @@ -110,7 +130,10 @@ def katana( part_2 = box(bounds[0] + width / 2, bounds[1], bounds[2], bounds[3]) result = [] - for each in (part_1, part_2,): + for each in ( + part_1, + part_2, + ): intersec = geometry.intersection(each) if not isinstance(intersec, GeometryCollection): intersec = [intersec] @@ -132,8 +155,11 @@ def katana( def erosion_dilatation( - features: Sequence[Dict], threshold: int = 2500, - erosion: int = -20, dilatation: int = 100) -> Dict: + features: Sequence[Dict], + threshold: int = 2500, + erosion: int = -20, + dilatation: int = 100, +) -> Dict: """ Erosion then dilatation. @@ -150,15 +176,25 @@ def erosion_dilatation( eroded = shape(part).buffer(float(erosion)) new_geom = eroded if int(eroded.area) else "POLYGON EMPTY" - if str(new_geom) != "POLYGON EMPTY": # small geometries are empty after erosion - yield ({"properties": feat["properties"], - "geometry": mapping(new_geom.buffer(float(dilatation))) - }) + if ( + str(new_geom) != "POLYGON EMPTY" + ): # small geometries are empty after erosion + yield ( + { + "properties": feat["properties"], + "geometry": mapping( + new_geom.buffer(float(dilatation)) + ), + } + ) def create_cloud_mask( - cloud_mask: Union[str, pathlib.PosixPath], out_path="./cloud_mask.tif", - buffering: bool = True, erosion: int = None, dilatation: int = None + cloud_mask: Union[str, pathlib.PosixPath], + out_path="./cloud_mask.tif", + buffering: bool = True, + erosion: int = None, + dilatation: int = None, ) -> None: """ create cloud mask @@ -191,38 +227,49 @@ def create_cloud_mask( # print("cloud-less array") band_norm01_masked = np.ma.masked_where(band_norm01 == 1, band_norm01).mask - with rasterio.open(str(out_path), 'w', - driver="Gtiff", - height=profile["height"], - width=profile["width"], - count=1, - dtype=np.uint8, - transform=profile["affine"], - crs=profile["crs"], - tiled=False) as out: + with rasterio.open( + str(out_path), + "w", + driver="Gtiff", + height=profile["height"], + width=profile["width"], + count=1, + dtype=np.uint8, + transform=profile["affine"], + crs=profile["crs"], + tiled=False, + ) as out: # with erosion-dilatation if buffering: logger.info("Erosion-dilatation.") # vectorization - vectors = ({'properties': {'raster_val': v}, 'geometry': s} - for i, (s, v) in enumerate(shapes(band_norm01, - mask=band_norm01_masked, - transform=profile["affine"]))) < qw + vectors = ( + {"properties": {"raster_val": v}, "geometry": s} + for i, (s, v) in enumerate( + shapes( + band_norm01, + mask=band_norm01_masked, + transform=profile["affine"], + ) + ) + ) < qw # buffering - buffers = erosion_dilatation(vectors, - erosion=erosion, - dilatation=dilatation) + buffers = erosion_dilatation( + vectors, erosion=erosion, dilatation=dilatation + ) shapes_values = ((f["geometry"], 1) for f in buffers) # rasterisation - burned = rasterize(shapes=shapes_values, - all_touched=False, - dtype=np.uint8, - out_shape=(profile["width"], profile["height"]), - transform=profile["affine"]) + burned = rasterize( + shapes=shapes_values, + all_touched=False, + dtype=np.uint8, + out_shape=(profile["width"], profile["height"]), + transform=profile["affine"], + ) # write band out.write(burned, 1) @@ -234,26 +281,12 @@ def create_cloud_mask( # return str(Path(str(out_path)).absolute) - # cascaded union - # from shapely.ops import cascaded_union - # print("cascaded union") - # dissolved = cascaded_union( [shape(f["geometry"]) for f in buffers] ) - # shapes = ((mapping(g), 1) for i, g in enumerate(dissolved)) # with cascaded union - - ###### TEST ###### - # with fiona.open(str("vectors.shp"), "w", - # driver="ESRI Shapefile", - # crs=profile["crs"], - # schema={"geometry":"Polygon", "properties": {"raster_val": "int"}}) as dst: - # dst.writerecords(vectors) - # sys.exit(0) - ################## - def create_cloud_mask_v2( - cloud_mask: Union[str, pathlib.PosixPath], - out_path="./cloud_mask_v2.jp2", - erosion: int = 1, dilatation: int = 5 + cloud_mask: Union[str, pathlib.PosixPath], + out_path="./cloud_mask_v2.jp2", + erosion: int = 1, + dilatation: int = 5, ) -> None: """ create cloud mask @@ -272,37 +305,48 @@ def create_cloud_mask_v2( cld_profile = cld_src.profile cld_bin = np.where(cld > CLD_seuil, 1, 0).astype(np.uint8) cld_erode = ndimage.binary_erosion(cld_bin).astype(cld_bin.dtype) - kernel = np.array([[0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0], - [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0], - [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], - [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], - [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], - [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0], - [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0]]) - cld_dilate = ndimage.binary_dilation(cld_erode, kernel).astype(cld_erode.dtype) - - cld_profile.update(driver="Gtiff", - compress="NONE", - tiled=False, - dtype=np.int8, - transform=cld_src.transform, - count=1) - - cld_profile.pop('tiled', None) - cld_profile.pop('nodata', None) + kernel = np.array( + [ + [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0], + [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], + [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0], + [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0], + ] + ) + cld_dilate = ndimage.binary_dilation(cld_erode, kernel).astype( + cld_erode.dtype + ) + + cld_profile.update( + driver="Gtiff", + compress="NONE", + tiled=False, + dtype=np.int8, + transform=cld_src.transform, + count=1, + ) + + cld_profile.pop("tiled", None) + cld_profile.pop("nodata", None) with rasterio.Env(GDAL_CACHEMAX=512) as env: with rasterio.open(out_dilate, "w", **cld_profile) as dst: dst.write(cld_dilate.astype(np.int8), 1) - - # Save to JP2000 + + # Save to JP2000 src_ds = gdal.Open(out_dilate) driver = gdal.GetDriverByName("JP2OpenJPEG") - dst_ds = driver.CreateCopy(str(out_path), src_ds, - options=["CODEC=JP2", "QUALITY=100", "REVERSIBLE=YES", "YCBCR420=NO"]) + dst_ds = driver.CreateCopy( + str(out_path), + src_ds, + options=["CODEC=JP2", "QUALITY=100", "REVERSIBLE=YES", "YCBCR420=NO"], + ) dst_ds = None src_ds = None @@ -311,16 +355,17 @@ def create_cloud_mask_v2( def create_cloud_mask_b11( - cloud_mask: Union[str, pathlib.PosixPath], - b11_path: Union[str, pathlib.PosixPath], - out_path="./cloud_mask_b11.jp2", - dilatation: int = 5) -> None: + cloud_mask: Union[str, pathlib.PosixPath], + b11_path: Union[str, pathlib.PosixPath], + out_path="./cloud_mask_b11.jp2", + dilatation: int = 5, +) -> None: """ - Marking cloud mask v2 with B11 values to reduce overdetection of clouds for some specific water bodies - uint8 + Marking cloud mask v2 with B11 values to reduce overdetection of clouds for + some specific water bodies (uint8). - :param cloud_mask: path to the cloud mask raster (sen2chain) - :param b11_path: path to the l2a b11 + :param cloud_mask: path to the cloud mask raster (sen2chain). + :param b11_path: path to the l2a b11. :param out_path: path to the output. :param erosion: size of the outer buffer in px. :param dilatation: size of the inner buffer in px. @@ -333,126 +378,154 @@ def create_cloud_mask_b11( with rasterio.open(str(b11_path)) as b11_src: b11 = b11_src.read(1).astype(np.int16) b11_bin = np.where(b11 < b11_seuil, 1, 0).astype(np.uint8) - - kernel = np.array([[0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0], - [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0], - [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], - [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], - [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], - [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0], - [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0]]) + + kernel = np.array( + [ + [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0], + [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], + [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0], + [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0], + ] + ) b11_dilate = ndimage.binary_dilation(b11_bin, kernel).astype(b11_bin.dtype) - + with rasterio.open(str(cloud_mask)) as cld_src: cld = cld_src.read(1).astype(np.int16) cld_profile = cld_src.profile cld_mskd = ((cld == 1) * (b11_dilate == 0)).astype(np.uint8) - - kernel = np.array([[0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0], - [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], - [0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], - [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], - [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], - [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], - [0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], - [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], - [0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0]]) - cld_mskd_dilate = ndimage.binary_dilation(cld_mskd, kernel).astype(cld_mskd.dtype) - - cld_profile.update(driver="Gtiff", - compress="NONE", - tiled=False, - dtype=np.int8, - transform=cld_src.transform, - count=1) - - cld_profile.pop('tiled', None) - cld_profile.pop('nodata', None) + + kernel = np.array( + [ + [0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], + [0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], + [0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], + [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], + [0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0], + ] + ) + cld_mskd_dilate = ndimage.binary_dilation(cld_mskd, kernel).astype( + cld_mskd.dtype + ) + + cld_profile.update( + driver="Gtiff", + compress="NONE", + tiled=False, + dtype=np.int8, + transform=cld_src.transform, + count=1, + ) + + cld_profile.pop("tiled", None) + cld_profile.pop("nodata", None) with rasterio.Env(GDAL_CACHEMAX=512) as env: with rasterio.open(out_mask, "w", **cld_profile) as dst: dst.write(cld_mskd_dilate.astype(np.int8), 1) - - # Save to JP2000 + + # Save to JP2000 src_ds = gdal.Open(out_mask) driver = gdal.GetDriverByName("JP2OpenJPEG") - dst_ds = driver.CreateCopy(str(out_path), src_ds, - options=["CODEC=JP2", "QUALITY=100", "REVERSIBLE=YES", "YCBCR420=NO"]) + dst_ds = driver.CreateCopy( + str(out_path), + src_ds, + options=["CODEC=JP2", "QUALITY=100", "REVERSIBLE=YES", "YCBCR420=NO"], + ) dst_ds = None src_ds = None os.remove(out_mask) logger.info("Done: {}".format(out_path.name)) -def create_cloud_mask_v003(cloud_mask: Union[str, pathlib.PosixPath], - out_path="./cm003.jp2", - probability: int = 1, - iterations: int = 5 - ) -> None: + +def create_cloud_mask_v003( + cloud_mask: Union[str, pathlib.PosixPath], + out_path="./cm003.jp2", + probability: int = 1, + iterations: int = 5, +) -> None: """ - create cloud mask version cm003. This cloudmask uses a simple thresholding and dilatations - over the 20m cloud_probability band from Sen2Cor. The threshold value and number of dilatation cycles - can be manually modified by the user. Default values 1% and 5 cycles. + create cloud mask version cm003. This cloudmask uses a simple thresholding + and dilatations over the 20m cloud_probability band from Sen2Cor. The + threshold value and number of dilatation cycles can be manually modified by + the user. Default values 1% and 5 cycles. :param cloud_mask: path to the 20m cloud mask raster. :param out_path: path to the output file. - :param probability: threshold in percent for the 20m cloud_probability band binarisation. + :param probability: threshold in percent for the 20m cloud_probability band + binarisation. :param iterations: number of dilatation cylces to apply. """ - + out_temp_path = Path(Config().get("temp_path")) out_temp = str(out_temp_path / (out_path.stem + "_tmp_cm003.tif")) - + with rasterio.open(str(cloud_mask)) as cld_src: cld_profile = cld_src.profile cld = cld_src.read(1).astype(np.int8) - + cld = np.where(cld >= probability, 1, 0) - - kernel = np.array([[0, 1, 1, 1, 0, 0], - [1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1], - [0, 1, 1, 1, 1, 0]]) - - cld_dilated = ndimage.binary_dilation(cld, kernel, iterations = iterations) - cld_profile.update(driver="Gtiff", - compress="DEFLATE", - tiled=False, - dtype=np.int8) - + + kernel = np.array( + [ + [0, 1, 1, 1, 0, 0], + [1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1], + [0, 1, 1, 1, 1, 0], + ] + ) + + cld_dilated = ndimage.binary_dilation(cld, kernel, iterations=iterations) + cld_profile.update( + driver="Gtiff", compress="DEFLATE", tiled=False, dtype=np.int8 + ) + with rasterio.Env(GDAL_CACHEMAX=512) as env: with rasterio.open(str(out_temp), "w", **cld_profile) as dst: dst.write(cld_dilated.astype(np.int8), 1) - - # Save to JP2000 + + # Save to JP2000 src_ds = gdal.Open(out_temp) driver = gdal.GetDriverByName("JP2OpenJPEG") - dst_ds = driver.CreateCopy(str(out_path), src_ds, - options=["CODEC=JP2", "QUALITY=100", "REVERSIBLE=YES", "YCBCR420=NO"]) + dst_ds = driver.CreateCopy( + str(out_path), + src_ds, + options=["CODEC=JP2", "QUALITY=100", "REVERSIBLE=YES", "YCBCR420=NO"], + ) dst_ds = None src_ds = None os.remove(out_temp) logger.info("Done: {}".format(out_path.name)) -def create_cloud_mask_v004(scl_path: Union[str, pathlib.PosixPath], - out_path="./cm004.jp2", - iterations: int = 5, - cld_shad: bool = True, - cld_med_prob: bool = True, - cld_hi_prob: bool = True, - thin_cir: bool = True, - ) -> None: + +def create_cloud_mask_v004( + scl_path: Union[str, pathlib.PosixPath], + out_path="./cm004.jp2", + iterations: int = 5, + cld_shad: bool = True, + cld_med_prob: bool = True, + cld_hi_prob: bool = True, + thin_cir: bool = True, +) -> None: """ - create cloud mask version cm004. This cloudmask uses 20m resolution SCL image from Sen2Cor - The number of dilatation cycles can be manually modified by the user. Default value: 5 cycles. + create cloud mask version cm004. This cloudmask uses 20m resolution SCL + image from Sen2Cor. The number of dilatation cycles can be manually + modified by the user. Default value: 5 cycles. :param scl_path: path to the Sen2Cor 20m scene classification raster. :param out_path: path to the output file. :param iterations: number of dilatation cylces to apply. @@ -461,44 +534,55 @@ def create_cloud_mask_v004(scl_path: Union[str, pathlib.PosixPath], :param cld_hi_prob: usage of the CLOUD_HIGH_PROBABILITY (9) :param thin_cir: usage of the THIN_CIRRUS (10) """ - + out_temp_path = Path(Config().get("temp_path")) out_temp = str(out_temp_path / (out_path.stem + "_tmp_cm004.tif")) - + with rasterio.open(str(scl_path)) as scl_src: scl_profile = scl_src.profile scl = scl_src.read(1).astype(np.int8) - - list_values = list(compress([3, 8, 9, 10], [cld_shad, cld_med_prob, cld_hi_prob, thin_cir])) + + list_values = list( + compress( + [3, 8, 9, 10], [cld_shad, cld_med_prob, cld_hi_prob, thin_cir] + ) + ) cld = np.isin(scl, list_values) - + if iterations > 0: - kernel = np.array([[0, 1, 1, 1, 0, 0], - [1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1], - [0, 1, 1, 1, 1, 0]]) - cld_dilated = ndimage.binary_dilation(cld, kernel, iterations = iterations) + kernel = np.array( + [ + [0, 1, 1, 1, 0, 0], + [1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1], + [0, 1, 1, 1, 1, 0], + ] + ) + cld_dilated = ndimage.binary_dilation( + cld, kernel, iterations=iterations + ) else: cld_dilated = cld - - scl_profile.update(driver="Gtiff", - compress="DEFLATE", - tiled=False, - dtype=np.int8) - + + scl_profile.update( + driver="Gtiff", compress="DEFLATE", tiled=False, dtype=np.int8 + ) + with rasterio.Env(GDAL_CACHEMAX=512) as env: with rasterio.open(str(out_temp), "w", **scl_profile) as dst: dst.write(cld_dilated.astype(np.int8), 1) - - # Save to JP2000 + + # Save to JP2000 src_ds = gdal.Open(out_temp) driver = gdal.GetDriverByName("JP2OpenJPEG") - dst_ds = driver.CreateCopy(str(out_path), src_ds, - options=["CODEC=JP2", "QUALITY=100", "REVERSIBLE=YES", "YCBCR420=NO"]) + dst_ds = driver.CreateCopy( + str(out_path), + src_ds, + options=["CODEC=JP2", "QUALITY=100", "REVERSIBLE=YES", "YCBCR420=NO"], + ) dst_ds = None src_ds = None os.remove(out_temp) - logger.info("Done: {}".format(out_path.name)) - + logger.info("Done: {}".format(out_path.name)) diff --git a/sen2chain/colormap.py b/sen2chain/colormap.py index 7f0692b09809a64c295f4ddd85bb874636058028..46902ab12d3803318ae7b74970ae6169d31ec6ae 100644 --- a/sen2chain/colormap.py +++ b/sen2chain/colormap.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 """ Module for computing colormaps from single band rasters. @@ -9,6 +9,7 @@ import pathlib import csv from pathlib import Path from typing import Any + # import colorcet as cc import rasterio from rasterio.warp import calculate_default_transform, reproject, Resampling @@ -48,8 +49,7 @@ logging.basicConfig(level=logging.INFO) def matplotlib_colormap_to_rgb( - cmap: matplotlib.colors.LinearSegmentedColormap, - revers=False + cmap: matplotlib.colors.LinearSegmentedColormap, revers=False ) -> Dict[int, Tuple[int, int, int]]: """ Returns a matplotlib colormap as a dictionnary of RGB values. @@ -58,16 +58,23 @@ def matplotlib_colormap_to_rgb( :param cmap: matplotlib colormap. :param revers: if true, reverse the colormap. """ + def float_to_integer(flt): """Returns a normalised ([0-1]) value in the [0-255] range.""" - return int(round(flt*255)) + return int(round(flt * 255)) cmap_list = [cmap(i) for i in range(0, 256)] # type: List[Any] if revers: cmap_list = reversed(cmap_list) - cmap_dict = {i: (float_to_integer(v[0]), float_to_integer(v[1]), float_to_integer(v[2])) - for i, v in enumerate(cmap_list)} # type: Dict[int, Tuple[int, int, int]] + cmap_dict = { + i: ( + float_to_integer(v[0]), + float_to_integer(v[1]), + float_to_integer(v[2]), + ) + for i, v in enumerate(cmap_list) + } # type: Dict[int, Tuple[int, int, int]] return cmap_dict @@ -87,19 +94,21 @@ def get_tsv_colormap(lut_path: str) -> Dict[int, Tuple[int, int, int]]: reader = csv.reader(csv_file, delimiter=" ") colormap_dict = dict() # type: Dict[int, Tuple[int, int, int]] for row in reader: - color = tuple([int(row[1]), int(row[2]), int(row[3])]) # type: Tuple[int, int, int] + color = tuple( + [int(row[1]), int(row[2]), int(row[3])] + ) # type: Tuple[int, int, int] colormap_dict[int(row[0])] = color return colormap_dict def create_colormap( - raster: Union[str, pathlib.PosixPath], - lut_dict: Dict[int, Tuple[int, int, int]], - cloud_mask: str = None, - out_path: Union[str, pathlib.PosixPath] = "./colormap.tif", - out_crs: str = None, - clouds_color: str = "black", - out_resolution: Tuple[int, int] = (100, 100) + raster: Union[str, pathlib.PosixPath], + lut_dict: Dict[int, Tuple[int, int, int]], + cloud_mask: str = None, + out_path: Union[str, pathlib.PosixPath] = "./colormap.tif", + out_crs: str = None, + clouds_color: str = "black", + out_resolution: Tuple[int, int] = (100, 100), ) -> str: """ Colormapping of a single-band raster with a look-up table passed as @@ -119,8 +128,10 @@ def create_colormap( # cloud mask if not Path(str(cloud_mask)).is_file(): cloud_mask = None - logger.error("Cloud mask path wrong or not provided \ - \nClouds will not be colored") + logger.error( + "Cloud mask path wrong or not provided \ + \nClouds will not be colored" + ) # clouds color if clouds_color.lower() == "black": @@ -130,8 +141,10 @@ def create_colormap( cld_val = 255 # lut_dict[0] = (255, 255, 255) else: - logger.warning('Available clouds colors: "black" or "white" \ - \nApplying default: black') + logger.warning( + 'Available clouds colors: "black" or "white" \ + \nApplying default: black' + ) lut_dict[0] = (0, 0, 0) lut_dict[255] = (255, 255, 255) @@ -152,20 +165,24 @@ def create_colormap( clouds_band = cld_src.read(1) # resample cloud_mask to raster grid cld_reproj = np.empty(raster_band.shape, dtype=np.uint8) - reproject(source=clouds_band, - destination=cld_reproj, - src_transform=cld_src.transform, - src_crs=cld_src.crs, - dst_transform=src.transform, - dst_crs=src.crs, - resampling=Resampling.nearest) + reproject( + source=clouds_band, + destination=cld_reproj, + src_transform=cld_src.transform, + src_crs=cld_src.crs, + dst_transform=src.transform, + dst_crs=src.crs, + resampling=Resampling.nearest, + ) # clouds raster_band = np.where(cld_reproj == 0, raster_band, 16383) # band_mask_borders = np.where(raster_band != 32767, raster_band, -10000) cmap = np.where(raster_band == 16383, -10000, raster_band) - cmap = (128 * (cmap/10000 + 1) * ((cmap+10000) > 0)).astype(np.uint8) + cmap = (128 * (cmap / 10000 + 1) * ((cmap + 10000) > 0)).astype( + np.uint8 + ) cmap = np.where(cld_reproj == 1, cld_val, cmap) # compute default transform, width and height to fit the out resolution dst_transform, dst_width, dst_height = calculate_default_transform( @@ -174,17 +191,20 @@ def create_colormap( src.width, src.height, *src.bounds, - resolution=out_resolution) + resolution=out_resolution + ) out_crs = src.crs if not out_crs else out_crs - profile.update(nodata=0, - driver="Gtiff", - # compress="DEFLATE", - dtype=np.uint8, - transform=dst_transform, - width=dst_width, - height=dst_height, - crs=out_crs) + profile.update( + nodata=0, + driver="Gtiff", + # compress="DEFLATE", + dtype=np.uint8, + transform=dst_transform, + width=dst_width, + height=dst_height, + crs=out_crs, + ) # write colormap to out_path with rasterio.open(str(out_path), "w", **profile) as dst: dst.write(cmap, 1) @@ -192,15 +212,16 @@ def create_colormap( return str(Path(str(out_path)).absolute) + def create_rvb( - raster: Union[str, pathlib.PosixPath], - lut_dict: Dict[int, Tuple[int, int, int]], - cloud_mask: str = None, - out_path: Union[str, pathlib.PosixPath] = "./colormap_rvb.tif", - out_crs: str = None, - clouds_color: str = "black", - out_resolution: Tuple[int, int] = (100, 100), - stretch: Tuple[float, float] = (-10000, 10000) + raster: Union[str, pathlib.PosixPath], + lut_dict: Dict[int, Tuple[int, int, int]], + cloud_mask: str = None, + out_path: Union[str, pathlib.PosixPath] = "./colormap_rvb.tif", + out_crs: str = None, + clouds_color: str = "black", + out_resolution: Tuple[int, int] = (100, 100), + stretch: Tuple[float, float] = (-10000, 10000), ) -> str: """ Colormapping of a single-band raster with a look-up table passed as @@ -220,8 +241,10 @@ def create_rvb( # cloud mask if not Path(str(cloud_mask)).is_file(): cloud_mask = None - logger.error("Cloud mask path wrong or not provided \ - \nClouds will not be colored") + logger.error( + "Cloud mask path wrong or not provided \ + \nClouds will not be colored" + ) # clouds color if clouds_color.lower() == "black": @@ -231,8 +254,10 @@ def create_rvb( cld_val = 255 # lut_dict[0] = (255, 255, 255) else: - logger.warning('Available clouds colors: "black" or "white" \ - \nApplying default: black') + logger.warning( + 'Available clouds colors: "black" or "white" \ + \nApplying default: black' + ) lut_dict[0] = (0, 0, 0) lut_dict[255] = (255, 255, 255) @@ -245,27 +270,43 @@ def create_rvb( print("Invalid CRS: {}\nUsing source raster CRS".format(e)) fact = int(1000 // out_resolution[0]) - fact_cld = int(1000 // (2*out_resolution[0])) - + fact_cld = int(1000 // (2 * out_resolution[0])) + with rasterio.open(str(raster)) as src: - raster_band = src.read(1, out_shape=(1, int(src.height // fact), int(src.height // fact))) + raster_band = src.read( + 1, out_shape=(1, int(src.height // fact), int(src.height // fact)) + ) profile = src.profile if cloud_mask: with rasterio.open(str(cloud_mask)) as cld_src: - cld_reproj = cld_src.read(1, out_shape=(1, int(cld_src.height // fact_cld), - int(cld_src.height // fact_cld))) + cld_reproj = cld_src.read( + 1, + out_shape=( + 1, + int(cld_src.height // fact_cld), + int(cld_src.height // fact_cld), + ), + ) cld_band = cld_src.read(1) raster_band = np.where(cld_reproj == 0, raster_band, 16383) # band_mask_borders = np.where(raster_band != 32767, raster_band, -10000) - #~ cmap = np.where(raster_band == 16383, -10000, raster_band) - #~ cmap = (128 * (cmap/10000 + 1) * ((cmap+10000) > 0)).astype(np.uint8) + # cmap = np.where(raster_band == 16383, -10000, raster_band) + # cmap = (128 * (cmap/10000 + 1) * ((cmap+10000) > 0)).astype(np.uint8) cmap = np.where(raster_band == 16383, stretch[0], raster_band) - cmap = np.clip((255 * (cmap - stretch[0]).astype(np.float) / (stretch[1] - stretch[0])), 0, 255).astype(np.uint8) - + cmap = np.clip( + ( + 255 + * (cmap - stretch[0]).astype(np.float) + / (stretch[1] - stretch[0]) + ), + 0, + 255, + ).astype(np.uint8) + cmap = np.where(cld_reproj == 1, cld_val, cmap) # compute default transform, width and height to fit the out resolution dst_transform, dst_width, dst_height = calculate_default_transform( @@ -274,34 +315,39 @@ def create_rvb( src.width, src.height, *src.bounds, - resolution=out_resolution) + resolution=out_resolution + ) out_crs = src.crs if not out_crs else out_crs - profile.update(nodata=0, - tiled=False, - driver="Gtiff", - dtype=np.uint8, - transform=dst_transform, - width=dst_width, - height=dst_height, - crs=out_crs, - count=3) + profile.update( + nodata=0, + tiled=False, + driver="Gtiff", + dtype=np.uint8, + transform=dst_transform, + width=dst_width, + height=dst_height, + crs=out_crs, + count=3, + ) with rasterio.open(str(out_path), "w", **profile) as dst: for k in range(3): cmap_temp = np.copy(cmap) - for idx in np.nditer(cmap_temp, op_flags=['readwrite']): + for idx in np.nditer(cmap_temp, op_flags=["readwrite"]): idx[...] = lut_dict[int(idx)][k] - dst.write(cmap_temp.astype(np.uint8), k+1) + dst.write(cmap_temp.astype(np.uint8), k + 1) return str(Path(str(out_path)).absolute) -def create_l2a_ql(b02: Union[str, pathlib.PosixPath], - b03: Union[str, pathlib.PosixPath], - b04: Union[str, pathlib.PosixPath], - out_path: Union[str, pathlib.PosixPath] = "./L2A_QL.tif", - out_resolution: Tuple[int, int] = (100, 100), - jpg = False, - ) -> str: + +def create_l2a_ql( + b02: Union[str, pathlib.PosixPath], + b03: Union[str, pathlib.PosixPath], + b04: Union[str, pathlib.PosixPath], + out_path: Union[str, pathlib.PosixPath] = "./L2A_QL.tif", + out_resolution: Tuple[int, int] = (100, 100), + jpg=False, +) -> str: """ Creating a color RVB quicklook from 3 single band files passed as arguments :param b02: path to B raster @@ -322,46 +368,55 @@ def create_l2a_ql(b02: Union[str, pathlib.PosixPath], src.width, src.height, *src.bounds, - resolution=out_resolution) - + resolution=out_resolution + ) + if out_path.suffix == ".jpg" or jpg: - profile.update(nodata=0, - driver="JPEG", - dtype=np.uint8, - transform=dst_transform, - width=dst_width, - height=dst_height, - tiled=False, - count=len(file_list)) - profile.pop('tiled', None) - profile.pop('blockxsize', None) - profile.pop('blockysize', None) + profile.update( + nodata=0, + driver="JPEG", + dtype=np.uint8, + transform=dst_transform, + width=dst_width, + height=dst_height, + tiled=False, + count=len(file_list), + ) + profile.pop("tiled", None) + profile.pop("blockxsize", None) + profile.pop("blockysize", None) else: - profile.update(nodata=0, - driver="Gtiff", - dtype=np.uint8, - transform=dst_transform, - width=dst_width, - height=dst_height, - tiled=False, - count=len(file_list)) - - with rasterio.open(str(out_path), 'w', **profile) as dst: + profile.update( + nodata=0, + driver="Gtiff", + dtype=np.uint8, + transform=dst_transform, + width=dst_width, + height=dst_height, + tiled=False, + count=len(file_list), + ) + + with rasterio.open(str(out_path), "w", **profile) as dst: for id, layer in enumerate(file_list, start=1): with rasterio.open(layer) as src1: raster_band = src1.read(1) raster_band = np.where(raster_band > 2000, 2000, raster_band) - raster_band = np.where(raster_band > 1, (raster_band/2000)*255, raster_band) + raster_band = np.where( + raster_band > 1, (raster_band / 2000) * 255, raster_band + ) raster_band = raster_band.astype(np.uint8) dst.write_band(id, raster_band) return str(Path(str(out_path)).absolute) -def create_l1c_ql(tci: Union[str, pathlib.PosixPath], - out_path: Union[str, pathlib.PosixPath] = "./L1C_QL.tif", - out_resolution: Tuple[int, int] = (100, 100), - jpg = False, - ) -> str: + +def create_l1c_ql( + tci: Union[str, pathlib.PosixPath], + out_path: Union[str, pathlib.PosixPath] = "./L1C_QL.tif", + out_resolution: Tuple[int, int] = (100, 100), + jpg=False, +) -> str: """ Creating a color RVB quicklook from tci 3 bands file passed as argument :param tci: path to tci raster @@ -373,56 +428,70 @@ def create_l1c_ql(tci: Union[str, pathlib.PosixPath], with rasterio.open(str(tci)) as src: profile = src.profile - + fact = int(1000 // out_resolution[0]) - + dst_transform, dst_width, dst_height = calculate_default_transform( src.crs, src.crs, src.width, src.height, *src.bounds, - resolution=out_resolution) - + resolution=out_resolution + ) + if out_path.suffix == ".jpg" or jpg: - profile.update(nodata=0, - driver="JPEG", - dtype=np.uint8, - transform=dst_transform, - width=dst_width, - height=dst_height, - tiled=False, - count=3) - profile.pop('tiled', None) - profile.pop('blockxsize', None) - profile.pop('blockysize', None) - profile.pop('interleave', None) + profile.update( + nodata=0, + driver="JPEG", + dtype=np.uint8, + transform=dst_transform, + width=dst_width, + height=dst_height, + tiled=False, + count=3, + ) + profile.pop("tiled", None) + profile.pop("blockxsize", None) + profile.pop("blockysize", None) + profile.pop("interleave", None) else: - profile.update(nodata=0, - driver="Gtiff", - dtype=np.uint8, - transform=dst_transform, - width=dst_width, - height=dst_height, - tiled=False, - count=3) - - with rasterio.open(str(out_path), 'w', **profile) as dst: - #~ with rasterio.open(str(tci)) as src1: - for band_id in range(1,4): + profile.update( + nodata=0, + driver="Gtiff", + dtype=np.uint8, + transform=dst_transform, + width=dst_width, + height=dst_height, + tiled=False, + count=3, + ) + + with rasterio.open(str(out_path), "w", **profile) as dst: + # with rasterio.open(str(tci)) as src1: + for band_id in range(1, 4): logger.info(band_id) - raster_band = src.read(band_id, out_shape=(1, int(src.height // fact), int(src.height // fact))).astype(np.uint8) - #~ raster_band = np.where(raster_band > 2000, 2000, raster_band) - #~ raster_band = np.where(raster_band > 1, (raster_band/2000)*255, raster_band) - #~ raster_band = raster_band.astype(np.uint8) + raster_band = src.read( + band_id, + out_shape=( + 1, + int(src.height // fact), + int(src.height // fact), + ), + ).astype(np.uint8) + # raster_band = np.where(raster_band > 2000, 2000, raster_band) + # raster_band = np.where(raster_band > 1, (raster_band/2000)*255, raster_band) + # raster_band = raster_band.astype(np.uint8) dst.write_band(band_id, raster_band) return str(Path(str(out_path)).absolute) -def create_l1c_ql_v2(tci: Union[str, pathlib.PosixPath], - out_path: Union[str, pathlib.PosixPath] = "./L1C_QL.tif", - out_resolution: Tuple[int, int] = (100, 100), - jpg = False, - ) -> str: + +def create_l1c_ql_v2( + tci: Union[str, pathlib.PosixPath], + out_path: Union[str, pathlib.PosixPath] = "./L1C_QL.tif", + out_resolution: Tuple[int, int] = (100, 100), + jpg=False, +) -> str: """ Creating a color RVB quicklook from tci 3 bands file passed as argument :param tci: path to tci raster @@ -431,56 +500,69 @@ def create_l1c_ql_v2(tci: Union[str, pathlib.PosixPath], """ with rasterio.open(str(tci)) as src: profile = src.profile - + fact = int(out_resolution[0] // 10) - + dst_transform, dst_width, dst_height = calculate_default_transform( src.crs, src.crs, src.width, src.height, *src.bounds, - resolution = out_resolution) - - logger.info("creating L1C QL - {}m/px - {}px".format(out_resolution[0], dst_width)) - + resolution=out_resolution + ) + + logger.info( + "creating L1C QL - {}m/px - {}px".format( + out_resolution[0], dst_width + ) + ) + if out_path.suffix == ".jpg" or jpg: - profile.update(nodata=0, - driver="JPEG", - dtype=np.uint8, - transform=dst_transform, - width=dst_width, - height=dst_height, - tiled=False, - count=3) - profile.pop('tiled', None) - profile.pop('blockxsize', None) - profile.pop('blockysize', None) - profile.pop('interleave', None) + profile.update( + nodata=0, + driver="JPEG", + dtype=np.uint8, + transform=dst_transform, + width=dst_width, + height=dst_height, + tiled=False, + count=3, + ) + profile.pop("tiled", None) + profile.pop("blockxsize", None) + profile.pop("blockysize", None) + profile.pop("interleave", None) else: - profile.update(nodata=0, - driver="Gtiff", - dtype=np.uint8, - transform=dst_transform, - width=dst_width, - height=dst_height, - tiled=False, - count=3) - with rasterio.open(str(out_path), 'w', **profile) as dst: - raster_band = src.read(out_shape=(src.count, - dst_width, - dst_height, - ), - resampling=Resampling.nearest - ).astype(np.uint8) + profile.update( + nodata=0, + driver="Gtiff", + dtype=np.uint8, + transform=dst_transform, + width=dst_width, + height=dst_height, + tiled=False, + count=3, + ) + with rasterio.open(str(out_path), "w", **profile) as dst: + raster_band = src.read( + out_shape=( + src.count, + dst_width, + dst_height, + ), + resampling=Resampling.nearest, + ).astype(np.uint8) dst.write(raster_band) return str(Path(str(out_path)).absolute) -def create_l2a_ql_v2(tci: Union[str, pathlib.PosixPath], - out_path: Union[str, pathlib.PosixPath] = "./L2A_QL.tif", - out_resolution: Tuple[int, int] = (100, 100), - jpg = False, - ) -> str: + +def create_l2a_ql_v2( + tci: Union[str, pathlib.PosixPath], + out_path: Union[str, pathlib.PosixPath] = "./L2A_QL.tif", + out_resolution: Tuple[int, int] = (100, 100), + jpg=False, +) -> str: """ Creating a color RVB quicklook from tci 3 bands file passed as argument :param tci: path to tci raster @@ -489,49 +571,59 @@ def create_l2a_ql_v2(tci: Union[str, pathlib.PosixPath], """ with rasterio.open(str(tci)) as src: profile = src.profile - + fact = int(out_resolution[0] // 10) - + dst_transform, dst_width, dst_height = calculate_default_transform( src.crs, src.crs, src.width, src.height, *src.bounds, - resolution = out_resolution) - - logger.info("creating L2A QL - {}m/px - {}px".format(out_resolution[0], dst_width)) - + resolution=out_resolution + ) + + logger.info( + "creating L2A QL - {}m/px - {}px".format( + out_resolution[0], dst_width + ) + ) + if out_path.suffix == ".jpg" or jpg: - profile.update(nodata=0, - driver="JPEG", - dtype=np.uint8, - transform=dst_transform, - width=dst_width, - height=dst_height, - tiled=False, - count=3) - profile.pop('tiled', None) - profile.pop('blockxsize', None) - profile.pop('blockysize', None) - profile.pop('interleave', None) + profile.update( + nodata=0, + driver="JPEG", + dtype=np.uint8, + transform=dst_transform, + width=dst_width, + height=dst_height, + tiled=False, + count=3, + ) + profile.pop("tiled", None) + profile.pop("blockxsize", None) + profile.pop("blockysize", None) + profile.pop("interleave", None) else: - profile.update(nodata=0, - driver="Gtiff", - dtype=np.uint8, - transform=dst_transform, - width=dst_width, - height=dst_height, - tiled=False, - count=3) - with rasterio.open(str(out_path), 'w', **profile) as dst: - raster_band = src.read(out_shape=(src.count, - dst_width, - dst_height, - ), - resampling=Resampling.nearest - ).astype(np.uint8) + profile.update( + nodata=0, + driver="Gtiff", + dtype=np.uint8, + transform=dst_transform, + width=dst_width, + height=dst_height, + tiled=False, + count=3, + ) + with rasterio.open(str(out_path), "w", **profile) as dst: + raster_band = src.read( + out_shape=( + src.count, + dst_width, + dst_height, + ), + resampling=Resampling.nearest, + ).astype(np.uint8) dst.write(raster_band) - return str(Path(str(out_path)).absolute) - + return str(Path(str(out_path)).absolute) diff --git a/sen2chain/config.py b/sen2chain/config.py index 3454613b2e5b03375876a20b9bceefa5e5d4c0b5..d44d7c6feeb42420d9c13efe2ae0f03ea98c544c 100644 --- a/sen2chain/config.py +++ b/sen2chain/config.py @@ -1,7 +1,8 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 """ -Module for collecting configuration data from ``~/sen2chain_data/config/config.cfg`` +Module for collecting configuration data from +``~/sen2chain_data/config/config.cfg`` """ import os @@ -16,12 +17,12 @@ logging.basicConfig(level=logging.INFO) ROOT = Path(os.path.realpath(__file__)).parent.parent SHARED_DATA = dict( - tiles_index = ROOT / "sen2chain" / "data" / "tiles_index.gpkg", - tiles_index_dict = ROOT / "sen2chain" / "data" / "tiles_index_dict.p", - peps_download = ROOT / "sen2chain" / "peps_download3.py", - sen2chain_meta = ROOT / "sen2chain" / "data" / "sen2chain_info.xml", - raw_job_cfg = ROOT / "sen2chain" / "data" / "job_ini.cfg", - ) + tiles_index=ROOT / "sen2chain" / "data" / "tiles_index.gpkg", + tiles_index_dict=ROOT / "sen2chain" / "data" / "tiles_index_dict.p", + peps_download=ROOT / "sen2chain" / "peps_download3.py", + sen2chain_meta=ROOT / "sen2chain" / "data" / "sen2chain_info.xml", + raw_job_cfg=ROOT / "sen2chain" / "data" / "job_ini.cfg", +) class Config: @@ -32,6 +33,7 @@ class Config: Usage:: >>> Config().get("l1c_path") """ + # TODO: Implement the Config class as a singleton. _USER_DIR = Path.home() / "sen2chain_data" @@ -44,35 +46,44 @@ class Config: def __init__(self) -> None: self._config_params = ConfigParser() - self._config_params["DATA PATHS"] = {"temp_path": "", - "l1c_path": "", - "l1c_archive_path": "", - "l2a_path": "", - "l2a_archive_path": "", - "indices_path": "", - "time_series_path": "", - "temporal_summaries_path": "", - "cloudmasks_path": "", - } + self._config_params["DATA PATHS"] = { + "temp_path": "", + "l1c_path": "", + "l1c_archive_path": "", + "l2a_path": "", + "l2a_archive_path": "", + "indices_path": "", + "time_series_path": "", + "temporal_summaries_path": "", + "cloudmasks_path": "", + } self._config_params["SEN2COR PATH"] = {"sen2cor_bashrc_path": ""} - self._config_params["HUBS LOGINS"] = {"scihub_id": "", - "scihub_pwd": "", - "peps_config_path": ""} - self._config_params["PROXY SETTINGS"] = {"proxy_http_url": "", - "proxy_https_url": ""} - self._config_params["SEN2CHAIN VERSIONS"] = {"sen2chain_processing_version": "xx.xx"} - - self._config_params["LOG PATH"] = {"log_path": str(self._USER_DIR / "logs")} - + self._config_params["HUBS LOGINS"] = { + "scihub_id": "", + "scihub_pwd": "", + "peps_config_path": "", + } + self._config_params["PROXY SETTINGS"] = { + "proxy_http_url": "", + "proxy_https_url": "", + } + self._config_params["SEN2CHAIN VERSIONS"] = { + "sen2chain_processing_version": "xx.xx" + } + + self._config_params["LOG PATH"] = { + "log_path": str(self._USER_DIR / "logs") + } + if self._CONFIG_FILE.exists(): self._config_params.read(str(self._CONFIG_FILE)) self._config_params_disk = ConfigParser() self._config_params_disk.read(str(self._CONFIG_FILE)) - if self._config_params_disk != self._config_params: + if self._config_params_disk != self._config_params: self._create_config() else: self._create_config() - + self.config_dict = dict() for section in self._config_params.sections(): @@ -87,9 +98,9 @@ class Config: self._CONFIG_DIR.mkdir(exist_ok=True) self._DEFAULT_DATA_DIR.mkdir(exist_ok=True) self._JOBS_DIR.mkdir(exist_ok=True) - Path(self._config_params["LOG PATH"]["log_path"]).mkdir(exist_ok = True) - #~ (self.__JOBS_DIR / "logs").mkdir(exist_ok=True) - + Path(self._config_params["LOG PATH"]["log_path"]).mkdir(exist_ok=True) + # ~ (self.__JOBS_DIR / "logs").mkdir(exist_ok=True) + with open(str(self._CONFIG_FILE), "w") as cfg_file: self._config_params.write(cfg_file) @@ -98,6 +109,7 @@ class Config: Checks if data paths are provided and valids. If not, create default folders in sen2chain_data/DATA and update the configuration file. """ + def update_config(section, key, val): """ Update a setting in config.ini @@ -113,16 +125,25 @@ class Config: if value.rstrip() == "" or not Path(value).exists(): - default_value = self._DEFAULT_DATA_DIR / path.replace("_path", "").upper() + default_value = ( + self._DEFAULT_DATA_DIR / path.replace("_path", "").upper() + ) default_value.mkdir(parents=True, exist_ok=True) update_config("DATA PATHS", path, str(default_value)) - logger.info("{}: using default at {}".format(path, str(default_value))) + logger.info( + "{}: using default at {}".format(path, str(default_value)) + ) sen2cor_bashrc_path_value = self.config_dict["sen2cor_bashrc_path"] - if sen2cor_bashrc_path_value.rstrip() == "" or not Path(sen2cor_bashrc_path_value).exists(): - logging.error("Make sure the path to the sen2cor Bashrc file is valid.") + if ( + sen2cor_bashrc_path_value.rstrip() == "" + or not Path(sen2cor_bashrc_path_value).exists() + ): + logging.error( + "Make sure the path to the sen2cor Bashrc file is valid." + ) raise ValueError("Invalid sen2cor Bashrc") def get(self, param: str) -> str: diff --git a/sen2chain/data_request.py b/sen2chain/data_request.py index 32508e745251cbf944765abc4d9091b06aff96a3..c718d5d3a42d78c2c1280dcd046cc62cc5bb2d5f 100644 --- a/sen2chain/data_request.py +++ b/sen2chain/data_request.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 """ Module for obtaining data from the scihub API. @@ -20,13 +20,19 @@ from shapely.wkt import loads from shapely.ops import cascaded_union from pprint import pprint import itertools + # type annotations from typing import List, Set, Dict, Tuple, Union from .config import Config, SHARED_DATA -#from geo_utils import get_tiles_from_point, get_tiles_from_bbox, get_tiles_from_file + from .utils import grouper, str_to_datetime -from .geo_utils import get_tiles_from_point, get_tiles_from_bbox, get_tiles_from_file, serialise_tiles_index +from .geo_utils import ( + get_tiles_from_point, + get_tiles_from_bbox, + get_tiles_from_file, + serialise_tiles_index, +) # présentation problème # # il n'est pas possible avec l'API du scihub de faire une recherche @@ -48,15 +54,21 @@ class DataRequest: :param start_date: query's start date (YYYY-MM-DD). :param end_date: query's end date(YYYY-MM-DD) or datetime objet. :param land_only: keep only tiles that contain land or datetime object. - :param cloud_cover_percentage: cloud cover percentage range (min, max) from 0 to 100. + :param cloud_cover_percentage: cloud cover percentage range (min, max) + from 0 to 100. Usage: - >>> data_request.DataRequest(start_date="2018-01-10", end_date="2018-01-31, - land_only=True).from_tiles["40KCB", "40KEC"] + >>> data_request.DataRequest( + start_date="2018-01-10", + end_date="2018-01-31, + land_only=True).from_tiles["40KCB", "40KEC"] """ + # Since "requester pays" was enabled on AWS for the Sentienl-2 L1C dataset - # (products are no longer free), downloading non-tiled products on AWS does'nt work anymore. - # Therefore, it's useless to make a complex request to separate non-tiled and tiled products. + # (products are no longer free), downloading non-tiled products on AWS + # does'nt work anymore. + # Therefore, it's useless to make a complex request to separate non-tiled + # and tiled products. # This class needs to be refactored. # Proxy settings @@ -64,17 +76,19 @@ class DataRequest: proxy_https_url = Config().get("proxy_https_url").strip() def __init__( - self, - start_date: Union[str, datetime] = None, - end_date: Union[str, datetime] = None, - land_only: bool = False, - cloud_cover_percentage: Tuple[int, int] = None + self, + start_date: Union[str, datetime] = None, + end_date: Union[str, datetime] = None, + land_only: bool = False, + cloud_cover_percentage: Tuple[int, int] = None, ) -> None: if start_date is None: # default start_date : first sentinel2 acquisition self.start_date = str_to_datetime("2015-06-29", "ymd") - logger.info("Start date not provided, using {}.".format(self.start_date)) + logger.info( + "Start date not provided, using {}.".format(self.start_date) + ) else: if not isinstance(start_date, datetime): start_date = str_to_datetime(start_date, "ymd") @@ -92,12 +106,21 @@ class DataRequest: self.tiles_to_keep = None self.tiles_to_keep_geom = dict() self.products_list = {"aws": {}, "hubs": {}} - self.cloudcoverpercentage = cloud_cover_percentage if cloud_cover_percentage else (0,100) - self.api = SentinelAPI(Config().get("scihub_id"), Config().get("scihub_pwd"), "https://apihub.copernicus.eu/apihub/") + self.cloudcoverpercentage = ( + cloud_cover_percentage if cloud_cover_percentage else (0, 100) + ) + self.api = SentinelAPI( + Config().get("scihub_id"), + Config().get("scihub_pwd"), + "https://apihub.copernicus.eu/apihub/", + ) # Set proxy settings to the Requests session if self.proxy_http_url or self.proxy_https_url: - proxies = {"http": self.proxy_http_url, "https": self.proxy_https_url} + proxies = { + "http": self.proxy_http_url, + "https": self.proxy_https_url, + } self.api.session.proxies = proxies def _get_tiles_geom(self) -> None: @@ -122,7 +145,9 @@ class DataRequest: :param : tiles: list of valid tiles names. """ - self.tiles_to_keep = [re.sub("^T", "", tile.upper()) for tile in set(tiles)] + self.tiles_to_keep = [ + re.sub("^T", "", tile.upper()) for tile in set(tiles) + ] self._get_tiles_geom() self._make_request() return self.products_list @@ -133,15 +158,15 @@ class DataRequest: :param lon: longitude. :param lat: latitude. """ - self.tiles_to_keep = get_tiles_from_point(lon, lat, land_only=self.land_only) + self.tiles_to_keep = get_tiles_from_point( + lon, lat, land_only=self.land_only + ) self._get_tiles_geom() self.make_request() return self.products_list def from_bbox( - self, - lon_min: float, lat_min: float, - lon_max: float, lat_max: float + self, lon_min: float, lat_min: float, lon_max: float, lat_max: float ) -> Dict[str, Dict]: """akes request from a bbox. @@ -150,7 +175,9 @@ class DataRequest: :param lon_max: longitude. :param lat_max: latitude. """ - self.tiles_to_keep = get_tiles_from_bbox(lon_min, lat_min, lon_max, lat_max, land_only=self.land_only) + self.tiles_to_keep = get_tiles_from_bbox( + lon_min, lat_min, lon_max, lat_max, land_only=self.land_only + ) self._get_tiles_geom() self._make_request() return self.products_list @@ -160,10 +187,12 @@ class DataRequest: :param : tiles: list of valid tiles names. """ - geom_tiles = get_tiles_from_file(vectors_file, land_only=self.land_only) - self.tiles_to_keep = list(set(itertools.chain.from_iterable( - geom_tiles.values() - ))) + geom_tiles = get_tiles_from_file( + vectors_file, land_only=self.land_only + ) + self.tiles_to_keep = list( + set(itertools.chain.from_iterable(geom_tiles.values())) + ) self._get_tiles_geom() self._make_request() return self.products_list @@ -172,14 +201,18 @@ class DataRequest: """Will call the right request method depending on products""" logger.debug("_make_request") - logger.info("Requesting images ranging from {} to {}".format(self.start_date, self.end_date)) + logger.info( + "Requesting images ranging from {} to {}".format( + self.start_date, self.end_date + ) + ) if self.tiles_to_keep is None: raise ValueError("Query tiles not provided") # reset products_list # should the products_list be updated or erased for each new request ? - self.products_list = {"aws":{}, "hubs":{}} + self.products_list = {"aws": {}, "hubs": {}} tileddate = str_to_datetime("2016-11-01", "ymd") @@ -201,12 +234,15 @@ class DataRequest: # query by group of 3 tiles, otherwise getting error message # "Request URI too long" from scihub - for tiles_to_keep_triplet, tiles_to_keep_triplet_geom \ - in zip(grouper(self.tiles_to_keep, 3), - grouper(self.tiles_to_keep_geom.values(), 3)): + for tiles_to_keep_triplet, tiles_to_keep_triplet_geom in zip( + grouper(self.tiles_to_keep, 3), + grouper(self.tiles_to_keep_geom.values(), 3), + ): tiles_to_keep = [tile for tile in tiles_to_keep_triplet if tile] - tiles_to_keep_geom = [geom for geom in tiles_to_keep_triplet_geom if geom] + tiles_to_keep_geom = [ + geom for geom in tiles_to_keep_triplet_geom if geom + ] print(tiles_to_keep) @@ -217,12 +253,13 @@ class DataRequest: # scihub request products = OrderedDict() products = self.api.query( - query_geom, - date=(self.start_date, self.end_date), - order_by="+endposition", - platformname="Sentinel-2", - producttype="S2MSI1C", - cloudcoverpercentage=self.cloudcoverpercentage) + query_geom, + date=(self.start_date, self.end_date), + order_by="+endposition", + platformname="Sentinel-2", + producttype="S2MSI1C", + cloudcoverpercentage=self.cloudcoverpercentage, + ) # save products list as a pandas dataframe products_df = self.api.to_dataframe(products) @@ -231,7 +268,9 @@ class DataRequest: return # a products dictionnay for each server (AWS vs hubs) # fill each dictionnary depending on the acquisition date - for index, row in products_df[["title", "beginposition", "footprint"]].iterrows(): + for index, row in products_df[ + ["title", "beginposition", "footprint"] + ].iterrows(): # start date of the tiled S2 collection on the scihub server tileddate = str_to_datetime("2016-11-01", "ymd") @@ -241,34 +280,55 @@ class DataRequest: for tile_name, tile_geom in self.tiles_to_keep_geom.items(): # in case of duplicates on the server - if img_title not in self.products_list["hubs"].keys() and img_title not in self.products_list["aws"].keys(): + if ( + img_title not in self.products_list["hubs"].keys() + and img_title not in self.products_list["aws"].keys() + ): # tiled products are downloaded on hubs if re.match(r".*_T[0-9]{2}[A-Z]{3}_.*", img_title): if tile_name in img_title: - self.products_list["hubs"][img_title] = {"date": img_date, - "tile": tile_name} + self.products_list["hubs"][img_title] = { + "date": img_date, + "tile": tile_name, + } continue else: continue - # non-tiled products will be downloaded on aws + # non-tiled products will be downloaded on aws else: if tile_geom.intersects(img_footprint): - self.products_list["aws"][img_title] = {"date": img_date, - "tile": tile_name} + self.products_list["aws"][img_title] = { + "date": img_date, + "tile": tile_name, + } # pprint dicts in chronological order print("\nFrom AWS") - pprint(list(OrderedDict(sorted(self.products_list["aws"].items(), - key=lambda t: t[1]["date"])))) + pprint( + list( + OrderedDict( + sorted( + self.products_list["aws"].items(), + key=lambda t: t[1]["date"], + ) + ) + ) + ) print("\nFrom hubs") - pprint(list(OrderedDict(sorted(self.products_list["hubs"].items(), - key=lambda t: t[1]["date"])))) - - -### REQUÊTE POUR PRODUITS TUILÉS (moins lourde) - + pprint( + list( + OrderedDict( + sorted( + self.products_list["hubs"].items(), + key=lambda t: t[1]["date"], + ) + ) + ) + ) + + # Tiled products request (lighter) def _make_request_tiled_only(self) -> None: """Scihub API request using sentinelsat. This method is called if products are a mix of tiled and non-tiled products.""" @@ -276,7 +336,6 @@ class DataRequest: logger.debug("_make_request_tiled_only") print("Sentinel2 tiles:\n", self.tiles_to_keep) - products_from_hubs = dict() products_from_aws = dict() @@ -293,15 +352,16 @@ class DataRequest: print("Ignoring water-only tiles:", water_tiles) query_kwargs = { - 'platformname': 'Sentinel-2', - 'producttype': 'S2MSI1C', - 'cloudcoverpercentage': self.cloudcoverpercentage, - 'date': (self.start_date, self.end_date)} + "platformname": "Sentinel-2", + "producttype": "S2MSI1C", + "cloudcoverpercentage": self.cloudcoverpercentage, + "date": (self.start_date, self.end_date), + } products = OrderedDict() for tile in self.tiles_to_keep: kw = query_kwargs.copy() - kw['filename'] = '*_T{}_*'.format(tile) + kw["filename"] = "*_T{}_*".format(tile) pp = self.api.query(**kw) products.update(pp) @@ -315,58 +375,20 @@ class DataRequest: for index, row in products_df[["title", "beginposition"]].iterrows(): img_title = row[0] img_date = row[1].to_pydatetime() - self.products_list["hubs"][img_title] = {"date": img_date, - "tile": re.findall("_T([0-9]{2}[A-Z]{3})_", img_title)[0]} + self.products_list["hubs"][img_title] = { + "date": img_date, + "tile": re.findall("_T([0-9]{2}[A-Z]{3})_", img_title)[0], + } # pprint dicts in chronological order print("\nFrom hubs") - pprint(list(OrderedDict(sorted(self.products_list["hubs"].items(), - key=lambda t: t[1]["date"])))) - - - - - -#instance_start_date = '2018-07-01' -#instance_end_date = '2018-07-16' -#instance_start_date, instance_end_date = '20160123', '20160424' - -#x = 55.5331 -#y = -21.1331 -#bbox = (54.66796875, -21.647217065387817, 58.4033203125, -19.652934210612436) - - -#data = DataRequest(start_date=instance_start_date, - #end_date=instance_end_date, - #land_only=False).from_tiles("35LLD", "40KCB", "38KQV", "34HBJ") - - -#req = DataRequest(start_date=instance_start_date, - #end_date=instance_end_date, - #land_only=False) - -#print("#######") -#pprint(req.from_tiles("35LLD")) -#print("-------") -#pprint(req.from_tiles("40KCB", "38KQV", "34HBJ")) - -#data = DataRequest(start_date=instance_start_date, - #end_date=instance_end_date, - #land_only=False).from_point(lon=x, lat=y) - - -#data = DataRequest(start_date=instance_start_date, - #end_date=instance_end_date, - #land_only=True).from_bbox(54.66796875, -21.647217065387817, 58.4033203125, -19.652934210612436) - - -#data = DataRequest(start_date=instance_start_date, - #end_date=instance_end_date, - #land_only=True).from_file("/home/seas-oi/Documents/NDVI-MODIS-SENTINEL/SIG/sites_mada/34_sites_sentinelles.shp") - #land_only=True).from_file("/home/seas-oi/Téléchargements/map.geojson") - - - - - - + pprint( + list( + OrderedDict( + sorted( + self.products_list["hubs"].items(), + key=lambda t: t[1]["date"], + ) + ) + ) + ) diff --git a/sen2chain/download_and_process.py b/sen2chain/download_and_process.py index 06d69ffdf253c018aed1f7b53c574f833323e9ea..99532dcc383ee04526209f409133de912dbf2ba5 100644 --- a/sen2chain/download_and_process.py +++ b/sen2chain/download_and_process.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 """ Module for downloading and processing Sentinel-2 images in parallel. @@ -15,6 +15,7 @@ from collections import defaultdict from sentinelsat import SentinelAPI from sentinelhub import AwsProductRequest from pprint import pprint + # type annotations from typing import Dict, List, Tuple @@ -30,6 +31,7 @@ logger = logging.getLogger(__name__) logging.basicConfig(level=logging.DEBUG) +# FIXME: AWS download no longer available def download_aws(identifier: str, tile: str, hub: str) -> Tuple[str, bool]: """Downloads L1C safe from AWS using sentinelhub package. @@ -42,21 +44,28 @@ def download_aws(identifier: str, tile: str, hub: str) -> Tuple[str, bool]: temp_container.create_temp_folder() # downloaded = True - if not temp_container.l1c.path.exists() and not temp_container.l1c.in_library: - product_request = AwsProductRequest(product_id=identifier, - tile_list=["T"+tile], - data_folder=str(temp_container.temp_path), - safe_format=True) + if ( + not temp_container.l1c.path.exists() + and not temp_container.l1c.in_library + ): + product_request = AwsProductRequest( + product_id=identifier, + tile_list=["T" + tile], + data_folder=str(temp_container.temp_path), + safe_format=True, + ) product_request.save_data() - #archive_l1c(identifier, tile, hub) + # archive_l1c(identifier, tile, hub) # if not temp_container.l1c.path.exists(): - # downloaded = False + # downloaded = False return identifier, hub -def download_peps(identifier: str, tile: str, hub: str) -> Tuple[str, str, str, bool]: +def download_peps( + identifier: str, tile: str, hub: str +) -> Tuple[str, str, str, bool]: """Downloads L1C safe zip file from PEPS using peps_downloader.py. :param identifier: @@ -69,20 +78,28 @@ def download_peps(identifier: str, tile: str, hub: str) -> Tuple[str, str, str, temp_container.create_temp_folder() downloaded = True - if not temp_container.l1c.path.exists() and not temp_container.l1c.in_library: + if ( + not temp_container.l1c.path.exists() + and not temp_container.l1c.in_library + ): try: command = "python3 {peps_download_script} -a {peps_download_config} -c S2ST -i {identifier}".format( peps_download_script=peps_download_script, peps_download_config=str(peps_download_config), - identifier=identifier) - - process = subprocess.run(command.split(), - cwd=str(temp_container.temp_path), - check=True, - stderr=subprocess.PIPE) + identifier=identifier, + ) + + process = subprocess.run( + command.split(), + cwd=str(temp_container.temp_path), + check=True, + stderr=subprocess.PIPE, + ) except subprocess.CalledProcessError as e: logger.error("download_peps - peps_download FAILED: {}".format(e)) - logger.warning("File does not exist on PEPS yet: {}".format(identifier)) + logger.warning( + "File does not exist on PEPS yet: {}".format(identifier) + ) try: temp_container.unzip_l1c() @@ -99,7 +116,9 @@ def download_peps(identifier: str, tile: str, hub: str) -> Tuple[str, str, str, return identifier, tile, hub, downloaded -def download_scihub(identifier: str, tile: str, hub: str) -> Tuple[str, str, str, bool]: +def download_scihub( + identifier: str, tile: str, hub: str +) -> Tuple[str, str, str, bool]: """Downloads L1C safe zip file from Scihub using sentinelsat package. :param identifier: @@ -112,19 +131,33 @@ def download_scihub(identifier: str, tile: str, hub: str) -> Tuple[str, str, str temp_container.create_temp_folder() downloaded = True - if not temp_container.l1c.path.exists() and not temp_container.l1c.in_library: - api = SentinelAPI(Config().get("scihub_id"), Config().get("scihub_pwd"), "https://apihub.copernicus.eu/apihub/") + if ( + not temp_container.l1c.path.exists() + and not temp_container.l1c.in_library + ): + api = SentinelAPI( + Config().get("scihub_id"), + Config().get("scihub_pwd"), + "https://apihub.copernicus.eu/apihub/", + ) # Set proxy settings to the Requests session - if DownloadAndProcess.proxy_http_url or DownloadAndProcess.proxy_https_url: - proxies = {"http": DownloadAndProcess.proxy_http_url, - "https": DownloadAndProcess.proxy_https_url} + if ( + DownloadAndProcess.proxy_http_url + or DownloadAndProcess.proxy_https_url + ): + proxies = { + "http": DownloadAndProcess.proxy_http_url, + "https": DownloadAndProcess.proxy_https_url, + } api.session.proxies = proxies - product_request = api.query(identifier='*{}*'.format(identifier)) - api.download_all(product_request, directory_path=str(temp_container.temp_path)) + product_request = api.query(identifier="*{}*".format(identifier)) + api.download_all( + product_request, directory_path=str(temp_container.temp_path) + ) try: temp_container.unzip_l1c() - #archive_l1c(identifier, tile, hub) + # archive_l1c(identifier, tile, hub) except Exception as e: logger.debug("{}".format(e)) temp_container.delete_temp_folder() @@ -134,10 +167,14 @@ def download_scihub(identifier: str, tile: str, hub: str) -> Tuple[str, str, str return identifier, tile, hub, downloaded +# FIXME: docs def processing( - identifier: str, tile: str, hub: str, - indices_list: List[str], - nodata_clouds: bool, quicklook: bool + identifier: str, + tile: str, + hub: str, + indices_list: List[str], + nodata_clouds: bool, + quicklook: bool, ) -> Tuple[str, str, str]: """Process each L1C downloaded. @@ -152,7 +189,9 @@ def processing( l1c_product = L1cProduct(identifier, tile) l1c_product.process_l2a() - l2a_identifier = identifier.replace("L1C_","L2A_").replace("_USER_", "_OPER_") + l2a_identifier = identifier.replace("L1C_", "L2A_").replace( + "_USER_", "_OPER_" + ) l2a_product = L2aProduct(l2a_identifier, tile) l2a_product.process_cloud_mask() l2a_product.process_indices(indices_list, nodata_clouds, quicklook) @@ -161,8 +200,7 @@ def processing( def archive_l1c(identifier, tile, hub): - """ - """ + """ """ temp = TempContainer(identifier) l1c_prod = L1cProduct(identifier, tile, path=temp.temp_path) l1c_prod.archive() @@ -183,18 +221,24 @@ class DownloadAndProcess: :param nodata_clouds: mask indices output rasters with a cloud-mask. :param quicklook: creates a quicklook for each indice processed. """ + # TODO: make most of methods private. + # TODO: translate french docs and comments. # Proxy settings proxy_http_url = Config().get("proxy_http_url").strip() proxy_https_url = Config().get("proxy_https_url").strip() def __init__( - self, - identifiers: Dict[str, dict], - hubs_limit: dict = None, aws_limit: int = None, - process_products: bool = False, max_processes: int = 2, indices_list: list = [], - nodata_clouds: bool = False, quicklook: bool = False + self, + identifiers: Dict[str, dict], + hubs_limit: dict = None, + aws_limit: int = None, + process_products: bool = False, + max_processes: int = 2, + indices_list: list = [], + nodata_clouds: bool = False, + quicklook: bool = False, ) -> None: if not isinstance(identifiers, dict): @@ -203,7 +247,7 @@ class DownloadAndProcess: self.identifiers = identifiers if hubs_limit is None: - self.hubs_limit={"peps":3, "scihub":2} + self.hubs_limit = {"peps": 3, "scihub": 2} logger.debug("hubs_limit set to: {}".format(self.hubs_limit)) else: self.hubs_limit = hubs_limit @@ -214,7 +258,11 @@ class DownloadAndProcess: else: if aws_limit > 3: self.aws_limit = 3 - logger.warning("aws limit too high, using default: {}".format(self.aws_limit)) + logger.warning( + "aws limit too high, using default: {}".format( + self.aws_limit + ) + ) if not isinstance(process_products, bool): raise ValueError("process_product must be either True or False") @@ -222,9 +270,15 @@ class DownloadAndProcess: self.process_products = process_products if max_processes >= 1: - self.max_processes = max_processes + 1 # + 1 pour process principal + self.max_processes = ( + max_processes + 1 + ) # + 1 pour process principal if max_processes > 2: - logger.warning("max_processes set to: {}, don't run out of memory!".format(max_processes)) + logger.warning( + "max_processes set to: {}, don't run out of memory!".format( + max_processes + ) + ) else: raise ValueError("max_processes must be an unsigned number > 0.") @@ -232,7 +286,9 @@ class DownloadAndProcess: self.indices_list = [] else: if not isinstance(indices_list, list): - raise ValueError("indices_list must be a list of indices names") + raise ValueError( + "indices_list must be a list of indices names" + ) self.indices_list = indices_list if not isinstance(nodata_clouds, bool): @@ -250,11 +306,13 @@ class DownloadAndProcess: # executors self.threads_executor = ThreadPoolExecutor( - max_workers=sum(self.hubs_limit.values()) - ) - self.processes_executor = ProcessPoolExecutor( - max_workers=self.max_processes - ) if self.process_products else None + max_workers=sum(self.hubs_limit.values()) + ) + self.processes_executor = ( + ProcessPoolExecutor(max_workers=self.max_processes) + if self.process_products + else None + ) # failed downloads self._products_attempts = defaultdict(lambda: 0) @@ -282,10 +340,13 @@ class DownloadAndProcess: logger.debug("tasks < limit") item = await self.queue.get() tile = self.identifiers["hubs"][item]["tile"] - task = asyncio.ensure_future(self.downloader_hubs(item, tile, hub)) + task = asyncio.ensure_future( + self.downloader_hubs(item, tile, hub) + ) tasks.append(task) - done, pending = await asyncio.wait(tasks, - return_when=asyncio.FIRST_COMPLETED) + done, pending = await asyncio.wait( + tasks, return_when=asyncio.FIRST_COMPLETED + ) for each in done: tasks.remove(each) @@ -296,41 +357,48 @@ class DownloadAndProcess: await asyncio.gather(*tasks) return - async def downloader_hubs(self, - identifier: str, tile: str, hub: str) -> Tuple[str, str, str]: + async def downloader_hubs( + self, identifier: str, tile: str, hub: str + ) -> Tuple[str, str, str]: """Coroutine for starting coroutine de téléchargement sur peps et scihub : appelle la fonction qui va télécharger """ if hub == "scihub": logger.info("--> downloading {} from {}".format(identifier, hub)) - fut = self.loop.run_in_executor(self.threads_executor, - functools.partial(download_scihub, - identifier, - tile, - hub)) + fut = self.loop.run_in_executor( + self.threads_executor, + functools.partial(download_scihub, identifier, tile, hub), + ) elif hub == "peps": logger.info("--> downloading {} from {}".format(identifier, hub)) - fut = self.loop.run_in_executor(self.threads_executor, - functools.partial(download_peps, - identifier, - tile, - hub)) + fut = self.loop.run_in_executor( + self.threads_executor, + functools.partial(download_peps, identifier, tile, hub), + ) await fut # if download was successful, process the file if fut.result()[3]: - logger.info("--> --> {} downloaded from {}".format(identifier, hub)) - arch = asyncio.ensure_future(self.archive_l1c(identifier, tile, hub)) + logger.info( + "--> --> {} downloaded from {}".format(identifier, hub) + ) + arch = asyncio.ensure_future( + self.archive_l1c(identifier, tile, hub) + ) await arch if self.process_products: - fut = asyncio.ensure_future(self.process(identifier, tile, hub)) + fut = asyncio.ensure_future( + self.process(identifier, tile, hub) + ) await fut # if download failed, try again on another hub if first retry elif not fut.result()[3] and self._products_attempts[identifier] < 3: self._products_attempts[identifier] += 1 - logger.info("{} download failed, will try again".format(identifier)) + logger.info( + "{} download failed, will try again".format(identifier) + ) # increase the number of seconds to wait with the number of attempts # will retry in 2, 4 then 6 minutes before giving up seconds_to_wait = self._products_attempts[identifier] * 120 @@ -344,34 +412,33 @@ class DownloadAndProcess: return (identifier, tile, hub) - async def downloader_aws(self, - identifier: str, tile: str, hub: str) -> Tuple[str, str, str]: - """coroutine appelant la coroutine de téléchargement sur aws - - """ + # FIXME: AWS download no longer available + async def downloader_aws( + self, identifier: str, tile: str, hub: str + ) -> Tuple[str, str, str]: + """coroutine appelant la coroutine de téléchargement sur aws""" logger.debug("downloader_aws {}".format(identifier)) async with asyncio.Semaphore(self.aws_limit): - fut = self.loop.run_in_executor(self.threads_executor, - functools.partial(download_aws, - identifier, - tile, - hub)) + fut = self.loop.run_in_executor( + self.threads_executor, + functools.partial(download_aws, identifier, tile, hub), + ) await fut - logger.info("--> --> {} downloaded from {}".format(identifier, hub)) + logger.info( + "--> --> {} downloaded from {}".format(identifier, hub) + ) return (identifier, tile, hub) - async def archive_l1c(self, - identifier: str, tile: str, hub: str) -> Tuple[str, str, str]: - """ - """ - #fut = self.loop.run_in_executor(self.processes_executor, - fut = self.loop.run_in_executor(None, - functools.partial(archive_l1c, - identifier, - tile, - hub)) + async def archive_l1c( + self, identifier: str, tile: str, hub: str + ) -> Tuple[str, str, str]: + """ """ + # fut = self.loop.run_in_executor(self.processes_executor, + fut = self.loop.run_in_executor( + None, functools.partial(archive_l1c, identifier, tile, hub) + ) if fut.cancelled: return fut await fut @@ -379,8 +446,9 @@ class DownloadAndProcess: return (identifier, tile, hub) - async def process(self, - identifier: str, tile: str, hub: str) -> Tuple[str, str, str]: + async def process( + self, identifier: str, tile: str, hub: str + ) -> Tuple[str, str, str]: """ coroutine qui va appeler la fonction permettant de traiter le fichier pour scihub et peps @@ -390,16 +458,20 @@ class DownloadAndProcess: """ logger.debug("process {}".format(identifier)) - #async with asyncio.Semaphore(self.max_processes): + # async with asyncio.Semaphore(self.max_processes): logger.info("--> --> --> processing {}".format(identifier)) - fut = self.loop.run_in_executor(self.processes_executor, - functools.partial(processing, - identifier, - tile, - hub, - self.indices_list, - self.nodata_clouds, - self.quicklook)) + fut = self.loop.run_in_executor( + self.processes_executor, + functools.partial( + processing, + identifier, + tile, + hub, + self.indices_list, + self.nodata_clouds, + self.quicklook, + ), + ) if fut.cancelled: return fut await fut @@ -407,7 +479,10 @@ class DownloadAndProcess: return identifier, tile, hub - async def download_process_aws(self, identifier: str) -> Tuple[str, str, str]: + # FIXME: AWS download no longer available + async def download_process_aws( + self, identifier: str + ) -> Tuple[str, str, str]: """ Coroutine for downloading and processing products from AWS. @@ -416,11 +491,13 @@ class DownloadAndProcess: logger.info("download_process_aws") tile = self.identifiers["aws"][identifier]["tile"] - downloads = await asyncio.ensure_future(self.downloader_aws(identifier, tile, "aws")) + downloads = await asyncio.ensure_future( + self.downloader_aws(identifier, tile, "aws") + ) if self.process_products: fut = self.process(*downloads) - #if fut.cancelled(): - #return fut + # if fut.cancelled(): + # return fut await fut return downloads @@ -432,19 +509,21 @@ class DownloadAndProcess: identifiers_aws = self.identifiers["aws"] identifiers_hubs = self.identifiers["hubs"] - print("Tiled: ",len(identifiers_hubs)) + print("Tiled: ", len(identifiers_hubs)) print("Non tiled: ", len(identifiers_aws)) tasks = [] # on lance les proxies for hub, limit in self.hubs_limit.items(): if limit > 0: - tasks.append(asyncio.ensure_future(self.proxy_pool(hub, limit))) + tasks.append( + asyncio.ensure_future(self.proxy_pool(hub, limit)) + ) # on remplit la queue if identifiers_hubs: for identifier, data in identifiers_hubs.items(): - if TempContainer(identifier).l1c.in_library : + if TempContainer(identifier).l1c.in_library: continue await self.queue.put(identifier) @@ -453,8 +532,10 @@ class DownloadAndProcess: # on lance les DL aws sur la bonne liste if identifiers_aws: - aws = [asyncio.ensure_future(self.download_process_aws(identifier)) - for (identifier, data) in identifiers_aws.items()] + aws = [ + asyncio.ensure_future(self.download_process_aws(identifier)) + for (identifier, data) in identifiers_aws.items() + ] await asyncio.gather(*aws) # shutting down executors @@ -479,8 +560,16 @@ class DownloadAndProcess: try: self.loop.run_until_complete(self.main()) end_time = datetime.now() - start_time - total_products = len(self.identifiers["aws"]) + len(self.identifiers["hubs"]) - len(self.failed_products) - logger.info("Downloaded and processed {} file(s) in {}".format(total_products, end_time)) + total_products = ( + len(self.identifiers["aws"]) + + len(self.identifiers["hubs"]) + - len(self.failed_products) + ) + logger.info( + "Downloaded and processed {} file(s) in {}".format( + total_products, end_time + ) + ) finally: logger.debug("closing loop") # self.loop.close() diff --git a/sen2chain/geo_utils.py b/sen2chain/geo_utils.py index e0b4d2cc33348cec5861c9211fed288d0bda26a3..52ddd60cdf810254b167a65c9681e9ed48cb4bfb 100644 --- a/sen2chain/geo_utils.py +++ b/sen2chain/geo_utils.py @@ -1,4 +1,5 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 + """ Module for manipulating the Sentinel-2 tiles index geopackage file. """ @@ -8,6 +9,7 @@ import pickle import fiona from osgeo import ogr from shapely.geometry import shape, Point, box + # type annotation from typing import List, Set, Dict, Optional, Union from pathlib import Path @@ -36,17 +38,16 @@ def serialise_tiles_index() -> None: with fiona.open(str(TILES_INDEX), "r") as tiles_index: for tile in tiles_index: name = tile["properties"]["Name"] - geom = shape(tile['geometry']) + geom = shape(tile["geometry"]) tiles_dict[name] = geom with open(str(TILES_INDEX_DICT), "wb") as pfile: pickle.dump(tiles_dict, pfile) -def get_tiles_from_point(lon: float, - lat: float, - land_only: bool = False - ) -> Set[str]: +def get_tiles_from_point( + lon: float, lat: float, land_only: bool = False +) -> Set[str]: """ Returns tiles containing a point (longitude, latitude) @@ -56,22 +57,27 @@ def get_tiles_from_point(lon: float, """ intersected_tiles = set() - with fiona.open(str(TILES_INDEX), 'r') as tiles_index: + with fiona.open(str(TILES_INDEX), "r") as tiles_index: point_geom = Point(lon, lat) if land_only: - tiles_index = filter(lambda feat: feat["properties"]["land"] == "True", tiles_index) + tiles_index = filter( + lambda feat: feat["properties"]["land"] == "True", tiles_index + ) for tile in tiles_index: - tile_geom = shape(tile['geometry']) + tile_geom = shape(tile["geometry"]) if tile_geom.contains(point_geom): intersected_tiles.add(tile["properties"]["Name"]) return intersected_tiles -def get_tiles_from_bbox(lon_min: float, lat_min: float, - lon_max: float, lat_max: float, - land_only=False - ) -> Set[str]: +def get_tiles_from_bbox( + lon_min: float, + lat_min: float, + lon_max: float, + lat_max: float, + land_only=False, +) -> Set[str]: """ Returns S2 tiles intersecting a bbox. @@ -83,13 +89,15 @@ def get_tiles_from_bbox(lon_min: float, lat_min: float, """ intersected_tiles = set() - with fiona.open(str(TILES_INDEX), 'r') as tiles_index: + with fiona.open(str(TILES_INDEX), "r") as tiles_index: bbox_geom = box(lon_min, lat_min, lon_max, lat_max) if land_only: - tiles_index = filter(lambda feat: feat["properties"]["land"] == "True", tiles_index) + tiles_index = filter( + lambda feat: feat["properties"]["land"] == "True", tiles_index + ) for tile in tiles_index: - tile_geom = shape(tile['geometry']) + tile_geom = shape(tile["geometry"]) if tile_geom.intersects(bbox_geom): intersected_tiles.add(tile["properties"]["Name"]) @@ -97,8 +105,7 @@ def get_tiles_from_bbox(lon_min: float, lat_min: float, def get_tiles_from_file( - vectors_file: str, - land_only: bool = False + vectors_file: str, land_only: bool = False ) -> Dict[str, Optional[List[str]]]: """ Returns the intersected tiles for each feature id in the vectors file. @@ -130,15 +137,15 @@ def get_tiles_from_file( tile = filtered_t.GetField("Name") contained_in_tiles.append(tile) - intersected_tiles_dict[str(feat_id)] = contained_in_tiles if contained_in_tiles else None + intersected_tiles_dict[str(feat_id)] = ( + contained_in_tiles if contained_in_tiles else None + ) feat_id += 1 return intersected_tiles_dict -def get_processed_indices_vect( - out_folder: str = None - ): +def get_processed_indices_vect(out_folder: str = None): """ Returns a vector file of the indice processed tiles @@ -147,123 +154,239 @@ def get_processed_indices_vect( if not out_folder: out_folder = Path(Config().get("temp_path")) out_shapefile = str(Path(out_folder) / "tiles_indices_shp.shp") - out_shapefile_complement = str(Path(out_folder) / "tiles_indices_complement_shp.shp") + out_shapefile_complement = str( + Path(out_folder) / "tiles_indices_complement_shp.shp" + ) out_shapefile_total = str(Path(out_folder) / "tiles_indices_total_shp.shp") drv_gpkg = ogr.GetDriverByName("GPKG") input_layer_ds = drv_gpkg.Open(str(TILES_INDEX), 0) input_layer_lyr = input_layer_ds.GetLayer(0) - - #~ ndvi_index = Library().ndvi - #~ ndwigao_index = Library().ndwigao - #~ ndwimcf_index = Library().ndwimcf - #~ mndwi_index = Library().mndwi - #~ indices_index = set().union(ndvi_index, ndwigao_index, ndwimcf_index, mndwi_index) - + + # ~ ndvi_index = Library().ndvi + # ~ ndwigao_index = Library().ndwigao + # ~ ndwimcf_index = Library().ndwimcf + # ~ mndwi_index = Library().mndwi + # ~ indices_index = set().union(ndvi_index, ndwigao_index, ndwimcf_index, mndwi_index) + lib = Library() - indices_index = {t for c in (getattr(lib, toto) for toto in ['l2a', 'ndvi', 'ndwigao', 'ndwimcf', 'mndwi', 'ndre', 'ireci']) for t in c} - total_index = {t for c in (getattr(lib, toto) for toto in ['l1c', 'l2a', 'ndvi', 'ndwigao', 'ndwimcf', 'mndwi', 'ndre', 'ireci']) for t in c} - complement_index = total_index - indices_index - + indices_index = { + t + for c in ( + getattr(lib, toto) + for toto in [ + "l2a", + "ndvi", + "ndwigao", + "ndwimcf", + "mndwi", + "ndre", + "ireci", + ] + ) + for t in c + } + total_index = { + t + for c in ( + getattr(lib, toto) + for toto in [ + "l1c", + "l2a", + "ndvi", + "ndwigao", + "ndwimcf", + "mndwi", + "ndre", + "ireci", + ] + ) + for t in c + } + complement_index = total_index - indices_index + tile_count = {} for key in total_index: - #~ logger.info(key) - tile_count[key]={"l1c": 0, "l2a": 0, 'ndvi': 0, 'ndwigao': 0, 'ndwimcf': 0, 'mndwi': 0, 'ndre': 0, 'ireci': 0} + # ~ logger.info(key) + tile_count[key] = { + "l1c": 0, + "l2a": 0, + "ndvi": 0, + "ndwigao": 0, + "ndwimcf": 0, + "mndwi": 0, + "ndre": 0, + "ireci": 0, + } tile = Tile(key) - for p in ['l1c', 'l2a']: + for p in ["l1c", "l2a"]: tile_count[key][p] = len(getattr(tile, p)) - for p in ['ndvi', 'ndwigao', 'ndwimcf', 'mndwi', 'ndre', 'ireci']: - try: + for p in ["ndvi", "ndwigao", "ndwimcf", "mndwi", "ndre", "ireci"]: + try: tile_count[key][p] = len(getattr(tile, p).masks.cm001) except: pass - + # pour supprimer les tuiles ayant tous les indices à 0 - indices_index_nonull = {k: v for k, v in tile_count.items() if sum(v[indice] for indice in [z for z in v.keys() if z in ['ndvi', 'ndwigao', 'ndwimcf', 'mndwi', 'ndre', 'ireci']])} + indices_index_nonull = { + k: v + for k, v in tile_count.items() + if sum( + v[indice] + for indice in [ + z + for z in v.keys() + if z + in ["ndvi", "ndwigao", "ndwimcf", "mndwi", "ndre", "ireci"] + ] + ) + } # Couche des indices - query_str = 'or '.join(['"{}" = "{}"'.format("Name", idx) for idx in indices_index_nonull.keys()]) + query_str = "or ".join( + [ + '"{}" = "{}"'.format("Name", idx) + for idx in indices_index_nonull.keys() + ] + ) input_layer_lyr.SetAttributeFilter(query_str) - driver = ogr.GetDriverByName('ESRI Shapefile') + driver = ogr.GetDriverByName("ESRI Shapefile") out_ds = driver.CreateDataSource(out_shapefile) - out_layer = out_ds.CopyLayer(input_layer_lyr, 'tuiles') - for name in ["L1C", "L1C_F", "L1C_L", \ - "L2A", "L2A_F", "L2A_L", \ - "NDVI", "NDVI_F", "NDVI_L", \ - "NDWIGAO", "NDWIGAO_F", "NDWIGAO_L", \ - "NDWIMCF", "NDWIMCF_F", "NDWIMCF_L", \ - "MNDWI", "MNDWI_F", "MNDWI_L", - "NDRE", "NDRE_F", "NDRE_L", - "IRECI", "IRECI_F", "IRECI_L", - ]: + out_layer = out_ds.CopyLayer(input_layer_lyr, "tuiles") + for name in [ + "L1C", + "L1C_F", + "L1C_L", + "L2A", + "L2A_F", + "L2A_L", + "NDVI", + "NDVI_F", + "NDVI_L", + "NDWIGAO", + "NDWIGAO_F", + "NDWIGAO_L", + "NDWIMCF", + "NDWIMCF_F", + "NDWIMCF_L", + "MNDWI", + "MNDWI_F", + "MNDWI_L", + "NDRE", + "NDRE_F", + "NDRE_L", + "IRECI", + "IRECI_F", + "IRECI_L", + ]: field_name = ogr.FieldDefn(name, ogr.OFTString) field_name.SetWidth(10) out_layer.CreateField(field_name) - + def fill_fields(layer): for feat in layer: tile_name = feat.GetField("Name") tile = Tile(tile_name) - - feat.SetField("L1C", tile_count[tile_name]['l1c']) + + feat.SetField("L1C", tile_count[tile_name]["l1c"]) try: - feat.SetField("L1C_F", tile.l1c.first.date.strftime("%d/%m/%Y")) + feat.SetField( + "L1C_F", tile.l1c.first.date.strftime("%d/%m/%Y") + ) feat.SetField("L1C_L", tile.l1c.last.date.strftime("%d/%m/%Y")) except: feat.SetField("L1C_F", 0) feat.SetField("L1C_L", 0) - - feat.SetField("L2A", tile_count[tile_name]['l2a']) + + feat.SetField("L2A", tile_count[tile_name]["l2a"]) try: - feat.SetField("L2A_F", tile.l2a.first.date.strftime("%d/%m/%Y")) + feat.SetField( + "L2A_F", tile.l2a.first.date.strftime("%d/%m/%Y") + ) feat.SetField("L2A_L", tile.l2a.last.date.strftime("%d/%m/%Y")) except: feat.SetField("L2A_F", 0) feat.SetField("L2A_L", 0) - - feat.SetField("NDVI", tile_count[tile_name]['ndvi']) + + feat.SetField("NDVI", tile_count[tile_name]["ndvi"]) try: - feat.SetField("NDVI_F", tile.ndvi.masks.cm001.first.date.strftime("%d/%m/%Y")) - feat.SetField("NDVI_L", tile.ndvi.masks.cm001.last.date.strftime("%d/%m/%Y")) + feat.SetField( + "NDVI_F", + tile.ndvi.masks.cm001.first.date.strftime("%d/%m/%Y"), + ) + feat.SetField( + "NDVI_L", + tile.ndvi.masks.cm001.last.date.strftime("%d/%m/%Y"), + ) except: feat.SetField("NDVI_F", 0) feat.SetField("NDVI_L", 0) - - feat.SetField("NDWIGAO", tile_count[tile_name]['ndwigao']) + + feat.SetField("NDWIGAO", tile_count[tile_name]["ndwigao"]) try: - feat.SetField("NDWIGAO_F", tile.ndwigao.masks.cm001.first.date.strftime("%d/%m/%Y")) - feat.SetField("NDWIGAO_L", tile.ndwigao.masks.cm001.last.date.strftime("%d/%m/%Y")) + feat.SetField( + "NDWIGAO_F", + tile.ndwigao.masks.cm001.first.date.strftime("%d/%m/%Y"), + ) + feat.SetField( + "NDWIGAO_L", + tile.ndwigao.masks.cm001.last.date.strftime("%d/%m/%Y"), + ) except: feat.SetField("NDWIGAO_F", 0) feat.SetField("NDWIGAO_L", 0) - - feat.SetField("NDWIMCF", tile_count[tile_name]['ndwimcf']) + + feat.SetField("NDWIMCF", tile_count[tile_name]["ndwimcf"]) try: - feat.SetField("NDWIMCF_F", tile.ndwimcf.masks.cm001.first.date.strftime("%d/%m/%Y")) - feat.SetField("NDWIMCF_L", tile.ndwimcf.masks.cm001.last.date.strftime("%d/%m/%Y")) + feat.SetField( + "NDWIMCF_F", + tile.ndwimcf.masks.cm001.first.date.strftime("%d/%m/%Y"), + ) + feat.SetField( + "NDWIMCF_L", + tile.ndwimcf.masks.cm001.last.date.strftime("%d/%m/%Y"), + ) except: feat.SetField("NDWIMCF_F", 0) feat.SetField("NDWIMCF_L", 0) - - feat.SetField("MNDWI", tile_count[tile_name]['mndwi']) + + feat.SetField("MNDWI", tile_count[tile_name]["mndwi"]) try: - feat.SetField("MNDWI_F", tile.mndwi.masks.cm001.first.date.strftime("%d/%m/%Y")) - feat.SetField("MNDWI_L", tile.mndwi.masks.cm001.last.date.strftime("%d/%m/%Y")) + feat.SetField( + "MNDWI_F", + tile.mndwi.masks.cm001.first.date.strftime("%d/%m/%Y"), + ) + feat.SetField( + "MNDWI_L", + tile.mndwi.masks.cm001.last.date.strftime("%d/%m/%Y"), + ) except: feat.SetField("MNDWI_F", 0) - feat.SetField("MNDWI_L", 0) - - feat.SetField("NDRE", tile_count[tile_name]['ndre']) + feat.SetField("MNDWI_L", 0) + + feat.SetField("NDRE", tile_count[tile_name]["ndre"]) try: - feat.SetField("NDRE_F", tile.ndre.masks.cm001.first.date.strftime("%d/%m/%Y")) - feat.SetField("NDRE_L", tile.ndre.masks.cm001.last.date.strftime("%d/%m/%Y")) + feat.SetField( + "NDRE_F", + tile.ndre.masks.cm001.first.date.strftime("%d/%m/%Y"), + ) + feat.SetField( + "NDRE_L", + tile.ndre.masks.cm001.last.date.strftime("%d/%m/%Y"), + ) except: feat.SetField("NDRE_F", 0) feat.SetField("NDRE_L", 0) - - feat.SetField("IRECI", tile_count[tile_name]['ireci']) + + feat.SetField("IRECI", tile_count[tile_name]["ireci"]) try: - feat.SetField("IRECI_F", tile.ireci.masks.cm001.first.date.strftime("%d/%m/%Y")) - feat.SetField("IRECI_L", tile.ireci.masks.cm001.last.date.strftime("%d/%m/%Y")) + feat.SetField( + "IRECI_F", + tile.ireci.masks.cm001.first.date.strftime("%d/%m/%Y"), + ) + feat.SetField( + "IRECI_L", + tile.ireci.masks.cm001.last.date.strftime("%d/%m/%Y"), + ) except: feat.SetField("IRECI_F", 0) feat.SetField("IRECI_L", 0) @@ -273,43 +396,85 @@ def get_processed_indices_vect( fill_fields(out_layer) out_layer = None del out_layer, out_ds - + # Couche complète - query_str = 'or '.join(['"{}" = "{}"'.format("Name", idx) for idx in tile_count.keys()]) + query_str = "or ".join( + ['"{}" = "{}"'.format("Name", idx) for idx in tile_count.keys()] + ) input_layer_lyr.SetAttributeFilter(query_str) - driver = ogr.GetDriverByName('ESRI Shapefile') + driver = ogr.GetDriverByName("ESRI Shapefile") out_ds = driver.CreateDataSource(out_shapefile_total) - out_layer = out_ds.CopyLayer(input_layer_lyr, 'tuiles') - for name in ["L1C", "L1C_F", "L1C_L", \ - "L2A", "L2A_F", "L2A_L", \ - "NDVI", "NDVI_F", "NDVI_L", \ - "NDWIGAO", "NDWIGAO_F", "NDWIGAO_L", \ - "NDWIMCF", "NDWIMCF_F", "NDWIMCF_L", \ - "MNDWI", "MNDWI_F", "MNDWI_L", - "NDRE", "NDRE_F", "NDRE_L", - "IRECI", "IRECI_F", "IRECI_L", - ]: + out_layer = out_ds.CopyLayer(input_layer_lyr, "tuiles") + for name in [ + "L1C", + "L1C_F", + "L1C_L", + "L2A", + "L2A_F", + "L2A_L", + "NDVI", + "NDVI_F", + "NDVI_L", + "NDWIGAO", + "NDWIGAO_F", + "NDWIGAO_L", + "NDWIMCF", + "NDWIMCF_F", + "NDWIMCF_L", + "MNDWI", + "MNDWI_F", + "MNDWI_L", + "NDRE", + "NDRE_F", + "NDRE_L", + "IRECI", + "IRECI_F", + "IRECI_L", + ]: field_name = ogr.FieldDefn(name, ogr.OFTString) field_name.SetWidth(10) out_layer.CreateField(field_name) fill_fields(out_layer) out_layer = None del out_layer, out_ds - + # Couche complémentaire - query_str = 'or '.join(['"{}" = "{}"'.format("Name", idx) for idx in (tile_count.keys()-indices_index_nonull.keys())]) + query_str = "or ".join( + [ + '"{}" = "{}"'.format("Name", idx) + for idx in (tile_count.keys() - indices_index_nonull.keys()) + ] + ) input_layer_lyr.SetAttributeFilter(query_str) - driver = ogr.GetDriverByName('ESRI Shapefile') + driver = ogr.GetDriverByName("ESRI Shapefile") out_ds = driver.CreateDataSource(out_shapefile_complement) - out_layer = out_ds.CopyLayer(input_layer_lyr, 'tuiles') - for name in ["L1C", "L1C_F", "L1C_L", \ - "L2A", "L2A_F", "L2A_L", \ - "NDVI", "NDVI_F", "NDVI_L", \ - "NDWIGAO", "NDWIGAO_F", "NDWIGAO_L", \ - "NDWIMCF", "NDWIMCF_F", "NDWIMCF_L", \ - "MNDWI", "MNDWI_F", "MNDWI_L", - "NDRE", "NDRE_F", "NDRE_L", - "IRECI", "IRECI_F", "IRECI_L"]: + out_layer = out_ds.CopyLayer(input_layer_lyr, "tuiles") + for name in [ + "L1C", + "L1C_F", + "L1C_L", + "L2A", + "L2A_F", + "L2A_L", + "NDVI", + "NDVI_F", + "NDVI_L", + "NDWIGAO", + "NDWIGAO_F", + "NDWIGAO_L", + "NDWIMCF", + "NDWIMCF_F", + "NDWIMCF_L", + "MNDWI", + "MNDWI_F", + "MNDWI_L", + "NDRE", + "NDRE_F", + "NDRE_L", + "IRECI", + "IRECI_F", + "IRECI_L", + ]: field_name = ogr.FieldDefn(name, ogr.OFTString) field_name.SetWidth(10) out_layer.CreateField(field_name) @@ -317,14 +482,15 @@ def get_processed_indices_vect( out_layer = None del input_layer_ds, input_layer_lyr, out_layer, out_ds + def crop_product_by_shp( - raster_path: Union[str, Path] = None, - vector_path: Union[str, Path] = None, - buff: int = 0, - out_folder: str = None - ): + raster_path: Union[str, Path] = None, + vector_path: Union[str, Path] = None, + buff: int = 0, + out_folder: str = None, +): """ - Returns a cropped raster + Returns a cropped raster :param raster_path: path to input raster file :param vector_path: path to input shp :param buff: buffer size around shp (in raster unit, ie m) @@ -332,21 +498,31 @@ def crop_product_by_shp( """ if not out_folder: out_folder = Path(Config().get("temp_path")) - + crop_extent = gpd.read_file(str(vector_path)) raster = rasterio.open(str(raster_path)) crop_extent_new_proj = crop_extent.to_crs(raster.crs) - extent_geojson = mapping(crop_extent_new_proj['geometry'][0].buffer(buff)) - - out_img, out_transform = mask(dataset=raster, shapes=[extent_geojson], crop=True) - + extent_geojson = mapping(crop_extent_new_proj["geometry"][0].buffer(buff)) + + out_img, out_transform = mask( + dataset=raster, shapes=[extent_geojson], crop=True + ) + out_meta = raster.meta.copy() - out_meta.update({"driver": "GTiff", - "compress": "DEFLATE", - "height": out_img.shape[1], - "width": out_img.shape[2], - "transform": out_transform}) - - with rasterio.open(out_folder / (Path(raster_path).stem + "_cropped-" + str(buff) + "m.tif"), "w", **out_meta) as dest: + out_meta.update( + { + "driver": "GTiff", + "compress": "DEFLATE", + "height": out_img.shape[1], + "width": out_img.shape[2], + "transform": out_transform, + } + ) + + with rasterio.open( + out_folder + / (Path(raster_path).stem + "_cropped-" + str(buff) + "m.tif"), + "w", + **out_meta + ) as dest: dest.write(out_img) - diff --git a/sen2chain/indices.py b/sen2chain/indices.py index db933d8ec7f927a07e21432f9f15c6a358567dfe..4a17247a2b1bf3b36ff20018ceab91041feb0f0d 100644 --- a/sen2chain/indices.py +++ b/sen2chain/indices.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 """ Module for defining radiometric indices. @@ -13,10 +13,20 @@ import os # type annotations from typing import Union, List -from .indices_functions import (create_raw_ndr, create_raw_ireci, - #~ create_raw_ndvi, create_raw_ndre, create_raw_ndwigao, create_raw_ndwimcf, create_raw_mndwi - create_raw_bigr, create_raw_birnir, create_raw_bibg, - create_masked_indice, index_tiff_2_jp2) +from .indices_functions import ( + create_raw_ndr, + create_raw_ireci, + # create_raw_ndvi, + # create_raw_ndre, + # create_raw_ndwigao, + # create_raw_ndwimcf, + # create_raw_mndwi, + create_raw_bigr, + create_raw_birnir, + create_raw_bibg, + create_masked_indice, + index_tiff_2_jp2, +) from .colormap import matplotlib_colormap_to_rgb, create_colormap, create_rvb logger = logging.getLogger(__name__) @@ -35,6 +45,7 @@ class Indice(metaclass=ABCMeta): process_indice(out_path, nodata_clouds, quicklook): """ + @property @abstractmethod def name(self): @@ -61,6 +72,7 @@ class Ndvi(Indice): NIR: band 08 (10m) VIR: band 04 (10m) """ + name = "NDVI" filename_template = "{product_identifier}_NDVI{ext}" ext = ".jp2" @@ -69,25 +81,30 @@ class Ndvi(Indice): def __init__(self, l2a_product_object, cm_product_object): if (l2a_product_object or cm_product_object) is None: - raise ValueError("A L2aProduct and NewCloudMask objects must be provided") + raise ValueError( + "A L2aProduct and NewCloudMask objects must be provided" + ) else: self.l2a_product = l2a_product_object self.cm_product = cm_product_object self.out_path = None - self.indice_stem = self.filename_template.format(product_identifier=self.l2a_product.identifier, ext="") + self.indice_stem = self.filename_template.format( + product_identifier=self.l2a_product.identifier, ext="" + ) self.indice_filename = self.indice_stem + self.ext self.indice_raw = self.indice_stem + self.ext_raw - def process_indice(self, - out_path: pathlib.PosixPath, - reprocess: bool = False, - nodata_clouds: bool = False, - quicklook: bool = False - ) -> None: - """ - process NDVI + def process_indice( + self, + out_path: pathlib.PosixPath, + reprocess: bool = False, + nodata_clouds: bool = False, + quicklook: bool = False, + ) -> None: + """ + process NDVI :param out_path: :param reprocess: :param nodata_clouds: @@ -97,54 +114,71 @@ class Ndvi(Indice): if (out_path / self.indice_filename).exists() and not reprocess: logger.info("{} already exists".format(self.indice_filename)) else: - create_raw_ndr(b1_path=self.l2a_product.b08_10m, - b2_path=self.l2a_product.b04_10m, - out_path=(out_path / self.indice_raw)) - index_tiff_2_jp2(img_path=(out_path / self.indice_raw), - out_path=(out_path / self.indice_filename), - quality = 20) + create_raw_ndr( + b1_path=self.l2a_product.b08_10m, + b2_path=self.l2a_product.b04_10m, + out_path=(out_path / self.indice_raw), + ) + index_tiff_2_jp2( + img_path=(out_path / self.indice_raw), + out_path=(out_path / self.indice_filename), + quality=20, + ) if nodata_clouds: if not self.cm_product.path.exists(): logger.info("Cloudmask does not exist, indice not masked") raise ValueError("Cloud mask does not exist") - masked_indice_filename = self.indice_stem + "_" + self.cm_product.suffix + self.ext - masked_indice_raw = self.indice_stem + "_" + self.cm_product.suffix + self.ext_raw - + masked_indice_filename = ( + self.indice_stem + "_" + self.cm_product.suffix + self.ext + ) + masked_indice_raw = ( + self.indice_stem + "_" + self.cm_product.suffix + self.ext_raw + ) + if (out_path / masked_indice_filename).exists() and not reprocess: logger.info("{} already exists".format(masked_indice_filename)) else: if (out_path / self.indice_raw).exists(): - ndvi_name = (out_path / self.indice_raw) + ndvi_name = out_path / self.indice_raw else: - ndvi_name = (out_path / self.indice_filename) - create_masked_indice(indice_path=ndvi_name, - cloud_mask_path=self.cm_product.path, - out_path=(out_path / masked_indice_raw)) - index_tiff_2_jp2(img_path=(out_path / masked_indice_raw), - out_path=(out_path / masked_indice_filename), - quality = 20) + ndvi_name = out_path / self.indice_filename + create_masked_indice( + indice_path=ndvi_name, + cloud_mask_path=self.cm_product.path, + out_path=(out_path / masked_indice_raw), + ) + index_tiff_2_jp2( + img_path=(out_path / masked_indice_raw), + out_path=(out_path / masked_indice_filename), + quality=20, + ) os.remove(str(out_path / masked_indice_raw)) - + try: os.remove(str(out_path / self.indice_raw)) logger.info("Removing {}".format(self.indice_raw)) except: pass - + if quicklook: cmap = matplotlib_colormap_to_rgb(self.colormap, revers=False) - quicklook_filename = self.indice_stem + "_" + self.cm_product.suffix + "_QL.tif" + quicklook_filename = ( + self.indice_stem + "_" + self.cm_product.suffix + "_QL.tif" + ) if (self.out_path / quicklook_filename).exists() and not reprocess: logger.info("{} already exists".format(quicklook_filename)) else: logger.info("creating quicklook") - create_rvb(raster=(self.out_path / self.indice_filename), - cloud_mask=self.cm_product.path, - lut_dict=cmap, clouds_color="white", - out_path=(self.out_path / quicklook_filename)) + create_rvb( + raster=(self.out_path / self.indice_filename), + cloud_mask=self.cm_product.path, + lut_dict=cmap, + clouds_color="white", + out_path=(self.out_path / quicklook_filename), + ) class NdwiMcf(Indice): @@ -154,86 +188,109 @@ class NdwiMcf(Indice): GREEN: band 03 NIR: band 08 """ + name = "NDWIMCF" filename_template = "{product_identifier}_NDWIMCF{ext}" ext = ".jp2" ext_raw = ".tif" - colormap = cm.colors.LinearSegmentedColormap.from_list("", ["green", "white", "blue"]) - + colormap = cm.colors.LinearSegmentedColormap.from_list( + "", ["green", "white", "blue"] + ) def __init__(self, l2a_product_object, cm_product_object): if (l2a_product_object or cm_product_object) is None: - raise ValueError("A L2aProduct and NewCloudMask objects must be provided") + raise ValueError( + "A L2aProduct and NewCloudMask objects must be provided" + ) else: self.l2a_product = l2a_product_object self.cm_product = cm_product_object self.out_path = None - self.indice_stem = self.filename_template.format(product_identifier=self.l2a_product.identifier, ext="") + self.indice_stem = self.filename_template.format( + product_identifier=self.l2a_product.identifier, ext="" + ) self.indice_filename = self.indice_stem + self.ext self.indice_raw = self.indice_stem + self.ext_raw def process_indice( - self, - out_path: pathlib.PosixPath, - reprocess: bool = False, - nodata_clouds: bool = False, - quicklook: bool = False + self, + out_path: pathlib.PosixPath, + reprocess: bool = False, + nodata_clouds: bool = False, + quicklook: bool = False, ) -> None: - """ process """ + """process""" self.out_path = out_path if (out_path / self.indice_filename).exists() and not reprocess: logger.info("{} already exists".format(self.indice_filename)) else: - create_raw_ndr(b1_path=self.l2a_product.b03_10m, - b2_path=self.l2a_product.b08_10m, - out_path=(out_path / self.indice_raw)) - index_tiff_2_jp2(img_path=(out_path / self.indice_raw), - out_path=(out_path / self.indice_filename), - quality = 20) + create_raw_ndr( + b1_path=self.l2a_product.b03_10m, + b2_path=self.l2a_product.b08_10m, + out_path=(out_path / self.indice_raw), + ) + index_tiff_2_jp2( + img_path=(out_path / self.indice_raw), + out_path=(out_path / self.indice_filename), + quality=20, + ) if nodata_clouds: if not self.cm_product.path.exists(): logger.info("Cloudmask does not exist, indice not masked") raise ValueError("Cloud mask does not exist") - masked_indice_filename = self.indice_stem + "_" + self.cm_product.suffix + self.ext - masked_indice_raw = self.indice_stem + "_" + self.cm_product.suffix + self.ext_raw - + masked_indice_filename = ( + self.indice_stem + "_" + self.cm_product.suffix + self.ext + ) + masked_indice_raw = ( + self.indice_stem + "_" + self.cm_product.suffix + self.ext_raw + ) + if (out_path / masked_indice_filename).exists() and not reprocess: logger.info("{} already exists".format(masked_indice_filename)) else: if (out_path / self.indice_raw).exists(): - ndwimcf_path = (out_path / self.indice_raw) + ndwimcf_path = out_path / self.indice_raw else: - ndwimcf_path = (out_path / self.indice_filename) - create_masked_indice(indice_path=ndwimcf_path, - cloud_mask_path=self.cm_product.path, - out_path=(out_path / masked_indice_raw)) - index_tiff_2_jp2(img_path=(out_path / masked_indice_raw), - out_path=(out_path / masked_indice_filename), - quality = 20) + ndwimcf_path = out_path / self.indice_filename + create_masked_indice( + indice_path=ndwimcf_path, + cloud_mask_path=self.cm_product.path, + out_path=(out_path / masked_indice_raw), + ) + index_tiff_2_jp2( + img_path=(out_path / masked_indice_raw), + out_path=(out_path / masked_indice_filename), + quality=20, + ) os.remove(str(out_path / masked_indice_raw)) - + try: os.remove(str(out_path / self.indice_raw)) logger.info("Removing {}".format(self.indice_raw)) except: pass - + if quicklook: cmap = matplotlib_colormap_to_rgb(self.colormap, revers=False) - quicklook_filename = self.indice_stem + "_" + self.cm_product.suffix + "_QL.tif" - + quicklook_filename = ( + self.indice_stem + "_" + self.cm_product.suffix + "_QL.tif" + ) + if (self.out_path / quicklook_filename).exists() and not reprocess: logger.info("{} already exists".format(quicklook_filename)) else: logger.info("creating quicklook") - create_rvb(raster=(self.out_path / self.indice_filename), - cloud_mask=self.cm_product.path, - lut_dict=cmap, clouds_color="white", - out_path=(self.out_path / quicklook_filename)) + create_rvb( + raster=(self.out_path / self.indice_filename), + cloud_mask=self.cm_product.path, + lut_dict=cmap, + clouds_color="white", + out_path=(self.out_path / quicklook_filename), + ) class NdwiGao(Indice): @@ -243,6 +300,7 @@ class NdwiGao(Indice): NIR: band 08 SWIR: band 11 """ + name = "NDWIGAO" filename_template = "{product_identifier}_NDWIGAO{ext}" ext = ".jp2" @@ -251,65 +309,81 @@ class NdwiGao(Indice): def __init__(self, l2a_product_object, cm_product_object): if (l2a_product_object or cm_product_object) is None: - raise ValueError("A L2aProduct and NewCloudMask objects must be provided") + raise ValueError( + "A L2aProduct and NewCloudMask objects must be provided" + ) else: self.l2a_product = l2a_product_object self.cm_product = cm_product_object self.out_path = None - self.indice_stem = self.filename_template.format(product_identifier=self.l2a_product.identifier, ext="") + self.indice_stem = self.filename_template.format( + product_identifier=self.l2a_product.identifier, ext="" + ) self.indice_filename = self.indice_stem + self.ext self.indice_raw = self.indice_stem + self.ext_raw def process_indice( - self, - out_path: pathlib.PosixPath, - reprocess: bool = False, - nodata_clouds: bool = False, - quicklook: bool = False + self, + out_path: pathlib.PosixPath, + reprocess: bool = False, + nodata_clouds: bool = False, + quicklook: bool = False, ) -> None: - """ process """ + """process""" self.out_path = out_path if (out_path / self.indice_filename).exists() and not reprocess: logger.info("{} already exists".format(self.indice_filename)) else: - create_raw_ndr(b1_path=self.l2a_product.b08_10m, - b2_path=self.l2a_product.b11_20m, - out_path=(out_path / self.indice_raw)) - index_tiff_2_jp2(img_path=(out_path / self.indice_raw), - out_path=(out_path / self.indice_filename), - quality = 20) + create_raw_ndr( + b1_path=self.l2a_product.b08_10m, + b2_path=self.l2a_product.b11_20m, + out_path=(out_path / self.indice_raw), + ) + index_tiff_2_jp2( + img_path=(out_path / self.indice_raw), + out_path=(out_path / self.indice_filename), + quality=20, + ) if nodata_clouds: if not self.cm_product.path.exists(): logger.info("Cloudmask does not exist, indice not masked") raise ValueError("Cloud mask does not exist") - masked_indice_filename = self.indice_stem + "_" + self.cm_product.suffix + self.ext - masked_indice_raw = self.indice_stem + "_" + self.cm_product.suffix + self.ext_raw - + masked_indice_filename = ( + self.indice_stem + "_" + self.cm_product.suffix + self.ext + ) + masked_indice_raw = ( + self.indice_stem + "_" + self.cm_product.suffix + self.ext_raw + ) + if (out_path / masked_indice_filename).exists() and not reprocess: logger.info("{} already exists".format(masked_indice_filename)) else: if (out_path / self.indice_raw).exists(): - ndwigao_name = (out_path / self.indice_raw) + ndwigao_name = out_path / self.indice_raw else: - ndwigao_name = (out_path / self.indice_filename) - create_masked_indice(indice_path=ndwigao_name, - cloud_mask_path=self.cm_product.path, - out_path=(out_path / masked_indice_raw)) - index_tiff_2_jp2(img_path=(out_path / masked_indice_raw), - out_path=(out_path / masked_indice_filename), - quality = 20) + ndwigao_name = out_path / self.indice_filename + create_masked_indice( + indice_path=ndwigao_name, + cloud_mask_path=self.cm_product.path, + out_path=(out_path / masked_indice_raw), + ) + index_tiff_2_jp2( + img_path=(out_path / masked_indice_raw), + out_path=(out_path / masked_indice_filename), + quality=20, + ) os.remove(str(out_path / masked_indice_raw)) - + try: os.remove(str(out_path / self.indice_raw)) logger.info("Removing {}".format(self.indice_raw)) except: pass - + if quicklook: cmap = matplotlib_colormap_to_rgb(self.colormap, revers=False) @@ -318,10 +392,13 @@ class NdwiGao(Indice): logger.info("{} already exists".format(quicklook_filename)) else: logger.info("creating quicklook") - create_rvb(raster=(self.out_path / self.indice_filename), - cloud_mask=self.cm_product.path, - lut_dict=cmap, clouds_color="white", - out_path=(self.out_path / quicklook_filename)) + create_rvb( + raster=(self.out_path / self.indice_filename), + cloud_mask=self.cm_product.path, + lut_dict=cmap, + clouds_color="white", + out_path=(self.out_path / quicklook_filename), + ) class Mndwi(Indice): @@ -331,6 +408,7 @@ class Mndwi(Indice): GREEN: band 03 SWIR: band 11 """ + name = "MNDWI" filename_template = "{product_identifier}_MNDWI{ext}" ext = ".jp2" @@ -339,77 +417,98 @@ class Mndwi(Indice): def __init__(self, l2a_product_object, cm_product_object): if (l2a_product_object or cm_product_object) is None: - raise ValueError("A L2aProduct and NewCloudMask objects must be provided") + raise ValueError( + "A L2aProduct and NewCloudMask objects must be provided" + ) else: self.l2a_product = l2a_product_object self.cm_product = cm_product_object self.out_path = None - self.indice_stem = self.filename_template.format(product_identifier=self.l2a_product.identifier, ext="") + self.indice_stem = self.filename_template.format( + product_identifier=self.l2a_product.identifier, ext="" + ) self.indice_filename = self.indice_stem + self.ext self.indice_raw = self.indice_stem + self.ext_raw def process_indice( - self, - out_path: pathlib.PosixPath, - reprocess: bool = False, - nodata_clouds: bool = False, - quicklook: bool = False + self, + out_path: pathlib.PosixPath, + reprocess: bool = False, + nodata_clouds: bool = False, + quicklook: bool = False, ) -> None: - """ process """ + """process""" self.out_path = out_path if (out_path / self.indice_filename).exists() and not reprocess: logger.info("{} already exists".format(self.indice_filename)) else: - create_raw_ndr(b1_path=self.l2a_product.b03_10m, - b2_path=self.l2a_product.b11_20m, - out_path=(out_path / self.indice_raw)) - index_tiff_2_jp2(img_path=(out_path / self.indice_raw), - out_path=(out_path / self.indice_filename), - quality = 20) + create_raw_ndr( + b1_path=self.l2a_product.b03_10m, + b2_path=self.l2a_product.b11_20m, + out_path=(out_path / self.indice_raw), + ) + index_tiff_2_jp2( + img_path=(out_path / self.indice_raw), + out_path=(out_path / self.indice_filename), + quality=20, + ) if nodata_clouds: if not self.cm_product.path.exists(): logger.info("Cloudmask does not exist, indice not masked") raise ValueError("Cloud mask does not exist") - masked_indice_filename = self.indice_stem + "_" + self.cm_product.suffix + self.ext - masked_indice_raw = self.indice_stem + "_" + self.cm_product.suffix + self.ext_raw + masked_indice_filename = ( + self.indice_stem + "_" + self.cm_product.suffix + self.ext + ) + masked_indice_raw = ( + self.indice_stem + "_" + self.cm_product.suffix + self.ext_raw + ) if (out_path / masked_indice_filename).exists() and not reprocess: logger.info("{} already exists".format(masked_indice_filename)) else: if (out_path / self.indice_raw).exists(): - mndwi_name = (out_path / self.indice_raw) + mndwi_name = out_path / self.indice_raw else: - mndwi_name = (out_path / self.indice_filename) - create_masked_indice(indice_path=mndwi_name, - cloud_mask_path=self.cm_product.path, - out_path=(out_path / masked_indice_raw)) - index_tiff_2_jp2(img_path=(out_path / masked_indice_raw), - out_path=(out_path / masked_indice_filename), - quality = 20) + mndwi_name = out_path / self.indice_filename + create_masked_indice( + indice_path=mndwi_name, + cloud_mask_path=self.cm_product.path, + out_path=(out_path / masked_indice_raw), + ) + index_tiff_2_jp2( + img_path=(out_path / masked_indice_raw), + out_path=(out_path / masked_indice_filename), + quality=20, + ) os.remove(str(out_path / masked_indice_raw)) - + try: os.remove(str(out_path / self.indice_raw)) logger.info("Removing {}".format(self.indice_raw)) except: pass - + if quicklook: cmap = matplotlib_colormap_to_rgb(self.colormap, revers=False) - quicklook_filename = self.indice_stem + "_" + self.cm_product.suffix + "_QL.tif" + quicklook_filename = ( + self.indice_stem + "_" + self.cm_product.suffix + "_QL.tif" + ) if (self.out_path / quicklook_filename).exists() and not reprocess: logger.info("{} already exists".format(quicklook_filename)) else: logger.info("creating quicklook") - create_rvb(raster=(self.out_path / self.indice_filename), - cloud_mask=self.cm_product.path, - lut_dict=cmap, clouds_color="white", - out_path=(self.out_path / quicklook_filename)) + create_rvb( + raster=(self.out_path / self.indice_filename), + cloud_mask=self.cm_product.path, + lut_dict=cmap, + clouds_color="white", + out_path=(self.out_path / quicklook_filename), + ) class Ndre(Indice): @@ -419,6 +518,7 @@ class Ndre(Indice): NIR: band 08 REDEDGE: band 05 """ + name = "NDRE" filename_template = "{product_identifier}_NDRE{ext}" ext = ".jp2" @@ -427,58 +527,75 @@ class Ndre(Indice): def __init__(self, l2a_product_object, cm_product_object): if (l2a_product_object or cm_product_object) is None: - raise ValueError("A L2aProduct and NewCloudMask objects must be provided") + raise ValueError( + "A L2aProduct and NewCloudMask objects must be provided" + ) else: self.l2a_product = l2a_product_object self.cm_product = cm_product_object - + self.out_path = None - self.indice_stem = self.filename_template.format(product_identifier=self.l2a_product.identifier, ext="") + self.indice_stem = self.filename_template.format( + product_identifier=self.l2a_product.identifier, ext="" + ) self.indice_filename = self.indice_stem + self.ext self.indice_raw = self.indice_stem + self.ext_raw - def process_indice(self, - out_path: pathlib.PosixPath, - reprocess: bool = False, - nodata_clouds: bool = False, - quicklook: bool = False - ) -> None: - """ process """ + def process_indice( + self, + out_path: pathlib.PosixPath, + reprocess: bool = False, + nodata_clouds: bool = False, + quicklook: bool = False, + ) -> None: + """process""" self.out_path = out_path if (out_path / self.indice_filename).exists() and not reprocess: logger.info("{} already exists".format(self.indice_filename)) else: - create_raw_ndr(b1_path=self.l2a_product.b08_10m, - b2_path=self.l2a_product.b05_20m, - out_path=(out_path / self.indice_raw)) - index_tiff_2_jp2(img_path=(out_path / self.indice_raw), - out_path=(out_path / self.indice_filename), - quality = 30) + create_raw_ndr( + b1_path=self.l2a_product.b08_10m, + b2_path=self.l2a_product.b05_20m, + out_path=(out_path / self.indice_raw), + ) + index_tiff_2_jp2( + img_path=(out_path / self.indice_raw), + out_path=(out_path / self.indice_filename), + quality=30, + ) if nodata_clouds: if not self.cm_product.path.exists(): logger.info("Cloudmask does not exist, indice not masked") raise ValueError("Cloud mask does not exist") - masked_indice_filename = self.indice_stem + "_" + self.cm_product.suffix + self.ext - masked_indice_raw = self.indice_stem + "_" + self.cm_product.suffix + self.ext_raw + masked_indice_filename = ( + self.indice_stem + "_" + self.cm_product.suffix + self.ext + ) + masked_indice_raw = ( + self.indice_stem + "_" + self.cm_product.suffix + self.ext_raw + ) if (out_path / masked_indice_filename).exists() and not reprocess: logger.info("{} already exists".format(masked_indice_filename)) else: if (out_path / self.indice_raw).exists(): - ndre_name = (out_path / self.indice_raw) + ndre_name = out_path / self.indice_raw else: - ndre_name = (out_path / self.indice_filename) - create_masked_indice(indice_path=ndre_name, - cloud_mask_path=self.cm_product.path, - out_path=(out_path / masked_indice_raw)) - index_tiff_2_jp2(img_path=(out_path / masked_indice_raw), - out_path=(out_path / masked_indice_filename), - quality = 30) + ndre_name = out_path / self.indice_filename + create_masked_indice( + indice_path=ndre_name, + cloud_mask_path=self.cm_product.path, + out_path=(out_path / masked_indice_raw), + ) + index_tiff_2_jp2( + img_path=(out_path / masked_indice_raw), + out_path=(out_path / masked_indice_filename), + quality=30, + ) os.remove(str(out_path / masked_indice_raw)) - + try: os.remove(str(out_path / self.indice_raw)) logger.info("Removing {}".format(self.indice_raw)) @@ -486,15 +603,20 @@ class Ndre(Indice): pass if quicklook: cmap = matplotlib_colormap_to_rgb(self.colormap, revers=False) - quicklook_filename = self.indice_stem + "_" + self.cm_product.suffix + "_QL.tif" + quicklook_filename = ( + self.indice_stem + "_" + self.cm_product.suffix + "_QL.tif" + ) if (self.out_path / quicklook_filename).exists() and not reprocess: logger.info("{} already exists".format(quicklook_filename)) else: logger.info("creating quicklook") - create_rvb(raster=(self.out_path / self.indice_filename), - cloud_mask=self.cm_product.path, - lut_dict=cmap, clouds_color="white", - out_path=(self.out_path / quicklook_filename)) + create_rvb( + raster=(self.out_path / self.indice_filename), + cloud_mask=self.cm_product.path, + lut_dict=cmap, + clouds_color="white", + out_path=(self.out_path / quicklook_filename), + ) class IRECI(Indice): @@ -505,68 +627,86 @@ class IRECI(Indice): RE1: band 705nm (B5 - 20m) RE2: band 740nm (B6 - 20m) """ + name = "IRECI" filename_template = "{product_identifier}_IRECI{ext}" ext = ".jp2" ext_raw = ".tif" colormap = cm.Spectral - + def __init__(self, l2a_product_object, cm_product_object): if (l2a_product_object or cm_product_object) is None: - raise ValueError("A L2aProduct and NewCloudMask objects must be provided") + raise ValueError( + "A L2aProduct and NewCloudMask objects must be provided" + ) else: self.l2a_product = l2a_product_object self.cm_product = cm_product_object - + self.out_path = None - self.indice_stem = self.filename_template.format(product_identifier=self.l2a_product.identifier, ext="") + self.indice_stem = self.filename_template.format( + product_identifier=self.l2a_product.identifier, ext="" + ) self.indice_filename = self.indice_stem + self.ext self.indice_raw = self.indice_stem + self.ext_raw - def process_indice(self, - out_path: pathlib.PosixPath, - reprocess: bool = False, - nodata_clouds: bool = False, - quicklook: bool = False - ) -> None: - """ process """ + def process_indice( + self, + out_path: pathlib.PosixPath, + reprocess: bool = False, + nodata_clouds: bool = False, + quicklook: bool = False, + ) -> None: + """process""" self.out_path = out_path if (out_path / self.indice_filename).exists() and not reprocess: logger.info("{} already exists".format(self.indice_filename)) else: - create_raw_ireci(b1_path = self.l2a_product.b07_20m, - b2_path = self.l2a_product.b04_10m, - b3_path = self.l2a_product.b05_20m, - b4_path = self.l2a_product.b06_20m, - out_path=(out_path / self.indice_raw)) - index_tiff_2_jp2(img_path=(out_path / self.indice_raw), - out_path=(out_path / self.indice_filename), - quality = 30) + create_raw_ireci( + b1_path=self.l2a_product.b07_20m, + b2_path=self.l2a_product.b04_10m, + b3_path=self.l2a_product.b05_20m, + b4_path=self.l2a_product.b06_20m, + out_path=(out_path / self.indice_raw), + ) + index_tiff_2_jp2( + img_path=(out_path / self.indice_raw), + out_path=(out_path / self.indice_filename), + quality=30, + ) if nodata_clouds: if not self.cm_product.path.exists(): logger.info("Cloudmask does not exist, indice not masked") raise ValueError("Cloud mask does not exist") - masked_indice_filename = self.indice_stem + "_" + self.cm_product.suffix + self.ext - masked_indice_raw = self.indice_stem + "_" + self.cm_product.suffix + self.ext_raw + masked_indice_filename = ( + self.indice_stem + "_" + self.cm_product.suffix + self.ext + ) + masked_indice_raw = ( + self.indice_stem + "_" + self.cm_product.suffix + self.ext_raw + ) if (out_path / masked_indice_filename).exists() and not reprocess: logger.info("{} already exists".format(masked_indice_filename)) else: if (out_path / self.indice_raw).exists(): - ndre_name = (out_path / self.indice_raw) + ndre_name = out_path / self.indice_raw else: - ndre_name = (out_path / self.indice_filename) - create_masked_indice(indice_path=ndre_name, - cloud_mask_path=self.cm_product.path, - out_path=(out_path / masked_indice_raw)) - index_tiff_2_jp2(img_path=(out_path / masked_indice_raw), - out_path=(out_path / masked_indice_filename), - quality = 30) + ndre_name = out_path / self.indice_filename + create_masked_indice( + indice_path=ndre_name, + cloud_mask_path=self.cm_product.path, + out_path=(out_path / masked_indice_raw), + ) + index_tiff_2_jp2( + img_path=(out_path / masked_indice_raw), + out_path=(out_path / masked_indice_filename), + quality=30, + ) os.remove(str(out_path / masked_indice_raw)) - + try: os.remove(str(out_path / self.indice_raw)) logger.info("Removing {}".format(self.indice_raw)) @@ -574,15 +714,20 @@ class IRECI(Indice): pass if quicklook: cmap = matplotlib_colormap_to_rgb(self.colormap, revers=False) - quicklook_filename = self.indice_stem + "_" + self.cm_product.suffix + "_QL.tif" + quicklook_filename = ( + self.indice_stem + "_" + self.cm_product.suffix + "_QL.tif" + ) if (self.out_path / quicklook_filename).exists() and not reprocess: logger.info("{} already exists".format(quicklook_filename)) else: logger.info("creating quicklook") - create_rvb(raster=(self.out_path / self.indice_filename), - cloud_mask=self.cm_product.path, - lut_dict=cmap, clouds_color="white", - out_path=(self.out_path / quicklook_filename)) + create_rvb( + raster=(self.out_path / self.indice_filename), + cloud_mask=self.cm_product.path, + lut_dict=cmap, + clouds_color="white", + out_path=(self.out_path / quicklook_filename), + ) class BIGR(Indice): @@ -592,6 +737,7 @@ class BIGR(Indice): GREEN: band 03 RED: band 04 """ + name = "BIGR" filename_template = "{product_identifier}_BIGR{ext}" ext = ".jp2" @@ -600,7 +746,9 @@ class BIGR(Indice): def __init__(self, l2a_product_object, cm_product_object): if (l2a_product_object or cm_product_object) is None: - raise ValueError("A L2aProduct and NewCloudMask objects must be provided") + raise ValueError( + "A L2aProduct and NewCloudMask objects must be provided" + ) else: self.l2a_product = l2a_product_object self.cm_product = cm_product_object @@ -609,69 +757,88 @@ class BIGR(Indice): self.out_path = None # filenames - self.indice_stem = self.filename_template.format(product_identifier=self.l2a_product.identifier, ext="") + self.indice_stem = self.filename_template.format( + product_identifier=self.l2a_product.identifier, ext="" + ) self.indice_filename = self.indice_stem + self.ext self.indice_raw = self.indice_stem + self.ext_raw def process_indice( - self, - out_path: pathlib.PosixPath, - reprocess: bool = False, - nodata_clouds: bool = False, - quicklook: bool = False + self, + out_path: pathlib.PosixPath, + reprocess: bool = False, + nodata_clouds: bool = False, + quicklook: bool = False, ) -> None: - """ process """ + """process""" self.out_path = out_path if (out_path / self.indice_filename).exists() and not reprocess: logger.info("{} already exists".format(self.indice_filename)) else: - create_raw_bigr(red_path=self.l2a_product.b04_10m, - green_path=self.l2a_product.b03_10m, - out_path=(out_path / self.indice_raw)) - index_tiff_2_jp2(img_path=(out_path / self.indice_raw), - out_path=(out_path / self.indice_filename)) + create_raw_bigr( + red_path=self.l2a_product.b04_10m, + green_path=self.l2a_product.b03_10m, + out_path=(out_path / self.indice_raw), + ) + index_tiff_2_jp2( + img_path=(out_path / self.indice_raw), + out_path=(out_path / self.indice_filename), + ) if nodata_clouds: if not self.cm_product.path.exists(): logger.info("Cloudmask does not exist, indice not masked") raise ValueError("Cloud mask does not exist") - masked_indice_filename = self.indice_stem + "_" + self.cm_product.suffix + self.ext - masked_indice_raw = self.indice_stem + "_" + self.cm_product.suffix + self.ext_raw + masked_indice_filename = ( + self.indice_stem + "_" + self.cm_product.suffix + self.ext + ) + masked_indice_raw = ( + self.indice_stem + "_" + self.cm_product.suffix + self.ext_raw + ) if (out_path / masked_indice_filename).exists() and not reprocess: logger.info("{} already exists".format(masked_indice_filename)) else: if (out_path / self.indice_raw).exists(): - bigr_name = (out_path / self.indice_raw) + bigr_name = out_path / self.indice_raw else: - bigr_name = (out_path / self.indice_filename) - create_masked_indice(indice_path=bigr_name, - cloud_mask_path=self.cm_product.path, - out_path=(out_path / masked_indice_raw)) - index_tiff_2_jp2(img_path=(out_path / masked_indice_raw), - out_path=(out_path / masked_indice_filename)) + bigr_name = out_path / self.indice_filename + create_masked_indice( + indice_path=bigr_name, + cloud_mask_path=self.cm_product.path, + out_path=(out_path / masked_indice_raw), + ) + index_tiff_2_jp2( + img_path=(out_path / masked_indice_raw), + out_path=(out_path / masked_indice_filename), + ) os.remove(str(out_path / masked_indice_raw)) - + try: os.remove(str(out_path / self.indice_raw)) logger.info("Removing {}".format(self.indice_raw)) except: pass - + if quicklook: cmap = matplotlib_colormap_to_rgb(self.colormap, revers=False) - quicklook_filename = self.indice_stem + "_" + self.cm_product.suffix + "_QL.tif" + quicklook_filename = ( + self.indice_stem + "_" + self.cm_product.suffix + "_QL.tif" + ) if (self.out_path / quicklook_filename).exists() and not reprocess: logger.info("{} already exists".format(quicklook_filename)) else: logger.info("creating quicklook") - create_rvb(raster=(self.out_path / self.indice_filename), - cloud_mask=self.cm_product.path, - lut_dict=cmap, clouds_color="white", - out_path=(self.out_path / quicklook_filename), - stretch=(0,2500)) + create_rvb( + raster=(self.out_path / self.indice_filename), + cloud_mask=self.cm_product.path, + lut_dict=cmap, + clouds_color="white", + out_path=(self.out_path / quicklook_filename), + stretch=(0, 2500), + ) class BIRNIR(Indice): @@ -681,6 +848,7 @@ class BIRNIR(Indice): NIR: band 08 RED: band 04 """ + name = "BIRNIR" filename_template = "{product_identifier}_BIRNIR{ext}" ext = ".jp2" @@ -689,7 +857,9 @@ class BIRNIR(Indice): def __init__(self, l2a_product_object, cm_product_object): if (l2a_product_object or cm_product_object) is None: - raise ValueError("A L2aProduct and NewCloudMask objects must be provided") + raise ValueError( + "A L2aProduct and NewCloudMask objects must be provided" + ) else: self.l2a_product = l2a_product_object self.cm_product = cm_product_object @@ -698,69 +868,88 @@ class BIRNIR(Indice): self.out_path = None # filenames - self.indice_stem = self.filename_template.format(product_identifier=self.l2a_product.identifier, ext="") + self.indice_stem = self.filename_template.format( + product_identifier=self.l2a_product.identifier, ext="" + ) self.indice_filename = self.indice_stem + self.ext self.indice_raw = self.indice_stem + self.ext_raw def process_indice( - self, - out_path: pathlib.PosixPath, - reprocess: bool = False, - nodata_clouds: bool = False, - quicklook: bool = False + self, + out_path: pathlib.PosixPath, + reprocess: bool = False, + nodata_clouds: bool = False, + quicklook: bool = False, ) -> None: - """ process """ + """process""" self.out_path = out_path if (out_path / self.indice_filename).exists() and not reprocess: logger.info("{} already exists".format(self.indice_filename)) else: - create_raw_birnir(red_path=self.l2a_product.b04_10m, - nir_path=self.l2a_product.b08_10m, - out_path=(out_path / self.indice_raw)) - index_tiff_2_jp2(img_path=(out_path / self.indice_raw), - out_path=(out_path / self.indice_filename)) + create_raw_birnir( + red_path=self.l2a_product.b04_10m, + nir_path=self.l2a_product.b08_10m, + out_path=(out_path / self.indice_raw), + ) + index_tiff_2_jp2( + img_path=(out_path / self.indice_raw), + out_path=(out_path / self.indice_filename), + ) if nodata_clouds: if not self.cm_product.path.exists(): logger.info("Cloudmask does not exist, indice not masked") raise ValueError("Cloud mask does not exist") - masked_indice_filename = self.indice_stem + "_" + self.cm_product.suffix + self.ext - masked_indice_raw = self.indice_stem + "_" + self.cm_product.suffix + self.ext_raw + masked_indice_filename = ( + self.indice_stem + "_" + self.cm_product.suffix + self.ext + ) + masked_indice_raw = ( + self.indice_stem + "_" + self.cm_product.suffix + self.ext_raw + ) if (out_path / masked_indice_filename).exists() and not reprocess: logger.info("{} already exists".format(masked_indice_filename)) else: if (out_path / self.indice_raw).exists(): - birnir_name = (out_path / self.indice_raw) + birnir_name = out_path / self.indice_raw else: - birnir_name = (out_path / self.indice_filename) - create_masked_indice(indice_path=birnir_name, - cloud_mask_path=self.cm_product.path, - out_path=(out_path / masked_indice_raw)) - index_tiff_2_jp2(img_path=(out_path / masked_indice_raw), - out_path=(out_path / masked_indice_filename)) + birnir_name = out_path / self.indice_filename + create_masked_indice( + indice_path=birnir_name, + cloud_mask_path=self.cm_product.path, + out_path=(out_path / masked_indice_raw), + ) + index_tiff_2_jp2( + img_path=(out_path / masked_indice_raw), + out_path=(out_path / masked_indice_filename), + ) os.remove(str(out_path / masked_indice_raw)) - + try: os.remove(str(out_path / self.indice_raw)) logger.info("Removing {}".format(self.indice_raw)) except: pass - + if quicklook: cmap = matplotlib_colormap_to_rgb(self.colormap, revers=False) - quicklook_filename = self.indice_stem + "_" + self.cm_product.suffix + "_QL.tif" + quicklook_filename = ( + self.indice_stem + "_" + self.cm_product.suffix + "_QL.tif" + ) if (self.out_path / quicklook_filename).exists() and not reprocess: logger.info("{} already exists".format(quicklook_filename)) else: logger.info("creating quicklook") - create_rvb(raster=(self.out_path / self.indice_filename), - cloud_mask=self.cm_product.path, - lut_dict=cmap, clouds_color="white", - out_path=(self.out_path / quicklook_filename), - stretch=(0,5000)) + create_rvb( + raster=(self.out_path / self.indice_filename), + cloud_mask=self.cm_product.path, + lut_dict=cmap, + clouds_color="white", + out_path=(self.out_path / quicklook_filename), + stretch=(0, 5000), + ) class BIBG(Indice): @@ -770,6 +959,7 @@ class BIBG(Indice): BLUE: band 02 GREEN: band 03 """ + name = "BIBG" filename_template = "{product_identifier}_BIBG{ext}" ext = ".jp2" @@ -778,7 +968,9 @@ class BIBG(Indice): def __init__(self, l2a_product_object, cm_product_object): if (l2a_product_object or cm_product_object) is None: - raise ValueError("A L2aProduct and NewCloudMask objects must be provided") + raise ValueError( + "A L2aProduct and NewCloudMask objects must be provided" + ) else: self.l2a_product = l2a_product_object self.cm_product = cm_product_object @@ -787,46 +979,60 @@ class BIBG(Indice): self.out_path = None # filenames - self.indice_stem = self.filename_template.format(product_identifier=self.l2a_product.identifier, ext="") + self.indice_stem = self.filename_template.format( + product_identifier=self.l2a_product.identifier, ext="" + ) self.indice_filename = self.indice_stem + self.ext self.indice_raw = self.indice_stem + self.ext_raw def process_indice( - self, - out_path: pathlib.PosixPath, - reprocess: bool = False, - nodata_clouds: bool = False, - quicklook: bool = False + self, + out_path: pathlib.PosixPath, + reprocess: bool = False, + nodata_clouds: bool = False, + quicklook: bool = False, ) -> None: - """ process """ + """process""" self.out_path = out_path if (out_path / self.indice_filename).exists() and not reprocess: logger.info("{} already exists".format(self.indice_filename)) else: - create_raw_bibg(blue_path=self.l2a_product.b02_10m, - green_path=self.l2a_product.b03_10m, - out_path=(out_path / self.indice_raw)) - index_tiff_2_jp2(img_path=(out_path / self.indice_raw), - out_path=(out_path / self.indice_filename)) + create_raw_bibg( + blue_path=self.l2a_product.b02_10m, + green_path=self.l2a_product.b03_10m, + out_path=(out_path / self.indice_raw), + ) + index_tiff_2_jp2( + img_path=(out_path / self.indice_raw), + out_path=(out_path / self.indice_filename), + ) if nodata_clouds: if not self.cm_product.path.exists(): logger.info("Cloudmask does not exist, indice not masked") raise ValueError("Cloud mask does not exist") - masked_indice_filename = self.indice_stem + "_" + self.cm_product.suffix + self.ext - masked_indice_raw = self.indice_stem + "_" + self.cm_product.suffix + self.ext_raw + masked_indice_filename = ( + self.indice_stem + "_" + self.cm_product.suffix + self.ext + ) + masked_indice_raw = ( + self.indice_stem + "_" + self.cm_product.suffix + self.ext_raw + ) if (out_path / masked_indice_filename).exists() and not reprocess: logger.info("{} already exists".format(masked_indice_filename)) else: if (out_path / self.indice_raw).exists(): - bibg_name = (out_path / self.indice_raw) + bibg_name = out_path / self.indice_raw else: - bibg_name = (out_path / self.indice_filename) - create_masked_indice(indice_path=bibg_name, - cloud_mask_path=self.cm_product.path, - out_path=(out_path / masked_indice_raw)) - index_tiff_2_jp2(img_path=(out_path / masked_indice_raw), - out_path=(out_path / masked_indice_filename)) + bibg_name = out_path / self.indice_filename + create_masked_indice( + indice_path=bibg_name, + cloud_mask_path=self.cm_product.path, + out_path=(out_path / masked_indice_raw), + ) + index_tiff_2_jp2( + img_path=(out_path / masked_indice_raw), + out_path=(out_path / masked_indice_filename), + ) os.remove(str(out_path / masked_indice_raw)) try: os.remove(str(out_path / self.indice_raw)) @@ -835,22 +1041,26 @@ class BIBG(Indice): pass if quicklook: cmap = matplotlib_colormap_to_rgb(self.colormap, revers=False) - quicklook_filename = self.indice_stem + "_" + self.cm_product.suffix + "_QL.tif" + quicklook_filename = ( + self.indice_stem + "_" + self.cm_product.suffix + "_QL.tif" + ) if (self.out_path / quicklook_filename).exists() and not reprocess: logger.info("{} already exists".format(quicklook_filename)) else: logger.info("creating quicklook") - create_rvb(raster=(self.out_path / self.indice_filename), - cloud_mask=self.cm_product.path, - lut_dict=cmap, clouds_color="white", - out_path=(self.out_path / quicklook_filename), - stretch=(0,2500)) + create_rvb( + raster=(self.out_path / self.indice_filename), + cloud_mask=self.cm_product.path, + lut_dict=cmap, + clouds_color="white", + out_path=(self.out_path / quicklook_filename), + stretch=(0, 2500), + ) class IndicesCollectionMeta(type): - """Adds special methods to IndicesCollection class. + """Adds special methods to IndicesCollection class.""" - """ def __iter__(cls): return iter(cls._indices_classes) @@ -895,6 +1105,9 @@ class IndicesCollection(metaclass=IndicesCollectionMeta): >>> IndicesCollection.list """ + # TODO: Implement class as a singleton. - _indices_classes = {cls.__dict__["name"]: cls for cls in Indice.__subclasses__()} + _indices_classes = { + cls.__dict__["name"]: cls for cls in Indice.__subclasses__() + } diff --git a/sen2chain/indices_functions.py b/sen2chain/indices_functions.py index bd9689a3f566005d38da79bf68110fc7f8ae8992..ea9919a01f2b03b9aba21099dd8a436935558745 100644 --- a/sen2chain/indices_functions.py +++ b/sen2chain/indices_functions.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 """ This module contains functions to compute radiometric indices. @@ -7,92 +7,96 @@ This module contains functions to compute radiometric indices. import logging import pathlib from pathlib import Path -#~ import otbApplication + +# import otbApplication import rasterio from rasterio.warp import reproject, Resampling import numpy as np from typing import Union from osgeo import gdal + # import gdal logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) -#~ def create_raw_ndvi(nir_path: Union[str, pathlib.PosixPath], - #~ vir_path: Union[str, pathlib.PosixPath], - #~ out_path: Union[str, pathlib.PosixPath]="./raw_ndvi.tif" - #~ ) -> pathlib.PosixPath: - #~ """ - #~ Creates a NDVI raster from NIR and VIR rasters. - - #~ :param nir_path: path to the NIR raster. - #~ :param vir_path: path to the VIR raster. - #~ :param out_path: path to the output raster. - #~ """ - #~ logger.info("creating raw NDVI (tiff - int16)") - - #~ with rasterio.open(str(nir_path)) as nir_src, \ - #~ rasterio.open(str(vir_path)) as vir_src: - #~ nir_profile = nir_src.profile - #~ np.seterr(divide='ignore', invalid='ignore') # ignore warnings when dividing by zero - #~ nir = nir_src.read(1).astype(np.float32) - #~ vir = vir_src.read(1).astype(np.float32) - #~ ndvi = ((nir - vir) / (nir + vir)*10000).astype(np.int16) - #~ ndvi_masked = np.where(nir != 0, ndvi, 32767) - - #~ nir_profile.update(driver="Gtiff", - #~ compress="DEFLATE", - #~ tiled=False, - #~ dtype=np.int16, - #~ nodata=32767, - #~ transform=nir_src.transform) - #~ nir_profile.pop('tiled', None) - #~ with rasterio.Env(GDAL_CACHEMAX=512) as env: - #~ with rasterio.open(str(out_path), "w", **nir_profile) as dst: - #~ dst.write(ndvi_masked, 1) - #~ return Path(str(out_path)).absolute - -#~ def create_raw_ndwimcf(nir_path: Union[str, pathlib.PosixPath], - #~ green_path: Union[str, pathlib.PosixPath], - #~ out_path: Union[str, pathlib.PosixPath]="./raw_ndwimcf.tif") -> pathlib.PosixPath: - #~ """ - #~ Creates a NDWI (McFeeters) raster from GREEN and NIR rasters. - - #~ :param nir_path: path to the NIR raster. - #~ :param green_path: path to the GREEN raster. - #~ :param out_path: path to the output raster. - #~ """ - #~ logger.info("creating raw NDWIMCF (tiff - int16)") - - #~ with rasterio.open(str(nir_path)) as nir_src, \ - #~ rasterio.open(str(green_path)) as green_src: - - #~ nir_profile = nir_src.profile - - #~ nir = nir_src.read(1).astype(np.float32) - #~ green = green_src.read(1).astype(np.float32) - - #~ np.seterr(divide='ignore', invalid='ignore') # ignore warnings when dividing by zero - #~ ndwimcf = ((green - nir) / (green + nir)*10000).astype(np.int16) - - #~ ndwimcf_masked = np.where(nir != 0, ndwimcf, 32767) - - #~ nir_profile.update(driver="Gtiff", - #~ compress="DEFLATE", - #~ tiled=False, - #~ dtype=np.int16, - #~ nodata=32767, - #~ transform=nir_src.transform) - #~ nir_profile.pop('tiled', None) - #~ with rasterio.Env(GDAL_CACHEMAX=512) as env: - #~ with rasterio.open(str(out_path), "w", **nir_profile) as dst: - #~ dst.write(ndwimcf_masked, 1) - #~ return Path(str(out_path)).absolute - -def create_raw_ndr(b1_path: Union[str, pathlib.PosixPath], - b2_path: Union[str, pathlib.PosixPath], - out_path: Union[str, pathlib.PosixPath]="./raw_ndr.tif" - ) -> pathlib.PosixPath: +# def create_raw_ndvi(nir_path: Union[str, pathlib.PosixPath], +# vir_path: Union[str, pathlib.PosixPath], +# out_path: Union[str, pathlib.PosixPath]="./raw_ndvi.tif" +# ) -> pathlib.PosixPath: +# """ +# Creates a NDVI raster from NIR and VIR rasters. + +# :param nir_path: path to the NIR raster. +# :param vir_path: path to the VIR raster. +# :param out_path: path to the output raster. +# """ +# logger.info("creating raw NDVI (tiff - int16)") + +# with rasterio.open(str(nir_path)) as nir_src, \ +# rasterio.open(str(vir_path)) as vir_src: +# nir_profile = nir_src.profile +# np.seterr(divide='ignore', invalid='ignore') # ignore warnings when dividing by zero +# nir = nir_src.read(1).astype(np.float32) +# vir = vir_src.read(1).astype(np.float32) +# ndvi = ((nir - vir) / (nir + vir)*10000).astype(np.int16) +# ndvi_masked = np.where(nir != 0, ndvi, 32767) + +# nir_profile.update(driver="Gtiff", +# compress="DEFLATE", +# tiled=False, +# dtype=np.int16, +# nodata=32767, +# transform=nir_src.transform) +# nir_profile.pop('tiled', None) +# with rasterio.Env(GDAL_CACHEMAX=512) as env: +# with rasterio.open(str(out_path), "w", **nir_profile) as dst: +# dst.write(ndvi_masked, 1) +# return Path(str(out_path)).absolute + +# def create_raw_ndwimcf(nir_path: Union[str, pathlib.PosixPath], +# green_path: Union[str, pathlib.PosixPath], +# out_path: Union[str, pathlib.PosixPath]="./raw_ndwimcf.tif") -> pathlib.PosixPath: +# """ +# Creates a NDWI (McFeeters) raster from GREEN and NIR rasters. + +# :param nir_path: path to the NIR raster. +# :param green_path: path to the GREEN raster. +# :param out_path: path to the output raster. +# """ +# logger.info("creating raw NDWIMCF (tiff - int16)") + +# with rasterio.open(str(nir_path)) as nir_src, \ +# rasterio.open(str(green_path)) as green_src: + +# nir_profile = nir_src.profile + +# nir = nir_src.read(1).astype(np.float32) +# green = green_src.read(1).astype(np.float32) + +# np.seterr(divide='ignore', invalid='ignore') # ignore warnings when dividing by zero +# ndwimcf = ((green - nir) / (green + nir)*10000).astype(np.int16) + +# ndwimcf_masked = np.where(nir != 0, ndwimcf, 32767) + +# nir_profile.update(driver="Gtiff", +# compress="DEFLATE", +# tiled=False, +# dtype=np.int16, +# nodata=32767, +# transform=nir_src.transform) +# nir_profile.pop('tiled', None) +# with rasterio.Env(GDAL_CACHEMAX=512) as env: +# with rasterio.open(str(out_path), "w", **nir_profile) as dst: +# dst.write(ndwimcf_masked, 1) +# return Path(str(out_path)).absolute + + +def create_raw_ndr( + b1_path: Union[str, pathlib.PosixPath], + b2_path: Union[str, pathlib.PosixPath], + out_path: Union[str, pathlib.PosixPath] = "./raw_ndr.tif", +) -> pathlib.PosixPath: """ Creates a generic normalized difference ratio raster from B1 and B2 rasters. NDR = (B1 - B2) / (B1 + B2) @@ -100,182 +104,204 @@ def create_raw_ndr(b1_path: Union[str, pathlib.PosixPath], :param b2_path: path to the B2 raster. :param out_path: path to the output raster. """ - logger.info("creating raw generic NDR ({}, {})".format(Path(b1_path).name, Path(b2_path).name)) - - with rasterio.open(str(b1_path)) as b1_src, \ - rasterio.open(str(b2_path)) as b2_src: + logger.info( + "creating raw generic NDR ({}, {})".format( + Path(b1_path).name, Path(b2_path).name + ) + ) + + with rasterio.open(str(b1_path)) as b1_src, rasterio.open( + str(b2_path) + ) as b2_src: b1_profile = b1_src.profile - np.seterr(divide='ignore', invalid='ignore') # ignore warnings when dividing by zero - b1 = b1_src.read(1, - out_shape=(1, max(b1_src.height, b2_src.height), max(b1_src.width, b2_src.width)), - resampling=Resampling.bilinear)\ - .astype(np.float32) - b2 = b2_src.read(1, - out_shape=(1, max(b1_src.height, b2_src.height), max(b1_src.width, b2_src.width)), - resampling=Resampling.bilinear)\ - .astype(np.float32) - ndr = ((b1 - b2) / (b1 + b2)*10000).astype(np.int16) + np.seterr( + divide="ignore", invalid="ignore" + ) # ignore warnings when dividing by zero + b1 = b1_src.read( + 1, + out_shape=( + 1, + max(b1_src.height, b2_src.height), + max(b1_src.width, b2_src.width), + ), + resampling=Resampling.bilinear, + ).astype(np.float32) + b2 = b2_src.read( + 1, + out_shape=( + 1, + max(b1_src.height, b2_src.height), + max(b1_src.width, b2_src.width), + ), + resampling=Resampling.bilinear, + ).astype(np.float32) + ndr = ((b1 - b2) / (b1 + b2) * 10000).astype(np.int16) ndr_masked = np.where(b1 != 0, ndr, 32767) - b1_profile.update(driver="Gtiff", - compress="DEFLATE", - tiled=False, - dtype=np.int16, - nodata=32767, - transform=b1_src.transform) - b1_profile.pop('tiled', None) + b1_profile.update( + driver="Gtiff", + compress="DEFLATE", + tiled=False, + dtype=np.int16, + nodata=32767, + transform=b1_src.transform, + ) + b1_profile.pop("tiled", None) with rasterio.Env(GDAL_CACHEMAX=512) as env: with rasterio.open(str(out_path), "w", **b1_profile) as dst: dst.write(ndr_masked, 1) return Path(str(out_path)).absolute -#~ def create_raw_ndwigao(nir_path: Union[str, pathlib.PosixPath], - #~ swir_path: Union[str, pathlib.PosixPath], - #~ out_path: Union[str, pathlib.PosixPath]="./raw_ndwigao.tif") -> pathlib.PosixPath: - #~ """ - #~ Creates a NDWI raster from NIR and SWIR rasters. - - #~ :param nir_path: path to the NIR raster. - #~ :param swir_path: path to the SWIR raster. - #~ :param out_path: path to the output raster. - #~ """ - #~ logger.info("creating raw NDWIGAO (tiff - int16)") - - #~ with rasterio.open(str(nir_path)) as nir_src, \ - #~ rasterio.open(str(swir_path)) as swir_src: - - #~ nir_profile = nir_src.profile - - #~ nir = nir_src.read(1).astype(np.float32) - #~ swir = swir_src.read(1).astype(np.float32) - - #~ swir_reproj = np.empty(nir.shape, dtype=np.float32) - #~ reproject(source=swir, - #~ destination=swir_reproj, - #~ src_transform=swir_src.transform, - #~ src_crs=swir_src.crs, - #~ dst_transform=nir_src.transform, - #~ dst_crs=nir_src.crs, - #~ resampling=Resampling.bilinear) - - #~ np.seterr(divide='ignore', invalid='ignore') # ignore warnings when dividing by zero - #~ ndwi = ((nir - swir_reproj) / (nir + swir_reproj)*10000).astype(np.int16) - - #~ ndwi_masked = np.where(nir != 0, ndwi, 32767) - - #~ nir_profile.update(driver="Gtiff", - #~ compress="DEFLATE", - #~ tiled=False, - #~ dtype=np.int16, - #~ nodata=32767, - #~ transform=nir_src.transform) - #~ nir_profile.pop('tiled', None) - #~ with rasterio.Env(GDAL_CACHEMAX=512) as env: - #~ with rasterio.open(str(out_path), "w", **nir_profile) as dst: - #~ dst.write(ndwi_masked, 1) - #~ return Path(str(out_path)).absolute - -#~ def create_raw_mndwi(green_path: Union[str, pathlib.PosixPath], - #~ swir_path: Union[str, pathlib.PosixPath], - #~ out_path: Union[str, pathlib.PosixPath]="./raw_mndwi.tif") -> pathlib.PosixPath: - #~ """ - #~ Creates a MNDWI raster from GREEN and SWIR rasters. - - #~ :param green_path: path to the GREEN raster. - #~ :param swir_path: path to the SWIR raster. - #~ :param out_path: path to the output raster. - #~ """ - #~ logger.info("creating raw MNDWI (tiff - int16)") - - #~ with rasterio.open(str(green_path)) as green_src, \ - #~ rasterio.open(str(swir_path)) as swir_src: - - #~ green_profile = green_src.profile - #~ # swir_profile = swir_src.profile - - #~ green = green_src.read(1).astype(np.float32) - #~ swir = swir_src.read(1).astype(np.float32) - - #~ # reproject swir band (20m) to nir band resolution (10m) - #~ swir_reproj = np.empty(green.shape, dtype=np.float32) - #~ reproject(source=swir, - #~ destination=swir_reproj, - #~ src_transform=swir_src.transform, - #~ src_crs=swir_src.crs, - #~ dst_transform=green_src.transform, - #~ dst_crs=green_src.crs, - #~ resampling=Resampling.bilinear) - - #~ np.seterr(divide='ignore', invalid='ignore') # ignore warnings when dividing by zero - #~ ndwi = ((green - swir_reproj) / (green + swir_reproj)*10000).astype(np.int16) - - #~ ndwi_masked = np.where(green != 0, ndwi, 32767) - - #~ green_profile.update(driver="Gtiff", - #~ compress="DEFLATE", - #~ tiled=False, - #~ dtype=np.int16, - #~ nodata=32767, - #~ transform=green_src.transform) - #~ green_profile.pop('tiled', None) - #~ with rasterio.Env(GDAL_CACHEMAX=512) as env: - #~ with rasterio.open(str(out_path), "w", **green_profile) as dst: - #~ dst.write(ndwi_masked, 1) - #~ return Path(str(out_path)).absolute - -#~ def create_raw_ndre(nir_path: Union[str, pathlib.PosixPath], - #~ redge_path: Union[str, pathlib.PosixPath], - #~ out_path: Union[str, pathlib.PosixPath]="./raw_ndre.tif") -> pathlib.PosixPath: - #~ """ - #~ Creates a NDRE raster from NIR and RED EDGE rasters. - - #~ :param nir_path: path to the NIR raster. - #~ :param redge_path: path to the RED EDGE raster. - #~ :param out_path: path to the output raster. - #~ """ - #~ logger.info("creating raw NDRE (tiff - int16)") - - #~ with rasterio.open(str(nir_path)) as nir_src, \ - #~ rasterio.open(str(redge_path)) as redge_src: - - #~ nir_profile = nir_src.profile - - #~ nir = nir_src.read(1).astype(np.float32) - #~ redge = redge_src.read(1).astype(np.float32) - - #~ # reproject redge band (20m) to nir band resolution (10m) - #~ redge_reproj = np.empty(nir.shape, dtype=np.float32) - #~ reproject(source=redge, - #~ destination=redge_reproj, - #~ src_transform=redge_src.transform, - #~ src_crs=redge_src.crs, - #~ dst_transform=nir_src.transform, - #~ dst_crs=nir_src.crs, - #~ resampling=Resampling.bilinear) - - #~ np.seterr(divide='ignore', invalid='ignore') # ignore warnings when dividing by zero - #~ ndre = ((nir - redge_reproj) / (nir + redge_reproj)*10000).astype(np.int16) - - #~ ndre_masked = np.where(nir != 0, ndre, 32767) - - #~ nir_profile.update(driver="Gtiff", - #~ compress="DEFLATE", - #~ tiled=False, - #~ dtype=np.int16, - #~ nodata=32767, - #~ transform=nir_src.transform) - #~ nir_profile.pop('tiled', None) - #~ with rasterio.Env(GDAL_CACHEMAX=512) as env: - #~ with rasterio.open(str(out_path), "w", **nir_profile) as dst: - #~ dst.write(ndre_masked, 1) - #~ return Path(str(out_path)).absolute - -def create_raw_ireci(b1_path: Union[str, pathlib.PosixPath], - b2_path: Union[str, pathlib.PosixPath], - b3_path: Union[str, pathlib.PosixPath], - b4_path: Union[str, pathlib.PosixPath], - out_path: Union[str, pathlib.PosixPath]="./raw_ireci.tif", - ) -> pathlib.PosixPath: + +# def create_raw_ndwigao(nir_path: Union[str, pathlib.PosixPath], +# swir_path: Union[str, pathlib.PosixPath], +# out_path: Union[str, pathlib.PosixPath]="./raw_ndwigao.tif") -> pathlib.PosixPath: +# """ +# Creates a NDWI raster from NIR and SWIR rasters. + +# :param nir_path: path to the NIR raster. +# :param swir_path: path to the SWIR raster. +# :param out_path: path to the output raster. +# """ +# logger.info("creating raw NDWIGAO (tiff - int16)") + +# with rasterio.open(str(nir_path)) as nir_src, \ +# rasterio.open(str(swir_path)) as swir_src: + +# nir_profile = nir_src.profile + +# nir = nir_src.read(1).astype(np.float32) +# swir = swir_src.read(1).astype(np.float32) + +# swir_reproj = np.empty(nir.shape, dtype=np.float32) +# reproject(source=swir, +# destination=swir_reproj, +# src_transform=swir_src.transform, +# src_crs=swir_src.crs, +# dst_transform=nir_src.transform, +# dst_crs=nir_src.crs, +# resampling=Resampling.bilinear) + +# np.seterr(divide='ignore', invalid='ignore') # ignore warnings when dividing by zero +# ndwi = ((nir - swir_reproj) / (nir + swir_reproj)*10000).astype(np.int16) + +# ndwi_masked = np.where(nir != 0, ndwi, 32767) + +# nir_profile.update(driver="Gtiff", +# compress="DEFLATE", +# tiled=False, +# dtype=np.int16, +# nodata=32767, +# transform=nir_src.transform) +# nir_profile.pop('tiled', None) +# with rasterio.Env(GDAL_CACHEMAX=512) as env: +# with rasterio.open(str(out_path), "w", **nir_profile) as dst: +# dst.write(ndwi_masked, 1) +# return Path(str(out_path)).absolute + +# def create_raw_mndwi(green_path: Union[str, pathlib.PosixPath], +# swir_path: Union[str, pathlib.PosixPath], +# out_path: Union[str, pathlib.PosixPath]="./raw_mndwi.tif") -> pathlib.PosixPath: +# """ +# Creates a MNDWI raster from GREEN and SWIR rasters. + +# :param green_path: path to the GREEN raster. +# :param swir_path: path to the SWIR raster. +# :param out_path: path to the output raster. +# """ +# logger.info("creating raw MNDWI (tiff - int16)") + +# with rasterio.open(str(green_path)) as green_src, \ +# rasterio.open(str(swir_path)) as swir_src: + +# green_profile = green_src.profile +# # swir_profile = swir_src.profile + +# green = green_src.read(1).astype(np.float32) +# swir = swir_src.read(1).astype(np.float32) + +# # reproject swir band (20m) to nir band resolution (10m) +# swir_reproj = np.empty(green.shape, dtype=np.float32) +# reproject(source=swir, +# destination=swir_reproj, +# src_transform=swir_src.transform, +# src_crs=swir_src.crs, +# dst_transform=green_src.transform, +# dst_crs=green_src.crs, +# resampling=Resampling.bilinear) + +# np.seterr(divide='ignore', invalid='ignore') # ignore warnings when dividing by zero +# ndwi = ((green - swir_reproj) / (green + swir_reproj)*10000).astype(np.int16) + +# ndwi_masked = np.where(green != 0, ndwi, 32767) + +# green_profile.update(driver="Gtiff", +# compress="DEFLATE", +# tiled=False, +# dtype=np.int16, +# nodata=32767, +# transform=green_src.transform) +# green_profile.pop('tiled', None) +# with rasterio.Env(GDAL_CACHEMAX=512) as env: +# with rasterio.open(str(out_path), "w", **green_profile) as dst: +# dst.write(ndwi_masked, 1) +# return Path(str(out_path)).absolute + +# def create_raw_ndre(nir_path: Union[str, pathlib.PosixPath], +# redge_path: Union[str, pathlib.PosixPath], +# out_path: Union[str, pathlib.PosixPath]="./raw_ndre.tif") -> pathlib.PosixPath: +# """ +# Creates a NDRE raster from NIR and RED EDGE rasters. + +# :param nir_path: path to the NIR raster. +# :param redge_path: path to the RED EDGE raster. +# :param out_path: path to the output raster. +# """ +# logger.info("creating raw NDRE (tiff - int16)") + +# with rasterio.open(str(nir_path)) as nir_src, \ +# rasterio.open(str(redge_path)) as redge_src: + +# nir_profile = nir_src.profile + +# nir = nir_src.read(1).astype(np.float32) +# redge = redge_src.read(1).astype(np.float32) + +# # reproject redge band (20m) to nir band resolution (10m) +# redge_reproj = np.empty(nir.shape, dtype=np.float32) +# reproject(source=redge, +# destination=redge_reproj, +# src_transform=redge_src.transform, +# src_crs=redge_src.crs, +# dst_transform=nir_src.transform, +# dst_crs=nir_src.crs, +# resampling=Resampling.bilinear) + +# np.seterr(divide='ignore', invalid='ignore') # ignore warnings when dividing by zero +# ndre = ((nir - redge_reproj) / (nir + redge_reproj)*10000).astype(np.int16) + +# ndre_masked = np.where(nir != 0, ndre, 32767) + +# nir_profile.update(driver="Gtiff", +# compress="DEFLATE", +# tiled=False, +# dtype=np.int16, +# nodata=32767, +# transform=nir_src.transform) +# nir_profile.pop('tiled', None) +# with rasterio.Env(GDAL_CACHEMAX=512) as env: +# with rasterio.open(str(out_path), "w", **nir_profile) as dst: +# dst.write(ndre_masked, 1) +# return Path(str(out_path)).absolute + + +def create_raw_ireci( + b1_path: Union[str, pathlib.PosixPath], + b2_path: Union[str, pathlib.PosixPath], + b3_path: Union[str, pathlib.PosixPath], + b4_path: Union[str, pathlib.PosixPath], + out_path: Union[str, pathlib.PosixPath] = "./raw_ireci.tif", +) -> pathlib.PosixPath: """ Creates an IRECI raster from NIR, RED and RED EDGE rasters. @@ -286,56 +312,84 @@ def create_raw_ireci(b1_path: Union[str, pathlib.PosixPath], :param out_path: path to the output raster. """ logger.info("creating raw IRECI (tiff - int16)") - - with rasterio.open(str(b1_path)) as b1_src, \ - rasterio.open(str(b2_path)) as b2_src, \ - rasterio.open(str(b3_path)) as b3_src, \ - rasterio.open(str(b4_path)) as b4_src: + + with rasterio.open(str(b1_path)) as b1_src, rasterio.open( + str(b2_path) + ) as b2_src, rasterio.open(str(b3_path)) as b3_src, rasterio.open( + str(b4_path) + ) as b4_src: b2_profile = b2_src.profile - np.seterr(divide='ignore', invalid='ignore') # ignore warnings when dividing by zero - b1 = b1_src.read(1, - out_shape=(1, - max(b1_src.height, b2_src.height, b3_src.height, b4_src.height), - max(b1_src.width, b2_src.width, b3_src.width, b4_src.width)), - resampling=Resampling.bilinear)\ - .astype(np.float32) - b2 = b2_src.read(1, - out_shape=(1, - max(b1_src.height, b2_src.height, b3_src.height, b4_src.height), - max(b1_src.width, b2_src.width, b3_src.width, b4_src.width)), - resampling=Resampling.bilinear)\ - .astype(np.float32) - b3 = b3_src.read(1, - out_shape=(1, - max(b1_src.height, b2_src.height, b3_src.height, b4_src.height), - max(b1_src.width, b2_src.width, b3_src.width, b4_src.width)), - resampling=Resampling.bilinear)\ - .astype(np.float32) - b4 = b4_src.read(1, - out_shape=(1, - max(b1_src.height, b2_src.height, b3_src.height, b4_src.height), - max(b1_src.width, b2_src.width, b3_src.width, b4_src.width)), - resampling=Resampling.bilinear)\ - .astype(np.float32) - + np.seterr( + divide="ignore", invalid="ignore" + ) # ignore warnings when dividing by zero + b1 = b1_src.read( + 1, + out_shape=( + 1, + max( + b1_src.height, b2_src.height, b3_src.height, b4_src.height + ), + max(b1_src.width, b2_src.width, b3_src.width, b4_src.width), + ), + resampling=Resampling.bilinear, + ).astype(np.float32) + b2 = b2_src.read( + 1, + out_shape=( + 1, + max( + b1_src.height, b2_src.height, b3_src.height, b4_src.height + ), + max(b1_src.width, b2_src.width, b3_src.width, b4_src.width), + ), + resampling=Resampling.bilinear, + ).astype(np.float32) + b3 = b3_src.read( + 1, + out_shape=( + 1, + max( + b1_src.height, b2_src.height, b3_src.height, b4_src.height + ), + max(b1_src.width, b2_src.width, b3_src.width, b4_src.width), + ), + resampling=Resampling.bilinear, + ).astype(np.float32) + b4 = b4_src.read( + 1, + out_shape=( + 1, + max( + b1_src.height, b2_src.height, b3_src.height, b4_src.height + ), + max(b1_src.width, b2_src.width, b3_src.width, b4_src.width), + ), + resampling=Resampling.bilinear, + ).astype(np.float32) + ireci = (b4 * (b1 - b2) / b3).astype(np.int16) ireci_masked = np.where(b1 != 0, ireci, 32767) - b2_profile.update(driver="Gtiff", - compress="DEFLATE", - tiled=False, - dtype=np.int16, - nodata=32767, - transform=b2_src.transform) - b2_profile.pop('tiled', None) + b2_profile.update( + driver="Gtiff", + compress="DEFLATE", + tiled=False, + dtype=np.int16, + nodata=32767, + transform=b2_src.transform, + ) + b2_profile.pop("tiled", None) with rasterio.Env(GDAL_CACHEMAX=512) as env: with rasterio.open(str(out_path), "w", **b2_profile) as dst: dst.write(ireci_masked, 1) return Path(str(out_path)).absolute -def create_raw_bigr(red_path: Union[str, pathlib.PosixPath], - green_path: Union[str, pathlib.PosixPath], - out_path: Union[str, pathlib.PosixPath]="./raw_bigr.tif") -> pathlib.PosixPath: + +def create_raw_bigr( + red_path: Union[str, pathlib.PosixPath], + green_path: Union[str, pathlib.PosixPath], + out_path: Union[str, pathlib.PosixPath] = "./raw_bigr.tif", +) -> pathlib.PosixPath: """ Creates a BI (Green, Red) raster from GREEN and RED rasters. @@ -345,30 +399,38 @@ def create_raw_bigr(red_path: Union[str, pathlib.PosixPath], """ logger.info("creating raw BIGR (tiff - int16)") - with rasterio.open(str(red_path)) as red_src, \ - rasterio.open(str(green_path)) as green_src: + with rasterio.open(str(red_path)) as red_src, rasterio.open( + str(green_path) + ) as green_src: red_profile = red_src.profile red = red_src.read(1).astype(np.float32) green = green_src.read(1).astype(np.float32) - np.seterr(divide='ignore', invalid='ignore') # ignore warnings when dividing by zero - bigr = ((((green)**2 + (red)**2)/2)**0.5).astype(np.int16) + np.seterr( + divide="ignore", invalid="ignore" + ) # ignore warnings when dividing by zero + bigr = ((((green) ** 2 + (red) ** 2) / 2) ** 0.5).astype(np.int16) bigr_masked = np.where(red != 0, bigr, 32767) - red_profile.update(driver="Gtiff", - compress="DEFLATE", - tiled=False, - dtype=np.int16, - nodata=32767, - transform=red_src.transform) - red_profile.pop('tiled', None) + red_profile.update( + driver="Gtiff", + compress="DEFLATE", + tiled=False, + dtype=np.int16, + nodata=32767, + transform=red_src.transform, + ) + red_profile.pop("tiled", None) with rasterio.Env(GDAL_CACHEMAX=512) as env: with rasterio.open(str(out_path), "w", **red_profile) as dst: dst.write(bigr_masked, 1) return Path(str(out_path)).absolute -def create_raw_birnir(red_path: Union[str, pathlib.PosixPath], - nir_path: Union[str, pathlib.PosixPath], - out_path: Union[str, pathlib.PosixPath]="./raw_birnir.tif") -> pathlib.PosixPath: + +def create_raw_birnir( + red_path: Union[str, pathlib.PosixPath], + nir_path: Union[str, pathlib.PosixPath], + out_path: Union[str, pathlib.PosixPath] = "./raw_birnir.tif", +) -> pathlib.PosixPath: """ Creates a BI (Red, NIR) raster from RED and NIR rasters. @@ -378,31 +440,38 @@ def create_raw_birnir(red_path: Union[str, pathlib.PosixPath], """ logger.info("creating raw BIRNIR (tiff - int16)") - with rasterio.open(str(red_path)) as red_src, \ - rasterio.open(str(nir_path)) as nir_src: + with rasterio.open(str(red_path)) as red_src, rasterio.open( + str(nir_path) + ) as nir_src: red_profile = red_src.profile red = red_src.read(1).astype(np.float32) nir = nir_src.read(1).astype(np.float32) - np.seterr(divide='ignore', invalid='ignore') # ignore warnings when dividing by zero - birnir = ((((nir)**2 + (red)**2)/2)**0.5).astype(np.int16) + np.seterr( + divide="ignore", invalid="ignore" + ) # ignore warnings when dividing by zero + birnir = ((((nir) ** 2 + (red) ** 2) / 2) ** 0.5).astype(np.int16) birnir_masked = np.where(red != 0, birnir, 32767) - red_profile.update(driver="Gtiff", - compress="DEFLATE", - tiled=False, - dtype=np.int16, - nodata=32767, - transform=red_src.transform) - red_profile.pop('tiled', None) + red_profile.update( + driver="Gtiff", + compress="DEFLATE", + tiled=False, + dtype=np.int16, + nodata=32767, + transform=red_src.transform, + ) + red_profile.pop("tiled", None) with rasterio.Env(GDAL_CACHEMAX=512) as env: with rasterio.open(str(out_path), "w", **red_profile) as dst: dst.write(birnir_masked, 1) return Path(str(out_path)).absolute -def create_raw_bibg(blue_path: Union[str, pathlib.PosixPath], - green_path: Union[str, pathlib.PosixPath], - out_path: Union[str, pathlib.PosixPath]="./raw_bibg.tif") -> pathlib.PosixPath: +def create_raw_bibg( + blue_path: Union[str, pathlib.PosixPath], + green_path: Union[str, pathlib.PosixPath], + out_path: Union[str, pathlib.PosixPath] = "./raw_bibg.tif", +) -> pathlib.PosixPath: """ Creates a BI (Blue, Green) raster from BLUE and GREEN rasters. @@ -412,32 +481,39 @@ def create_raw_bibg(blue_path: Union[str, pathlib.PosixPath], """ logger.info("creating raw BIBG (tiff - int16)") - with rasterio.open(str(blue_path)) as blue_src, \ - rasterio.open(str(green_path)) as green_src: + with rasterio.open(str(blue_path)) as blue_src, rasterio.open( + str(green_path) + ) as green_src: blue_profile = blue_src.profile blue = blue_src.read(1).astype(np.float32) green = green_src.read(1).astype(np.float32) - np.seterr(divide='ignore', invalid='ignore') # ignore warnings when dividing by zero - bibg = ((((green)**2 + (blue)**2)/2)**0.5).astype(np.int16) + np.seterr( + divide="ignore", invalid="ignore" + ) # ignore warnings when dividing by zero + bibg = ((((green) ** 2 + (blue) ** 2) / 2) ** 0.5).astype(np.int16) bibg_masked = np.where(blue != 0, bibg, 32767) - blue_profile.update(driver="Gtiff", - compress="DEFLATE", - tiled=False, - dtype=np.int16, - nodata=32767, - transform=blue_src.transform) - blue_profile.pop('tiled', None) + blue_profile.update( + driver="Gtiff", + compress="DEFLATE", + tiled=False, + dtype=np.int16, + nodata=32767, + transform=blue_src.transform, + ) + blue_profile.pop("tiled", None) with rasterio.Env(GDAL_CACHEMAX=512) as env: with rasterio.open(str(out_path), "w", **blue_profile) as dst: dst.write(bibg_masked, 1) return Path(str(out_path)).absolute + # A faire -def create_raw_bi(b1_path: Union[str, pathlib.PosixPath], - b2_path: Union[str, pathlib.PosixPath], - out_path: Union[str, pathlib.PosixPath]="./raw_bi.tif", - ) -> pathlib.PosixPath: +def create_raw_bi( + b1_path: Union[str, pathlib.PosixPath], + b2_path: Union[str, pathlib.PosixPath], + out_path: Union[str, pathlib.PosixPath] = "./raw_bi.tif", +) -> pathlib.PosixPath: """ Creates a BI (Green, Red) raster from GREEN and RED rasters. @@ -447,32 +523,38 @@ def create_raw_bi(b1_path: Union[str, pathlib.PosixPath], """ logger.info("creating raw BIGR (tiff - int16)") - with rasterio.open(str(red_path)) as red_src, \ - rasterio.open(str(green_path)) as green_src: + with rasterio.open(str(red_path)) as red_src, rasterio.open( + str(green_path) + ) as green_src: red_profile = red_src.profile red = red_src.read(1).astype(np.float32) green = green_src.read(1).astype(np.float32) - np.seterr(divide='ignore', invalid='ignore') # ignore warnings when dividing by zero - bigr = ((((green)**2 + (red)**2)/2)**0.5).astype(np.int16) + np.seterr( + divide="ignore", invalid="ignore" + ) # ignore warnings when dividing by zero + bigr = ((((green) ** 2 + (red) ** 2) / 2) ** 0.5).astype(np.int16) bigr_masked = np.where(red != 0, bigr, 32767) - red_profile.update(driver="Gtiff", - compress="DEFLATE", - tiled=False, - dtype=np.int16, - nodata=32767, - transform=red_src.transform) - red_profile.pop('tiled', None) + red_profile.update( + driver="Gtiff", + compress="DEFLATE", + tiled=False, + dtype=np.int16, + nodata=32767, + transform=red_src.transform, + ) + red_profile.pop("tiled", None) with rasterio.Env(GDAL_CACHEMAX=512) as env: with rasterio.open(str(out_path), "w", **red_profile) as dst: dst.write(bigr_masked, 1) return Path(str(out_path)).absolute + def create_masked_indice( - indice_path: Union[str, pathlib.PosixPath], - cloud_mask_path: Union[str, pathlib.PosixPath], - out_path: Union[str, pathlib.PosixPath]="./masked_indice.tif" - ) -> pathlib.PosixPath: + indice_path: Union[str, pathlib.PosixPath], + cloud_mask_path: Union[str, pathlib.PosixPath], + out_path: Union[str, pathlib.PosixPath] = "./masked_indice.tif", +) -> pathlib.PosixPath: """ Masks an indice raster with a cloud mask. @@ -484,32 +566,39 @@ def create_masked_indice( logger.info("Cloud-masking indice (int16)") - with rasterio.open(str(indice_path)) as indice_src, \ - rasterio.open(str(cloud_mask_path)) as cld_src: + with rasterio.open(str(indice_path)) as indice_src, rasterio.open( + str(cloud_mask_path) + ) as cld_src: profile = indice_src.profile raw_indice = indice_src.read(1) cld = cld_src.read(1) # repoject cloud_mask to ndvi resolution cld_reproj = np.empty(raw_indice.shape, dtype=np.uint8) - reproject(source=cld, - destination=cld_reproj, - src_transform=cld_src.transform, - src_crs=cld_src.crs, - dst_transform=indice_src.transform, - dst_crs=indice_src.crs, - resampling=Resampling.nearest) -# indice_borders_mask = np.where(raw_indice > 0, raw_indice, 32767) + reproject( + source=cld, + destination=cld_reproj, + src_transform=cld_src.transform, + src_crs=cld_src.crs, + dst_transform=indice_src.transform, + dst_crs=indice_src.crs, + resampling=Resampling.nearest, + ) + # indice_borders_mask = np.where(raw_indice > 0, raw_indice, 32767) indice_cloud_mask = np.where(cld_reproj == 0, raw_indice, 32767) if indice_path.suffix == ".jp2": - indice_cloud_mask = np.where(indice_cloud_mask == 16383, 32767, indice_cloud_mask) - profile.update(driver="Gtiff", - compress="DEFLATE", - tiled=False, - dtype=np.int16, - nodata=32767, - transform=indice_src.transform) -# profile.pop('tiled', None) + indice_cloud_mask = np.where( + indice_cloud_mask == 16383, 32767, indice_cloud_mask + ) + profile.update( + driver="Gtiff", + compress="DEFLATE", + tiled=False, + dtype=np.int16, + nodata=32767, + transform=indice_src.transform, + ) + # profile.pop('tiled', None) with rasterio.Env(GDAL_CACHEMAX=512) as env: with rasterio.open(str(out_path), "w", **profile) as dst: @@ -517,9 +606,12 @@ def create_masked_indice( return str(Path(str(out_path)).absolute) -def index_tiff_2_jp2(img_path: Union[str, pathlib.PosixPath], - out_path: Union[str, pathlib.PosixPath]="./indice_2_jp2.jp2", - quality: int = 20) -> pathlib.PosixPath: + +def index_tiff_2_jp2( + img_path: Union[str, pathlib.PosixPath], + out_path: Union[str, pathlib.PosixPath] = "./indice_2_jp2.jp2", + quality: int = 20, +) -> pathlib.PosixPath: """ Convert a indice file from TIF to JP2. :param out_path: path to the output raster. @@ -528,8 +620,12 @@ def index_tiff_2_jp2(img_path: Union[str, pathlib.PosixPath], src_ds = gdal.Open(str(img_path)) driver = gdal.GetDriverByName("JP2OpenJPEG") for i in range(src_ds.RasterCount): - src_ds.GetRasterBand(i+1).SetNoDataValue(float(16383)) - dst_ds = driver.CreateCopy(str(out_path), src_ds, options=['NBITS=15', 'CODEC=JP2', 'QUALITY=' + str(quality)]) + src_ds.GetRasterBand(i + 1).SetNoDataValue(float(16383)) + dst_ds = driver.CreateCopy( + str(out_path), + src_ds, + options=["NBITS=15", "CODEC=JP2", "QUALITY=" + str(quality)], + ) dst_ds = None src_ds = None return str(Path(str(out_path)).absolute) diff --git a/sen2chain/jobs.py b/sen2chain/jobs.py index dfc10b7725690b202ad340bc1754bd1070078a17..c8fa7dde18dad78963f26355df1080379e4b4363 100644 --- a/sen2chain/jobs.py +++ b/sen2chain/jobs.py @@ -1,8 +1,9 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 """ Module for managing sen2chain processing jobs """ + import logging from pathlib import Path import pandas as pd @@ -24,12 +25,11 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) - -class Jobs(): +class Jobs: """ Class to manage created jobs """ - + def __init__(self): self._jobs_all = list(Config()._JOBS_DIR.glob("job_*.cfg")) self._jobs_all.extend(list(Config()._JOBS_DIR.glob("job_*.py"))) @@ -38,7 +38,7 @@ class Jobs(): self._py_scripts_exist = [] self._logs = [] self._timing = [] - self._cron_status = [] + self._cron_status = [] self._cron_timing = [] for jid in self._jid_set: logger.disabled = True @@ -50,31 +50,31 @@ class Jobs(): self._cron_status.append(j.cron_status) self._cron_timing.append(j.cron_timing) logger.disabled = False - - + def __repr__(self): - return repr(pd.DataFrame(OrderedDict([("job_id", list(self._jid_set)), - ("config_file", self._config_files_exist), - ("python_script", self._py_scripts_exist), - ("logging", self._logs), - ("timing", self._timing), - ("cron_status", self._cron_status), - ("cron_timing", self._cron_timing), - - ]))) - - ###### >>> job.render().split(' ')[0:4] - - - - - + return repr( + pd.DataFrame( + OrderedDict( + [ + ("job_id", list(self._jid_set)), + ("config_file", self._config_files_exist), + ("python_script", self._py_scripts_exist), + ("logging", self._logs), + ("timing", self._timing), + ("cron_status", self._cron_status), + ("cron_timing", self._cron_timing), + ] + ) + ) + ) + + # >>> job.render().split(' ')[0:4] + @property def list(self): return [t.stem[4:] for t in self._jobs_all] - - def remove(self, - jid: str = None): + + def remove(self, jid: str = None): if jid in self.list: logger.disabled = True j = Job(jid) @@ -89,14 +89,14 @@ class Jobs(): else: logger.info("Job {} not found, doing nothing...".format(jid)) - -class Job(): + +class Job: """ Class to manage job """ - #~ logger.propagate = False - def __init__(self, - jid: str): + + # logger.propagate = False + def __init__(self, jid: str): self._config_path = Config()._JOBS_DIR / ("job_" + jid + ".cfg") self._python_script_path = Config()._JOBS_DIR / ("job_" + jid + ".py") self.jid = jid @@ -108,89 +108,100 @@ class Job(): self.init() self._cron = CronTab(user=True) self.cron_status = self.get_cron_status() - + self._log_folder_path = Path(Config().get("log_path")) / ("job_" + jid) - + def __repr__(self): return repr(self.tasks) - + def init(self): raw_job_cfg = SHARED_DATA.get("raw_job_cfg") self.read(raw_job_cfg) - first_row = OrderedDict([("tile", ["40KCB"]), - ("date_min", ["2022-02-04"]), - ("date_max", ["2022-02-04"]), - ("max_clouds", [100]), - ("l1c", [False]), - ("l2a", [False]), - ("cloudmask", [False]), - ("indices", [False]), - ("remove", [False]), - ]) + first_row = OrderedDict( + [ + ("tile", ["40KCB"]), + ("date_min", ["2022-02-04"]), + ("date_max", ["2022-02-04"]), + ("max_clouds", [100]), + ("l1c", [False]), + ("l2a", [False]), + ("cloudmask", [False]), + ("indices", [False]), + ("remove", [False]), + ] + ) self.tasks = pd.DataFrame(first_row) - - def task_add(self, - row: dict = None): + + def task_add(self, row: dict = None): if not row: logger.info("No row provided, using default") - row = pd.DataFrame({"tile": ["40KCB"], - "date_min": ["2022-02-04"], - "date_max": ["2022-02-04"], - "max_clouds": [100], - "l1c": [False], - "l2a": [False], - "cloudmask": [False], - "indices": [False], - "remove": [False], - }) - #~ self.tasks = self.tasks.append(row, ignore_index=True) - self.tasks = pd.concat([self.tasks, row], ignore_index = True)[self.tasks.columns] - + row = pd.DataFrame( + { + "tile": ["40KCB"], + "date_min": ["2022-02-04"], + "date_max": ["2022-02-04"], + "max_clouds": [100], + "l1c": [False], + "l2a": [False], + "cloudmask": [False], + "indices": [False], + "remove": [False], + } + ) + # self.tasks = self.tasks.append(row, ignore_index=True) + self.tasks = pd.concat([self.tasks, row], ignore_index=True)[ + self.tasks.columns + ] + logger.info("\n{}".format(self.tasks)) - - def task_edit(self, - task_id: int = None, - **kwargs): + + def task_edit(self, task_id: int = None, **kwargs): if task_id is None: - logger.info("Please provide task_number to edit, if no task in job, create_task first") + logger.info( + "Please provide task_number to edit, if no task in job, create_task first" + ) else: if task_id in self.tasks.index: for arg, val in kwargs.items(): if arg in self.tasks.columns: self.tasks.loc[task_id, arg] = val - logger.info("Line {}: {} updated to {}".format(task_id, arg, val)) + logger.info( + "Line {}: {} updated to {}".format( + task_id, arg, val + ) + ) else: logger.info("{} not found".format(arg)) - logger.info("\n{}".format(self.tasks)) + logger.info("\n{}".format(self.tasks)) else: logger.info("Task_number not found") - - def task_remove(self, - task_id: int = None): + + def task_remove(self, task_id: int = None): if task_id is None: logger.info("Please provide task_number to remove") else: if task_id in self.tasks.index: - self.tasks.drop(labels = task_id, axis=0, inplace = True) - self.tasks.reset_index(inplace = True, drop = True) + self.tasks.drop(labels=task_id, axis=0, inplace=True) + self.tasks.reset_index(inplace=True, drop=True) logger.info("\n{}".format(self.tasks)) else: logger.info("Task_number not found") - + def save(self): # save task to disk - with open(str(self._config_path), 'w') as ict: - header = '\n'.join( - ['logs = ' + str(self.logs), - 'timing = ' + self.timing, - '', - ] + with open(str(self._config_path), "w") as ict: + header = "\n".join( + [ + "logs = " + str(self.logs), + "timing = " + self.timing, + "", + ] ) for line in header: ict.write(line) - #~ self.tasks.to_csv(ict) - self.tasks.to_csv(ict, index = False, sep=';') - + # self.tasks.to_csv(ict) + self.tasks.to_csv(ict, index=False, sep=";") + def get_cron_status(self): iter = list(self._cron.find_comment("sen2chain_job_" + self.jid)) if iter: @@ -215,12 +226,17 @@ class Job(): lines.append('"""\n') lines.append("\n") lines.append("from sen2chain import Job\n") - lines.append('Job("' + self.jid + '").run(clean_before = False, clean_after = False)\n') - with open(str(self._python_script_path), 'w') as f: + lines.append( + 'Job("' + + self.jid + + '").run(clean_before = False, clean_after = False)\n' + ) + with open(str(self._python_script_path), "w") as f: f.writelines(lines) - - def cron_enable(self, - ): + + def cron_enable( + self, + ): # enable job in cron self.save() self.create_python_script() @@ -232,19 +248,21 @@ class Job(): job.setall(self.timing) job.enable() else: - job = self._cron.new(command="/usr/bin/python3 " + str(self._python_script_path), comment="sen2chain_job_" + self.jid) + job = self._cron.new( + command="/usr/bin/python3 " + str(self._python_script_path), + comment="sen2chain_job_" + self.jid, + ) if self.timing: job.setall(self.timing) else: job.setall("0 0 * * *") job.enable() logger.info("Enabling job...") - - #~ logger.info("Time: {}".format(job.time)) + + # logger.info("Time: {}".format(job.time)) self._cron.write() - #~ new.enable(False) + # new.enable(False) self.get_cron_status() - def cron_disable(self): # disable / commenting job in cron @@ -265,38 +283,54 @@ class Job(): self._cron.remove(job) self._cron.write() self.get_cron_status() - - - def read(self, - path): + + def read(self, path): parser = ConfigParser(allow_no_value=True) with open(str(path)) as stream: - parser.read_string("[top]\n" + stream.read()) # This line does the trick. - self.logs = bool(distutils.util.strtobool(parser['top']['logs'])) - self.timing = parser['top']['timing'] - - self.tasks = pd.read_csv(path, sep = ';', na_values="", na_filter=False, comment='#', dtype = str, header = 2) - #~ if "logs" not in self.tasks: - #~ self.tasks["logs"] = False - + parser.read_string( + "[top]\n" + stream.read() + ) # This line does the trick. + self.logs = bool(distutils.util.strtobool(parser["top"]["logs"])) + self.timing = parser["top"]["timing"] + + self.tasks = pd.read_csv( + path, + sep=";", + na_values="", + na_filter=False, + comment="#", + dtype=str, + header=2, + ) + # if "logs" not in self.tasks: + # self.tasks["logs"] = False + for index, row in self.tasks.iterrows(): if not row.date_min: - #~ self.tasks.at[index, "start_time"] = (datetime.datetime.now()-datetime.timedelta(days=delta_t)).strftime('%Y-%m-%d') - self.tasks.at[index, "date_min"] = datetime.datetime.strptime("2015-01-01", "%Y-%m-%d").strftime('%Y-%m-%d') + # self.tasks.at[index, "start_time"] = (datetime.datetime.now()-datetime.timedelta(days=delta_t)).strftime('%Y-%m-%d') + self.tasks.at[index, "date_min"] = datetime.datetime.strptime( + "2015-01-01", "%Y-%m-%d" + ).strftime("%Y-%m-%d") if not row.date_max: - self.tasks.at[index, "date_max"] = (datetime.datetime.now()+datetime.timedelta(days=1)).strftime('%Y-%m-%d') + self.tasks.at[index, "date_max"] = ( + datetime.datetime.now() + datetime.timedelta(days=1) + ).strftime("%Y-%m-%d") if not row.max_clouds: self.tasks.at[index, "max_clouds"] = 100 if not row.indices == "False": if row.indices == "All": - self.tasks.at[index, "indices"] = IndicesCollection.available_indices + self.tasks.at[ + index, "indices" + ] = IndicesCollection.available_indices else: - self.tasks.at[index, "indices"] = str(row.indices).split("/") + self.tasks.at[index, "indices"] = str(row.indices).split( + "/" + ) if not row.cloudmask == "False": - self.tasks.at[index, "cloudmask"] = self.get_cm_version(row.cloudmask) - - - + self.tasks.at[index, "cloudmask"] = self.get_cm_version( + row.cloudmask + ) + @staticmethod def get_cm_version(identifier) -> str: """Returns cloudmask version from a cloud mask identifier string. @@ -308,42 +342,54 @@ class Job(): except: pass try: - pat = re.compile(r"(?P<cm_version>CM003)" + \ - "-PRB(?P<probability>.*)" + \ - "-ITER(?P<iterations>.*)") + pat = re.compile( + r"(?P<cm_version>CM003)" + + "-PRB(?P<probability>.*)" + + "-ITER(?P<iterations>.*)" + ) return pat.match(identifier).groupdict() except: pass try: - pat = re.compile(r"(?P<cm_version>CM004)" + \ - "-CSH(?P<cld_shad>.*)" + \ - "-CMP(?P<cld_med_prob>.*)" + \ - "-CHP(?P<cld_hi_prob>.*)" + \ - "-TCI(?P<thin_cir>.*)" + \ - "-ITER(?P<iterations>.*)") + pat = re.compile( + r"(?P<cm_version>CM004)" + + "-CSH(?P<cld_shad>.*)" + + "-CMP(?P<cld_med_prob>.*)" + + "-CHP(?P<cld_hi_prob>.*)" + + "-TCI(?P<thin_cir>.*)" + + "-ITER(?P<iterations>.*)" + ) return pat.match(identifier).groupdict() except: pass - - def run(self, - nb_proc: int = 12, - clean_before: bool = False, - clean_after: bool = False): - + + def run( + self, + nb_proc: int = 12, + clean_before: bool = False, + clean_after: bool = False, + ): + if self.logs: - self._log_folder_path.mkdir(exist_ok = True) - self._log_file = self._log_folder_path / ("job_" + self.jid + "_run_"+ datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + ".log") + self._log_folder_path.mkdir(exist_ok=True) + self._log_file = self._log_folder_path / ( + "job_" + + self.jid + + "_run_" + + datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + + ".log" + ) f = open(str(self._log_file), "w") f.write("Debut : {}\n\n".format(datetime.datetime.now())) f.write(repr(self) + "\n") f.flush() - + if not self.tasks.empty: - + # Telechargement # todo # keep list of downloaded l1c products - + # Nettoyage if clean_before: logger.info("Cleaning Tiles") @@ -351,106 +397,170 @@ class Job(): for index, row in self.tasks.iterrows(): clean_list.append(row.tile) lib = Library() - pb_before = lib.clean(clean_list, remove=True, remove_indice_tif=True) + pb_before = lib.clean( + clean_list, remove=True, remove_indice_tif=True + ) # lib.clean(clean_list, remove=False) # Traitement des L1C en L2A - logger.info("Computing l2a") - l1c_process_list = [] + logger.info("Computing l2a") + l1c_process_list = [] for index, row in self.tasks.iterrows(): if not row.l2a == False: t = Tile(row.tile) - l1c_to_process = list(p.identifier for p in t.l2a_missings.filter_dates(date_min = row.date_min, date_max = row.date_max)) + l1c_to_process = list( + p.identifier + for p in t.l2a_missings.filter_dates( + date_min=row.date_min, date_max=row.date_max + ) + ) l1c_process_list.append(l1c_to_process) - logger.info("ajout {}: {} l1c files".format(row.tile, len(l1c_to_process))) + logger.info( + "ajout {}: {} l1c files".format( + row.tile, len(l1c_to_process) + ) + ) l1c_process_list = list(chain.from_iterable(l1c_process_list)) - logger.info("l1c Sen2Cor process list ({} files): {}".format(len(l1c_process_list), l1c_process_list)) + logger.info( + "l1c Sen2Cor process list ({} files): {}".format( + len(l1c_process_list), l1c_process_list + ) + ) l2a_res = False if l1c_process_list: - l2a_res = l2a_multiprocessing(l1c_process_list, nb_proc = nb_proc) - #~ logger.info("je multiprocess les l1c en l2a") + l2a_res = l2a_multiprocessing( + l1c_process_list, nb_proc=nb_proc + ) + # logger.info("je multiprocess les l1c en l2a") if self.logs: f.write("\nTraitement des l1c : {}\n".format(l2a_res)) - f.write("l1c_process_list: \n" + "\n".join(l1c_process_list) + "\n") - + f.write( + "l1c_process_list: \n" + + "\n".join(l1c_process_list) + + "\n" + ) + # Remove L1C # todo # remove list of downloaded l1c products - + # Traitement des L2A (clouds) logger.info("Computing cloudmasks") reprocess = False - cld_l2a_process_list = [] + cld_l2a_process_list = [] for index, row in self.tasks.iterrows(): - #~ if not bool(distutils.util.strtobool(str(row.cloudmask))): + # if not bool(distutils.util.strtobool(str(row.cloudmask))): if not (row.cloudmask == "False" or not row.cloudmask): t = Tile(row.tile) - l2a_to_process = [p.identifier for p in t.cloudmasks_missing().filter_dates(date_min = row.date_min, date_max = row.date_max)] + l2a_to_process = [ + p.identifier + for p in t.cloudmasks_missing().filter_dates( + date_min=row.date_min, date_max=row.date_max + ) + ] for j in l2a_to_process: - l2a_cm_details = [j, - row.cloudmask["cm_version"], - row.cloudmask["probability"], - row.cloudmask["iterations"], - row.cloudmask["cld_shad"], - row.cloudmask["cld_med_prob"], - row.cloudmask["cld_hi_prob"], - row.cloudmask["thin_cir"], - reprocess, - ] + l2a_cm_details = [ + j, + row.cloudmask["cm_version"], + row.cloudmask["probability"], + row.cloudmask["iterations"], + row.cloudmask["cld_shad"], + row.cloudmask["cld_med_prob"], + row.cloudmask["cld_hi_prob"], + row.cloudmask["thin_cir"], + reprocess, + ] cld_l2a_process_list.append(l2a_cm_details) - logger.info("ajout {}: {} l2a products".format(row.tile, len(l2a_to_process))) - logger.info("l2a cloudmasks process list ({} products): {}".format(len(cld_l2a_process_list), cld_l2a_process_list)) + logger.info( + "ajout {}: {} l2a products".format( + row.tile, len(l2a_to_process) + ) + ) + logger.info( + "l2a cloudmasks process list ({} products): {}".format( + len(cld_l2a_process_list), cld_l2a_process_list + ) + ) cld_res = False if cld_l2a_process_list: - cld_res = cld_version_probability_iterations_reprocessing_multiprocessing(cld_l2a_process_list, nb_proc = nb_proc) + cld_res = cld_version_probability_iterations_reprocessing_multiprocessing( + cld_l2a_process_list, nb_proc=nb_proc + ) if self.logs: f.write("\nTraitement des clouds : {}\n".format(cld_res)) - f.write("cld_l2a_process_list: \n" + "\n".join(cld_l2a_process_list) + "\n") - + f.write( + "cld_l2a_process_list: \n" + + "\n".join(cld_l2a_process_list) + + "\n" + ) + # Traitement des L2A (indices) logger.info("Computing indices") nodata_clouds = True quicklook = False - indices_l2a_process_list = [] + indices_l2a_process_list = [] for index, row in self.tasks.iterrows(): if not (row.indices == "False" or not row.indices): t = Tile(row.tile) - #~ indices_list = row.indices.split("/") + # indices_list = row.indices.split("/") for i in row.indices: - #~ l2a_list = [p.identifier for p in t.missing_indices(i).filter_dates(date_min = row.start_time, date_max = row.end_time)] - l2a_to_process = [p.identifier for p in t.missing_indices(indice = i, - nodata_clouds = nodata_clouds, - ).filter_dates(date_min = row.date_min, date_max = row.date_max)] + # l2a_list = [p.identifier for p in t.missing_indices(i).filter_dates(date_min = row.start_time, date_max = row.end_time)] + l2a_to_process = [ + p.identifier + for p in t.missing_indices( + indice=i, + nodata_clouds=nodata_clouds, + ).filter_dates( + date_min=row.date_min, date_max=row.date_max + ) + ] for j in l2a_to_process: - l2a_ind_details = [j, - i, - reprocess, - nodata_clouds, - quicklook, - row.cloudmask["cm_version"], - row.cloudmask["probability"], - row.cloudmask["iterations"], - row.cloudmask["cld_shad"], - row.cloudmask["cld_med_prob"], - row.cloudmask["cld_hi_prob"], - row.cloudmask["thin_cir"], - ] + l2a_ind_details = [ + j, + i, + reprocess, + nodata_clouds, + quicklook, + row.cloudmask["cm_version"], + row.cloudmask["probability"], + row.cloudmask["iterations"], + row.cloudmask["cld_shad"], + row.cloudmask["cld_med_prob"], + row.cloudmask["cld_hi_prob"], + row.cloudmask["thin_cir"], + ] indices_l2a_process_list.append(l2a_ind_details) - logger.info("ajout {} - {}: {} l2a products".format(row.tile, i, len(l2a_to_process))) - logger.info("l2a indices process list ({} products): {}".format(len(indices_l2a_process_list), indices_l2a_process_list)) + logger.info( + "ajout {} - {}: {} l2a products".format( + row.tile, i, len(l2a_to_process) + ) + ) + logger.info( + "l2a indices process list ({} products): {}".format( + len(indices_l2a_process_list), indices_l2a_process_list + ) + ) indices_res = False if indices_l2a_process_list: - indices_res = idx_multiprocessing(indices_l2a_process_list, nb_proc = nb_proc) + indices_res = idx_multiprocessing( + indices_l2a_process_list, nb_proc=nb_proc + ) if self.logs: - f.write("\nTraitement des indices: {}\n".format(indices_res)) - f.write("indices_l2a_process_list: \n" + "\n".join(indices_l2a_process_list) + "\n") - + f.write( + "\nTraitement des indices: {}\n".format(indices_res) + ) + f.write( + "indices_l2a_process_list: \n" + + "\n".join(indices_l2a_process_list) + + "\n" + ) + # Remove L2A # todo - + else: logger.info("No task defined for this job, doing nothing") - + if self.logs: f.write("\nFin : {}\n".format(datetime.datetime.now())) f.close() diff --git a/sen2chain/library.py b/sen2chain/library.py index 9fbf94b70f3e21e69e4b223442067991b389f45b..72d2e679d1550593927ce0e74be8a98b7bf53a30 100644 --- a/sen2chain/library.py +++ b/sen2chain/library.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 """ Module for managing products and tiles in the library and temp folders. @@ -11,6 +11,7 @@ import pathlib from itertools import chain from pathlib import Path + # type annotations from typing import List, Dict @@ -18,7 +19,11 @@ from .config import Config, SHARED_DATA from .products import L1cProduct from .tiles import Tile from .utils import human_size, human_size_decimal -from .multi_processing import l2a_multiprocessing, cld_version_probability_iterations_reprocessing_multiprocessing, idx_multiprocessing +from .multi_processing import ( + l2a_multiprocessing, + cld_version_probability_iterations_reprocessing_multiprocessing, + idx_multiprocessing, +) s2_tiles_index = SHARED_DATA.get("tiles_index") @@ -27,9 +32,8 @@ logger = logging.getLogger(__name__) class Library: - """Class for listing L1C, L2A and indices tiles in the library folders. + """Class for listing L1C, L2A and indices tiles in the library folders.""" - """ _temp_path = Path(Config().get("temp_path")) _l1c_path = Path(Config().get("l1c_path")) _l2a_path = Path(Config().get("l2a_path")) @@ -41,7 +45,9 @@ class Library: self._indices = dict() for indice in [f.name for f in self._indices_path.glob("*")]: - self.__dict__[indice.lower()] = [f.name for f in self._indices_path.glob("{}/*".format(indice))] + self.__dict__[indice.lower()] = [ + f.name for f in self._indices_path.glob("{}/*".format(indice)) + ] @property def l1c(self) -> List[str]: @@ -57,18 +63,24 @@ class Library: def indices(self) -> Dict[str, pathlib.PosixPath]: """Returns tiles in the indices library folder.""" return self._indices - - def clean(self, - clean_list: list = [], - remove_indice_tif: bool = False, - remove: bool = False): - """Function to clean corrupted files during processing errors from whole Library - or selected Tiles. Calls the clean_lib function from Tile class. - - :param clean_list: list of tiles to be cleaned. If not provided, will process all l1c tiles. - :param remove_indice_tif: bool. If True will remove present TIFF files present in indice folders. Default value False. - :param remove: bool. If True will effectively remove corrupted files, if False will just list identified problems. Default value False. - + + def clean( + self, + clean_list: list = [], + remove_indice_tif: bool = False, + remove: bool = False, + ): + """Function to clean corrupted files during processing errors from + whole Library or selected Tiles. Calls the clean_lib function from Tile + class. + + :param clean_list: list of tiles to be cleaned. If not provided, will + process all l1c tiles. + :param remove_indice_tif: bool. If True will remove present TIFF files + present in indice folders. Default value False. + :param remove: bool. If True will effectively remove corrupted files, + if False will just list identified problems. Default value False. + """ nb_id = 0 nb_rm = 0 @@ -78,103 +90,125 @@ class Library: for t in clean_list: try: til = Tile(t) - counts = til.clean_lib(remove=remove, remove_indice_tif=remove_indice_tif) + counts = til.clean_lib( + remove=remove, remove_indice_tif=remove_indice_tif + ) nb_id += counts["identified_problems"] nb_rm += counts["removed_problems"] if counts["problems"]: prob_id.append(counts["problems"]) - logger.info("{} - identified problems:{} - removed problems:{}".format(til, counts["identified_problems"], counts["removed_problems"])) + logger.info( + "{} - identified problems:{} - removed problems:{}".format( + til, + counts["identified_problems"], + counts["removed_problems"], + ) + ) except: pass - return {"identified_problems": nb_id, "removed_problems": nb_rm, "problems": prob_id} - - def archive_l1c(self, - archive_list: list = [], - force: bool = False, - size_only: bool = False, - ): + return { + "identified_problems": nb_id, + "removed_problems": nb_rm, + "problems": prob_id, + } + + def archive_l1c( + self, + archive_list: list = [], + force: bool = False, + size_only: bool = False, + ): """ Function to archive l1c products from library folder to l1c_archive_path. Calls the archive_l1c function from Tile class, see there for details. - - :param archive_list: list of tiles to archive. If not provided, will process all l1c tiles. + + :param archive_list: list of tiles to archive. If not provided, will + process all l1c tiles. :param size_only: if True, only gives sizes - """ - - total_size = 0 + """ + + total_size = 0 if not archive_list: archive_list = self.l1c for t in archive_list: try: logger.info(t) til = Tile(t) - size_tile = til.archive_l1c(size_only = size_only, force = force) + size_tile = til.archive_l1c(size_only=size_only, force=force) if size_only: total_size += size_tile except: pass - logger.info("Total l1c size to move: {}".format(human_size_decimal(total_size))) - - - - def archive_l2a(self, - tile_list: list = [], - size_only: bool = False, - ): - + logger.info( + "Total l1c size to move: {}".format(human_size_decimal(total_size)) + ) + + def archive_l2a( + self, + tile_list: list = [], + size_only: bool = False, + ): + """ Function to archive l2a products from library folder to l2a_archive_path. Calls the archive_l2a function from Tile class, see there for details. - - :param tile_list: list of tiles to archive. If not provided, will process all l2a tiles. - :param size_only: if True, only gives sizes - """ - - total_size = 0 + + :param tile_list: list of tiles to archive. If not provided, will + process all l2a tiles. + :param size_only: if True, only gives sizes. + """ + + total_size = 0 if not tile_list: tile_list = self.l2a for t in tile_list: try: logger.info(t) til = Tile(t) - size_tile = til.archive_l2a(size_only = size_only) + size_tile = til.archive_l2a(size_only=size_only) if size_only: total_size += size_tile except: logger.info("Problem while archiving") - logger.info("Total l2a size to move: {}".format(human_size_decimal(total_size))) - - def archive_all(self, - archive_list: list = [], - size_only: bool = False, - force: bool = False, - ): - total_size = 0 + logger.info( + "Total l2a size to move: {}".format(human_size_decimal(total_size)) + ) + + def archive_all( + self, + archive_list: list = [], + size_only: bool = False, + force: bool = False, + ): + total_size = 0 if not archive_list: archive_list = self.l1c for t in archive_list: try: logger.info(t) til = Tile(t) - size_tile = til.archive_all(size_only = size_only, force = force) + size_tile = til.archive_all(size_only=size_only, force=force) total_size += size_tile except: pass - logger.info("Total size to move: {}".format(human_size_decimal(total_size))) - - def compute_ql(self, - tile_list: list = [], - product_list: list = [], - resolution: int = 750, - jpg: bool = True, - ): + logger.info( + "Total size to move: {}".format(human_size_decimal(total_size)) + ) + + def compute_ql( + self, + tile_list: list = [], + product_list: list = [], + resolution: int = 750, + jpg: bool = True, + ): """ Produce: - - l1c and/or l2a quicklooks - - for provided tile_list or all the library tiles + - l1c and/or l2a quicklooks + - for provided tile_list or all the library tiles - at specified resolution - in jpg or tiff format - """ + """ if not tile_list: tile_list = list(set(self.l1c + self.l2a)) for tile in tile_list: @@ -190,12 +224,12 @@ class Library: except: pass - #~ def update_latest_ql(self): - #~ """ - #~ Produce or update the latest quicklook for the L2A library tiles - #~ """ - #~ for tile in self.l2a: - #~ Tile(tile).update_latest_ql() + # def update_latest_ql(self): + # """ + # Produce or update the latest quicklook for the L2A library tiles + # """ + # for tile in self.l2a: + # Tile(tile).update_latest_ql() def update_old_cloudmasks(self): """ @@ -203,7 +237,7 @@ class Library: """ for tile in self.l2a: Tile(tile).update_old_cloudmasks() - + def remove_very_old_cloudmasks(self): """ Remove very old cloudmasks, matching pattern : *_CLOUD_MASK.tif for the L2A library tiles @@ -225,126 +259,184 @@ class Library: """ for tile in {f.name for f in list(self._indices_path.glob("*/*/"))}: Tile(tile).update_old_indices() - + def init_md(self): """ - Initiate sen2chain metadata for all tile products (l2a, cloudmasks, indices (raw, masked, ql)) + Initiate sen2chain metadata for all tile products (l2a, cloudmasks, indices (raw, masked, ql)) for the L1C, L2A and Indice library tiles """ - for tile in set([val for sublist in [k for k in [getattr(self,t) for t in self.__dict__]] for val in sublist]): - Tile(tile).init_md() - - def compute_l2a(self, - tile_list: list = [], - date_min: str = None, - date_max: str = None, - nb_proc: int = 4, - ): - l1c_process_list = [] + for tile in set( + [ + val + for sublist in [ + k for k in [getattr(self, t) for t in self.__dict__] + ] + for val in sublist + ] + ): + Tile(tile).init_md() + + def compute_l2a( + self, + tile_list: list = [], + date_min: str = None, + date_max: str = None, + nb_proc: int = 4, + ): + l1c_process_list = [] for tile in tile_list: t = Tile(tile) - l1c_process_list.append(list(p.identifier for p in t.l2a_missings.filter_dates(date_min = date_min, date_max = date_max))) + l1c_process_list.append( + list( + p.identifier + for p in t.l2a_missings.filter_dates( + date_min=date_min, date_max=date_max + ) + ) + ) l1c_process_list = list(chain.from_iterable(l1c_process_list)) - logger.info("l1c_process_list ({} files): \n{}".format(len(l1c_process_list), l1c_process_list)) + logger.info( + "l1c_process_list ({} files): \n{}".format( + len(l1c_process_list), l1c_process_list + ) + ) if l1c_process_list: l2a_res = l2a_multiprocessing(l1c_process_list, nb_proc=nb_proc) - - def compute_cloudmasks(self, - tile_list: list = [], - cm_version: str = "cm001", - probability: int = 1, - iterations: int = 5, - cld_shad: bool = True, - cld_med_prob: bool = True, - cld_hi_prob: bool = True, - thin_cir: bool = True, - reprocess: bool = False, - date_min: str = None, - date_max: str = None, - nb_proc: int = 4, - ): - cld_l2a_process_list = [] + + def compute_cloudmasks( + self, + tile_list: list = [], + cm_version: str = "cm001", + probability: int = 1, + iterations: int = 5, + cld_shad: bool = True, + cld_med_prob: bool = True, + cld_hi_prob: bool = True, + thin_cir: bool = True, + reprocess: bool = False, + date_min: str = None, + date_max: str = None, + nb_proc: int = 4, + ): + cld_l2a_process_list = [] for tile in tile_list: t = Tile(tile) if not reprocess: - l2a_list = [p.identifier for p in t.cloudmasks_missing(cm_version = cm_version, - probability = probability, - iterations = iterations, - cld_shad = cld_shad, - cld_med_prob = cld_med_prob, - cld_hi_prob = cld_hi_prob, - thin_cir = thin_cir, - )\ - .filter_dates(date_min = date_min, date_max = date_max)] + l2a_list = [ + p.identifier + for p in t.cloudmasks_missing( + cm_version=cm_version, + probability=probability, + iterations=iterations, + cld_shad=cld_shad, + cld_med_prob=cld_med_prob, + cld_hi_prob=cld_hi_prob, + thin_cir=thin_cir, + ).filter_dates(date_min=date_min, date_max=date_max) + ] else: - l2a_list = [p.identifier for p in t.l2a.filter_dates(date_min = date_min, date_max = date_max)] + l2a_list = [ + p.identifier + for p in t.l2a.filter_dates( + date_min=date_min, date_max=date_max + ) + ] for j in l2a_list: - cld_l2a_process_list.append([j, - cm_version, - probability, - iterations, - cld_shad, - cld_med_prob, - cld_hi_prob, - thin_cir, - reprocess, - ]) - logger.info("cld_l2a_process_list ({} files): \n{}".format(len(cld_l2a_process_list), cld_l2a_process_list)) + cld_l2a_process_list.append( + [ + j, + cm_version, + probability, + iterations, + cld_shad, + cld_med_prob, + cld_hi_prob, + thin_cir, + reprocess, + ] + ) + logger.info( + "cld_l2a_process_list ({} files): \n{}".format( + len(cld_l2a_process_list), cld_l2a_process_list + ) + ) if cld_l2a_process_list: - cld_res = cld_version_probability_iterations_reprocessing_multiprocessing(cld_l2a_process_list, nb_proc=nb_proc) - - def compute_indices(self, - tile_list: list = [], - indice_list: list = [], - reprocess: bool = False, - nodata_clouds: bool = True, - quicklook: bool = False, - cm_version: list = "cm001", - probability: int = 1, - iterations: int = 5, - cld_shad: bool = True, - cld_med_prob: bool = True, - cld_hi_prob: bool = True, - thin_cir: bool = True, - date_min: str = None, - date_max: str = None, - nb_proc: int = 4): - indices_l2a_process_list = [] + cld_res = cld_version_probability_iterations_reprocessing_multiprocessing( + cld_l2a_process_list, nb_proc=nb_proc + ) + + def compute_indices( + self, + tile_list: list = [], + indice_list: list = [], + reprocess: bool = False, + nodata_clouds: bool = True, + quicklook: bool = False, + cm_version: list = "cm001", + probability: int = 1, + iterations: int = 5, + cld_shad: bool = True, + cld_med_prob: bool = True, + cld_hi_prob: bool = True, + thin_cir: bool = True, + date_min: str = None, + date_max: str = None, + nb_proc: int = 4, + ): + indices_l2a_process_list = [] for tile in tile_list: t = Tile(tile) for i in indice_list: if not reprocess: - l2a_list = [p.identifier for p in t.missing_indices(i, - nodata_clouds = nodata_clouds, - cm_version = cm_version, - probability = probability, - iterations = iterations, - cld_shad = cld_shad, - cld_med_prob = cld_med_prob, - cld_hi_prob = cld_hi_prob, - thin_cir = thin_cir, - ).filter_dates(date_min = date_min, date_max = date_max)] + l2a_list = [ + p.identifier + for p in t.missing_indices( + i, + nodata_clouds=nodata_clouds, + cm_version=cm_version, + probability=probability, + iterations=iterations, + cld_shad=cld_shad, + cld_med_prob=cld_med_prob, + cld_hi_prob=cld_hi_prob, + thin_cir=thin_cir, + ).filter_dates(date_min=date_min, date_max=date_max) + ] else: - l2a_list = [p.identifier for p in t.l2a.filter_dates(date_min = date_min, date_max = date_max)] - + l2a_list = [ + p.identifier + for p in t.l2a.filter_dates( + date_min=date_min, date_max=date_max + ) + ] + for j in l2a_list: - indices_l2a_process_list.append([j, - i, - reprocess, - nodata_clouds, - quicklook, - cm_version, - probability, - iterations, - cld_shad, - cld_med_prob, - cld_hi_prob, - thin_cir, - ]) - logger.info("indices_l2a_process_list ({} files): \n{}".format(len(indices_l2a_process_list), indices_l2a_process_list)) + indices_l2a_process_list.append( + [ + j, + i, + reprocess, + nodata_clouds, + quicklook, + cm_version, + probability, + iterations, + cld_shad, + cld_med_prob, + cld_hi_prob, + thin_cir, + ] + ) + logger.info( + "indices_l2a_process_list ({} files): \n{}".format( + len(indices_l2a_process_list), indices_l2a_process_list + ) + ) if indices_l2a_process_list: - indices_res = idx_multiprocessing(indices_l2a_process_list, nb_proc=nb_proc) + indices_res = idx_multiprocessing( + indices_l2a_process_list, nb_proc=nb_proc + ) class TempContainer: @@ -353,13 +445,16 @@ class TempContainer: :param l1c_identifier: L1C product's identifier. :param tile: product's tile name. """ + _temp_path = Path(Config().get("temp_path")) def __init__(self, l1c_identifier=None, tile: str = None): self.tile = tile self.temp_id = l1c_identifier + "_TEMP" self.temp_path = self._temp_path / self.temp_id - self.l1c = L1cProduct(identifier=l1c_identifier, tile=tile, path=self.temp_path) + self.l1c = L1cProduct( + identifier=l1c_identifier, tile=tile, path=self.temp_path + ) def create_temp_folder(self) -> "TempContainer": """Creates the L1C product's temporary folder in the TEMP library folder.""" @@ -375,9 +470,13 @@ class TempContainer: """Unzips the downloaded L1C zip file in the product's temporary folder.""" zip_file = self.temp_path / (self.l1c.identifier + ".zip") if zip_file.exists(): - logger.info("{} : Unzipping L1C archive".format(self.l1c.identifier)) + logger.info( + "{} : Unzipping L1C archive".format(self.l1c.identifier) + ) shutil.unpack_archive(str(zip_file), str(self.temp_path)) - sorted(zip_file.parent.glob("*.SAFE"))[0].rename(zip_file.parent / (zip_file.stem + ".SAFE")) + sorted(zip_file.parent.glob("*.SAFE"))[0].rename( + zip_file.parent / (zip_file.stem + ".SAFE") + ) os.remove(str(zip_file)) return self @@ -388,7 +487,9 @@ class TempContainer: try: shutil.rmtree(str(self.temp_path)) except FileNotFoundError: - logger.warning("Temp folder does not exist: {}".format(self.temp_path)) + logger.warning( + "Temp folder does not exist: {}".format(self.temp_path) + ) else: logger.error("Temp folder contains a SAFE product.") return self diff --git a/sen2chain/multi_processing.py b/sen2chain/multi_processing.py index f619dfa892318db3b3f83f3f504579fea62c7452..b794ee27b25a3455d9380e25431537ab6b52044f 100644 --- a/sen2chain/multi_processing.py +++ b/sen2chain/multi_processing.py @@ -1,11 +1,12 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 import multiprocessing, subprocess import os from time import sleep import logging from functools import partial -#~ import psutil + +# import psutil from .products import L1cProduct, L2aProduct @@ -13,61 +14,72 @@ logger = logging.getLogger("Multiprocessing") logging.basicConfig(level=logging.INFO) logger.setLevel(logging.INFO) + def multi(product): try: fwd = os.path.dirname(os.path.realpath(__file__)) logger.info("Processing {}".format(product)) - cmd= ["setsid", "/usr/bin/python3", fwd + "/multiprocess_l2a.py", product] + cmd = [ + "setsid", + "/usr/bin/python3", + fwd + "/multiprocess_l2a.py", + product, + ] proc = subprocess.Popen(cmd) l1c = L1cProduct(product) - l2a_identifier = l1c.identifier.replace("L1C_", "L2A_").replace("_OPER_", "_USER_") + l2a_identifier = l1c.identifier.replace("L1C_", "L2A_").replace( + "_OPER_", "_USER_" + ) l2a_prod = L2aProduct(l2a_identifier) - while not(l2a_prod.in_library): + while not (l2a_prod.in_library): sleep(5) logger.info("End {}".format(product)) except: logger.info("Plante {}".format(product)) pass - + + def l2a_multiprocessing(process_list, nb_proc=4): """ """ - nb_proc = max(min(len(os.sched_getaffinity(0))-1, nb_proc), 1) + nb_proc = max(min(len(os.sched_getaffinity(0)) - 1, nb_proc), 1) pool = multiprocessing.Pool(nb_proc) results = [pool.map(multi, process_list)] pool.close() pool.join() return True -#~ def multi_cldidx(indice_list, l2a_identifier): - #~ l2a = L2aProduct(l2a_identifier) - #~ l2a.process_cloud_mask_v2() - #~ l2a.process_indices(indice_list, True, True) - -#~ def cldidx_multiprocessing(process_list, indice_list=["NDVI", "NDWIGAO", "NDWIMCF"], nb_proc=4): - #~ """ """ - #~ nb_proc = max(min(len(os.sched_getaffinity(0))-1, nb_proc), 1) - #~ pool = multiprocessing.Pool(nb_proc) - #~ results = [pool.map(partial(multi_cldidx, indice_list), process_list)] - #~ pool.close() - #~ pool.join() - #~ return True - -#~ def multi_cld(l2a_identifier): - #~ l2a = L2aProduct(l2a_identifier) - #~ try: - #~ l2a.process_cloud_mask_v2() - #~ except: - #~ pass - -#~ def cld_multiprocessing(process_list, nb_proc=4): - #~ """ """ - #~ nb_proc = max(min(len(os.sched_getaffinity(0))-1, nb_proc), 1) - #~ pool = multiprocessing.Pool(nb_proc) - #~ results = [pool.map(multi_cld, process_list)] - #~ pool.close() - #~ pool.join() - #~ return True + +# def multi_cldidx(indice_list, l2a_identifier): +# l2a = L2aProduct(l2a_identifier) +# l2a.process_cloud_mask_v2() +# l2a.process_indices(indice_list, True, True) + +# def cldidx_multiprocessing(process_list, indice_list=["NDVI", "NDWIGAO", "NDWIMCF"], nb_proc=4): +# """ """ +# nb_proc = max(min(len(os.sched_getaffinity(0))-1, nb_proc), 1) +# pool = multiprocessing.Pool(nb_proc) +# results = [pool.map(partial(multi_cldidx, indice_list), process_list)] +# pool.close() +# pool.join() +# return True + +# def multi_cld(l2a_identifier): +# l2a = L2aProduct(l2a_identifier) +# try: +# l2a.process_cloud_mask_v2() +# except: +# pass + +# def cld_multiprocessing(process_list, nb_proc=4): +# """ """ +# nb_proc = max(min(len(os.sched_getaffinity(0))-1, nb_proc), 1) +# pool = multiprocessing.Pool(nb_proc) +# results = [pool.map(multi_cld, process_list)] +# pool.close() +# pool.join() +# return True + def multi_cld_ver_pro_iter_repro(l2a_ver_pro_iter_repro): l2a = L2aProduct(l2a_ver_pro_iter_repro[0]) @@ -80,33 +92,39 @@ def multi_cld_ver_pro_iter_repro(l2a_ver_pro_iter_repro): thin_cir = l2a_ver_pro_iter_repro[7] reprocess = l2a_ver_pro_iter_repro[8] try: - l2a.compute_cloud_mask(cm_version = cm_version, - probability = probability, - iterations = iterations, - cld_shad = cld_shad, - cld_med_prob = cld_med_prob, - cld_hi_prob = cld_hi_prob, - thin_cir = thin_cir, - reprocess = reprocess) + l2a.compute_cloud_mask( + cm_version=cm_version, + probability=probability, + iterations=iterations, + cld_shad=cld_shad, + cld_med_prob=cld_med_prob, + cld_hi_prob=cld_hi_prob, + thin_cir=thin_cir, + reprocess=reprocess, + ) except: pass -def cld_version_probability_iterations_reprocessing_multiprocessing(process_list, nb_proc=4): + +def cld_version_probability_iterations_reprocessing_multiprocessing( + process_list, nb_proc=4 +): """ """ - nb_proc = max(min(len(os.sched_getaffinity(0))-1, nb_proc), 1) + nb_proc = max(min(len(os.sched_getaffinity(0)) - 1, nb_proc), 1) pool = multiprocessing.Pool(nb_proc) results = [pool.map(multi_cld_ver_pro_iter_repro, process_list)] pool.close() pool.join() return True - + + def multi_idx(l2a_id_idx): l2a_identifier = l2a_id_idx[0] indice = l2a_id_idx[1] reprocess = l2a_id_idx[2] nodata_clouds = l2a_id_idx[3] - quicklook = l2a_id_idx[4] - cm_version = l2a_id_idx[5] + quicklook = l2a_id_idx[4] + cm_version = l2a_id_idx[5] probability = l2a_id_idx[6] iterations = l2a_id_idx[7] cld_shad = l2a_id_idx[8] @@ -115,24 +133,26 @@ def multi_idx(l2a_id_idx): thin_cir = l2a_id_idx[11] l2a = L2aProduct(l2a_identifier) try: - l2a.compute_indice(indice = indice, - reprocess = reprocess, - nodata_clouds = nodata_clouds, - quicklook = quicklook, - cm_version = cm_version, - probability = probability, - iterations = iterations, - cld_shad = cld_shad, - cld_med_prob = cld_med_prob, - cld_hi_prob = cld_hi_prob, - thin_cir = thin_cir, - ) + l2a.compute_indice( + indice=indice, + reprocess=reprocess, + nodata_clouds=nodata_clouds, + quicklook=quicklook, + cm_version=cm_version, + probability=probability, + iterations=iterations, + cld_shad=cld_shad, + cld_med_prob=cld_med_prob, + cld_hi_prob=cld_hi_prob, + thin_cir=thin_cir, + ) except: pass + def idx_multiprocessing(process_list, nb_proc=4): """ """ - nb_proc = max(min(len(os.sched_getaffinity(0))-1, nb_proc), 1) + nb_proc = max(min(len(os.sched_getaffinity(0)) - 1, nb_proc), 1) pool = multiprocessing.Pool(nb_proc) results = [pool.map(multi_idx, process_list)] pool.close() diff --git a/sen2chain/multiprocess_l2a.py b/sen2chain/multiprocess_l2a.py index da7409e89f49e1cefce562f8544227d5071c29cd..db74f4202c3ec6692feed3071da037dc893cd7a1 100644 --- a/sen2chain/multiprocess_l2a.py +++ b/sen2chain/multiprocess_l2a.py @@ -1,5 +1,7 @@ # -*- coding:utf-8 -*- +# FIXME: delete file ? + """ This python script should be run from terminal for multiprocessing l1c-> l2a """ diff --git a/sen2chain/parallel_processing.py b/sen2chain/parallel_processing.py index 6de8294311d5b49ad158691a075895c3e91c8371..3bc92a5108715c9dabad7d55e32660fc07154e68 100644 --- a/sen2chain/parallel_processing.py +++ b/sen2chain/parallel_processing.py @@ -1,17 +1,23 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 import concurrent.futures import urllib.request import time -#~ from .tiles import Tile -#~ from .products import L1cProduct, L2aProduct +# from .tiles import Tile +# from .products import L1cProduct, L2aProduct from sen2chain import Tile, L1cProduct, L2aProduct -def process(l1c_identifier, - reprocess_l2a=False, reprocess_cloud_mask=False, - indices_list=[], reprocess_indices=False, nodata_clouds=False, quicklook=False): +def process( + l1c_identifier, + reprocess_l2a=False, + reprocess_cloud_mask=False, + indices_list=[], + reprocess_indices=False, + nodata_clouds=False, + quicklook=False, +): """Process a list of products in paralle. :param l1c_identifier: @@ -27,47 +33,66 @@ def process(l1c_identifier, L1cProduct(l1c_identifier).process_l2a(reprocess=reprocess_l2a) l2a_identifier = l1c_identifier.replace("1C_", "2A_") - L2aProduct(l2a_identifier).process_cloud_mask(reprocess=reprocess_cloud_mask) - L2aProduct(l2a_identifier).process_indices(indices_list=indices_list, - nodata_clouds=nodata_clouds, - quicklook=quicklook, - reprocess=reprocess_indices, - ) + L2aProduct(l2a_identifier).process_cloud_mask( + reprocess=reprocess_cloud_mask + ) + L2aProduct(l2a_identifier).process_indices( + indices_list=indices_list, + nodata_clouds=nodata_clouds, + quicklook=quicklook, + reprocess=reprocess_indices, + ) except Exception as e: return "FAILED", e return "success", None - -def parallel_processing(identifiers_list, max_workers=3, - reprocess_l2a=False, reprocess_cloud_mask=False, - indices_list=[], reprocess_indices=False, - nodata_clouds=False, quicklook=False): +def parallel_processing( + identifiers_list, + max_workers=3, + reprocess_l2a=False, + reprocess_cloud_mask=False, + indices_list=[], + reprocess_indices=False, + nodata_clouds=False, + quicklook=False, +): """ """ prods = [L1cProduct(p.identifier) for p in identifiers_list] - with concurrent.futures.ProcessPoolExecutor(max_workers=max_workers) as executor: - future_to_mot = {executor.submit(process, p.identifier, **kwargs): p for p in prods} + with concurrent.futures.ProcessPoolExecutor( + max_workers=max_workers + ) as executor: + future_to_mot = { + executor.submit(process, p.identifier, **kwargs): p for p in prods + } for future in concurrent.futures.as_completed(future_to_mot): identifier = future_to_mot[future] issue = future.result() - print("{} processing: {}. Errors: {}.".format(identifier, issue[0], issue[1])) + print( + "{} processing: {}. Errors: {}.".format( + identifier, issue[0], issue[1] + ) + ) if __name__ == "__main__": - kwargs = {"reprocess_l2a": False, - "reprocess_cloud_mask": False, - "indices_list": ["NDVI", "NDWIGAO", "NDWIMCP"], - "nodata_clouds": True, - "quicklook": True, - "reprocess_indices": False} + kwargs = { + "reprocess_l2a": False, + "reprocess_cloud_mask": False, + "indices_list": ["NDVI", "NDWIGAO", "NDWIMCP"], + "nodata_clouds": True, + "quicklook": True, + "reprocess_indices": False, + } identifiers_list = Tile("38LPM").l1c - parallel_processing(identifiers_list=identifiers_list, max_workers=8, **kwargs) - + parallel_processing( + identifiers_list=identifiers_list, max_workers=8, **kwargs + ) -#parallel(identifiers_list, process_indices +# parallel(identifiers_list, process_indices diff --git a/sen2chain/products.py b/sen2chain/products.py index f93bff57706fd160c1a850635f9b04b731cb227b..5159cc160a9f77a5b3028c099e26d541d1c49659 100755 --- a/sen2chain/products.py +++ b/sen2chain/products.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 """ Module for managing products and tiles in the library and temp folders. @@ -11,6 +11,7 @@ import re import os from pathlib import Path + # type annotations from typing import List, Tuple, Optional @@ -18,9 +19,20 @@ from .utils import grouper, setPermissions, get_current_Sen2Cor_version from .config import Config, SHARED_DATA from .xmlparser import MetadataParser, Sen2ChainMetadataParser from .sen2cor import process_sen2cor -from .cloud_mask import create_cloud_mask, create_cloud_mask_v2, create_cloud_mask_b11, create_cloud_mask_v003, create_cloud_mask_v004 +from .cloud_mask import ( + create_cloud_mask, + create_cloud_mask_v2, + create_cloud_mask_b11, + create_cloud_mask_v003, + create_cloud_mask_v004, +) from .indices import IndicesCollection -from .colormap import create_l2a_ql, create_l1c_ql, create_l1c_ql_v2, create_l2a_ql_v2 +from .colormap import ( + create_l2a_ql, + create_l1c_ql, + create_l1c_ql_v2, + create_l2a_ql_v2, +) s2_tiles_index = SHARED_DATA.get("tiles_index") @@ -29,13 +41,14 @@ logger = logging.getLogger(__name__) class Product: - """ Base product template class. This class is designed to be + """Base product template class. This class is designed to be instanciated only by sub-classes. :param identifier: L1C product's identifier or SAFE filename. :param tile: tile's name. :param path: parent's SAFE folder path. """ + _library_path = Path("") _metadata_filename_templates = () @@ -46,10 +59,10 @@ class Product: return object.__new__(cls) def __init__( - self, - identifier: str = None, - tile: str = None, - path: pathlib.PosixPath = None + self, + identifier: str = None, + tile: str = None, + path: pathlib.PosixPath = None, ) -> None: if identifier is None: raise ValueError("Product identifier is empty") @@ -76,7 +89,9 @@ class Product: if path is None: self.path = self.library_path - logger.debug("No path specified, using library's: {}".format(self.path)) + logger.debug( + "No path specified, using library's: {}".format(self.path) + ) else: self.path = path / self.safe @@ -101,10 +116,16 @@ class Product: if self._metadata_parser is None: try: self._get_metadata_path() - self._metadata_parser = MetadataParser(metadata_path=self._metadata_path, tile=self.tile) + self._metadata_parser = MetadataParser( + metadata_path=self._metadata_path, tile=self.tile + ) except Exception as e: logger.debug("{}".format(e)) - logger.error("{}: could not load metadata: {}".format(self.identifier, self._metadata_path)) + logger.error( + "{}: could not load metadata: {}".format( + self.identifier, self._metadata_path + ) + ) def _get_metadata_path(self) -> None: """Searchs and sets the metadata path of the product.""" @@ -112,7 +133,12 @@ class Product: if self._tiled: filename = self._tiled_metadata else: - filename = self.identifier.replace("_PRD_", "_MTD_").replace("_MSI", "_SAF") + ".xml" + filename = ( + self.identifier.replace("_PRD_", "_MTD_").replace( + "_MSI", "_SAF" + ) + + ".xml" + ) metadata_path = self.path / filename if not metadata_path.exists(): self._metadata_path = None @@ -165,13 +191,13 @@ class L1cProduct(Product): :param tile: product's tile. :param path: parent folder. """ + _library_path = Path(Config().get("l1c_path")) _tiled_metadata = "MTD_MSIL1C.xml" - def __init__(self, - identifier: str = None, - tile: str = None, - path: str = None): + def __init__( + self, identifier: str = None, tile: str = None, path: str = None + ): super().__init__(identifier=identifier, tile=tile, path=path) @@ -179,10 +205,12 @@ class L1cProduct(Product): raise ValueError("Invalid L1C product name") def process_l2a(self, reprocess: bool = False) -> "L1cProduct": - """ process sen2cor """ + """process sen2cor""" logger.info("{}: processing L2A".format(self.identifier)) - l2a_identifier = self.identifier.replace("L1C_", "L2A_").replace("_OPER_", "_USER_") + l2a_identifier = self.identifier.replace("L1C_", "L2A_").replace( + "_OPER_", "_USER_" + ) l2a_path = self.path.parent / (l2a_identifier + ".SAFE") l2a_prod = L2aProduct(l2a_identifier, self.tile, l2a_path.parent) @@ -215,43 +243,49 @@ class L1cProduct(Product): l2a_prod.update_md() return self - def process_ql(self, - reprocess: bool = False, - out_path: pathlib.PosixPath = None, - out_resolution: Tuple[int, int] = (100, 100), - jpg = False, - ) -> "L1cProduct": - """ - """ + def process_ql( + self, + reprocess: bool = False, + out_path: pathlib.PosixPath = None, + out_resolution: Tuple[int, int] = (100, 100), + jpg=False, + ) -> "L1cProduct": + """ """ logger.info("{}: processing L1C Quicklook".format(self.identifier)) if jpg: - ql_filename = self.identifier + "_QL-"+ str(out_resolution[0]) + "m.jpg" + ql_filename = ( + self.identifier + "_QL-" + str(out_resolution[0]) + "m.jpg" + ) else: - ql_filename = self.identifier + "_QL-"+ str(out_resolution[0]) + "m.tif" - + ql_filename = ( + self.identifier + "_QL-" + str(out_resolution[0]) + "m.tif" + ) + if out_path is None: ql_folder = self.library_path.parent / "QL" ql_folder.mkdir(parents=True, exist_ok=True) ql_path = ql_folder / ql_filename else: ql_path = Path(out_path) - + if ql_path.exists() and not reprocess: logger.info("{} Quicklook already exists".format(self.identifier)) else: - #~ create_l1c_ql(tci = self.tci, - #~ out_path = ql_path, - #~ out_resolution = out_resolution, - #~ jpg = jpg) - create_l1c_ql_v2(tci = self.tci, - out_path = ql_path, - out_resolution = out_resolution, - jpg = jpg) + # create_l1c_ql(tci = self.tci, + # out_path = ql_path, + # out_resolution = out_resolution, + # jpg = jpg) + create_l1c_ql_v2( + tci=self.tci, + out_path=ql_path, + out_resolution=out_resolution, + jpg=jpg, + ) self.user_ql = ql_path return ql_path - + def remove(self): if self.path.is_symlink(): l1c_path = os.readlink(str(self.path)) @@ -260,10 +294,10 @@ class L1cProduct(Product): logger.info("Removing symlink: {}".format(self.path)) self.path.unlink() else: - #~ l1c_path = os.readlink(str(l1c.path)) + # l1c_path = os.readlink(str(l1c.path)) logger.info("Removing: {}".format(self.path)) shutil.rmtree(str(self.path)) - + # METADATA @property def product_start_time(self): @@ -382,15 +416,13 @@ class L2aProduct(Product): :param tile: tile's name. :param path: parent's SAFE folder path. """ + _library_path = Path(Config().get("l2a_path")) _indices_library_path = Path(Config().get("indices_path")) _tiled_metadata = "MTD_MSIL2A.xml" - + def __init__( - self, - identifier: str = None, - tile: str = None, - path: str = None + self, identifier: str = None, tile: str = None, path: str = None ) -> None: super().__init__(identifier=identifier, tile=tile, path=path) if not re.match(r".*L2A_.*", identifier): @@ -399,71 +431,80 @@ class L2aProduct(Product): self.indices_path = self._indices_library_path # user cloud mask - #~ self.user_cloud_mask = self.path.parent / (self.identifier + "_CLOUD_MASK.jp2") - #~ if not self.user_cloud_mask.exists(): - #~ self.user_cloud_mask = None - #~ # user cloud mask b11 - #~ self.user_cloud_mask_b11 = self.path.parent / (self.identifier + "_CLOUD_MASK_B11.jp2") - #~ if not self.user_cloud_mask_b11.exists(): - #~ self.user_cloud_mask_b11 = None + # self.user_cloud_mask = self.path.parent / (self.identifier + "_CLOUD_MASK.jp2") + # if not self.user_cloud_mask.exists(): + # self.user_cloud_mask = None + # # user cloud mask b11 + # self.user_cloud_mask_b11 = self.path.parent / (self.identifier + "_CLOUD_MASK_B11.jp2") + # if not self.user_cloud_mask_b11.exists(): + # self.user_cloud_mask_b11 = None # user QL self.user_ql = self.path.parent / (self.identifier + "_QL.tif") if not self.user_ql.exists(): self.user_ql = None - + # versions self._sen2chain_info_path = self.path / "sen2chain_info.xml" - if self._sen2chain_info_path.parent.exists() and not self._sen2chain_info_path.exists(): + if ( + self._sen2chain_info_path.parent.exists() + and not self._sen2chain_info_path.exists() + ): Sen2ChainMetadataParser(self._sen2chain_info_path).init_metadata() - - def process_ql(self, - reprocess: bool = False, - out_path: pathlib.PosixPath = None, - out_resolution: Tuple[int, int] = (100, 100), - jpg = False, - ) -> "L2aProduct": - """ - """ + + def process_ql( + self, + reprocess: bool = False, + out_path: pathlib.PosixPath = None, + out_resolution: Tuple[int, int] = (100, 100), + jpg=False, + ) -> "L2aProduct": + """ """ logger.info("{}: processing L2A Quicklook".format(self.identifier)) if jpg: - ql_filename = self.identifier + "_QL-"+ str(out_resolution[0]) + "m.jpg" + ql_filename = ( + self.identifier + "_QL-" + str(out_resolution[0]) + "m.jpg" + ) else: - ql_filename = self.identifier + "_QL-"+ str(out_resolution[0]) + "m.tif" - + ql_filename = ( + self.identifier + "_QL-" + str(out_resolution[0]) + "m.tif" + ) + if out_path is None: ql_folder = self.library_path.parent / "QL" ql_folder.mkdir(parents=True, exist_ok=True) ql_path = ql_folder / ql_filename else: ql_path = Path(out_path) - + if ql_path.exists() and not reprocess: logger.info("{} Quicklook already exists".format(self.identifier)) else: - #~ create_l2a_ql(b02 = self.b02_10m, - #~ b03 = self.b03_10m, - #~ b04 = self.b04_10m, - #~ out_path = ql_path, - #~ out_resolution = out_resolution, - #~ jpg = jpg) - - create_l2a_ql_v2(tci = self.tci_10m, - out_path = ql_path, - out_resolution = out_resolution, - jpg = jpg) - + # create_l2a_ql(b02 = self.b02_10m, + # b03 = self.b03_10m, + # b04 = self.b04_10m, + # out_path = ql_path, + # out_resolution = out_resolution, + # jpg = jpg) + + create_l2a_ql_v2( + tci=self.tci_10m, + out_path=ql_path, + out_resolution=out_resolution, + jpg=jpg, + ) + self.user_ql = ql_path return self - def process_cloud_mask(self, - buffering: bool = True, - reprocess: bool = False, - out_path: pathlib.PosixPath = None - ) -> "L2aProduct": - """ - """ + def process_cloud_mask( + self, + buffering: bool = True, + reprocess: bool = False, + out_path: pathlib.PosixPath = None, + ) -> "L2aProduct": + """ """ logger.info("{}: processing cloud_mask".format(self.identifier)) cloud_mask_filename = self.identifier + "_CLOUD_MASK.tif" @@ -478,26 +519,34 @@ class L2aProduct(Product): if cloud_mask_path.exists() and not reprocess: logger.info("{} cloud mask already exists".format(self.identifier)) else: - create_cloud_mask(self.cld_20m, - buffering=buffering, - erosion=-40, - dilatation=100, - out_path=cloud_mask_path) + create_cloud_mask( + self.cld_20m, + buffering=buffering, + erosion=-40, + dilatation=100, + out_path=cloud_mask_path, + ) self.user_cloud_mask = cloud_mask_path return self - def process_cloud_mask_v2(self, - buffering: bool = True, - reprocess: bool = False, - out_path_mask = None, - out_path_mask_b11 = None - ) -> "L2aProduct": - """ - """ + def process_cloud_mask_v2( + self, + buffering: bool = True, + reprocess: bool = False, + out_path_mask=None, + out_path_mask_b11=None, + ) -> "L2aProduct": + """ """ logger.info("{}: processing cloud_mask_v2".format(self.identifier)) cloud_mask_filename = self.identifier + "_CLOUD_MASK.jp2" - cloud_mask_b11_filename = str(Path(cloud_mask_filename).parent/Path(cloud_mask_filename).stem) + "_B11.jp2" + cloud_mask_b11_filename = ( + str( + Path(cloud_mask_filename).parent + / Path(cloud_mask_filename).stem + ) + + "_B11.jp2" + ) if out_path_mask is None: cloud_mask_folder = self.library_path.parent @@ -514,124 +563,172 @@ class L2aProduct(Product): cloud_mask_b11_path = Path(out_path_mask_b11) if cloud_mask_path.exists() and not reprocess: - logger.info("{} cloud mask v2 already exists".format(self.identifier)) + logger.info( + "{} cloud mask v2 already exists".format(self.identifier) + ) else: - create_cloud_mask_v2(self.cld_20m, - erosion=1, - dilatation=5, - out_path=cloud_mask_path) + create_cloud_mask_v2( + self.cld_20m, erosion=1, dilatation=5, out_path=cloud_mask_path + ) self.user_cloud_mask = cloud_mask_path if cloud_mask_b11_path.exists() and not reprocess: - logger.info("{} cloud mask v2 masked b11 already exists".format(self.identifier)) + logger.info( + "{} cloud mask v2 masked b11 already exists".format( + self.identifier + ) + ) else: - create_cloud_mask_b11(self.user_cloud_mask, - self.b11_20m, - dilatation=5, - out_path=cloud_mask_b11_path) + create_cloud_mask_b11( + self.user_cloud_mask, + self.b11_20m, + dilatation=5, + out_path=cloud_mask_b11_path, + ) self.user_cloud_mask_b11 = cloud_mask_b11_path return self - - - def compute_cloud_mask(self, - cm_version: str = "cm001", - probability: int = 1, - iterations: int = 5, - cld_shad: bool = True, - cld_med_prob: bool = True, - cld_hi_prob: bool = True, - thin_cir: bool = True, - reprocess: bool = False, - #~ out_path_mask = None, - #~ out_path_mask_b11 = None - ) -> "L2aProduct": - """ - """ + + def compute_cloud_mask( + self, + cm_version: str = "cm001", + probability: int = 1, + iterations: int = 5, + cld_shad: bool = True, + cld_med_prob: bool = True, + cld_hi_prob: bool = True, + thin_cir: bool = True, + reprocess: bool = False, + # out_path_mask = None, + # out_path_mask_b11 = None + ) -> "L2aProduct": + """ """ if cm_version == "cm003": - logger.info("Computing cloudmask version {}, probability {}%, iteration(s) {}: {}".format(cm_version, probability, iterations, self.identifier)) + logger.info( + "Computing cloudmask version {}, probability {}%, iteration(s) {}: {}".format( + cm_version, probability, iterations, self.identifier + ) + ) elif cm_version == "cm004": - logger.info("Computing cloudmask version {}, ITER {}, SHAD {}, MED-PRB {}, HI-PRB {}, CIRR {}: {}"\ - .format(cm_version, iterations, cld_shad, cld_med_prob, cld_hi_prob, thin_cir, self.identifier)) + logger.info( + "Computing cloudmask version {}, ITER {}, SHAD {}, MED-PRB {}, HI-PRB {}, CIRR {}: {}".format( + cm_version, + iterations, + cld_shad, + cld_med_prob, + cld_hi_prob, + thin_cir, + self.identifier, + ) + ) else: - logger.info("Computing cloudmask version {}: {}".format(cm_version, self.identifier)) - - cloudmask = NewCloudMaskProduct(l2a_identifier = self.identifier, - sen2chain_processing_version = self.sen2chain_processing_version, - cm_version = cm_version, - probability = probability, - iterations = iterations, - cld_shad = cld_shad, - cld_med_prob = cld_med_prob, - cld_hi_prob = cld_hi_prob, - thin_cir = thin_cir, - ) - + logger.info( + "Computing cloudmask version {}: {}".format( + cm_version, self.identifier + ) + ) + + cloudmask = NewCloudMaskProduct( + l2a_identifier=self.identifier, + sen2chain_processing_version=self.sen2chain_processing_version, + cm_version=cm_version, + probability=probability, + iterations=iterations, + cld_shad=cld_shad, + cld_med_prob=cld_med_prob, + cld_hi_prob=cld_hi_prob, + thin_cir=thin_cir, + ) + if cloudmask.path.exists() and not reprocess: - logger.info("{} cloud mask already computed".format(cloudmask.identifier)) + logger.info( + "{} cloud mask already computed".format(cloudmask.identifier) + ) else: if not cloudmask.path.parent.exists(): cloudmask.path.parent.mkdir(parents=True) if cm_version == "cm001": - if cloudmask.path.exists(): # in version 3.8 will be updated using missing_ok = True + if ( + cloudmask.path.exists() + ): # in version 3.8 will be updated using missing_ok = True cloudmask.path.unlink() cloudmask._info_path.unlink() - create_cloud_mask_v2(self.cld_20m, - erosion=1, - dilatation=5, - out_path=cloudmask.path) - #~ self.user_cloud_mask = cloudmask.updated_path + create_cloud_mask_v2( + self.cld_20m, + erosion=1, + dilatation=5, + out_path=cloudmask.path, + ) + # self.user_cloud_mask = cloudmask.updated_path elif cm_version == "cm002": - cloudmask_cm001 = NewCloudMaskProduct(l2a_identifier = self.identifier, - sen2chain_processing_version = self.sen2chain_processing_version, - cm_version = "cm001") + cloudmask_cm001 = NewCloudMaskProduct( + l2a_identifier=self.identifier, + sen2chain_processing_version=self.sen2chain_processing_version, + cm_version="cm001", + ) if cloudmask_cm001.path.exists(): - if cloudmask.path.exists(): # in version 3.8 will be updated using missing_ok = True + if ( + cloudmask.path.exists() + ): # in version 3.8 will be updated using missing_ok = True cloudmask.path.unlink() cloudmask._info_path.unlink() - create_cloud_mask_b11(cloudmask_cm001.path, - self.b11_20m, - dilatation=5, - out_path=Path(str(cloudmask_cm001.path).replace("CM001", "CM002-B11"))) + create_cloud_mask_b11( + cloudmask_cm001.path, + self.b11_20m, + dilatation=5, + out_path=Path( + str(cloudmask_cm001.path).replace( + "CM001", "CM002-B11" + ) + ), + ) else: - logger.info("No cloudmask version cm001 found, please compute this one first") + logger.info( + "No cloudmask version cm001 found, please compute this one first" + ) elif cm_version == "cm003": - if cloudmask.path.exists(): # in version 3.8 will be updated using missing_ok = True + if ( + cloudmask.path.exists() + ): # in version 3.8 will be updated using missing_ok = True cloudmask.path.unlink() cloudmask._info_path.unlink() - create_cloud_mask_v003(cloud_mask = self.msk_cldprb_20m, - out_path = cloudmask.path, - probability = probability, - iterations = iterations, - ) - - + create_cloud_mask_v003( + cloud_mask=self.msk_cldprb_20m, + out_path=cloudmask.path, + probability=probability, + iterations=iterations, + ) + elif cm_version == "cm004": - if cloudmask.path.exists(): # in version 3.8 will be updated using missing_ok = True + if ( + cloudmask.path.exists() + ): # in version 3.8 will be updated using missing_ok = True cloudmask.path.unlink() cloudmask._info_path.unlink() - create_cloud_mask_v004(scl_path = self.scl_20m, - out_path = cloudmask.path, - iterations = iterations, - cld_shad = cld_shad, - cld_med_prob = cld_med_prob, - cld_hi_prob = cld_hi_prob, - thin_cir = thin_cir, - ) + create_cloud_mask_v004( + scl_path=self.scl_20m, + out_path=cloudmask.path, + iterations=iterations, + cld_shad=cld_shad, + cld_med_prob=cld_med_prob, + cld_hi_prob=cld_hi_prob, + thin_cir=thin_cir, + ) else: logger.info("Wrong cloudmask version {}".format(cm_version)) cloudmask.init_md() - - #~ return self - - def process_indices(self, - indices_list: List[str] = [], - nodata_clouds: bool = False, - quicklook: bool = False, - reprocess: bool = False, - out_path: str = None - ) -> "L2aProduct": + + # return self + + def process_indices( + self, + indices_list: List[str] = [], + nodata_clouds: bool = False, + quicklook: bool = False, + reprocess: bool = False, + out_path: str = None, + ) -> "L2aProduct": """ Process indices. @@ -651,36 +748,52 @@ class L2aProduct(Product): logger.info("Processing {}: {}".format(indice, self.identifier)) indice_cls = IndicesCollection.get_indice_cls(indice) if indice_cls is None: - print("Indices available: {}".format(IndicesCollection.available_indices)) + print( + "Indices available: {}".format( + IndicesCollection.available_indices + ) + ) raise ValueError("Indice not defined") if out_path is None: # logger.info(self.identifier) - indice_path = self.indices_path / indice.upper() / self.tile / self.identifier + indice_path = ( + self.indices_path + / indice.upper() + / self.tile + / self.identifier + ) else: - indice_path = Path(out_path) / (self.identifier + "_INDICES") / indice.upper() + indice_path = ( + Path(out_path) + / (self.identifier + "_INDICES") + / indice.upper() + ) indice_path.mkdir(parents=True, exist_ok=True) indice_obj = indice_cls(self) - indice_obj.process_indice(out_path=indice_path, - nodata_clouds=nodata_clouds, - quicklook=quicklook, - reprocess=reprocess) + indice_obj.process_indice( + out_path=indice_path, + nodata_clouds=nodata_clouds, + quicklook=quicklook, + reprocess=reprocess, + ) return self - def compute_indice(self, - indice: str = None, - reprocess: bool = False, - nodata_clouds: bool = True, - quicklook: bool = False, - cm_version: str = "cm001", - probability: int = 1, - iterations: int = 5, - cld_shad: bool = True, - cld_med_prob: bool = True, - cld_hi_prob: bool = True, - thin_cir: bool = True, - out_path: str = None, - ) -> "L2aProduct": + def compute_indice( + self, + indice: str = None, + reprocess: bool = False, + nodata_clouds: bool = True, + quicklook: bool = False, + cm_version: str = "cm001", + probability: int = 1, + iterations: int = 5, + cld_shad: bool = True, + cld_med_prob: bool = True, + cld_hi_prob: bool = True, + thin_cir: bool = True, + out_path: str = None, + ) -> "L2aProduct": """ compute and mask indice specified cloudmask version :param indice: a valid indice name @@ -693,75 +806,102 @@ class L2aProduct(Product): """ logger.info("Computing indice {}: {}".format(indice, self.identifier)) - #~ if not isinstance(indices_list, list): - #~ raise TypeError("Indices must be provided as a list.") + # if not isinstance(indices_list, list): + # raise TypeError("Indices must be provided as a list.") - #~ for indice in set(indices_list): - #~ logger.info("Processing {}: {}".format(indice, self.identifier)) + # for indice in set(indices_list): + # logger.info("Processing {}: {}".format(indice, self.identifier)) indice_cls = IndicesCollection.get_indice_cls(indice) if indice_cls is None: - print("Indices available: {}".format(IndicesCollection.available_indices)) + print( + "Indices available: {}".format( + IndicesCollection.available_indices + ) + ) raise ValueError("Indice not defined") if out_path is None: # logger.info(self.identifier) - indice_path = self.indices_path / indice.upper() / self.tile / self.identifier + indice_path = ( + self.indices_path + / indice.upper() + / self.tile + / self.identifier + ) else: - indice_path = Path(out_path) / (self.identifier + "_INDICES") / indice.upper() + indice_path = ( + Path(out_path) + / (self.identifier + "_INDICES") + / indice.upper() + ) indice_path.mkdir(parents=True, exist_ok=True) - - indice_obj = indice_cls(self, NewCloudMaskProduct(l2a_identifier = self.identifier, - cm_version = cm_version, - probability = probability, - iterations = iterations, - cld_shad = cld_shad, - cld_med_prob = cld_med_prob, - cld_hi_prob = cld_hi_prob, - thin_cir = thin_cir)) - - indice_obj.process_indice(out_path = indice_path, - nodata_clouds = nodata_clouds, - quicklook = quicklook, - reprocess = reprocess) + + indice_obj = indice_cls( + self, + NewCloudMaskProduct( + l2a_identifier=self.identifier, + cm_version=cm_version, + probability=probability, + iterations=iterations, + cld_shad=cld_shad, + cld_med_prob=cld_med_prob, + cld_hi_prob=cld_hi_prob, + thin_cir=thin_cir, + ), + ) + + indice_obj.process_indice( + out_path=indice_path, + nodata_clouds=nodata_clouds, + quicklook=quicklook, + reprocess=reprocess, + ) if nodata_clouds: - indice_raw = IndiceProduct(l2a_identifier = self.identifier, - indice = indice, - masked = not nodata_clouds, - cm_version = cm_version, - probability = probability, - iterations = iterations, - cld_shad = cld_shad, - cld_med_prob = cld_med_prob, - cld_hi_prob = cld_hi_prob, - thin_cir = thin_cir, - ) - - indice_masked = IndiceProduct(l2a_identifier = self.identifier, - indice = indice, - masked = nodata_clouds, - cm_version = cm_version, - probability = probability, - iterations = iterations, - cld_shad = cld_shad, - cld_med_prob = cld_med_prob, - cld_hi_prob = cld_hi_prob, - thin_cir = thin_cir, - ) + indice_raw = IndiceProduct( + l2a_identifier=self.identifier, + indice=indice, + masked=not nodata_clouds, + cm_version=cm_version, + probability=probability, + iterations=iterations, + cld_shad=cld_shad, + cld_med_prob=cld_med_prob, + cld_hi_prob=cld_hi_prob, + thin_cir=thin_cir, + ) + + indice_masked = IndiceProduct( + l2a_identifier=self.identifier, + indice=indice, + masked=nodata_clouds, + cm_version=cm_version, + probability=probability, + iterations=iterations, + cld_shad=cld_shad, + cld_med_prob=cld_med_prob, + cld_hi_prob=cld_hi_prob, + thin_cir=thin_cir, + ) if quicklook: - indice_masked_ql = IndiceProduct(identifier = indice_masked.identifier.replace(".jp2", "_QL.tif")) - + indice_masked_ql = IndiceProduct( + identifier=indice_masked.identifier.replace( + ".jp2", "_QL.tif" + ) + ) + else: - IndiceProduct(l2a_identifier = self.identifier, - indice = indice, - masked = nodata_clouds, - cm_version = cm_version, - probability = probability, - iterations = iterations, - cld_shad = cld_shad, - cld_med_prob = cld_med_prob, - cld_hi_prob = cld_hi_prob, - thin_cir = thin_cir, - ) + IndiceProduct( + l2a_identifier=self.identifier, + indice=indice, + masked=nodata_clouds, + cm_version=cm_version, + probability=probability, + iterations=iterations, + cld_shad=cld_shad, + cld_med_prob=cld_med_prob, + cld_hi_prob=cld_hi_prob, + thin_cir=thin_cir, + ) return self @@ -784,31 +924,33 @@ class L2aProduct(Product): # indice_filename = template.format(product_identifier=self.identifier, # ext=ext) # return (self.indices_path / indice / self.tile / indice_filename).exists() - - #~ @property - #~ def sen2cor_version(self): - #~ """ Used Sen2Cor version""" - #~ return Sen2ChainMetadataParser(self.identifier): - - - #~ sen2chain_info_path = self.path / "Sen2Chain_info.xml" - #~ if sen2chain_info_path.exists(): - #~ return "xml present" - #~ Sen2ChainMetadataParser(self.identifier) - - #~ else: - #~ return None - - def update_md(self, - sen2chain_version: str = None, - sen2chain_processing_version: str = None, - sen2cor_version: str = None, - ): - """ Set custom sen2chain, sen2chain_processing and sen2cor versions """ - Sen2ChainMetadataParser(self._sen2chain_info_path).set_metadata(sen2chain_version = sen2chain_version, - sen2chain_processing_version = sen2chain_processing_version, - sen2cor_version = sen2cor_version) - + + # @property + # def sen2cor_version(self): + # """ Used Sen2Cor version""" + # return Sen2ChainMetadataParser(self.identifier): + + # sen2chain_info_path = self.path / "Sen2Chain_info.xml" + # if sen2chain_info_path.exists(): + # return "xml present" + # Sen2ChainMetadataParser(self.identifier) + + # else: + # return None + + def update_md( + self, + sen2chain_version: str = None, + sen2chain_processing_version: str = None, + sen2cor_version: str = None, + ): + """Set custom sen2chain, sen2chain_processing and sen2cor versions""" + Sen2ChainMetadataParser(self._sen2chain_info_path).set_metadata( + sen2chain_version=sen2chain_version, + sen2chain_processing_version=sen2chain_processing_version, + sen2cor_version=sen2cor_version, + ) + def remove(self): if self.path.is_symlink(): l2a_path = os.readlink(str(self.path)) @@ -822,16 +964,22 @@ class L2aProduct(Product): @property def sen2chain_version(self): - return Sen2ChainMetadataParser(self._sen2chain_info_path).get_metadata_value('SEN2CHAIN_VERSION') - + return Sen2ChainMetadataParser( + self._sen2chain_info_path + ).get_metadata_value("SEN2CHAIN_VERSION") + @property def sen2chain_processing_version(self): - return Sen2ChainMetadataParser(self._sen2chain_info_path).get_metadata_value('SEN2CHAIN_PROCESSING_VERSION') - + return Sen2ChainMetadataParser( + self._sen2chain_info_path + ).get_metadata_value("SEN2CHAIN_PROCESSING_VERSION") + @property def sen2cor_version(self): - return Sen2ChainMetadataParser(self._sen2chain_info_path).get_metadata_value('SEN2COR_VERSION') - + return Sen2ChainMetadataParser( + self._sen2chain_info_path + ).get_metadata_value("SEN2COR_VERSION") + @property def generation_time(self): return self._get_metadata_value(key="GENERATION_TIME") @@ -855,7 +1003,9 @@ class L2aProduct(Product): @property def saturated_defective_pixel_percentage(self): - return self._get_metadata_value(key="SATURATED_DEFECTIVE_PIXEL_PERCENTAGE") + return self._get_metadata_value( + key="SATURATED_DEFECTIVE_PIXEL_PERCENTAGE" + ) @property def dark_features_percentage(self): @@ -1007,9 +1157,9 @@ class L2aProduct(Product): @property def msk_cldprb_20m(self): """new sen2cor cloud-mask's name""" - #~ try: - #~ return self._get_band_path(key="MSK_CLDPRB", res="20m") - #~ except: + # try: + # return self._get_band_path(key="MSK_CLDPRB", res="20m") + # except: return self._get_band_path(key="CLD", res="20m") @property @@ -1100,8 +1250,9 @@ class OldCloudMaskProduct: :param identifier: cloudmask filename. """ + _library_path = Path(Config().get("l2a_path")) - + def __init__(self, identifier: str = None) -> None: if identifier is None: raise ValueError("Product identifier is empty") @@ -1110,8 +1261,7 @@ class OldCloudMaskProduct: self.tile = self.get_tile(identifier) self.l2a = self.get_l2a(identifier) self.path = self._library_path / self.tile / self.identifier - - + @staticmethod def get_tile(identifier) -> str: """Returns tile name from a string. @@ -1119,56 +1269,74 @@ class OldCloudMaskProduct: :param string: string from which to extract the tile name. """ return re.findall("_T([0-9]{2}[A-Z]{3})_", identifier)[0] - + @staticmethod def get_l2a(identifier) -> str: """Returns l2a name from a old cloud mask identifier string. :param string: string from which to extract the l2a name. """ - return re.findall(r"(S2.+)_CLOUD_MASK.+jp2", identifier)[0] - - + return re.findall(r"(S2.+)_CLOUD_MASK.+jp2", identifier)[0] + + class NewCloudMaskProduct: """New cloud mask product class. :param identifier: cloudmask filename. """ + _library_path = Path(Config().get("cloudmasks_path")) - - def __init__(self, - identifier: str = None, - l2a_identifier: str = None, - sen2chain_processing_version: str = None, - cm_version: str = "cm001", - probability: int = 1, - iterations: int = 5, - cld_shad: bool = True, - cld_med_prob: bool = True, - cld_hi_prob: bool = True, - thin_cir: bool = True, - ) -> None: + + def __init__( + self, + identifier: str = None, + l2a_identifier: str = None, + sen2chain_processing_version: str = None, + cm_version: str = "cm001", + probability: int = 1, + iterations: int = 5, + cld_shad: bool = True, + cld_med_prob: bool = True, + cld_hi_prob: bool = True, + thin_cir: bool = True, + ) -> None: if not (identifier or l2a_identifier): raise ValueError("Product or L2a identifier cannot be empty") else: self.tile = self.get_tile(identifier or l2a_identifier) - self.l2a = (l2a_identifier or self.get_l2a(identifier)).replace(".SAFE", "") - self.suffix = [i for i in ["CM001", - "CM002-B11", - "CM003-PRB" + str(probability) + "-ITER" + str(iterations), - "CM004-CSH" + str(int(cld_shad)) + \ - "-CMP" + str(int(cld_med_prob)) + \ - "-CHP" + str(int(cld_hi_prob)) + \ - "-TCI" + str(int(thin_cir)) + \ - "-ITER" + str(iterations), - ] if cm_version.upper() in i][0] - self.identifier = identifier or self.l2a + "_" + self.suffix + ".jp2" - #~ self.cm_version, self.probability, self.iterations = self.get_cm_version(self.identifier) + self.l2a = (l2a_identifier or self.get_l2a(identifier)).replace( + ".SAFE", "" + ) + self.suffix = [ + i + for i in [ + "CM001", + "CM002-B11", + "CM003-PRB" + str(probability) + "-ITER" + str(iterations), + "CM004-CSH" + + str(int(cld_shad)) + + "-CMP" + + str(int(cld_med_prob)) + + "-CHP" + + str(int(cld_hi_prob)) + + "-TCI" + + str(int(thin_cir)) + + "-ITER" + + str(iterations), + ] + if cm_version.upper() in i + ][0] + self.identifier = ( + identifier or self.l2a + "_" + self.suffix + ".jp2" + ) + # self.cm_version, self.probability, self.iterations = self.get_cm_version(self.identifier) self.mask_info = self.get_cm_version(self.identifier) - - self.path = self._library_path / self.tile / self.l2a / self.identifier + + self.path = ( + self._library_path / self.tile / self.l2a / self.identifier + ) self._info_path = self.path.parent / (self.path.stem + ".xml") self.init_md() - + @staticmethod def get_tile(identifier) -> str: """Returns tile name from a string. @@ -1176,27 +1344,30 @@ class NewCloudMaskProduct: :param string: string from which to extract the tile name. """ return re.findall("_T([0-9]{2}[A-Z]{3})_", identifier)[0] - + @staticmethod def get_identifier(l2a_identifier) -> str: """Returns l2a name from a old cloud mask identifier string. :param string: string from which to extract the l2a name. """ - return re.findall(r"(S2.+)_CM.+jp2", identifier)[0] - - #~ @staticmethod - #~ def get_l2a(identifier) -> str: - #~ """Returns l2a name from a old cloud mask identifier string. - #~ :param string: string from which to extract the l2a name. - #~ """ - #~ return re.findall(r"(S2.+)_CM.+jp2", identifier)[0] - + return re.findall(r"(S2.+)_CM.+jp2", identifier)[0] + + # @staticmethod + # def get_l2a(identifier) -> str: + # """Returns l2a name from a old cloud mask identifier string. + # :param string: string from which to extract the l2a name. + # """ + # return re.findall(r"(S2.+)_CM.+jp2", identifier)[0] + @staticmethod def get_l2a(identifier) -> str: """Returns l2a name from a old cloud mask identifier string. :param string: string from which to extract the l2a name. """ - return re.findall(r"(S2._.+_[0-9]{8}T[0-9]{6}_N[0-9]{4}_R[0-9]{3}_T[0-9]{2}[A-Z]{3}_[0-9]{8}T[0-9]{6})_.*", identifier)[0] + return re.findall( + r"(S2._.+_[0-9]{8}T[0-9]{6}_N[0-9]{4}_R[0-9]{3}_T[0-9]{2}[A-Z]{3}_[0-9]{8}T[0-9]{6})_.*", + identifier, + )[0] @staticmethod def get_cm_version(identifier) -> str: @@ -1209,136 +1380,190 @@ class NewCloudMaskProduct: except: pass try: - pat = re.compile(r"S2.+_(?P<cm_version>CM003)" + \ - "-PRB(?P<probability>.*)" + \ - "-ITER(?P<iterations>.*)" + \ - ".jp2") + pat = re.compile( + r"S2.+_(?P<cm_version>CM003)" + + "-PRB(?P<probability>.*)" + + "-ITER(?P<iterations>.*)" + + ".jp2" + ) return pat.match(identifier).groupdict() except: pass try: - pat = re.compile(r"S2.+_(?P<cm_version>CM004)" + \ - "-CSH(?P<cld_shad>.*)" + \ - "-CMP(?P<cld_med_prob>.*)" + \ - "-CHP(?P<cld_hi_prob>.*)" + \ - "-TCI(?P<thin_cir>.*)" + \ - "-ITER(?P<iterations>.*)" + \ - ".jp2") + pat = re.compile( + r"S2.+_(?P<cm_version>CM004)" + + "-CSH(?P<cld_shad>.*)" + + "-CMP(?P<cld_med_prob>.*)" + + "-CHP(?P<cld_hi_prob>.*)" + + "-TCI(?P<thin_cir>.*)" + + "-ITER(?P<iterations>.*)" + + ".jp2" + ) return pat.match(identifier).groupdict() except: pass - #~ try: - #~ return re.findall(r"S2.+_(CM004)-CSH(.*)-CMP(.*)-CHP(.*)-TCI(.*)-ITER(.*)\.", identifier)[0] - #~ except: - #~ try: - #~ return [re.findall(r"S2.+_(CM003)-PRB(.*)-ITER(.*)\.", identifier)[0], None, None] - #~ except: - #~ try: - #~ return [re.findall(r"S2.+_(CM00[1-2]).+", identifier)[0], None, None] - #~ except: - #~ return [None, None, None] - + # try: + # return re.findall(r"S2.+_(CM004)-CSH(.*)-CMP(.*)-CHP(.*)-TCI(.*)-ITER(.*)\.", identifier)[0] + # except: + # try: + # return [re.findall(r"S2.+_(CM003)-PRB(.*)-ITER(.*)\.", identifier)[0], None, None] + # except: + # try: + # return [re.findall(r"S2.+_(CM00[1-2]).+", identifier)[0], None, None] + # except: + # return [None, None, None] + @property def sen2chain_version(self): - return Sen2ChainMetadataParser(self._info_path).get_metadata_value('SEN2CHAIN_VERSION') - + return Sen2ChainMetadataParser(self._info_path).get_metadata_value( + "SEN2CHAIN_VERSION" + ) + @property def sen2chain_processing_version(self): - return Sen2ChainMetadataParser(self._info_path).get_metadata_value('SEN2CHAIN_PROCESSING_VERSION') - + return Sen2ChainMetadataParser(self._info_path).get_metadata_value( + "SEN2CHAIN_PROCESSING_VERSION" + ) + @property def sen2cor_version(self): - return Sen2ChainMetadataParser(self._info_path).get_metadata_value('SEN2COR_VERSION') - + return Sen2ChainMetadataParser(self._info_path).get_metadata_value( + "SEN2COR_VERSION" + ) + def init_md(self): - if self.path.exists() and not self._info_path.exists(): - l2a = L2aProduct(self.l2a) - if l2a._sen2chain_info_path.exists(): - Sen2ChainMetadataParser(self._info_path).set_metadata(sen2chain_version = l2a.sen2chain_version, - sen2chain_processing_version = l2a.sen2chain_processing_version, - sen2cor_version = l2a.sen2cor_version) - else: - Sen2ChainMetadataParser(self._info_path).init_metadata() - - def update_md(self, - sen2chain_version: str = None, - sen2chain_processing_version: str = None, - sen2cor_version: str = None, - ): - """ Set custom sen2chain, sen2chain_processing and sen2cor versions """ - Sen2ChainMetadataParser(self._info_path).set_metadata(sen2chain_version = sen2chain_version, - sen2chain_processing_version = sen2chain_processing_version, - sen2cor_version = sen2cor_version) + if self.path.exists() and not self._info_path.exists(): + l2a = L2aProduct(self.l2a) + if l2a._sen2chain_info_path.exists(): + Sen2ChainMetadataParser(self._info_path).set_metadata( + sen2chain_version=l2a.sen2chain_version, + sen2chain_processing_version=l2a.sen2chain_processing_version, + sen2cor_version=l2a.sen2cor_version, + ) + else: + Sen2ChainMetadataParser(self._info_path).init_metadata() + + def update_md( + self, + sen2chain_version: str = None, + sen2chain_processing_version: str = None, + sen2cor_version: str = None, + ): + """Set custom sen2chain, sen2chain_processing and sen2cor versions""" + Sen2ChainMetadataParser(self._info_path).set_metadata( + sen2chain_version=sen2chain_version, + sen2chain_processing_version=sen2chain_processing_version, + sen2cor_version=sen2cor_version, + ) class IndiceProduct: - """Indice product class. + """Indice product class.""" - """ - #~ _library_path = Path(Config().get("cloudmasks_path")) + # _library_path = Path(Config().get("cloudmasks_path")) _indices_path = Path(Config().get("indices_path")) - - def __init__(self, - identifier: str = None, - l2a_identifier: str = None, - indice: str = None, - sen2chain_processing_version: str = None, - masked: bool = False, - cm_version: str = "cm001", - probability: int = 1, - iterations: int = 5, - cld_shad: bool = True, - cld_med_prob: bool = True, - cld_hi_prob: bool = True, - thin_cir: bool = True, - ) -> None: + + def __init__( + self, + identifier: str = None, + l2a_identifier: str = None, + indice: str = None, + sen2chain_processing_version: str = None, + masked: bool = False, + cm_version: str = "cm001", + probability: int = 1, + iterations: int = 5, + cld_shad: bool = True, + cld_med_prob: bool = True, + cld_hi_prob: bool = True, + thin_cir: bool = True, + ) -> None: if not (identifier or (l2a_identifier and indice)): - raise ValueError("Product or (L2a identifier and indice) cannot be empty") + raise ValueError( + "Product or (L2a identifier and indice) cannot be empty" + ) else: - self.tile = NewCloudMaskProduct.get_tile(identifier or l2a_identifier) - self.l2a = (l2a_identifier or NewCloudMaskProduct.get_l2a(identifier)).replace(".SAFE", "") - self.indice = (indice or identifier.replace(".", "_").split("_")[7]).upper() + self.tile = NewCloudMaskProduct.get_tile( + identifier or l2a_identifier + ) + self.l2a = ( + l2a_identifier or NewCloudMaskProduct.get_l2a(identifier) + ).replace(".SAFE", "") + self.indice = ( + indice or identifier.replace(".", "_").split("_")[7] + ).upper() self.masked = masked if self.masked: - #~ self.suffix = [i for i in ["CM001", "CM002-B11", "CM003-PRB" + str(probability) + "-ITER" + str(iterations)] if cm_version.upper() in i][0] - self.suffix = [i for i in ["CM001", - "CM002-B11", - "CM003-PRB" + str(probability) + "-ITER" + str(iterations), - "CM004-CSH" + str(int(cld_shad)) + \ - "-CMP" + str(int(cld_med_prob)) + \ - "-CHP" + str(int(cld_hi_prob)) + \ - "-TCI" + str(int(thin_cir)) + \ - "-ITER" + str(iterations), - ] if cm_version.upper() in i][0] - self.identifier = identifier or self.l2a + "_" + self.indice + "_" + self.suffix + ".jp2" - #~ self.cm_version, self.probability, self.iterations = NewCloudMaskProduct.get_cm_version(self.identifier) - self.mask_info = NewCloudMaskProduct.get_cm_version(self.identifier) + # self.suffix = [i for i in ["CM001", "CM002-B11", "CM003-PRB" + str(probability) + "-ITER" + str(iterations)] if cm_version.upper() in i][0] + self.suffix = [ + i + for i in [ + "CM001", + "CM002-B11", + "CM003-PRB" + + str(probability) + + "-ITER" + + str(iterations), + "CM004-CSH" + + str(int(cld_shad)) + + "-CMP" + + str(int(cld_med_prob)) + + "-CHP" + + str(int(cld_hi_prob)) + + "-TCI" + + str(int(thin_cir)) + + "-ITER" + + str(iterations), + ] + if cm_version.upper() in i + ][0] + self.identifier = ( + identifier + or self.l2a + + "_" + + self.indice + + "_" + + self.suffix + + ".jp2" + ) + # self.cm_version, self.probability, self.iterations = NewCloudMaskProduct.get_cm_version(self.identifier) + self.mask_info = NewCloudMaskProduct.get_cm_version( + self.identifier + ) else: self.suffix = None - self.identifier = identifier or self.l2a + "_" + self.indice + ".jp2" - #~ self.cm_version, self.probability, self.iterations = 3* [None] + self.identifier = ( + identifier or self.l2a + "_" + self.indice + ".jp2" + ) + # self.cm_version, self.probability, self.iterations = 3* [None] self.mask_info = None - self.path = self._indices_path / self.indice / self.tile / self.l2a / self.identifier - #~ self.cm_version = self.cm_version or cm_version - #~ self.probability = self.probability or probability - #~ self.iterations = self.iterations or iterations + self.path = ( + self._indices_path + / self.indice + / self.tile + / self.l2a + / self.identifier + ) + # self.cm_version = self.cm_version or cm_version + # self.probability = self.probability or probability + # self.iterations = self.iterations or iterations self._info_path = self.path.parent / (self.path.stem + ".xml") self.init_md() - + @staticmethod def get_indice(identifier) -> str: - """ - """ - return re.findall(r"S2.+_(.+)_.*jp2", identifier)[0] - + """ """ + return re.findall(r"S2.+_(.+)_.*jp2", identifier)[0] + def init_md(self): if self.path.exists() and not self._info_path.exists(): l2a = L2aProduct(self.l2a) if l2a._sen2chain_info_path.exists(): - Sen2ChainMetadataParser(self._info_path).set_metadata(sen2chain_version = l2a.sen2chain_version, - sen2chain_processing_version = l2a.sen2chain_processing_version, - sen2cor_version = l2a.sen2cor_version) + Sen2ChainMetadataParser(self._info_path).set_metadata( + sen2chain_version=l2a.sen2chain_version, + sen2chain_processing_version=l2a.sen2chain_processing_version, + sen2cor_version=l2a.sen2cor_version, + ) else: Sen2ChainMetadataParser(self._info_path).init_metadata() - diff --git a/sen2chain/sen2cor.py b/sen2chain/sen2cor.py index 5ff53a0b7ce8573189d4c439af771fc112c7c5fe..ccab997b4981054158a2f016dc1340fe985c4984 100644 --- a/sen2chain/sen2cor.py +++ b/sen2chain/sen2cor.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 """ Module for sen2cor processings. @@ -7,7 +7,7 @@ Module for sen2cor processings. import logging import pathlib import subprocess -#~ import re + from typing import Union from .config import Config @@ -19,10 +19,10 @@ logging.basicConfig(level=logging.INFO) def process_sen2cor( - l1c_product_path: Union[str, pathlib.PosixPath], - l2a_product_path: Union[str, pathlib.PosixPath], - pb: str = '99.99', - resolution: int = 10 + l1c_product_path: Union[str, pathlib.PosixPath], + l2a_product_path: Union[str, pathlib.PosixPath], + pb: str = "99.99", + resolution: int = 10, ) -> None: """Process a L1C SAFE folder using sen2cor thanks to a subprocess call. The sen2cor bashrc file must be specified in the sen2chain configuration. @@ -37,55 +37,66 @@ def process_sen2cor( # TODO: Add 60m resolution. sen2cor_bashrc_path = Config().get("sen2cor_bashrc_path") - #~ s2c_v = next(iter(re.findall('Sen2Cor-(\d{2}\.\d{2}\.\d{2})', str(sen2cor_bashrc_path))), None) + # ~ s2c_v = next(iter(re.findall('Sen2Cor-(\d{2}\.\d{2}\.\d{2})', str(sen2cor_bashrc_path))), None) s2c_v = get_current_Sen2Cor_version() - l2a_product_path_tmp = l2a_product_path.parent / (l2a_product_path.stem + '.tmp') - - if s2c_v == '02.05.05': - logger.info("sen2cor {} processing: {}".format(s2c_v, l1c_product_path)) + l2a_product_path_tmp = l2a_product_path.parent / ( + l2a_product_path.stem + ".tmp" + ) + + if s2c_v == "02.05.05": + logger.info( + "sen2cor {} processing: {}".format(s2c_v, l1c_product_path) + ) if resolution == 10: - + logger.info("sen2cor processing 20 m: {}".format(l1c_product_path)) command1 = "/bin/bash, -c, source {sen2cor_bashrc} && L2A_Process --resolution {res} {l1c_folder}".format( sen2cor_bashrc=str(sen2cor_bashrc_path), res=20, - l1c_folder=str(l1c_product_path) - ) - + l1c_folder=str(l1c_product_path), + ) + process1 = subprocess.run(command1.split(", ")) - + logger.info("sen2cor processing 10 m: {}".format(l1c_product_path)) command2 = "/bin/bash, -c, source {sen2cor_bashrc} && L2A_Process --resolution {res} {l1c_folder}".format( sen2cor_bashrc=str(sen2cor_bashrc_path), res=10, - l1c_folder=str(l1c_product_path) - ) + l1c_folder=str(l1c_product_path), + ) process2 = subprocess.run(command2.split(", ")) - + else: logger.debug("sen2cor processing: {}".format(l1c_product_path)) command = "/bin/bash, -c, source {sen2cor_bashrc} && L2A_Process --resolution {resolution} {l1c_folder}".format( sen2cor_bashrc=str(sen2cor_bashrc_path), resolution=resolution, - l1c_folder=str(l1c_product_path) - ) + l1c_folder=str(l1c_product_path), + ) process = subprocess.run(command.split(", ")) - - elif s2c_v in ['02.08.00','02.09.00', '02.10.01']: - logger.info("sen2cor {} processing: {}".format(s2c_v, l1c_product_path)) + + elif s2c_v in ["02.08.00", "02.09.00", "02.10.01"]: + logger.info( + "sen2cor {} processing: {}".format(s2c_v, l1c_product_path) + ) command = "/bin/bash, -c, source {sen2cor_bashrc} && L2A_Process --processing_baseline {processing_baseline} --output_dir {out_dir} {l1c_folder}".format( - sen2cor_bashrc = str(sen2cor_bashrc_path), - processing_baseline = pb, - out_dir = l2a_product_path_tmp, - l1c_folder = str(l1c_product_path) - ) + sen2cor_bashrc=str(sen2cor_bashrc_path), + processing_baseline=pb, + out_dir=l2a_product_path_tmp, + l1c_folder=str(l1c_product_path), + ) process = subprocess.run(command.split(", ")) - sorted(l2a_product_path_tmp.glob("*.SAFE"))[0].rename(l2a_product_path.parent / (l2a_product_path.stem + '.SAFE')) + sorted(l2a_product_path_tmp.glob("*.SAFE"))[0].rename( + l2a_product_path.parent / (l2a_product_path.stem + ".SAFE") + ) l2a_product_path_tmp.rmdir() - + elif s2c_v is not None: - logger.info('Sen2Cor version {} is not compatible with Sen2Chain'.format(s2c_v)) + logger.info( + "Sen2Cor version {} is not compatible with Sen2Chain".format(s2c_v) + ) - else: - logger.info('Could not determine sen2cor version from path, please check pattern "Sen2Cor-**.**.**" is in path') + logger.info( + 'Could not determine sen2cor version from path, please check pattern "Sen2Cor-**.**.**" is in path' + ) diff --git a/sen2chain/tiles.py b/sen2chain/tiles.py index e0053c56c636d338d30ca87aa71e2a0dd36c94b3..12f286c34702874867f76d50c616e8346875e266 100644 --- a/sen2chain/tiles.py +++ b/sen2chain/tiles.py @@ -1,8 +1,9 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 """ Module for managing products and tiles in the library and temp folders. """ + import pathlib import logging import re @@ -16,14 +17,25 @@ from pathlib import Path from collections import namedtuple from datetime import datetime from pprint import pformat + # type annotations from typing import List, Dict, Iterable from .config import Config, SHARED_DATA from .utils import str_to_datetime, human_size, getFolderSize from .indices import IndicesCollection -from .products import L1cProduct, L2aProduct, OldCloudMaskProduct, NewCloudMaskProduct, IndiceProduct -from .multi_processing import l2a_multiprocessing, cld_version_probability_iterations_reprocessing_multiprocessing, idx_multiprocessing +from .products import ( + L1cProduct, + L2aProduct, + OldCloudMaskProduct, + NewCloudMaskProduct, + IndiceProduct, +) +from .multi_processing import ( + l2a_multiprocessing, + cld_version_probability_iterations_reprocessing_multiprocessing, + idx_multiprocessing, +) logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -38,6 +50,7 @@ class ProductsList: namedtuple. """ + def __init__(self): self._dict = dict() self._time_index = dict() @@ -73,27 +86,33 @@ class ProductsList: return TileProduct(prod, max_date, self._dict[prod]["cloud_cover"]) def filter_dates( - self, - date_min: str = None, date_max: str = None + self, date_min: str = None, date_max: str = None ) -> "ProductsList": """Filters products list in a time range. :param date_min: oldest date. :param date_max: newest date. """ - min_date = str_to_datetime(date_min, "ymd") if date_min else self.first.date - max_date = str_to_datetime(date_max, "ymd") if date_max else self.last.date + min_date = ( + str_to_datetime(date_min, "ymd") if date_min else self.first.date + ) + max_date = ( + str_to_datetime(date_max, "ymd") if date_max else self.last.date + ) filtered = ProductsList() for k, v in self._dict.items(): if min_date.date() <= v["date"].date() <= max_date.date(): - filtered[k] = {"date": v["date"], "cloud_cover": v["cloud_cover"]} + filtered[k] = { + "date": v["date"], + "cloud_cover": v["cloud_cover"], + } return filtered def filter_clouds( - self, - cover_min: int = 0, cover_max: int = 100 + self, cover_min: int = 0, cover_max: int = 100 ) -> "ProductsList": """Filters products list. + :param cover_min: minimum cloud coverage. :param cover_max: maximum cloud coverage. """ @@ -101,9 +120,15 @@ class ProductsList: for k, v in self._dict.items(): if v["cloud_cover"]: if cover_min <= v["cloud_cover"] <= cover_max: - filtered[k] = {"date": v["date"], "cloud_cover": v["cloud_cover"]} + filtered[k] = { + "date": v["date"], + "cloud_cover": v["cloud_cover"], + } else: - filtered[k] = {"date": v["date"], "cloud_cover": v["cloud_cover"]} + filtered[k] = { + "date": v["date"], + "cloud_cover": v["cloud_cover"], + } return filtered def __len__(self) -> int: @@ -120,133 +145,176 @@ class ProductsList: def __getitem__(self, item) -> TileProduct: if item in self._dict: - return TileProduct(item, self._dict[item]["date"], self._dict[item]["cloud_cover"]) + return TileProduct( + item, self._dict[item]["date"], self._dict[item]["cloud_cover"] + ) else: raise KeyError("{} not found".format(item)) def __setitem__(self, item, value) -> None: - self._dict[item] = {"date": value["date"], "cloud_cover": value["cloud_cover"]} + self._dict[item] = { + "date": value["date"], + "cloud_cover": value["cloud_cover"], + } self._time_index[value["date"]] = item class CloudMaskList(ProductsList): - """Class for managing mask product list + """Class for managing mask product list""" - """ @property def cm001(self) -> "CloudMaskList": filtered = CloudMaskList() for k, v in self._dict.items(): - if ("_CM001" in k) : - filtered[k] = {"date": v["date"], "cloud_cover": v["cloud_cover"]} + if "_CM001" in k: + filtered[k] = { + "date": v["date"], + "cloud_cover": v["cloud_cover"], + } return filtered @property def cm002(self) -> "CloudMaskList": filtered = CloudMaskList() for k, v in self._dict.items(): - if ("_CM002" in k): - filtered[k] = {"date": v["date"], "cloud_cover": v["cloud_cover"]} + if "_CM002" in k: + filtered[k] = { + "date": v["date"], + "cloud_cover": v["cloud_cover"], + } return filtered @property def cm003(self) -> "CloudMaskList": filtered = CloudMaskList() for k, v in self._dict.items(): - if ("_CM003" in k): - filtered[k] = {"date": v["date"], "cloud_cover": v["cloud_cover"]} + if "_CM003" in k: + filtered[k] = { + "date": v["date"], + "cloud_cover": v["cloud_cover"], + } return filtered @property def cm004(self) -> "CloudMaskList": filtered = CloudMaskList() for k, v in self._dict.items(): - if ("_CM004" in k): - filtered[k] = {"date": v["date"], "cloud_cover": v["cloud_cover"]} + if "_CM004" in k: + filtered[k] = { + "date": v["date"], + "cloud_cover": v["cloud_cover"], + } return filtered - - def params(self, - probability: int = 1, - iterations: int = 5, - cld_shad: bool = True, - cld_med_prob: bool = True, - cld_hi_prob: bool = True, - thin_cir: bool = True, - ): + + def params( + self, + probability: int = 1, + iterations: int = 5, + cld_shad: bool = True, + cld_med_prob: bool = True, + cld_hi_prob: bool = True, + thin_cir: bool = True, + ): filtered = CloudMaskList() for k, v in self._dict.items(): if "_CM003" in k: - if "-PRB" + str(probability) + \ - "-ITER" + str(iterations) in k: - filtered[k] = {"date": v["date"], "cloud_cover": v["cloud_cover"]} + if "-PRB" + str(probability) + "-ITER" + str(iterations) in k: + filtered[k] = { + "date": v["date"], + "cloud_cover": v["cloud_cover"], + } elif "_CM004" in k: - if "-CSH" + str(int(cld_shad)) + \ - "-CMP" + str(int(cld_med_prob)) + \ - "-CHP" + str(int(cld_hi_prob)) + \ - "-TCI" + str(int(thin_cir)) + \ - "-ITER" + str(iterations) in k: - filtered[k] = {"date": v["date"], "cloud_cover": v["cloud_cover"]} + if ( + "-CSH" + + str(int(cld_shad)) + + "-CMP" + + str(int(cld_med_prob)) + + "-CHP" + + str(int(cld_hi_prob)) + + "-TCI" + + str(int(thin_cir)) + + "-ITER" + + str(iterations) + in k + ): + filtered[k] = { + "date": v["date"], + "cloud_cover": v["cloud_cover"], + } else: - filtered[k] = {"date": v["date"], "cloud_cover": v["cloud_cover"]} - return filtered - - -#~ class IndicesList(ProductsList): - #~ """Class for managing indices products lists. - - #~ """ - #~ @property - #~ def raws(self) -> "ProductsList": - #~ filtered = ProductsList() - #~ for k, v in self._dict.items(): - #~ if not("MASK" in k) and not("QUICKLOOK" in k): - #~ filtered[k] = {"date": v["date"], "cloud_cover": v["cloud_cover"]} - #~ return filtered - - #~ @property - #~ def masks(self) -> "ProductsList": - #~ filtre = ProductsList() - #~ for k, v in self._dict.items(): - #~ if "MASK" in k: - #~ filtre[k] = {"date": v["date"], "cloud_cover": v["cloud_cover"]} - #~ return filtre - - #~ @property - #~ def quicklooks(self) -> "ProductsList": - #~ filtered = ProductsList() - #~ for k, v in self._dict.items(): - #~ if "QUICKLOOK" in k: - #~ filtered[k] = {"date": v["date"], "cloud_cover": v["cloud_cover"]} - #~ return filtered + filtered[k] = { + "date": v["date"], + "cloud_cover": v["cloud_cover"], + } + return filtered + + +# class IndicesList(ProductsList): +# """Class for managing indices products lists. + +# """ +# @property +# def raws(self) -> "ProductsList": +# filtered = ProductsList() +# for k, v in self._dict.items(): +# if not("MASK" in k) and not("QUICKLOOK" in k): +# filtered[k] = {"date": v["date"], "cloud_cover": v["cloud_cover"]} +# return filtered + +# @property +# def masks(self) -> "ProductsList": +# filtre = ProductsList() +# for k, v in self._dict.items(): +# if "MASK" in k: +# filtre[k] = {"date": v["date"], "cloud_cover": v["cloud_cover"]} +# return filtre + +# @property +# def quicklooks(self) -> "ProductsList": +# filtered = ProductsList() +# for k, v in self._dict.items(): +# if "QUICKLOOK" in k: +# filtered[k] = {"date": v["date"], "cloud_cover": v["cloud_cover"]} +# return filtered + class NewIndiceList(CloudMaskList): - """Class for managing indices products lists. + """Class for managing indices products lists.""" - """ @property def raws(self) -> "NewIndiceList": filtered = NewIndiceList() for k, v in self._dict.items(): - if not("_CM" in k): - filtered[k] = {"date": v["date"], "cloud_cover": v["cloud_cover"]} + if not ("_CM" in k): + filtered[k] = { + "date": v["date"], + "cloud_cover": v["cloud_cover"], + } return filtered @property def masks(self) -> "NewIndiceList": filtred = NewIndiceList() for k, v in self._dict.items(): - if ("_CM" in k) and not("_QL" in k): - filtred[k] = {"date": v["date"], "cloud_cover": v["cloud_cover"]} + if ("_CM" in k) and not ("_QL" in k): + filtred[k] = { + "date": v["date"], + "cloud_cover": v["cloud_cover"], + } return filtred - + @property def quicklooks(self) -> "NewIndiceList": filtered = NewIndiceList() for k, v in self._dict.items(): if "_QL" in k: - filtered[k] = {"date": v["date"], "cloud_cover": v["cloud_cover"]} + filtered[k] = { + "date": v["date"], + "cloud_cover": v["cloud_cover"], + } return filtered + class Tile: """Class for managing tiles in the library. @@ -255,27 +323,31 @@ class Tile: Usage: >>> Tile("40KCC") """ + def __init__(self, name: str): self.name = name - self._paths = {"l1c": Path(Config().get("l1c_path")) / name, - "l2a": Path(Config().get("l2a_path")) / name, - "indices": {}, - "cloudmasks": Path(Config().get("cloudmasks_path")) / name, - } + self._paths = { + "l1c": Path(Config().get("l1c_path")) / name, + "l2a": Path(Config().get("l2a_path")) / name, + "indices": {}, + "cloudmasks": Path(Config().get("cloudmasks_path")) / name, + } self._indices_path = Path(Config().get("indices_path")) self._cloudmasks_path = Path(Config().get("cloudmasks_path")) - - self._products = {"l1c": ProductsList(), - "l2a": ProductsList(), - "cloudmasks" : ProductsList(), - #~ "cloudmasks2" : ProductsList(), - "indices": dict()} + + self._products = { + "l1c": ProductsList(), + "l2a": ProductsList(), + "cloudmasks": ProductsList(), + # "cloudmasks2" : ProductsList(), + "indices": dict(), + } self._get_indices_paths() self._get_l1c_list() self._get_l2a_list() self._get_cloudmasks() - #~ self._get_indices_list() + # self._get_indices_list() self._get_new_indice_list() def _get_indices_paths(self) -> None: @@ -286,41 +358,61 @@ class Tile: def _get_l1c_list(self) -> None: """Scans L1C folder and adds products in a ProductsList.""" for f in self._paths["l1c"].glob("*L1C_*.SAFE"): - cloud_cover = L1cProduct(identifier=f.name, tile=self.name, path=None).cloud_coverage_assessment - self._products["l1c"][f.name] = {"date": Tile._get_date(f.name), "cloud_cover": float(cloud_cover)} + cloud_cover = L1cProduct( + identifier=f.name, tile=self.name, path=None + ).cloud_coverage_assessment + self._products["l1c"][f.name] = { + "date": Tile._get_date(f.name), + "cloud_cover": float(cloud_cover), + } def _get_l2a_list(self) -> None: """Scans L2A folder and adds products in a ProductsList.""" for f in self._paths["l2a"].glob("*L2A*.SAFE"): - l1c_name = f.name.replace("L2A_", "L1C_").replace("_USER_", "_OPER_") + l1c_name = f.name.replace("L2A_", "L1C_").replace( + "_USER_", "_OPER_" + ) try: date = self._products["l1c"][l1c_name].date cloud_cover = self._products["l1c"][l1c_name].cloud_cover except KeyError: date = Tile._get_date(f.name) - cloud_cover = L2aProduct(f.name, self.name).cloud_coverage_assessment - self._products["l2a"][f.name] = {"date": date, "cloud_cover": float(cloud_cover)} - - #~ def _get_cloudmasks(self) -> None: - #~ """Scans L2A folder for cloud masks and adds corresponding L2A products in a ProductsList.""" - #~ for f in self._paths["l2a"].glob("*L2A*_CLOUD_MASK.jp2"): - #~ l1c_name = f.name.replace("L2A_", "L1C_").replace("_USER_", "_OPER_").replace("_CLOUD_MASK.jp2", ".SAFE") - #~ try: - #~ date = self._products["l1c"][l1c_name].date - #~ cloud_cover = self._products["l1c"][l1c_name].cloud_cover - #~ except KeyError: - #~ date = Tile._get_date(f.name.replace("_CLOUD_MASK.jp2", ".SAFE")) - #~ cloud_cover = L2aProduct(f.name.replace("_CLOUD_MASK.jp2", ".SAFE"), self.name).cloud_coverage_assessment - #~ self._products["cloudmasks"][f.name.replace("_CLOUD_MASK.jp2", ".SAFE")] = {"date": date, "cloud_cover": float(cloud_cover)} + cloud_cover = L2aProduct( + f.name, self.name + ).cloud_coverage_assessment + self._products["l2a"][f.name] = { + "date": date, + "cloud_cover": float(cloud_cover), + } + + # def _get_cloudmasks(self) -> None: + # """Scans L2A folder for cloud masks and adds corresponding L2A products in a ProductsList.""" + # for f in self._paths["l2a"].glob("*L2A*_CLOUD_MASK.jp2"): + # l1c_name = f.name.replace("L2A_", "L1C_").replace("_USER_", "_OPER_").replace("_CLOUD_MASK.jp2", ".SAFE") + # try: + # date = self._products["l1c"][l1c_name].date + # cloud_cover = self._products["l1c"][l1c_name].cloud_cover + # except KeyError: + # date = Tile._get_date(f.name.replace("_CLOUD_MASK.jp2", ".SAFE")) + # cloud_cover = L2aProduct(f.name.replace("_CLOUD_MASK.jp2", ".SAFE"), self.name).cloud_coverage_assessment + # self._products["cloudmasks"][f.name.replace("_CLOUD_MASK.jp2", ".SAFE")] = {"date": date, "cloud_cover": float(cloud_cover)} def _get_cloudmasks(self) -> None: - """Scans cloudmasks folder for cloud masks and adds corresponding L2A products in a ProductsList.""" + """Scans cloudmasks folder for cloud masks and adds corresponding L2A + products in a ProductsList.""" self._products["cloudmasks"] = CloudMaskList() for f in self._paths["cloudmasks"].glob("*L2A*/*_CM*.jp2"): - l1c_name = f.parent.name.replace("L2A_", "L1C_").replace("_USER_", "_OPER_") + ".SAFE" + l1c_name = ( + f.parent.name.replace("L2A_", "L1C_").replace( + "_USER_", "_OPER_" + ) + + ".SAFE" + ) try: date = self._products["l1c"][l1c_name].date - cloud_cover = float(self._products["l1c"][l1c_name].cloud_cover) + cloud_cover = float( + self._products["l1c"][l1c_name].cloud_cover + ) except KeyError: date = Tile._get_date(f.parent.name) l2a = L2aProduct(f.parent.name, self.name) @@ -328,65 +420,79 @@ class Tile: cloud_cover = float(l2a.cloud_coverage_assessment) else: cloud_cover = None - self._products["cloudmasks"][f.name] = {"date": date, - "cloud_cover": cloud_cover, - #~ "version": re.findall(r"_(CM...)", f.name)[0] - } - - #~ def _get_indices_list(self) -> None: - #~ """Scans indices folders and adds products in a IndicesList.""" - #~ for indice, path in self._paths["indices"].items(): - #~ if path.is_dir(): - #~ self._products["indices"][indice] = IndicesList() - #~ indice_template = IndicesCollection.get_indice_cls(indice.upper()).filename_template - #~ indice_ext = IndicesCollection.get_indice_cls(indice.upper()).ext - #~ file_patterns = [indice_ext, 'QUICKLOOK.tif'] - #~ files_selected = [] - #~ for p in file_patterns: - #~ files_selected.extend(path.glob("*/*{}".format(p))) - #~ for f in files_selected: - #~ try: - #~ indice_pattern = re.sub("{.*?}", "", indice_template) - #~ remove_pattern = "{}.*".format(indice_pattern) - #~ l2a_name = re.sub(remove_pattern, '', f.name) + ".SAFE" - #~ date = self._products["l2a"][l2a_name].date - #~ cloud_cover = self._products["l2a"][l2a_name].cloud_cover - #~ except KeyError: - #~ date = Tile._get_date(f.name) - #~ cloud_cover = None - #~ self._products["indices"][indice][f.name] = {"date": date, "cloud_cover": cloud_cover} - #~ self.__dict__[indice] = self._products["indices"][indice] + self._products["cloudmasks"][f.name] = { + "date": date, + "cloud_cover": cloud_cover, + # "version": re.findall(r"_(CM...)", f.name)[0] + } + + # def _get_indices_list(self) -> None: + # """Scans indices folders and adds products in a IndicesList.""" + # for indice, path in self._paths["indices"].items(): + # if path.is_dir(): + # self._products["indices"][indice] = IndicesList() + # indice_template = IndicesCollection.get_indice_cls(indice.upper()).filename_template + # indice_ext = IndicesCollection.get_indice_cls(indice.upper()).ext + # file_patterns = [indice_ext, 'QUICKLOOK.tif'] + # files_selected = [] + # for p in file_patterns: + # files_selected.extend(path.glob("*/*{}".format(p))) + # for f in files_selected: + # try: + # indice_pattern = re.sub("{.*?}", "", indice_template) + # remove_pattern = "{}.*".format(indice_pattern) + # l2a_name = re.sub(remove_pattern, '', f.name) + ".SAFE" + # date = self._products["l2a"][l2a_name].date + # cloud_cover = self._products["l2a"][l2a_name].cloud_cover + # except KeyError: + # date = Tile._get_date(f.name) + # cloud_cover = None + # self._products["indices"][indice][f.name] = {"date": date, "cloud_cover": cloud_cover} + # self.__dict__[indice] = self._products["indices"][indice] def _get_new_indice_list(self) -> None: """Scans indice folders and adds products in a NewIndiceList.""" for indice, path in self._paths["indices"].items(): if path.is_dir(): self._products["indices"][indice] = NewIndiceList() - indice_template = IndicesCollection.get_indice_cls(indice.upper()).filename_template - indice_ext = IndicesCollection.get_indice_cls(indice.upper()).ext - #~ file_patterns = [indice_ext, '_QL.tif'] - file_patterns = [indice.upper() + indice_ext, - indice.upper() + "_CM*" + indice_ext, - indice.upper() + "_CM*" + "_QL.tif"] + indice_template = IndicesCollection.get_indice_cls( + indice.upper() + ).filename_template + indice_ext = IndicesCollection.get_indice_cls( + indice.upper() + ).ext + # file_patterns = [indice_ext, '_QL.tif'] + file_patterns = [ + indice.upper() + indice_ext, + indice.upper() + "_CM*" + indice_ext, + indice.upper() + "_CM*" + "_QL.tif", + ] files_selected = [] for p in file_patterns: files_selected.extend(path.glob("*/*{}".format(p))) for f in files_selected: indice_pattern = re.sub("{.*?}", "", indice_template) remove_pattern = "{}.*".format(indice_pattern) - l2a_name = re.sub(remove_pattern, '', f.name) + ".SAFE" + l2a_name = re.sub(remove_pattern, "", f.name) + ".SAFE" try: date = self._products["l2a"][l2a_name].date - cloud_cover = self._products["l2a"][l2a_name].cloud_cover + cloud_cover = self._products["l2a"][ + l2a_name + ].cloud_cover except KeyError: l1c_name = l2a_name.replace("L2A_", "L1C_") try: date = self._products["l1c"][l1c_name].date - cloud_cover = self._products["l1c"][l1c_name].cloud_cover + cloud_cover = self._products["l1c"][ + l1c_name + ].cloud_cover except KeyError: date = Tile._get_date(f.name) cloud_cover = None - self._products["indices"][indice][f.name] = {"date": date, "cloud_cover": cloud_cover} + self._products["indices"][indice][f.name] = { + "date": date, + "cloud_cover": cloud_cover, + } self.__dict__[indice] = self._products["indices"][indice] @staticmethod @@ -397,14 +503,13 @@ class Tile: """ if not re.match(r"^[0-9]{2}[A-Z]{3}$", tile_name): return False - with fiona.open(str(SHARED_DATA["tiles_index"]), 'r') as tiles_index: + with fiona.open(str(SHARED_DATA["tiles_index"]), "r") as tiles_index: tiles_names = [tile["properties"]["Name"] for tile in tiles_index] return tile_name in tiles_names @staticmethod def _get_date(product_name: str) -> datetime: - """Extracts the acquisition date of a products filename. - """ + """Extracts the acquisition date of a products filename.""" date_pattern = re.findall(r"[0-9]{8}T[0-9]{6}", product_name) if product_name.startswith("S2A_OPER"): date = date_pattern[1] @@ -417,7 +522,7 @@ class Tile: def __str__(self) -> str: return self.name - + @property def paths(self) -> Dict[str, pathlib.PosixPath]: """Returns all the paths related to a Tile object.""" @@ -433,10 +538,10 @@ class Tile: """Returns tile's L2A products as a ProductsList.""" return self._products["l2a"] - #~ @property - #~ def cloudmasks(self) -> "ProductsList": - #~ """Returns tile's cloud masks products as a ProductsList.""" - #~ return self._products["cloudmasks"] + # @property + # def cloudmasks(self) -> "ProductsList": + # """Returns tile's cloud masks products as a ProductsList.""" + # return self._products["cloudmasks"] @property def cloudmasks(self) -> "ProductsList": @@ -447,253 +552,346 @@ class Tile: def l1c_missings(self) -> "ProductsList": """Returns tile's L2A products that don't have a L1C as a ProductsList.""" prods_list = ProductsList() - missings_l1c_set = set(self.l2a.products) - {identifier.replace("L1C_", "L2A_").replace("__OPER__", "_USER_") - for identifier in self.l1c.products} + missings_l1c_set = set(self.l2a.products) - { + identifier.replace("L1C_", "L2A_").replace("__OPER__", "_USER_") + for identifier in self.l1c.products + } for prod in missings_l1c_set: - prods_list[prod] = {"date": self._products["l2a"][prod].date, - "cloud_cover": self._products["l2a"][prod].cloud_cover} + prods_list[prod] = { + "date": self._products["l2a"][prod].date, + "cloud_cover": self._products["l2a"][prod].cloud_cover, + } return prods_list @property def l2a_missings(self) -> "ProductsList": """Returns tile's L1C products that don't have a L2A as a ProductsList.""" prods_list = ProductsList() - missings_l2a_set = set(self.l1c.products) - {identifier.replace("L2A_", "L1C_").replace("_USER_", "_OPER_") - for identifier in self.l2a.products} + missings_l2a_set = set(self.l1c.products) - { + identifier.replace("L2A_", "L1C_").replace("_USER_", "_OPER_") + for identifier in self.l2a.products + } for prod in missings_l2a_set: - prods_list[prod] = {"date": self._products["l1c"][prod].date, - "cloud_cover": self._products["l1c"][prod].cloud_cover} + prods_list[prod] = { + "date": self._products["l1c"][prod].date, + "cloud_cover": self._products["l1c"][prod].cloud_cover, + } return prods_list - #~ @property - #~ def cloudmasks_missings(self) -> "ProductsList": - #~ """Returns tile's L2A products that don't have a cloud mask as a ProductsList.""" - #~ prods_list = ProductsList() - #~ missings_l2a_set = set(self.l2a.products) - {identifier for identifier in self.cloudmasks.products} - #~ for prod in missings_l2a_set: - #~ prods_list[prod] = {"date": self._products["l2a"][prod].date, - #~ "cloud_cover": self._products["l2a"][prod].cloud_cover} - #~ return prods_list - - def cloudmasks_missing(self, - cm_version: str = "cm001", - probability: int = 1, - iterations: int = 5, - cld_shad: bool = True, - cld_med_prob: bool = True, - cld_hi_prob: bool = True, - thin_cir: bool = True, - ) -> "ProductsList": - """Returns tile's L2A products that don't have a cloud mask as a ProductsList.""" + # @property + # def cloudmasks_missings(self) -> "ProductsList": + # """Returns tile's L2A products that don't have a cloud mask as a ProductsList.""" + # prods_list = ProductsList() + # missings_l2a_set = set(self.l2a.products) - {identifier for identifier in self.cloudmasks.products} + # for prod in missings_l2a_set: + # prods_list[prod] = {"date": self._products["l2a"][prod].date, + # "cloud_cover": self._products["l2a"][prod].cloud_cover} + # return prods_list + + def cloudmasks_missing( + self, + cm_version: str = "cm001", + probability: int = 1, + iterations: int = 5, + cld_shad: bool = True, + cld_med_prob: bool = True, + cld_hi_prob: bool = True, + thin_cir: bool = True, + ) -> "ProductsList": + """Returns tile's L2A products that don't have a cloud mask as a + ProductsList.""" prods_list = ProductsList() - missings_l2a_set = set(self.l2a.products) - {(re.findall(r"(S2.+)_CM.+.jp2", identifier)[0] + ".SAFE") \ - for identifier in getattr(self.cloudmasks, cm_version).\ - params(probability = probability, - iterations = iterations, - cld_shad = cld_shad, - cld_med_prob = cld_med_prob, - cld_hi_prob = cld_hi_prob, - thin_cir = thin_cir, - ).\ - products} + missings_l2a_set = set(self.l2a.products) - { + (re.findall(r"(S2.+)_CM.+.jp2", identifier)[0] + ".SAFE") + for identifier in getattr(self.cloudmasks, cm_version) + .params( + probability=probability, + iterations=iterations, + cld_shad=cld_shad, + cld_med_prob=cld_med_prob, + cld_hi_prob=cld_hi_prob, + thin_cir=thin_cir, + ) + .products + } for prod in missings_l2a_set: - prods_list[prod] = {"date": self._products["l2a"][prod].date, - "cloud_cover": self._products["l2a"][prod].cloud_cover} + prods_list[prod] = { + "date": self._products["l2a"][prod].date, + "cloud_cover": self._products["l2a"][prod].cloud_cover, + } return prods_list - + @property def info(self): logger.info("l1c: {}".format(len(self.l1c))) logger.info("l2a: {}".format(len(self.l2a))) logger.info("cloud_masks: {}".format(len(self.cloudmasks))) for indice, path in self._paths["indices"].items(): - logger.info("{} (raw / masked): {} / {}".format(indice, len(getattr(self, indice).raws), len(getattr(self, indice).masks))) - + logger.info( + "{} (raw / masked): {} / {}".format( + indice, + len(getattr(self, indice).raws), + len(getattr(self, indice).masks), + ) + ) + @property def size(self): try: local = getFolderSize(str(self.paths["l1c"])) total = getFolderSize(str(self.paths["l1c"]), True) - logger.info("l1c: {} (local: {} / archived: {})".format(human_size(total), - human_size(local), - human_size(total-local), - )) + logger.info( + "l1c: {} (local: {} / archived: {})".format( + human_size(total), + human_size(local), + human_size(total - local), + ) + ) except: pass try: local = getFolderSize(str(self.paths["l2a"])) total = getFolderSize(str(self.paths["l2a"]), True) - logger.info("l2a: {} (local: {} / archived: {})".format(human_size(total), - human_size(local), - human_size(total-local), - )) + logger.info( + "l2a: {} (local: {} / archived: {})".format( + human_size(total), + human_size(local), + human_size(total - local), + ) + ) except: pass for indice, path in self._paths["indices"].items(): - logger.info("{}: {}".format(indice, human_size(getFolderSize(str(path), True)))) - - def missing_indices(self, - indice: str, - nodata_clouds: bool = False, - cm_version: list = "cm001", - probability: int = 1, - iterations: int = 5, - cld_shad: bool = True, - cld_med_prob: bool = True, - cld_hi_prob: bool = True, - thin_cir: bool = True, - ) -> "ProductsList": - """Returns tile's L2A products that don't have indices as a ProductsList.""" + logger.info( + "{}: {}".format( + indice, human_size(getFolderSize(str(path), True)) + ) + ) + + def missing_indices( + self, + indice: str, + nodata_clouds: bool = False, + cm_version: list = "cm001", + probability: int = 1, + iterations: int = 5, + cld_shad: bool = True, + cld_med_prob: bool = True, + cld_hi_prob: bool = True, + thin_cir: bool = True, + ) -> "ProductsList": + """ + Returns tile's L2A products that don't have indices as a ProductsList. + """ prodlist = ProductsList() - + try: if not nodata_clouds: - missings_indice_set = set(self.l2a.products) - {re.sub("_" + indice.upper() + ".+jp2", ".SAFE", identifier) \ - for identifier in getattr(getattr(self, indice.lower()), 'raws').products} + missings_indice_set = set(self.l2a.products) - { + re.sub("_" + indice.upper() + ".+jp2", ".SAFE", identifier) + for identifier in getattr( + getattr(self, indice.lower()), "raws" + ).products + } else: - missings_indice_set = set(self.l2a.products) - {re.sub("_" + indice.upper() + "_CM.+jp2", ".SAFE", identifier) \ - for identifier in getattr(getattr(getattr(self, indice.lower()), 'masks'), cm_version)\ - .params(probability = probability, iterations = iterations).products} + missings_indice_set = set(self.l2a.products) - { + re.sub( + "_" + indice.upper() + "_CM.+jp2", ".SAFE", identifier + ) + for identifier in getattr( + getattr(getattr(self, indice.lower()), "masks"), + cm_version, + ) + .params(probability=probability, iterations=iterations) + .products + } except: logger.info("Problem finding missing indices setting all L2A list") missings_indice_set = set(self.l2a.products) for prod in missings_indice_set: - prodlist[prod] = {"date": self._products["l2a"][prod].date, - "cloud_cover": self._products["l2a"][prod].cloud_cover} + prodlist[prod] = { + "date": self._products["l2a"][prod].date, + "cloud_cover": self._products["l2a"][prod].cloud_cover, + } return prodlist - - def compute_l2a(self, - reprocess: bool = False, - p_60m_missing: bool = False, - date_min: str = None, - date_max: str = None, - nb_proc: int = 4): + + def compute_l2a( + self, + reprocess: bool = False, + p_60m_missing: bool = False, + date_min: str = None, + date_max: str = None, + nb_proc: int = 4, + ): """ Compute all missing l2a for l1c products between date_min and date_max If reprocess = True reprocess already processed products - + """ if reprocess: if p_60m_missing: - l2a_remove_list = [product.identifier for product in self.l2a.filter_dates(date_min = date_min, date_max = date_max) if not L2aProduct(product.identifier).b01_60m] + l2a_remove_list = [ + product.identifier + for product in self.l2a.filter_dates( + date_min=date_min, date_max=date_max + ) + if not L2aProduct(product.identifier).b01_60m + ] else: - l2a_remove_list = [product.identifier for product in self.l2a.filter_dates(date_min = date_min, date_max = date_max)] + l2a_remove_list = [ + product.identifier + for product in self.l2a.filter_dates( + date_min=date_min, date_max=date_max + ) + ] if l2a_remove_list: self.remove_l2a(l2a_remove_list) - l1c_process_list = [] - l1c_process_list.append(list(p.identifier for p in self.l2a_missings.filter_dates(date_min = date_min, date_max = date_max))) + l1c_process_list = [] + l1c_process_list.append( + list( + p.identifier + for p in self.l2a_missings.filter_dates( + date_min=date_min, date_max=date_max + ) + ) + ) l1c_process_list = list(chain.from_iterable(l1c_process_list)) if l1c_process_list: - logger.info("{} l1c products to process:".format(len(l1c_process_list))) + logger.info( + "{} l1c products to process:".format(len(l1c_process_list)) + ) logger.info("{}".format(l1c_process_list)) else: logger.info("All l2a products already computed") l2a_res = False if l1c_process_list: l2a_res = l2a_multiprocessing(l1c_process_list, nb_proc=nb_proc) - - - - #~ def compute_cloudmasks(self, - #~ version: str = "cm001", - #~ probability: int = 1, - #~ iterations: int = 5, - #~ date_min: str = None, - #~ date_max: str = None, - #~ nb_proc: int = 4): - #~ """ - #~ Compute all missing cloud masks for l2a products - #~ """ - - #~ cld_l2a_process_list = [] - #~ cld_l2a_process_list.append(list(p.identifier for p in self.cloudmasks_missing.filter_dates(date_min = date_min, date_max = date_max))) - #~ cld_l2a_process_list = list(chain.from_iterable(cld_l2a_process_list)) - #~ if cld_l2a_process_list: - #~ logger.info("{} l2a products to process:".format(len(cld_l2a_process_list))) - #~ logger.info("{}".format(cld_l2a_process_list)) - #~ else: - #~ logger.info("All cloud masks already computed") - #~ cld_res = False - #~ if cld_l2a_process_list: - #~ cld_res = cld_multiprocessing(cld_l2a_process_list, nb_proc=nb_proc) - - def compute_cloudmasks(self, - cm_version: str = "cm001", - probability: int = 1, - iterations: int = 5, - cld_shad: bool = True, - cld_med_prob: bool = True, - cld_hi_prob: bool = True, - thin_cir: bool = True, - reprocess: bool = False, - date_min: str = None, - date_max: str = None, - nb_proc: int = 4): - """ - Compute all (missing) cloud masks for l2a products - :param cm_version: version of cloudmask to compute. Can be either cm001, cm002, cm003, or cm004 - :param probability: only used by cm003: threshold probability of clouds to be considered - :param iterations: only used by cm003: number of iterations for dilatation process while computing cloudmask - :param reprocess: if False (default), only missing cloudmasks will be computed. if True already processed cloudmask will be computed again. - :param date_min: products before this date wont be processed. Defaut None = no limit - :param date_max: product after this date wont be processed. Defaut None = no limit - :param nb_proc: number of parallel process, limited to the number of proc of your PC (default 4) + + # def compute_cloudmasks(self, + # version: str = "cm001", + # probability: int = 1, + # iterations: int = 5, + # date_min: str = None, + # date_max: str = None, + # nb_proc: int = 4): + # """ + # Compute all missing cloud masks for l2a products + # """ + + # cld_l2a_process_list = [] + # cld_l2a_process_list.append(list(p.identifier for p in self.cloudmasks_missing.filter_dates(date_min = date_min, date_max = date_max))) + # cld_l2a_process_list = list(chain.from_iterable(cld_l2a_process_list)) + # if cld_l2a_process_list: + # logger.info("{} l2a products to process:".format(len(cld_l2a_process_list))) + # logger.info("{}".format(cld_l2a_process_list)) + # else: + # logger.info("All cloud masks already computed") + # cld_res = False + # if cld_l2a_process_list: + # cld_res = cld_multiprocessing(cld_l2a_process_list, nb_proc=nb_proc) + + def compute_cloudmasks( + self, + cm_version: str = "cm001", + probability: int = 1, + iterations: int = 5, + cld_shad: bool = True, + cld_med_prob: bool = True, + cld_hi_prob: bool = True, + thin_cir: bool = True, + reprocess: bool = False, + date_min: str = None, + date_max: str = None, + nb_proc: int = 4, + ): + """Compute all (missing) cloud masks for l2a products. + + :param cm_version: version of cloudmask to compute. Can be either cm001, + cm002, cm003, or cm004. + :param probability: only used by cm003: threshold probability of clouds + to be considered. + :param iterations: only used by cm003: number of iterations for + dilatation process while computing cloudmask. + :param reprocess: if False (default), only missing cloudmasks will be + computed. if True already processed cloudmask will be computed again. + :param date_min: products before this date wont be processed. Defaut + None = no limit. + :param date_max: product after this date wont be processed. Defaut + None = no limit. + :param nb_proc: number of parallel process, limited to the number of + proc of your PC (default 4). """ - + if not reprocess: - cld_l2a_process_list = list([p.identifier, - cm_version, - probability, - iterations, - cld_shad, - cld_med_prob, - cld_hi_prob, - thin_cir, - reprocess] \ - for p in self.cloudmasks_missing(cm_version = cm_version, - probability = probability, - iterations = iterations, - cld_shad = cld_shad, - cld_med_prob = cld_med_prob, - cld_hi_prob = cld_hi_prob, - thin_cir = thin_cir, - )\ - .filter_dates(date_min = date_min, date_max = date_max)) + cld_l2a_process_list = list( + [ + p.identifier, + cm_version, + probability, + iterations, + cld_shad, + cld_med_prob, + cld_hi_prob, + thin_cir, + reprocess, + ] + for p in self.cloudmasks_missing( + cm_version=cm_version, + probability=probability, + iterations=iterations, + cld_shad=cld_shad, + cld_med_prob=cld_med_prob, + cld_hi_prob=cld_hi_prob, + thin_cir=thin_cir, + ).filter_dates(date_min=date_min, date_max=date_max) + ) else: - cld_l2a_process_list = list([p.identifier, - cm_version, - probability, - iterations, - cld_shad, - cld_med_prob, - cld_hi_prob, - thin_cir, - reprocess] \ - for p in self.l2a.filter_dates(date_min = date_min, date_max = date_max)) + cld_l2a_process_list = list( + [ + p.identifier, + cm_version, + probability, + iterations, + cld_shad, + cld_med_prob, + cld_hi_prob, + thin_cir, + reprocess, + ] + for p in self.l2a.filter_dates( + date_min=date_min, date_max=date_max + ) + ) if cld_l2a_process_list: - logger.info("{} l2a products to process:".format(len(cld_l2a_process_list))) - #~ logger.info("{}".format(cld_l2a_process_list)) - cld_version_probability_iterations_reprocessing_multiprocessing(cld_l2a_process_list, nb_proc=nb_proc) + logger.info( + "{} l2a products to process:".format(len(cld_l2a_process_list)) + ) + # logger.info("{}".format(cld_l2a_process_list)) + cld_version_probability_iterations_reprocessing_multiprocessing( + cld_l2a_process_list, nb_proc=nb_proc + ) else: logger.info("All cloud masks already computed") - #~ return False - - def compute_indices(self, - indices: list = [], - reprocess: bool = False, - nodata_clouds: bool = True, - quicklook: bool = False, - cm_version: list = "cm001", - probability: int = 1, - iterations: int = 5, - cld_shad: bool = True, - cld_med_prob: bool = True, - cld_hi_prob: bool = True, - thin_cir: bool = True, - date_min: str = None, - date_max: str = None, - nb_proc: int = 4): + # return False + + def compute_indices( + self, + indices: list = [], + reprocess: bool = False, + nodata_clouds: bool = True, + quicklook: bool = False, + cm_version: list = "cm001", + probability: int = 1, + iterations: int = 5, + cld_shad: bool = True, + cld_med_prob: bool = True, + cld_hi_prob: bool = True, + thin_cir: bool = True, + date_min: str = None, + date_max: str = None, + nb_proc: int = 4, + ): """ Compute all missing indices for l2a products - indices are given as a list @@ -702,63 +900,83 @@ class Tile: """ if not indices: indices = list(self._paths["indices"].keys()) - #~ else: + # else: indices = [indice.upper() for indice in indices] - indices_l2a_process_list = [] + indices_l2a_process_list = [] for i in indices: if not reprocess: - l2a_list = [p.identifier for p in self.missing_indices(i, - nodata_clouds = nodata_clouds, - cm_version = cm_version, - probability = probability, - iterations = iterations, - cld_shad = cld_shad, - cld_med_prob = cld_med_prob, - cld_hi_prob = cld_hi_prob, - thin_cir = thin_cir, - ).filter_dates(date_min = date_min, date_max = date_max)] + l2a_list = [ + p.identifier + for p in self.missing_indices( + i, + nodata_clouds=nodata_clouds, + cm_version=cm_version, + probability=probability, + iterations=iterations, + cld_shad=cld_shad, + cld_med_prob=cld_med_prob, + cld_hi_prob=cld_hi_prob, + thin_cir=thin_cir, + ).filter_dates(date_min=date_min, date_max=date_max) + ] else: - l2a_list = [p.identifier for p in self.l2a.filter_dates(date_min = date_min, date_max = date_max)] - - + l2a_list = [ + p.identifier + for p in self.l2a.filter_dates( + date_min=date_min, date_max=date_max + ) + ] + for j in l2a_list: - indices_l2a_process_list.append([j, - i, - reprocess, - nodata_clouds, - quicklook, - cm_version, - probability, - iterations, - cld_shad, - cld_med_prob, - cld_hi_prob, - thin_cir]) + indices_l2a_process_list.append( + [ + j, + i, + reprocess, + nodata_clouds, + quicklook, + cm_version, + probability, + iterations, + cld_shad, + cld_med_prob, + cld_hi_prob, + thin_cir, + ] + ) if indices_l2a_process_list: - logger.info("{} indice products to process:".format(len(indices_l2a_process_list))) + logger.info( + "{} indice products to process:".format( + len(indices_l2a_process_list) + ) + ) logger.info("{}".format(indices_l2a_process_list)) - indices_res = idx_multiprocessing(indices_l2a_process_list, nb_proc=nb_proc) + indices_res = idx_multiprocessing( + indices_l2a_process_list, nb_proc=nb_proc + ) else: logger.info("All indices already computed") - - def clean_lib(self, - remove_indice_tif: bool = False, - remove: bool = False): + + def clean_lib(self, remove_indice_tif: bool = False, remove: bool = False): """ - Function to search and clean corrupted files during processing errors from Tile - + Function to search and clean corrupted files during processing errors + from Tile : + - unmoved error l2a products from l1c folder - moved error l2a products from l2a folder - cloud masks error (0kb) - - indices error (0kb) - - :param clean_list: list of tiles to be cleaned. If not provided, will process all l1c Tiles. - :param remove_indice_tif: bool. If True will remove present TIFF files present in indice folders. Default value False. - :param remove: bool. If True will effectively remove corrupted files, if False will just list identified problems. Default value False. - + - indices error (0kb) + + :param clean_list: list of tiles to be cleaned. If not provided, will + process all l1c Tiles. + :param remove_indice_tif: bool. If True will remove present TIFF files + present in indice folders. Default value False. + :param remove: bool. If True will effectively remove corrupted files, + if False will just list identified problems. Default value False. + """ - #~ logger.info("Cleaning {} library".format(self.name)) - + # logger.info("Cleaning {} library".format(self.name)) + # identify corrupted jp2 in l1c folder nb_id = 0 nb_rm = 0 @@ -780,7 +998,10 @@ class Tile: except: logger.error("Can't remove {} from L1C folder".format(e)) # identify residual l2a from l1c folder - for f in chain(self._paths["l1c"].glob("*L2A*.SAFE"), self._paths["l1c"].glob("*L2A*.tmp")): + for f in chain( + self._paths["l1c"].glob("*L2A*.SAFE"), + self._paths["l1c"].glob("*L2A*.tmp"), + ): txt = "Identified {} in L1C folder".format(f.name) prob_id.append(txt) logger.info(txt) @@ -791,22 +1012,32 @@ class Tile: logger.info("Removing {} from L1C folder".format(f.name)) nb_rm += 1 except: - logger.error("Can't remove {} from L1C folder".format(f.name)) + logger.error( + "Can't remove {} from L1C folder".format(f.name) + ) # identify missing jp2 in L2A folder for f in self._paths["l2a"].glob("*L2A*.SAFE"): # Nb jp2 < 7 if len(list(f.glob("GRANULE/*/IMG_DATA/R10m/*.jp2"))) < 7: - txt = "Corrupted L2A {} in L2A folder (less than 7 jp2 files in R10m folder)".format(f.name) + txt = "Corrupted L2A {} in L2A folder (less than 7 jp2 files in R10m folder)".format( + f.name + ) prob_id.append(txt) logger.info(txt) nb_id += 1 if remove: try: shutil.rmtree(str(f)) - logger.info("Removing corrupted L2A {} from L2A folder".format(f.name)) + logger.info( + "Removing corrupted L2A {} from L2A folder".format( + f.name + ) + ) nb_rm += 1 except: - logger.error("Can't remove {} from L2A folder".format(f.name)) + logger.error( + "Can't remove {} from L2A folder".format(f.name) + ) # identify 0B cloud masks for f in self._paths["cloudmasks"].glob("*/*CM*.jp2"): if f.stat().st_size == 0: @@ -815,7 +1046,11 @@ class Tile: logger.info(txt) nb_id += 1 if remove: - logger.info("Removing corrupted cloud mask {} from L2A folder".format(f.name)) + logger.info( + "Removing corrupted cloud mask {} from L2A folder".format( + f.name + ) + ) f.unlink() nb_rm += 1 # identify wrong size l2a_QL @@ -826,49 +1061,59 @@ class Tile: logger.info(txt) nb_id += 1 if remove: - logger.info("Removing corrupted QL {} from L2A folder".format(f.name)) + logger.info( + "Removing corrupted QL {} from L2A folder".format( + f.name + ) + ) f.unlink() nb_rm += 1 # identify 0B or absent indice QL for f in self._paths["indices"]: - #~ logger.info(f, self._paths["indices"][f]) + # logger.info(f, self._paths["indices"][f]) for p in self._paths["indices"][f].glob("*_MSIL2A_*/"): - #~ logger.info(p) + # logger.info(p) if p.is_file(): txt = "Identified old indice format {}".format(p.name) prob_id.append(txt) logger.info(txt) nb_id += 1 if remove: - logger.info("Removing old indice format {}".format(p.name)) + logger.info( + "Removing old indice format {}".format(p.name) + ) p.unlink() nb_rm += 1 else: - #~ for q in p.glob("*_QUICKLOOK.tif"): - #~ if not ((q.stat().st_size == 3617212) or - #~ (q.stat().st_size == 4196652) or - #~ (q.stat().st_size == 3617478)): - #~ logger.info("Corrupted indice QL {} (bad size)".format(q.name)) - #~ if remove: - #~ logger.info("Removing indice QL {}".format(q.name)) - #~ q.unlink() + # for q in p.glob("*_QUICKLOOK.tif"): + # if not ((q.stat().st_size == 3617212) or + # (q.stat().st_size == 4196652) or + # (q.stat().st_size == 3617478)): + # logger.info("Corrupted indice QL {} (bad size)".format(q.name)) + # if remove: + # logger.info("Removing indice QL {}".format(q.name)) + # q.unlink() for q in list(p.glob("*.jp2")) + list(p.glob("*.tif")): - #~ logger.info(q) + # logger.info(q) try: Image.MAX_IMAGE_PIXELS = 120560400 - img = Image.open(str(q)) # open the image file - img.verify() # verify that it is, in fact an image + img = Image.open(str(q)) # open the image file + img.verify() # verify that it is, in fact an image except (IOError, SyntaxError) as e: - txt = 'Bad file (PIL): {}'.format(str(q.name)) # print out the names of corrupt files + txt = "Bad file (PIL): {}".format( + str(q.name) + ) # print out the names of corrupt files prob_id.append(txt) logger.info(txt) nb_id += 1 if remove: - logger.info("Removing indice QL {}".format(q.name)) + logger.info( + "Removing indice QL {}".format(q.name) + ) q.unlink() nb_rm += 1 for q in list(p.glob("*.jp2")): - if not (Path(str(q) + ".aux.xml")).exists(): + if not (Path(str(q) + ".aux.xml")).exists(): txt = "Missing metadata: {}".format(q.name) prob_id.append(txt) logger.info(txt) @@ -884,44 +1129,65 @@ class Tile: logger.info(txt) nb_id += 1 if remove: - logger.info("Removing indice QL {}".format(q.name)) + logger.info( + "Removing indice QL {}".format(q.name) + ) q.unlink() nb_rm += 1 if remove_indice_tif: for q in p.glob("*" + f.upper() + ".tif"): - txt = "Identified indice in tif format {}".format(q.name) + txt = "Identified indice in tif format {}".format( + q.name + ) prob_id.append(txt) logger.info(txt) nb_id += 1 if remove: - logger.info("Removing indice QL {}".format(q.name)) + logger.info( + "Removing indice QL {}".format(q.name) + ) q.unlink() nb_rm += 1 - return {"identified_problems": nb_id, "removed_problems": nb_rm, "problems": prob_id} - - def archive_l1c(self, - size_only: bool = False, - force: bool = False, - ): + return { + "identified_problems": nb_id, + "removed_problems": nb_rm, + "problems": prob_id, + } + + def archive_l1c( + self, + size_only: bool = False, + force: bool = False, + ): """ Check and move l1c products to l1c archive folder - + """ - + l1c_archive_path = Path(Config().get("l1c_archive_path")) - + if force: prod_list = self.l1c else: prod_list = ProductsList() - archive_l1c_set = {a for a in {identifier.replace("L2A_", "L1C_").replace("_USER_", "__OPER__") - for identifier in self.l2a.products} if a in set(self.l1c.products)} - + archive_l1c_set = { + a + for a in { + identifier.replace("L2A_", "L1C_").replace( + "_USER_", "__OPER__" + ) + for identifier in self.l2a.products + } + if a in set(self.l1c.products) + } + for prod in archive_l1c_set: - prod_list[prod] = {"date": self._products["l1c"][prod].date, - "cloud_cover": self._products["l1c"][prod].cloud_cover} + prod_list[prod] = { + "date": self._products["l1c"][prod].date, + "cloud_cover": self._products["l1c"][prod].cloud_cover, + } count = 0 - total_size = 0 + total_size = 0 if prod_list: for prod in prod_list: l1c = L1cProduct(prod.identifier) @@ -932,93 +1198,127 @@ class Tile: move_path = l1c_archive_path / l1c.tile / l1c.path.name logger.info("archiving {}".format(l1c.identifier)) move_path.parent.mkdir(exist_ok=True) - #~ shutil.move(str(l1c.path), str(move_path.parent)) - distutils.dir_util.copy_tree(str(l1c.path), str(move_path)) + # shutil.move(str(l1c.path), str(move_path.parent)) + distutils.dir_util.copy_tree( + str(l1c.path), str(move_path) + ) distutils.dir_util.remove_tree(str(l1c.path)) - l1c.path.symlink_to(move_path, target_is_directory = True) + l1c.path.symlink_to( + move_path, target_is_directory=True + ) if size_only: - logger.info("{} l1c product(s) to archive ({})".format(count, human_size(total_size))) + logger.info( + "{} l1c product(s) to archive ({})".format( + count, human_size(total_size) + ) + ) else: - logger.info("{} l1c product(s) archived ({})".format(count, human_size(total_size))) + logger.info( + "{} l1c product(s) archived ({})".format( + count, human_size(total_size) + ) + ) return total_size if not count: logger.info("No L1C products to archive") return 0 - - def archive_l2a(self, - size_only: bool = False,): + + def archive_l2a( + self, + size_only: bool = False, + ): """ Check errors and move l2a products to l2a archive folder - + """ - - if (self.clean_lib()['identified_problems']-self.clean_lib()['removed_problems']) == 0: - + + if ( + self.clean_lib()["identified_problems"] + - self.clean_lib()["removed_problems"] + ) == 0: + l2a_archive_path = Path(Config().get("l2a_archive_path")) - + prod_list = self.l2a - + if prod_list: count = 0 - total_size = 0 + total_size = 0 for prod in prod_list: l2a = L2aProduct(prod.identifier) if not l2a.path.is_symlink(): count += 1 total_size += getFolderSize(str(l2a.path)) if not size_only: - move_path = l2a_archive_path / l2a.tile / l2a.path.name + move_path = ( + l2a_archive_path / l2a.tile / l2a.path.name + ) logger.info("archiving {}".format(l2a.identifier)) move_path.parent.mkdir(exist_ok=True) shutil.move(str(l2a.path), str(move_path.parent)) - l2a.path.symlink_to(move_path, target_is_directory = True) + l2a.path.symlink_to( + move_path, target_is_directory=True + ) if size_only: - logger.info("{} l2a product(s) to archive ({})".format(count, human_size(total_size))) + logger.info( + "{} l2a product(s) to archive ({})".format( + count, human_size(total_size) + ) + ) else: - logger.info("{} l2a product(s) archived ({})".format(count, human_size(total_size))) + logger.info( + "{} l2a product(s) archived ({})".format( + count, human_size(total_size) + ) + ) return total_size else: logger.info("No L2A products, nothing to archive") return 0 else: - logger.info("Error(s) in l2a product(s) please correct them running clean_lib(remove=True) before archiving") + logger.info( + "Error(s) in l2a product(s) please correct them running clean_lib(remove=True) before archiving" + ) return 0 - - def archive_all(self, - force: bool = False, - size_only: bool = False, - ): + + def archive_all( + self, + force: bool = False, + size_only: bool = False, + ): """ Chain archive_l1c and archive_l2a functions - """ - l1c_size = self.archive_l1c(size_only = size_only, force = force) - l2a_size = self.archive_l2a(size_only = size_only) - return l1c_size + l2a_size - - def compute_ql(self, - product_list: list = [], - resolution: int = 750, - jpg: bool = True, - ): + """ + l1c_size = self.archive_l1c(size_only=size_only, force=force) + l2a_size = self.archive_l2a(size_only=size_only) + return l1c_size + l2a_size + + def compute_ql( + self, + product_list: list = [], + resolution: int = 750, + jpg: bool = True, + ): """ Produce or update the latest l2a quicklook for the tile - And remove previous ones """ + And remove previous ones""" for product in product_list: # l1c if "l1c" in [item.lower() for item in product_list]: for p in self.l1c: l1c = L1cProduct(p.identifier) - l1c.process_ql(out_resolution = (resolution, resolution), jpg = jpg) + l1c.process_ql( + out_resolution=(resolution, resolution), jpg=jpg + ) # l2a if "l2a" in [item.lower() for item in product_list]: for p in self.l2a: l2a = L2aProduct(p.identifier) - l2a.process_ql(out_resolution = (resolution, resolution), jpg = jpg) + l2a.process_ql( + out_resolution=(resolution, resolution), jpg=jpg + ) # indices - - - - + def update_latest_ql(self): """ Produce or update the latest l2a quicklook for the tile @@ -1028,48 +1328,63 @@ class Tile: if p: l2a = L2aProduct(p) - outfullpath = l2a.path.parent / "QL" / (l2a.tile + '_' + p[0:4] + Tile._get_date(p).strftime("%Y%m%d") + '_QL_latest.jpg') + outfullpath = ( + l2a.path.parent + / "QL" + / ( + l2a.tile + + "_" + + p[0:4] + + Tile._get_date(p).strftime("%Y%m%d") + + "_QL_latest.jpg" + ) + ) outfullpath.parent.mkdir(parents=True, exist_ok=True) - old_ql = list((l2a.path.parent / "QL").glob('*_QL_latest.jpg*')) - liste=[a for a in old_ql if str(outfullpath) not in str(a)] + old_ql = list((l2a.path.parent / "QL").glob("*_QL_latest.jpg*")) + liste = [a for a in old_ql if str(outfullpath) not in str(a)] for f in liste: f.unlink() if outfullpath in old_ql: logger.info("{} - Latest QL already done".format(self.name)) return else: - l2a.process_ql(out_path = outfullpath, out_resolution=(750,750), jpg = True) + l2a.process_ql( + out_path=outfullpath, out_resolution=(750, 750), jpg=True + ) else: logger.info("{} - No L2A product available".format(self.name)) - + def move_old_quicklooks(self): """ - Move all old quicklooks to QL subfolder + Move all old quicklooks to QL subfolder. """ - logger.info("{}: Moving all quicklooks to QL/ subfolder".format(self.name)) + logger.info( + "{}: Moving all quicklooks to QL/ subfolder".format(self.name) + ) (self._paths["l2a"] / "QL").mkdir(exist_ok=True, parents=True) for f in self._paths["l2a"].glob("*_QL*"): if f.is_file(): f.replace(f.parent / "QL" / f.name) - + def update_old_cloudmasks(self): """ Move and rename old cloudmasks to new cloudmask folder - cloudmask xmls are removed + cloudmask xmls are removed. """ - #Move and rename old masks / B11 + # Move and rename old masks / B11 logger.info("Moving and renaming old masks") for f in self._paths["l2a"].glob("*L2A*_CLOUD_MASK*.jp2"): p = OldCloudMaskProduct(f.name) - f_renamed = f.name.replace("CLOUD_MASK_B11", "CM002-B11")\ - .replace("CLOUD_MASK", "CM001") + f_renamed = f.name.replace("CLOUD_MASK_B11", "CM002-B11").replace( + "CLOUD_MASK", "CM001" + ) logger.info(f_renamed) - p_new = NewCloudMaskProduct(identifier = f_renamed) + p_new = NewCloudMaskProduct(identifier=f_renamed) p_new.path.parent.mkdir(exist_ok=True, parents=True) p.path.replace(p_new.path) p_new.init_md() - - #Remove xml + + # Remove xml logger.info("Removing xmls") for f in self._paths["l2a"].glob("*L2A*_CLOUD_MASK*.jp2.aux.xml"): f.unlink() @@ -1079,7 +1394,9 @@ class Tile: Remove very old cloudmasks, matching pattern : *_CLOUD_MASK.tif """ files = list(self._paths["l2a"].glob("*_CLOUD_MASK.tif")) - logger.info("{}: Removing {} very old cloudmasks".format(self.name, len(files))) + logger.info( + "{}: Removing {} very old cloudmasks".format(self.name, len(files)) + ) for f in files: if f.is_file(): f.unlink() @@ -1088,35 +1405,50 @@ class Tile: """ Rename old indices to match new cloudmask nomenclature """ - #Rename old indices to default cm_version cm001 + # Rename old indices to default cm_version cm001 logger.info("Moving and renaming old indices") - + for indice, path in self._paths["indices"].items(): - logger.info("{} - Processing: {}".format(self.name, indice.upper())) - for f in list(Path(path).glob("*/*MASKED*")) + list(Path(path).glob("*/*QUICKLOOK*")): - f_renamed = f.name.replace("MASKED", "CM001").replace("QUICKLOOK", "CM001_QL") + logger.info( + "{} - Processing: {}".format(self.name, indice.upper()) + ) + for f in list(Path(path).glob("*/*MASKED*")) + list( + Path(path).glob("*/*QUICKLOOK*") + ): + f_renamed = f.name.replace("MASKED", "CM001").replace( + "QUICKLOOK", "CM001_QL" + ) f.rename(str(Path(f.parent / f_renamed))) logger.info(f_renamed) - IndiceProduct(identifier = f_renamed) - + IndiceProduct(identifier=f_renamed) + def init_md(self): """ - Initiate sen2chain metadata for all tile products (l2a, cloudmasks, indices (raw, masked, ql)) + Initiate sen2chain metadata for all tile products (l2a, cloudmasks, + indices (raw, masked, ql)). """ logger.info("{} - Initiating products metadata".format(self.name)) - + for l2a in [product.identifier for product in self.l2a]: L2aProduct(l2a) - + for cloudmask in [product.identifier for product in self.cloudmasks]: NewCloudMaskProduct(cloudmask) - - for indice in [val for sublist in [getattr(self, i) for i in [p for p in self.paths["indices"]]] for val in sublist]: - IndiceProduct(identifier = indice.identifier) - - def remove_l1c(self, - product_list: list = [], - ): + + for indice in [ + val + for sublist in [ + getattr(self, i) for i in [p for p in self.paths["indices"]] + ] + for val in sublist + ]: + IndiceProduct(identifier=indice.identifier) + + # FIXME: docs + def remove_l1c( + self, + product_list: list = [], + ): """ Remove l1c files """ @@ -1127,10 +1459,11 @@ class Tile: l1c.remove() logger.info("Removed: {} products".format(len(product_list))) - - def remove_l2a(self, - product_list: list = [], - ): + # FIXME: docs + def remove_l2a( + self, + product_list: list = [], + ): """ Remove l2a files """ @@ -1140,4 +1473,3 @@ class Tile: l2a = L2aProduct(identifier) l2a.remove() logger.info("Removed: {} products".format(len(product_list))) - diff --git a/sen2chain/tileset.py b/sen2chain/tileset.py index 8c5d4f8a4b9ac1917f0e6c3c058a5e01beae405e..3128e87c6e4f4a8907e099a3fd669c9f315210f3 100644 --- a/sen2chain/tileset.py +++ b/sen2chain/tileset.py @@ -1,7 +1,6 @@ -#!/usr/bin/python3 -# -*- coding: utf-8 -*- - +# coding: utf-8 +# FIXME: docs import os import shutil @@ -9,6 +8,7 @@ import logging import pathlib from pathlib import Path + # type annotations from typing import List, Dict from itertools import chain @@ -17,7 +17,11 @@ from .config import Config, SHARED_DATA from .products import L1cProduct from .tiles import Tile from .utils import human_size, human_size_decimal -from .multi_processing import l2a_multiprocessing, cld_version_probability_iterations_reprocessing_multiprocessing, idx_multiprocessing +from .multi_processing import ( + l2a_multiprocessing, + cld_version_probability_iterations_reprocessing_multiprocessing, + idx_multiprocessing, +) s2_tiles_index = SHARED_DATA.get("tiles_index") @@ -29,68 +33,88 @@ class TileSet: """ Module to process l2a, cloud masks and indices from a tile list, using multiprocessing. """ - - def __init__(self, - tile_list: list = [], - ): + + def __init__( + self, + tile_list: list = [], + ): self.tile_list = tile_list if not self.tile_list: - logger.info("TileSet list cannot be empty, please provide a tile list while initializing class") - + logger.info( + "TileSet list cannot be empty, please provide a tile list while initializing class" + ) + def __repr__(self) -> str: return "TileSet: {}".format(self.tile_list) - - def compute_l2a(self, - date_min: str = None, - date_max: str = None, - nb_proc: int = 8): + def compute_l2a( + self, date_min: str = None, date_max: str = None, nb_proc: int = 8 + ): """ Compute all missing l2a for l1c products """ - l1c_process_list = [] + l1c_process_list = [] for tile in self.tile_list: t = Tile(tile) - l1c_process_list.append(list(p.identifier for p in t.l2a_missings.filter_dates(date_min = date_min, date_max = date_max))) + l1c_process_list.append( + list( + p.identifier + for p in t.l2a_missings.filter_dates( + date_min=date_min, date_max=date_max + ) + ) + ) l1c_process_list = list(chain.from_iterable(l1c_process_list)) if l1c_process_list: - logger.info("{} L1C products to process:".format(len(l1c_process_list))) + logger.info( + "{} L1C products to process:".format(len(l1c_process_list)) + ) logger.info("{}".format(l1c_process_list)) l2a_multiprocessing(l1c_process_list, nb_proc=nb_proc) else: logger.info("All L2A already computed") - - def compute_cloudmasks(self, - date_min: str = None, - date_max: str = None, - nb_proc: int = 8): + + def compute_cloudmasks( + self, date_min: str = None, date_max: str = None, nb_proc: int = 8 + ): """ Compute all missing cloud masks for l2a products """ - cld_l2a_process_list = [] + cld_l2a_process_list = [] for tile in self.tile_list: t = Tile(tile) - cld_l2a_process_list.append(list(p.identifier for p in t.cloudmasks_missings.filter_dates(date_min = date_min, date_max = date_max))) + cld_l2a_process_list.append( + list( + p.identifier + for p in t.cloudmasks_missings.filter_dates( + date_min=date_min, date_max=date_max + ) + ) + ) cld_l2a_process_list = list(chain.from_iterable(cld_l2a_process_list)) if cld_l2a_process_list: - logger.info("{} L2A products to process:".format(len(cld_l2a_process_list))) + logger.info( + "{} L2A products to process:".format(len(cld_l2a_process_list)) + ) logger.info("{}".format(cld_l2a_process_list)) - #~ cld_multiprocessing(cld_l2a_process_list, nb_proc=nb_proc) + # ~ cld_multiprocessing(cld_l2a_process_list, nb_proc=nb_proc) else: logger.info("All cloud masks already computed") - - def compute_indices(self, - indices: list = [], - date_min: str = None, - date_max: str = None, - nb_proc: int = 8): + + def compute_indices( + self, + indices: list = [], + date_min: str = None, + date_max: str = None, + nb_proc: int = 8, + ): """ Compute all missing indices for l2a products - indices are given as a list - if indices not provided, will compute missing dates of already existing indices for each tile (no new indice computed) - ! indices won't be masked if no cloud masks are present, you have to compute cloudmasks first """ - indices_l2a_process_list = [] + indices_l2a_process_list = [] for tile in self.tile_list: t = Tile(tile) if not indices: @@ -99,19 +123,27 @@ class TileSet: indices = [indice.upper() for indice in indices] for i in indices: - l2a_list = [p.identifier for p in t.missing_indices(i).filter_dates(date_min = date_min, date_max = date_max)] + l2a_list = [ + p.identifier + for p in t.missing_indices(i).filter_dates( + date_min=date_min, date_max=date_max + ) + ] for j in l2a_list: indices_l2a_process_list.append([j, i]) if indices_l2a_process_list: - logger.info("{} l2a products to process:".format(len(indices_l2a_process_list))) + logger.info( + "{} l2a products to process:".format( + len(indices_l2a_process_list) + ) + ) logger.info("{}".format(indices_l2a_process_list)) idx_multiprocessing(indices_l2a_process_list, nb_proc=nb_proc) else: logger.info("All indices already computed") - + @property def info(self): for t in self.tile_list: logger.info(t) Tile(t).info - diff --git a/sen2chain/time_series.py b/sen2chain/time_series.py index 389b276ce348aebb0ce92add45f8a6d49fc48ad9..edaac0ecb3938f87d9475f5923c486b78330f1fd 100644 --- a/sen2chain/time_series.py +++ b/sen2chain/time_series.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 """ Module for computing time series. @@ -20,6 +20,7 @@ from shapely.geometry import mapping from rasterstats import zonal_stats import pandas as pd import geopandas as gpd + # type annotations from typing import Sequence, List, Dict, Union import matplotlib.pyplot as plt @@ -40,9 +41,14 @@ from .products import L2aProduct, IndiceProduct logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) -#~ logging.basicConfig(format='%(process)s %(asctime)s %(levelname)s:%(module)s:%(message)s', level=logging.INFO, datefmt='%Y-%m-%d %H:%M:%S') -logging.getLogger().handlers[0].setFormatter(logging.Formatter('%(process)s:%(asctime)s:%(levelname)s:%(name)s:%(message)s', - '%Y-%m-%d %H:%M:%S')) +# logging.basicConfig(format='%(process)s %(asctime)s %(levelname)s:%(module)s:%(message)s', level=logging.INFO, datefmt='%Y-%m-%d %H:%M:%S') +logging.getLogger().handlers[0].setFormatter( + logging.Formatter( + "%(process)s:%(asctime)s:%(levelname)s:%(name)s:%(message)s", + "%Y-%m-%d %H:%M:%S", + ) +) + class TimeSeries: """Class for time series extraction. @@ -55,17 +61,25 @@ class TimeSeries: :param cover_max: minimum cloud cover value. Default: 100. Usage: - >>> ts = TimeSeries("polygons.geojson", indices=["NDVI"], date_min="2017-01-01", date_max="2018-01-01") + >>> ts = TimeSeries( + "polygons.geojson", + indices=["NDVI"], + date_min="2017-01-01", + date_max="2018-01-01" + ) """ + def __init__( - self, - vectors_file: Union[str, pathlib.PosixPath], - indices: Sequence[str] = None, - date_min: str = None, date_max: str = None, - cover_min: int = 0, cover_max: int = 100, - field_names: str = None, - multiproc: int = 0, - getstat: bool = True + self, + vectors_file: Union[str, pathlib.PosixPath], + indices: Sequence[str] = None, + date_min: str = None, + date_max: str = None, + cover_min: int = 0, + cover_max: int = 100, + field_names: str = None, + multiproc: int = 0, + getstat: bool = True, ) -> None: self._vectors_file = Path(vectors_file) @@ -76,7 +90,7 @@ class TimeSeries: self._field_names = field_names self._multiproc = multiproc self._out_path = Config().get("time_series_path") - + if indices is None: self._indices_list = IndicesCollection.list else: @@ -84,12 +98,12 @@ class TimeSeries: if indice not in IndicesCollection: raise ValueError("Invald indice name: {}".format(indice)) self._indices_list = indices - + if self._field_names: self._key = self._field_names else: - self._key = 'fid' - + self._key = "fid" + self._tiles_geoms = self._get_tiles_geom_dict() self._df_dicts = dict() if getstat is True: @@ -97,7 +111,6 @@ class TimeSeries: self._get_stats_multiproc() else: self._get_stats() - @property def data(self) -> Dict[str, pd.DataFrame]: @@ -116,8 +129,11 @@ class TimeSeries: return False def _get_tiles_geom_dict(self) -> Dict[str, List[str]]: - """{Feature id: list of intersected tiles} to {tile: list of intersected features ids}.""" - intersected_tiles_dict = get_tiles_from_file(self._vectors_file, land_only=False) + """{Feature id: list of intersected tiles} to {tile: list of intersected + features ids}.""" + intersected_tiles_dict = get_tiles_from_file( + self._vectors_file, land_only=False + ) tiles_geom = dict() for feat_id, tiles_list in intersected_tiles_dict.items(): for tile in tiles_list: @@ -128,10 +144,12 @@ class TimeSeries: @staticmethod def _filter_products( - tile: Tile, - indice: str, - date_min: str, date_max: str, - cover_min: int, cover_max: int + tile: Tile, + indice: str, + date_min: str, + date_max: str, + cover_min: int, + cover_max: int, ) -> ProductsList: """Filters tile's indices products. @@ -142,78 +160,92 @@ class TimeSeries: :param cover_min: minimum cloud coverage. :param cover_max: maximum cloud coverage. """ - products = vars(tile)[indice.lower()].masks.cm001 ##########"" Ajouter ici un truc pour choisir le CM !!!!! - filt = products.filter_dates(date_min, date_max).filter_clouds(cover_min, cover_max) + products = vars(tile)[ + indice.lower() + ].masks.cm001 ##########"" Ajouter ici un truc pour choisir le CM !!!!! + filt = products.filter_dates(date_min, date_max).filter_clouds( + cover_min, cover_max + ) return filt @staticmethod def _get_raster_stats_in_geom( - feature: Dict, - raster_path: Union[str, pathlib.PosixPath], - cloud_path: str = None, - cloud_proba_path: str = None + feature: Dict, + raster_path: Union[str, pathlib.PosixPath], + cloud_path: str = None, + cloud_proba_path: str = None, ) -> Dict: """Extracts statistics from a raster in a geometry. :param feature: GeoJSON like object. :param raster_path: path to the raster. """ - geom = shape(transform.transform_geom("EPSG:4326", - rasterio.open(str(raster_path)).crs["init"], - feature["geometry"])) - - #~ with rasterio.open(str(raster_path)) as raster_src: - #~ raster_profile = raster_src.profile - #~ logger.info(raster_profile) - - stats = zonal_stats(geom, str(raster_path), stats=["count", - "nodata", - "min", - "max", - "median", - "mean", - "std", - "percentile_25", - "percentile_75"])[0] - #~ logger.info(stats) - + geom = shape( + transform.transform_geom( + "EPSG:4326", + rasterio.open(str(raster_path)).crs["init"], + feature["geometry"], + ) + ) + + # with rasterio.open(str(raster_path)) as raster_src: + # raster_profile = raster_src.profile + # logger.info(raster_profile) + + stats = zonal_stats( + geom, + str(raster_path), + stats=[ + "count", + "nodata", + "min", + "max", + "median", + "mean", + "std", + "percentile_25", + "percentile_75", + ], + )[0] + # logger.info(stats) + if cloud_path: - #~ def mycount(x): - #~ return np.count_nonzero(x == 1) - #~ with rasterio.open(str(cloud_path)) as cld_src: - #~ cld_profile = cld_src.profile - #~ cld_array = cld_src.read(1, out_shape = (10980, 10980), resampling=Resampling.nearest) - #~ cld_array = cld_src.read(1) - #~ logger.info("shape: {}".format(cld_array.shape)) - - #~ stats_cld = zonal_stats(geom, cld_array, affine=raster_profile["transform"] , stats=["count", "nodata"], add_stats={'cldcount':mycount}) - #~ stats_cld = zonal_stats(geom, cld_array, affine=raster_profile["transform"], categorical=True)[0] - #~ stats_cld = zonal_stats(geom, str(cloud_path), nodata = 16383.0, categorical=True)[0] - #~ stats_cld = zonal_stats(geom, str(cloud_path), stats=["count", "nodata"], add_stats={'cldcount':mycount}) - #~ stats_cld = zonal_stats(geom, str(cloud_path), categorical=True, nodata = 16383.0, category_map= {-999: 'tttttttttttt', 1.0: 'clds', 0.0:'noclds', 16383.0: 'nnnnnnn'})[0] - stats_cld = zonal_stats(geom, str(cloud_path), stats=["count", "nodata"])[0] - - #~ logger.info(stats_cld) + # def mycount(x): + # return np.count_nonzero(x == 1) + # with rasterio.open(str(cloud_path)) as cld_src: + # cld_profile = cld_src.profile + # cld_array = cld_src.read(1, out_shape = (10980, 10980), resampling=Resampling.nearest) + # cld_array = cld_src.read(1) + # logger.info("shape: {}".format(cld_array.shape)) + + # stats_cld = zonal_stats(geom, cld_array, affine=raster_profile["transform"] , stats=["count", "nodata"], add_stats={'cldcount':mycount}) + # stats_cld = zonal_stats(geom, cld_array, affine=raster_profile["transform"], categorical=True)[0] + # stats_cld = zonal_stats(geom, str(cloud_path), nodata = 16383.0, categorical=True)[0] + # stats_cld = zonal_stats(geom, str(cloud_path), stats=["count", "nodata"], add_stats={'cldcount':mycount}) + # stats_cld = zonal_stats(geom, str(cloud_path), categorical=True, nodata = 16383.0, category_map= {-999: 'tttttttttttt', 1.0: 'clds', 0.0:'noclds', 16383.0: 'nnnnnnn'})[0] + stats_cld = zonal_stats( + geom, str(cloud_path), stats=["count", "nodata"] + )[0] + + # logger.info(stats_cld) try: nbcld = stats["nodata"] - stats_cld["nodata"] except: nbcld = 0 - #~ logger.info(stats_cld) - #~ logger.info(cld_pct) - stats['nbcld'] = nbcld - + # logger.info(stats_cld) + # logger.info(cld_pct) + stats["nbcld"] = nbcld + if cloud_proba_path: - stats_cld_prb = zonal_stats(geom, str(cloud_proba_path), stats=["mean"])[0] - stats['cldprb'] = stats_cld_prb["mean"] + stats_cld_prb = zonal_stats( + geom, str(cloud_proba_path), stats=["mean"] + )[0] + stats["cldprb"] = stats_cld_prb["mean"] logger.info(stats) - - - + # mettre ici le cloud mask ! - - - + return stats def _get_stats(self) -> None: @@ -222,63 +254,94 @@ class TimeSeries: with fiona.open(str(self._vectors_file), "r") as vectors: features = {feat["id"]: feat for feat in vectors} for index2, indice in enumerate(self._indices_list): - logger.info("computing {}/{}: {}".format(index2 + 1, len(self._indices_list), indice)) + logger.info( + "computing {}/{}: {}".format( + index2 + 1, len(self._indices_list), indice + ) + ) rows_list = [] - #~ for tile, fid_list in self._tiles_geoms.items(): + # for tile, fid_list in self._tiles_geoms.items(): for index3, tile in enumerate(self._tiles_geoms): fid_list = self._tiles_geoms[tile] tile_obj = Tile(tile) - logger.info("Tile {}/{}: {}".format(index3 + 1, len(self._tiles_geoms), tile)) + logger.info( + "Tile {}/{}: {}".format( + index3 + 1, len(self._tiles_geoms), tile + ) + ) if TimeSeries._is_indice_in_tile(indice, tile_obj): - tile_indice_path = tile_obj.paths["indices"][indice.lower()] - products = TimeSeries._filter_products(tile_obj, indice, - self._date_min, self._date_max, - self._cover_min, self._cover_max) + tile_indice_path = tile_obj.paths["indices"][ + indice.lower() + ] + products = TimeSeries._filter_products( + tile_obj, + indice, + self._date_min, + self._date_max, + self._cover_min, + self._cover_max, + ) for index1, prod in enumerate(products): indice_product = IndiceProduct(prod.identifier) prod_path = str(indice_product.path) - prod_path_unmasked = prod_path.replace("_CM001", "") + prod_path_unmasked = prod_path.replace( + "_CM001", "" + ) l2a = L2aProduct(indice_product.l2a) if l2a.path.exists(): prod_path_cloud_proba = l2a.msk_cldprb_20m else: prod_path_cloud_proba = None - #~ prod_path = tile_indice_path / prod.identifier[:(-12 - len(indice))] / prod.identifier - #~ cloud_path = tile_obj.paths["l2a"] / (prod.identifier[:(-12 - len(indice))] + "_CLOUD_MASK.jp2") - #~ prod_path_unmasked = tile_indice_path / prod.identifier[:(-12 - len(indice))] / (prod.identifier[:-11] + '.jp2') - #~ prod_path_cloud_proba = L2aProduct(prod.identifier[:(-12 - len(indice))]).msk_cldprb_20m - #~ logger.info(prod_path_cloud_proba) - - - logger.info("Product {}/{}: {}".format(index1 + 1, len(products), prod.identifier)) + # prod_path = tile_indice_path / prod.identifier[:(-12 - len(indice))] / prod.identifier + # cloud_path = tile_obj.paths["l2a"] / (prod.identifier[:(-12 - len(indice))] + "_CLOUD_MASK.jp2") + # prod_path_unmasked = tile_indice_path / prod.identifier[:(-12 - len(indice))] / (prod.identifier[:-11] + '.jp2') + # prod_path_cloud_proba = L2aProduct(prod.identifier[:(-12 - len(indice))]).msk_cldprb_20m + # logger.info(prod_path_cloud_proba) + + logger.info( + "Product {}/{}: {}".format( + index1 + 1, len(products), prod.identifier + ) + ) logger.info("{} features".format(len(fid_list))) - + for index, fid in enumerate(fid_list): df_dict = OrderedDict() df_dict["fid"] = fid - + # feat_properties = features[fid]["properties"] # if feat_properties: - # df_dict.update(feat_properties) - df_dict.update(TimeSeries._get_raster_stats_in_geom(features[fid], prod_path, prod_path_unmasked, prod_path_cloud_proba)) + # df_dict.update(feat_properties) + df_dict.update( + TimeSeries._get_raster_stats_in_geom( + features[fid], + prod_path, + prod_path_unmasked, + prod_path_cloud_proba, + ) + ) # df_properties = features[fid]["properties"] df_dict["date"] = prod.date df_dict["tile"] = tile df_dict["filename"] = prod.identifier - + for prop in features[fid]["properties"]: - df_dict[prop] = features[fid]["properties"][prop] + df_dict[prop] = features[fid][ + "properties" + ][prop] rows_list.append(df_dict) if rows_list: self._df_dicts[indice] = TimeSeries._list_to_df(rows_list) end = time.time() - logger.info("Execution time: {}".format(timedelta(seconds = end - start))) + logger.info( + "Execution time: {}".format(timedelta(seconds=end - start)) + ) def _raster_stats_multi(self, features, shared_list, proc_item): indice_product = IndiceProduct(proc_item[0].identifier) @@ -289,99 +352,136 @@ class TimeSeries: prod_path_cloud_proba = l2a.msk_cldprb_20m else: prod_path_cloud_proba = None - #~ prod_path = proc_item[2] / proc_item[0].identifier[:(-12 - len(proc_item[3]))] / proc_item[0].identifier - #~ prod_path_unmasked = proc_item[2] / proc_item[0].identifier[:(-12 - len(proc_item[3]))] / (proc_item[0].identifier[:-11] + '.jp2') - #~ prod_path_cloud_proba = L2aProduct(proc_item[0].identifier[:(-12 - len(proc_item[3]))]).msk_cldprb_20m - #~ logger.info(prod_path_cloud_proba) - + # prod_path = proc_item[2] / proc_item[0].identifier[:(-12 - len(proc_item[3]))] / proc_item[0].identifier + # prod_path_unmasked = proc_item[2] / proc_item[0].identifier[:(-12 - len(proc_item[3]))] / (proc_item[0].identifier[:-11] + '.jp2') + # prod_path_cloud_proba = L2aProduct(proc_item[0].identifier[:(-12 - len(proc_item[3]))]).msk_cldprb_20m + # logger.info(prod_path_cloud_proba) + fid = proc_item[1] result_dict = OrderedDict() result_dict["fid"] = fid - result_dict.update(TimeSeries._get_raster_stats_in_geom(features[fid], prod_path, prod_path_unmasked, prod_path_cloud_proba)) + result_dict.update( + TimeSeries._get_raster_stats_in_geom( + features[fid], + prod_path, + prod_path_unmasked, + prod_path_cloud_proba, + ) + ) result_dict["date"] = proc_item[0].date result_dict["tile"] = proc_item[4] result_dict["filename"] = proc_item[0].identifier for prop in features[fid]["properties"]: - #~ if type(features[fid]["properties"][prop]) == float: - #~ result_dict[prop] = "{:.6f}".format(features[fid]["properties"][prop]) - #~ logger.info("toto {}".format(result_dict[prop])) - #~ else: - #~ result_dict[prop] = features[fid]["properties"][prop] - #~ logger.info("tata {}".format(result_dict[prop])) + # if type(features[fid]["properties"][prop]) == float: + # result_dict[prop] = "{:.6f}".format(features[fid]["properties"][prop]) + # logger.info("toto {}".format(result_dict[prop])) + # else: + # result_dict[prop] = features[fid]["properties"][prop] + # logger.info("tata {}".format(result_dict[prop])) result_dict[prop] = features[fid]["properties"][prop] shared_list.append(result_dict) - + def _get_stats_multiproc(self) -> None: start = time.time() """Compute stats in polygons.""" with fiona.open(str(self._vectors_file), "r") as vectors: features = {feat["id"]: feat for feat in vectors} for index2, indice in enumerate(self._indices_list): - logger.info("computing {}/{}: {}".format(index2 + 1, len(self._indices_list), indice)) - #~ rows_list = [] + logger.info( + "computing {}/{}: {}".format( + index2 + 1, len(self._indices_list), indice + ) + ) + # rows_list = [] manager = multiprocessing.Manager() shared_list = manager.list() - - #~ for tile, fid_list in self._tiles_geoms.items(): + # for tile, fid_list in self._tiles_geoms.items(): for index3, tile in enumerate(self._tiles_geoms): fid_list = self._tiles_geoms[tile] tile_obj = Tile(tile) - logger.info("Tile {}/{}: {}".format(index3 + 1, len(self._tiles_geoms), tile)) + logger.info( + "Tile {}/{}: {}".format( + index3 + 1, len(self._tiles_geoms), tile + ) + ) if TimeSeries._is_indice_in_tile(indice, tile_obj): - tile_indice_path = tile_obj.paths["indices"][indice.lower()] - products = TimeSeries._filter_products(tile_obj, indice, - self._date_min, self._date_max, - self._cover_min, self._cover_max) - - proc_list = list(itertools.product(products, fid_list, [tile_indice_path], [indice], [tile])) + tile_indice_path = tile_obj.paths["indices"][ + indice.lower() + ] + products = TimeSeries._filter_products( + tile_obj, + indice, + self._date_min, + self._date_max, + self._cover_min, + self._cover_max, + ) + + proc_list = list( + itertools.product( + products, + fid_list, + [tile_indice_path], + [indice], + [tile], + ) + ) logger.info("{} extractions".format(len(proc_list))) - + pool = multiprocessing.Pool(self._multiproc) - results = [pool.map(partial(self._raster_stats_multi, features, shared_list), proc_list)] + results = [ + pool.map( + partial( + self._raster_stats_multi, + features, + shared_list, + ), + proc_list, + ) + ] pool.close() pool.join() - - #~ logger.info("{}".format(shared_list)) - #~ for index1, prod in enumerate(products): + + # logger.info("{}".format(shared_list)) + # for index1, prod in enumerate(products): rows_list = list(shared_list) - #~ logger.info("rows_list {}".format(rows_list)) + # logger.info("rows_list {}".format(rows_list)) if rows_list: - self._df_dicts[indice] = TimeSeries._list_to_df(rows_list) - - - - #~ prod_path = tile_indice_path / prod.identifier[:(-12 - len(indice))] / prod.identifier - #~ logger.info("Product {}/{}: {}".format(index1 + 1, len(products), prod.identifier)) - #~ logger.info("{} features".format(len(fid_list))) - - #~ for index, fid in enumerate(fid_list): - #~ df_dict = OrderedDict() - #~ df_dict["fid"] = fid - - #~ # feat_properties = features[fid]["properties"] - #~ # if feat_properties: - #~ # df_dict.update(feat_properties) - #~ df_dict.update(TimeSeries._get_raster_stats_in_geom(features[fid], prod_path)) - - #~ # df_properties = features[fid]["properties"] - #~ df_dict["date"] = prod.date - #~ df_dict["tile"] = tile - #~ df_dict["filename"] = prod.identifier - - #~ for prop in features[fid]["properties"]: - #~ df_dict[prop] = features[fid]["properties"][prop] - - #~ rows_list.append(df_dict) - #~ if rows_list: - #~ self._df_dicts[indice] = TimeSeries._list_to_df(rows_list) + self._df_dicts[indice] = TimeSeries._list_to_df( + rows_list + ) + + # prod_path = tile_indice_path / prod.identifier[:(-12 - len(indice))] / prod.identifier + # logger.info("Product {}/{}: {}".format(index1 + 1, len(products), prod.identifier)) + # logger.info("{} features".format(len(fid_list))) + + # for index, fid in enumerate(fid_list): + # df_dict = OrderedDict() + # df_dict["fid"] = fid + + # # feat_properties = features[fid]["properties"] + # # if feat_properties: + # # df_dict.update(feat_properties) + # df_dict.update(TimeSeries._get_raster_stats_in_geom(features[fid], prod_path)) + + # # df_properties = features[fid]["properties"] + # df_dict["date"] = prod.date + # df_dict["tile"] = tile + # df_dict["filename"] = prod.identifier + + # for prop in features[fid]["properties"]: + # df_dict[prop] = features[fid]["properties"][prop] + + # rows_list.append(df_dict) + # if rows_list: + # self._df_dicts[indice] = TimeSeries._list_to_df(rows_list) end = time.time() - logger.info("Execution time: {}".format(timedelta(seconds = end - start))) - - - + logger.info( + "Execution time: {}".format(timedelta(seconds=end - start)) + ) @staticmethod def _list_to_df(rows_list: List[Dict]) -> pd.DataFrame: @@ -392,8 +492,8 @@ class TimeSeries: df = pd.DataFrame.from_dict(rows_list) df = df.sort_values(by=["date"]) df.set_index("date", inplace=True, drop=False) - del df['date'] - df = df.dropna(subset = ['mean']) + del df["date"] + df = df.dropna(subset=["mean"]) return df def to_csv(self, out_path: Union[str, pathlib.PosixPath] = None) -> None: @@ -405,29 +505,63 @@ class TimeSeries: out_path = self._out_path out_path_folder = Path(out_path) / self._vectors_file.stem out_path_folder.mkdir(parents=True, exist_ok=True) - - list_order = ['fid', 'tile', 'filename', 'count', 'nodata', 'nbcld', 'cldprb', 'min', 'max', 'mean', 'std','median', 'percentile_25', 'percentile_75'] - #~ b=[a for a in df.columns if a not in liste] - + + list_order = [ + "fid", + "tile", + "filename", + "count", + "nodata", + "nbcld", + "cldprb", + "min", + "max", + "mean", + "std", + "median", + "percentile_25", + "percentile_75", + ] + # b=[a for a in df.columns if a not in liste] + for df_name, df in self._df_dicts.items(): csv_path = out_path_folder / "{0}_{1}_{2}_{3}.csv".format( self._vectors_file.stem, df_name, self._date_min, - self._date_max) - #~ df.sort_values(by=['fid', 'date', 'count', 'tile']).reindex(columns=(list_order + [a for a in df.columns if a not in list_order]), copy=False).to_csv(str(csv_path)) - df['fid'] = pd.to_numeric(df['fid']) - dg = df.sort_values(by=['fid', 'date', 'count', 'std'], ascending=[True, True, True, False]).\ - reindex(columns=(list_order + [a for a in df.columns if a not in list_order]), copy=False).\ - reset_index().\ - drop_duplicates(['date','fid'], keep='last') - + self._date_max, + ) + # df.sort_values(by=['fid', 'date', 'count', 'tile']).reindex(columns=(list_order + [a for a in df.columns if a not in list_order]), copy=False).to_csv(str(csv_path)) + df["fid"] = pd.to_numeric(df["fid"]) + dg = ( + df.sort_values( + by=["fid", "date", "count", "std"], + ascending=[True, True, True, False], + ) + .reindex( + columns=( + list_order + + [a for a in df.columns if a not in list_order] + ), + copy=False, + ) + .reset_index() + .drop_duplicates(["date", "fid"], keep="last") + ) + dg.set_index("date", inplace=True, drop=True) - dg.replace(to_replace=[r"\\t|\\n|\\r", "\t|\n|\r"], value=["",""], regex=True, inplace=True) + dg.replace( + to_replace=[r"\\t|\\n|\\r", "\t|\n|\r"], + value=["", ""], + regex=True, + inplace=True, + ) dg.to_csv(str(csv_path)) logger.info("exported to csv: {}".format(csv_path)) - - def plot_global(self, out_path: Union[str, pathlib.PosixPath] = None) -> None: + + def plot_global( + self, out_path: Union[str, pathlib.PosixPath] = None + ) -> None: """Exports the time series to CSV format. :param out_path: output folder. Default is DATA/TIME_SERIES. @@ -436,26 +570,34 @@ class TimeSeries: out_path = self._out_path out_path_folder = Path(out_path) / self._vectors_file.stem out_path_folder.mkdir(parents=True, exist_ok=True) - + for df_name, df in self._df_dicts.items(): - png_path = out_path_folder / "{0}_plot-global_{1}_{2}_{3}.png".format( - self._vectors_file.stem, - df_name, - self._date_min, - self._date_max) + png_path = ( + out_path_folder + / "{0}_plot-global_{1}_{2}_{3}.png".format( + self._vectors_file.stem, + df_name, + self._date_min, + self._date_max, + ) + ) df = self.data[df_name] - plt.figure(figsize=(19.2,10.8)) - if df_name in ['NDVI', 'NDWIGAO', 'NDWIMCF', 'MNDWI']: - plt.ylim((-10000,10000)) + plt.figure(figsize=(19.2, 10.8)) + if df_name in ["NDVI", "NDWIGAO", "NDWIMCF", "MNDWI"]: + plt.ylim((-10000, 10000)) else: - plt.ylim((0,10000)) - df.dropna(subset = ['mean']).groupby(self._key)['mean'].plot(legend=True) + plt.ylim((0, 10000)) + df.dropna(subset=["mean"]).groupby(self._key)["mean"].plot( + legend=True + ) plt.title(df_name) plt.savefig(str(png_path)) plt.close() logger.info("Plot saved to png: {}".format(png_path)) - def plot_details(self, out_path: Union[str, pathlib.PosixPath] = None) -> None: + def plot_details( + self, out_path: Union[str, pathlib.PosixPath] = None + ) -> None: """Exports the time series to CSV format. :param out_path: output folder. Default is DATA/TIME_SERIES. @@ -465,55 +607,99 @@ class TimeSeries: out_path_folder = Path(out_path) / self._vectors_file.stem out_path_folder.mkdir(parents=True, exist_ok=True) for df_name, df in self._df_dicts.items(): - png_path = out_path_folder / "{0}_plot-details_{1}_{2}_{3}.png".format( - self._vectors_file.stem, - df_name, - self._date_min, - self._date_max) - png_path_nonan = out_path_folder / "{0}_plot-details_{1}-nonan_{2}_{3}.png".format( - self._vectors_file.stem, - df_name, - self._date_min, - self._date_max) - - if df_name in ['NDVI', 'NDWIGAO', 'NDWIMCF', 'MNDWI']: + png_path = ( + out_path_folder + / "{0}_plot-details_{1}_{2}_{3}.png".format( + self._vectors_file.stem, + df_name, + self._date_min, + self._date_max, + ) + ) + png_path_nonan = ( + out_path_folder + / "{0}_plot-details_{1}-nonan_{2}_{3}.png".format( + self._vectors_file.stem, + df_name, + self._date_min, + self._date_max, + ) + ) + + if df_name in ["NDVI", "NDWIGAO", "NDWIMCF", "MNDWI"]: ylim = [-10000, 10000] else: ylim = [0, 10000] - + df = self.data[df_name] grouped = df.groupby(self._key) - ncols = int(math.ceil(len(grouped)**0.5)) - nrows = int(math.ceil(len(grouped)/ncols)) - fig, axs = plt.subplots(nrows, ncols, figsize=(19.2,10.8)) + ncols = int(math.ceil(len(grouped) ** 0.5)) + nrows = int(math.ceil(len(grouped) / ncols)) + fig, axs = plt.subplots(nrows, ncols, figsize=(19.2, 10.8)) for (name, dfe), ax in zip(grouped, axs.flat): ax.set_ylim(ylim) ax.set_title(name) - dfe.dropna(subset = ['mean']).plot(y=['mean'], ax=ax, yerr='std', color='black', elinewidth=0.2, legend=False) - dfe.dropna(subset = ['mean']).plot(y=['min', 'max'], ax=ax, linewidth=0.25, color='black', legend=False) + dfe.dropna(subset=["mean"]).plot( + y=["mean"], + ax=ax, + yerr="std", + color="black", + elinewidth=0.2, + legend=False, + ) + dfe.dropna(subset=["mean"]).plot( + y=["min", "max"], + ax=ax, + linewidth=0.25, + color="black", + legend=False, + ) ax2 = ax.twinx() ax2.set_ylim([0, 1]) - dfe['na_ratio'] = dfe['nodata']/(dfe['count'] + dfe['nodata']) - dfe.dropna(subset = ['mean']).plot(y=['na_ratio'], marker='o', ax=ax2, color='red', linewidth=0.25, linestyle='', legend=False) - #~ dfe.dropna().plot.bar(y=['na_ratio'], ax=ax2, color='red',legend=False) - ax.plot(np.nan, '-r', label = 'NaN ratio') - #~ ax.legend(loc=0, prop={'size': 6}) + dfe["na_ratio"] = dfe["nodata"] / ( + dfe["count"] + dfe["nodata"] + ) + dfe.dropna(subset=["mean"]).plot( + y=["na_ratio"], + marker="o", + ax=ax2, + color="red", + linewidth=0.25, + linestyle="", + legend=False, + ) + # dfe.dropna().plot.bar(y=['na_ratio'], ax=ax2, color='red',legend=False) + ax.plot(np.nan, "-r", label="NaN ratio") + # ax.legend(loc=0, prop={'size': 6}) ax.legend(loc=0, labelspacing=0.05) fig.tight_layout() fig.suptitle(df_name, fontsize=16) plt.savefig(str(png_path)) plt.close() - logger.info("Plot saved to png: {}".format(png_path)) - + logger.info("Plot saved to png: {}".format(png_path)) + try: - fig, axs = plt.subplots(nrows, ncols, figsize=(19.2,10.8)) + fig, axs = plt.subplots(nrows, ncols, figsize=(19.2, 10.8)) for (name, dfe), ax in zip(grouped, axs.flat): ax.set_ylim(ylim) ax.set_title(name) - dfe = dfe.dropna(subset = ['mean']) - dfe = dfe[(dfe[['nodata']] == 0).all(axis=1)] - dfe.plot(y=['mean'], ax=ax, yerr='std', color='black', elinewidth=0.2, legend=False) - dfe.plot(y=['min', 'max'], ax=ax, linewidth=0.25, color='black', legend=False) + dfe = dfe.dropna(subset=["mean"]) + dfe = dfe[(dfe[["nodata"]] == 0).all(axis=1)] + dfe.plot( + y=["mean"], + ax=ax, + yerr="std", + color="black", + elinewidth=0.2, + legend=False, + ) + dfe.plot( + y=["min", "max"], + ax=ax, + linewidth=0.25, + color="black", + legend=False, + ) ax.legend(loc=0, labelspacing=0.05) fig.tight_layout() fig.suptitle(df_name, fontsize=16) @@ -524,10 +710,10 @@ class TimeSeries: logger.info("No data to plot (no date with 0 nodata)") pass - - def extract_ql(self, out_path: Union[str, pathlib.PosixPath] = None) -> None: - """Extract ql images around vectors. - """ + def extract_ql( + self, out_path: Union[str, pathlib.PosixPath] = None + ) -> None: + """Extract ql images around vectors.""" if out_path is None: out_path = Config().get("time_series_path") out_path_folder = Path(out_path) / self._vectors_file.stem @@ -536,129 +722,210 @@ class TimeSeries: for df_name, df in self.data.items(): fid_list = dict(zip(df.fid, df[self._key])) if not self._field_names: - fid_list = fid_list.fromkeys(fid_list, '') - - cmap_dict = {'NDVI' : 'RdYlGn', - 'NDWIGAO' : 'RdYlBu', - 'NDWIMCF' : 'RdYlBu', - 'BIGR' : 'pink', - 'BIRNIR' : 'afmhot', - 'BIBG' : 'bone', - 'MNDWI' : 'BrBG'} - if df_name in ['NDVI', 'NDWIGAO', 'NDWIMCF', 'MNDWI']: + fid_list = fid_list.fromkeys(fid_list, "") + + cmap_dict = { + "NDVI": "RdYlGn", + "NDWIGAO": "RdYlBu", + "NDWIMCF": "RdYlBu", + "BIGR": "pink", + "BIRNIR": "afmhot", + "BIBG": "bone", + "MNDWI": "BrBG", + } + if df_name in ["NDVI", "NDWIGAO", "NDWIMCF", "MNDWI"]: vmin = -10000 vmax = 10000 else: vmin = 0 vmax = 10000 - + for fid, name in fid_list.items(): if name: fidname = name else: - fidname = 'FID'+fid + fidname = "FID" + fid out_path_fid_folder = out_path_folder / str("QL_" + fidname) out_path_fid_folder.mkdir(parents=True, exist_ok=True) - indice_png_path = out_path_fid_folder / "{0}_MOZ-QL_{1}_{2}_{3}_{4}.jpg".format( - self._vectors_file.stem, - fidname, - df_name, - self._date_min, - self._date_max) - l2a_png_path = out_path_fid_folder / "{0}_MOZ-QL_{1}_{2}_{3}_{4}.jpg".format( - self._vectors_file.stem, - fidname, - 'L2A', - self._date_min, - self._date_max) + indice_png_path = ( + out_path_fid_folder + / "{0}_MOZ-QL_{1}_{2}_{3}_{4}.jpg".format( + self._vectors_file.stem, + fidname, + df_name, + self._date_min, + self._date_max, + ) + ) + l2a_png_path = ( + out_path_fid_folder + / "{0}_MOZ-QL_{1}_{2}_{3}_{4}.jpg".format( + self._vectors_file.stem, + fidname, + "L2A", + self._date_min, + self._date_max, + ) + ) logger.info("fid/name:{}".format(fidname)) - nb_prod = len(df.loc[df['fid'] == fid]) - ncols = int(math.ceil(nb_prod**0.5)) - nrows = int(math.ceil(nb_prod/ncols)) - figa, axs = plt.subplots(nrows, ncols, figsize=(5*ncols,5*nrows), sharey=True, sharex=True) - figb, bxs = plt.subplots(nrows, ncols, figsize=(5*ncols,5*nrows), sharey=True, sharex=True) - for (index, row), ax, bx in zip((df.loc[df['fid'] == fid]).sort_values(by=['date']).iterrows(), - np.array(axs).flatten(), - np.array(bxs).flatten()): - #~ logger.info("row[filename]: {}".format(row['filename'])) - prod_id = IndiceProduct(row['filename']).l2a - #~ prod_id = row['filename'][:(-12 - len(df_name))] - indice_png_tile_path = out_path_fid_folder / "{0}_QL_{1}_{2}_{3}_{4}_{5}.jpg".format( - self._vectors_file.stem, - fidname, - df_name, - prod_id[11:19], - prod_id[39:44], - prod_id[0:2]) - l2a_png_tile_path = out_path_fid_folder / "{0}_QL_{1}_{2}_{3}_{4}_{5}.jpg".format( - self._vectors_file.stem, - fidname, - 'L2A', - prod_id[11:19], - prod_id[39:44], - prod_id[0:2]) - tile_obj = Tile(row['tile']) - tile_indice_path = tile_obj.paths["indices"][df_name.lower()] + nb_prod = len(df.loc[df["fid"] == fid]) + ncols = int(math.ceil(nb_prod ** 0.5)) + nrows = int(math.ceil(nb_prod / ncols)) + figa, axs = plt.subplots( + nrows, + ncols, + figsize=(5 * ncols, 5 * nrows), + sharey=True, + sharex=True, + ) + figb, bxs = plt.subplots( + nrows, + ncols, + figsize=(5 * ncols, 5 * nrows), + sharey=True, + sharex=True, + ) + for (index, row), ax, bx in zip( + (df.loc[df["fid"] == fid]) + .sort_values(by=["date"]) + .iterrows(), + np.array(axs).flatten(), + np.array(bxs).flatten(), + ): + # logger.info("row[filename]: {}".format(row['filename'])) + prod_id = IndiceProduct(row["filename"]).l2a + # prod_id = row['filename'][:(-12 - len(df_name))] + indice_png_tile_path = ( + out_path_fid_folder + / "{0}_QL_{1}_{2}_{3}_{4}_{5}.jpg".format( + self._vectors_file.stem, + fidname, + df_name, + prod_id[11:19], + prod_id[39:44], + prod_id[0:2], + ) + ) + l2a_png_tile_path = ( + out_path_fid_folder + / "{0}_QL_{1}_{2}_{3}_{4}_{5}.jpg".format( + self._vectors_file.stem, + fidname, + "L2A", + prod_id[11:19], + prod_id[39:44], + prod_id[0:2], + ) + ) + tile_obj = Tile(row["tile"]) + tile_indice_path = tile_obj.paths["indices"][ + df_name.lower() + ] tile_l2a_path = tile_obj.paths["l2a"] - prod_path = tile_indice_path / prod_id / row['filename'] + prod_path = tile_indice_path / prod_id / row["filename"] tci_path = L2aProduct(prod_id).tci_10m - #~ tci_path = list(Path(str(tile_l2a_path / row['filename'][:(-12 - len(df_name))])+ '.SAFE/')\ - #~ .glob('GRANULE/*/IMG_DATA/R10m/*_TCI_10m.jp2')) - + # tci_path = list(Path(str(tile_l2a_path / row['filename'][:(-12 - len(df_name))])+ '.SAFE/')\ + # .glob('GRANULE/*/IMG_DATA/R10m/*_TCI_10m.jp2')) + crop_extent = gpd.read_file(str(self._vectors_file)) logger.info(tci_path) raster_tci = rasterio.open(tci_path) crop_extent_new_proj = crop_extent.to_crs(raster_tci.crs) - extent_geojson = mapping(crop_extent_new_proj['geometry'][int(fid)].buffer(1000)) + extent_geojson = mapping( + crop_extent_new_proj["geometry"][int(fid)].buffer(1000) + ) with rasterio.open(tci_path) as tci_data: - tci_data_crop, tci_data_crop_affine = mask(tci_data, - [extent_geojson], - crop=True) - tci_crop = np.dstack((tci_data_crop[0], tci_data_crop[1], tci_data_crop[2])) - tci_data_extent = plotting_extent(tci_data_crop[0], tci_data_crop_affine) - ax.imshow(tci_crop, extent = tci_data_extent) - crop_extent_new_proj.loc[[int(fid)], 'geometry'].plot(ax=ax, facecolor='none', edgecolor='black', linewidth = 3) - ax.set_title("{} - {}".format(prod_id[11:19], prod_id[39:44]), fontsize=10) - - fig2, ax2 = plt.subplots(1, 1, figsize=(4,4)) - plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1) - ax2.imshow(tci_crop, extent = tci_data_extent) - crop_extent_new_proj.loc[[int(fid)], 'geometry'].plot(ax=ax2, facecolor='none', edgecolor='black', linewidth = 3) + tci_data_crop, tci_data_crop_affine = mask( + tci_data, [extent_geojson], crop=True + ) + tci_crop = np.dstack( + (tci_data_crop[0], tci_data_crop[1], tci_data_crop[2]) + ) + tci_data_extent = plotting_extent( + tci_data_crop[0], tci_data_crop_affine + ) + ax.imshow(tci_crop, extent=tci_data_extent) + crop_extent_new_proj.loc[[int(fid)], "geometry"].plot( + ax=ax, facecolor="none", edgecolor="black", linewidth=3 + ) + ax.set_title( + "{} - {}".format(prod_id[11:19], prod_id[39:44]), + fontsize=10, + ) + + fig2, ax2 = plt.subplots(1, 1, figsize=(4, 4)) + plt.subplots_adjust( + left=0.1, right=0.9, top=0.9, bottom=0.1 + ) + ax2.imshow(tci_crop, extent=tci_data_extent) + crop_extent_new_proj.loc[[int(fid)], "geometry"].plot( + ax=ax2, + facecolor="none", + edgecolor="black", + linewidth=3, + ) ax2.set_axis_off() fig2.suptitle(name) - ax2.set_title(prod_id[39:44] + ' ' + prod_id[11:19]) + ax2.set_title(prod_id[39:44] + " " + prod_id[11:19]) fig2.tight_layout() - fig2.savefig(str(l2a_png_tile_path), bbox_inches='tight') + fig2.savefig(str(l2a_png_tile_path), bbox_inches="tight") plt.close(fig=fig2) - + raster = rasterio.open(prod_path) nrg = raster.read(1) with rasterio.open(prod_path) as img_data: - img_data_crop, img_data_crop_affine = mask(img_data, - [extent_geojson], - crop=True) - img_data_extent = plotting_extent(img_data_crop[0], img_data_crop_affine) - bx.imshow(img_data_crop[0], extent=img_data_extent, cmap=cmap_dict[df_name], vmin=vmin, vmax=vmax) - crop_extent_new_proj.loc[[int(fid)], 'geometry'].plot(ax=bx, facecolor='none', edgecolor='black', linewidth = 3) - bx.set_title("{} - {}".format(prod_id[11:19], prod_id[39:44]), fontsize=10) - - fig_t_ind, ax_t_ind = plt.subplots(1, 1, figsize=(4,4)) - plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1) - ax_t_ind.imshow(img_data_crop[0], extent=img_data_extent, cmap=cmap_dict[df_name], vmin=vmin, vmax=vmax) - crop_extent_new_proj.loc[[int(fid)], 'geometry'].plot(ax=ax_t_ind, facecolor='none', edgecolor='black', linewidth = 3) + img_data_crop, img_data_crop_affine = mask( + img_data, [extent_geojson], crop=True + ) + img_data_extent = plotting_extent( + img_data_crop[0], img_data_crop_affine + ) + bx.imshow( + img_data_crop[0], + extent=img_data_extent, + cmap=cmap_dict[df_name], + vmin=vmin, + vmax=vmax, + ) + crop_extent_new_proj.loc[[int(fid)], "geometry"].plot( + ax=bx, facecolor="none", edgecolor="black", linewidth=3 + ) + bx.set_title( + "{} - {}".format(prod_id[11:19], prod_id[39:44]), + fontsize=10, + ) + + fig_t_ind, ax_t_ind = plt.subplots(1, 1, figsize=(4, 4)) + plt.subplots_adjust( + left=0.1, right=0.9, top=0.9, bottom=0.1 + ) + ax_t_ind.imshow( + img_data_crop[0], + extent=img_data_extent, + cmap=cmap_dict[df_name], + vmin=vmin, + vmax=vmax, + ) + crop_extent_new_proj.loc[[int(fid)], "geometry"].plot( + ax=ax_t_ind, + facecolor="none", + edgecolor="black", + linewidth=3, + ) ax_t_ind.set_axis_off() fig_t_ind.suptitle(name) - ax_t_ind.set_title(prod_id[39:44] + ' ' + prod_id[11:19]) + ax_t_ind.set_title(prod_id[39:44] + " " + prod_id[11:19]) fig_t_ind.tight_layout() - fig_t_ind.savefig(str(indice_png_tile_path), bbox_inches='tight') + fig_t_ind.savefig( + str(indice_png_tile_path), bbox_inches="tight" + ) plt.close(fig=fig_t_ind) - - figa.suptitle(fidname + ' - L2A') - figa.savefig(str(l2a_png_path), bbox_inches='tight') + + figa.suptitle(fidname + " - L2A") + figa.savefig(str(l2a_png_path), bbox_inches="tight") plt.close(fig=figa) - - figb.suptitle(fidname + ' - ' + df_name) - figb.savefig(str(indice_png_path), bbox_inches='tight') - plt.close(fig=figb) + figb.suptitle(fidname + " - " + df_name) + figb.savefig(str(indice_png_path), bbox_inches="tight") + plt.close(fig=figb) diff --git a/sen2chain/utils.py b/sen2chain/utils.py index f06a4e368c37a3c080a56bba7414ae624ea39d69..f54d141f0878968241779dd395e8a02448aa4e1a 100644 --- a/sen2chain/utils.py +++ b/sen2chain/utils.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 """ This module contains usefull functions commons to every modules. @@ -19,9 +19,8 @@ logging.basicConfig(level=logging.INFO) # useful dates formats encountered when working with Sentinel-2 data DATES_FORMATS = dict( - ymd="%Y-%m-%d", - filename="%Y%m%dT%H%M%S", - metadata="%Y-%m-%dT%H:%M:%S.%fZ") + ymd="%Y-%m-%d", filename="%Y%m%dT%H%M%S", metadata="%Y-%m-%dT%H:%M:%S.%fZ" +) def format_word(word: str) -> str: @@ -75,54 +74,73 @@ def datetime_to_str(date: datetime, date_format: str) -> str: return datetime.strftime(date, date_format) -def human_size(bytes, units=['B','KB','MB','GB','TB', 'PB', 'EB']): - """ Returns a human readable string reprentation of bytes""" - return str(bytes) + units[0] if bytes < 1024.0 else human_size(bytes>>10, units[1:]) - +def human_size(bytes, units=["B", "KB", "MB", "GB", "TB", "PB", "EB"]): + """Returns a human readable string reprentation of bytes""" + return ( + str(bytes) + units[0] + if bytes < 1024.0 + else human_size(bytes >> 10, units[1:]) + ) + + def human_size_decimal(size, decimal_places=2): - for unit in ['B','KB','MB','GB','TB', 'PB', 'EB']: + for unit in ["B", "KB", "MB", "GB", "TB", "PB", "EB"]: if size < 1024.0: break size /= 1024.0 - return "{}{}".format(format(size, '.' + str(decimal_places) + 'f'), unit) - #~ return f"{size:.{decimal_places}f}{unit}" + return "{}{}".format(format(size, "." + str(decimal_places) + "f"), unit) + # return f"{size:.{decimal_places}f}{unit}" + -def getFolderSize(folder, follow_symlinks = False): +def getFolderSize(folder, follow_symlinks=False): total_size = os.path.getsize(folder) try: for item in os.listdir(folder): itempath = os.path.join(folder, item) - if os.path.isfile(itempath) and (follow_symlinks or not os.path.islink(itempath)): + if os.path.isfile(itempath) and ( + follow_symlinks or not os.path.islink(itempath) + ): total_size += os.path.getsize(itempath) - elif os.path.isdir(itempath) and (follow_symlinks or not os.path.islink(itempath)): + elif os.path.isdir(itempath) and ( + follow_symlinks or not os.path.islink(itempath) + ): total_size += getFolderSize(itempath) except: pass return total_size - + + def setPermissions(path): - os.chmod(str(path), os.stat(str(path)).st_mode | stat.S_IWGRP | stat.S_IWUSR) + os.chmod( + str(path), os.stat(str(path)).st_mode | stat.S_IWGRP | stat.S_IWUSR + ) os.chmod(str(path), os.stat(str(path)).st_mode & ~stat.S_IWOTH) for dir_path, dir_names, files in os.walk(str(path)): for d in dir_names: - name = dir_path + '/' + d - #~ os.chmod(name, os.stat(name).st_mode | 0o075) - #~ os.chmod(name, os.stat(name).st_mode & ~0o002) + name = dir_path + "/" + d + # os.chmod(name, os.stat(name).st_mode | 0o075) + # os.chmod(name, os.stat(name).st_mode & ~0o002) os.chmod(name, os.stat(name).st_mode | stat.S_IWGRP | stat.S_IWUSR) os.chmod(name, os.stat(name).st_mode & ~stat.S_IWOTH) - + for f in files: - name = dir_path + '/' + f - #~ os.chmod(name, os.stat(name).st_mode | 0o064) - #~ os.chmod(name, os.stat(name).st_mode & ~0o003) + name = dir_path + "/" + f + # os.chmod(name, os.stat(name).st_mode | 0o064) + # os.chmod(name, os.stat(name).st_mode & ~0o003) os.chmod(name, os.stat(name).st_mode | stat.S_IWGRP | stat.S_IWUSR) - os.chmod(name, os.stat(name).st_mode & ~stat.S_IXOTH & ~stat.S_IWOTH) - -def get_current_Sen2Cor_version(): - """ Returns your current Sen2Cor version """ - sen2cor_bashrc_path = Config().get("sen2cor_bashrc_path") - return next(iter(re.findall('Sen2Cor-(\d{2}\.\d{2}\.\d{2})', str(sen2cor_bashrc_path))), None) - + os.chmod( + name, os.stat(name).st_mode & ~stat.S_IXOTH & ~stat.S_IWOTH + ) - +def get_current_Sen2Cor_version(): + """Returns your current Sen2Cor version""" + sen2cor_bashrc_path = Config().get("sen2cor_bashrc_path") + return next( + iter( + re.findall( + "Sen2Cor-(\d{2}\.\d{2}\.\d{2})", str(sen2cor_bashrc_path) + ) + ), + None, + ) diff --git a/sen2chain/xmlparser.py b/sen2chain/xmlparser.py index ed0cac46f3914c74f906a7f4697ba0e2893f363a..effe4036c192f20eb04886c119e5466400bf099b 100644 --- a/sen2chain/xmlparser.py +++ b/sen2chain/xmlparser.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 """ Module for parsing L1C and L2A products metadatada. @@ -14,20 +14,23 @@ from .utils import get_current_Sen2Cor_version from .config import SHARED_DATA, Config - logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) + class MetadataParser: """Class for getting metadata values from a L1C or L2A product's XML metadata file. :param metadata_path: XML metadata file path. :param tile: product's tile name. """ + def __init__(self, metadata_path: pathlib.PosixPath, tile: str) -> None: if metadata_path is None or not metadata_path.exists(): - raise ValueError("metadata file not found: {}".format(metadata_path)) + raise ValueError( + "metadata file not found: {}".format(metadata_path) + ) self._metadata_path = metadata_path self.tile = tile @@ -53,24 +56,39 @@ class MetadataParser: def _get_psd(self) -> None: """Returns the version of the metadata file.""" - schema_location = self._root.attrib["{http://www.w3.org/2001/XMLSchema-instance}schemaLocation"] - psd_version = re.findall("https://psd-([0-9]{2}).sentinel2.eo.esa.int", schema_location)[0] + schema_location = self._root.attrib[ + "{http://www.w3.org/2001/XMLSchema-instance}schemaLocation" + ] + psd_version = re.findall( + "https://psd-([0-9]{2}).sentinel2.eo.esa.int", schema_location + )[0] self._psd = int(psd_version) def _get_granule(self) -> None: """Load granule identifier.""" self._granule_string = "Granule" if self._psd > 13 else "Granules" try: - self._granule = [g.attrib['granuleIdentifier'] - for g in self._root.findall('.//{}'.format(self._granule_string)) - if self.tile in g.attrib['granuleIdentifier']][0] + self._granule = [ + g.attrib["granuleIdentifier"] + for g in self._root.findall( + ".//{}".format(self._granule_string) + ) + if self.tile in g.attrib["granuleIdentifier"] + ][0] except IndexError: - logger.error("{}: could not find granule for tile {}".format(self._metadata_path.name, self.tile)) + logger.error( + "{}: could not find granule for tile {}".format( + self._metadata_path.name, self.tile + ) + ) raise def _get_bands_image_string(self) -> None: """Returns XML key string for granule's bands paths.""" - image_string = {t.tag for t in self._root.findall('.//{}/*'.format(self._granule_string))} + image_string = { + t.tag + for t in self._root.findall(".//{}/*".format(self._granule_string)) + } self._image_string = list(image_string)[0] def get_metadata_value(self, key: str = None) -> str: @@ -80,7 +98,9 @@ class MetadataParser: :param key: metadata tag name. """ try: - return [v.text for v in self._root.findall(".//{0}".format(key))][0] + return [v.text for v in self._root.findall(".//{0}".format(key))][ + 0 + ] except IndexError: logger.error("Metadata value not found: {}".format(key)) @@ -92,55 +112,75 @@ class MetadataParser: :param res: band's resolution. None for L1C. """ # folder's name for each resolution. - res_strings_dict = {"10m": "R10m", - "20m": "R20m", - "60m": "R60m"} + res_strings_dict = {"10m": "R10m", "20m": "R20m", "60m": "R60m"} - pattern = key.upper() if res is None else key.upper() + "_" + res.lower() + pattern = ( + key.upper() if res is None else key.upper() + "_" + res.lower() + ) if key.upper() in ("CLD", "SNW"): - #~ try: - #~ path = [v.text - #~ for v in self._root.findall(".//{0}".format(self._image_string)) - #~ if pattern in v.text][0].split("/") - #~ if self._psd > 13: - #~ full_path = self._safe_path / "{}/{}/{}/{}.jp2".format(path[0], path[1], path[2], path[3]) - #~ else: - #~ full_path = self._safe_path / "GRANULE/{}/QI_DATA/{}.jp2".format(self._granule, path[-1], pattern) - #~ return str(full_path) - #~ except Exception as e: - #~ logger.debug("{}".format(e)) - #~ granule_path = self._safe_path / "GRANULE" - #~ return list(granule_path.rglob("*{}*".format(key.upper())))[0] - path = self._safe_path.glob("**/QI_DATA/*{}*{}.jp2".format(key.upper(), res.lower())) + # try: + # path = [v.text + # for v in self._root.findall(".//{0}".format(self._image_string)) + # if pattern in v.text][0].split("/") + # if self._psd > 13: + # full_path = self._safe_path / "{}/{}/{}/{}.jp2".format(path[0], path[1], path[2], path[3]) + # else: + # full_path = self._safe_path / "GRANULE/{}/QI_DATA/{}.jp2".format(self._granule, path[-1], pattern) + # return str(full_path) + # except Exception as e: + # logger.debug("{}".format(e)) + # granule_path = self._safe_path / "GRANULE" + # return list(granule_path.rglob("*{}*".format(key.upper())))[0] + path = self._safe_path.glob( + "**/QI_DATA/*{}*{}.jp2".format(key.upper(), res.lower()) + ) return next(path, None) - + try: if self._product_level == "L2A": - full_path = self._safe_path if self._psd > 13 \ - else self._safe_path / "GRANULE" / self._granule / "IMG_DATA" / res_strings_dict[res] + full_path = ( + self._safe_path + if self._psd > 13 + else self._safe_path + / "GRANULE" + / self._granule + / "IMG_DATA" + / res_strings_dict[res] + ) else: - full_path = self._safe_path if self._psd > 13 \ - else self._safe_path / "GRANULE" / self._granule / "IMG_DATA" - return [str(full_path/f.text) + ".jp2" - for f in self._root.findall(".//{}[@granuleIdentifier='{}']/{}".format(self._granule_string, - self._granule, - self._image_string)) - if pattern in f.text][0] + full_path = ( + self._safe_path + if self._psd > 13 + else self._safe_path + / "GRANULE" + / self._granule + / "IMG_DATA" + ) + return [ + str(full_path / f.text) + ".jp2" + for f in self._root.findall( + ".//{}[@granuleIdentifier='{}']/{}".format( + self._granule_string, self._granule, self._image_string + ) + ) + if pattern in f.text + ][0] except IndexError: logger.error("Band not found: {}".format(pattern)) raise + class Sen2ChainMetadataParser: - """ - - """ - def __init__(self, - xml_path, - ): + """ """ + + def __init__( + self, + xml_path, + ): SEN2CHAIN_META = SHARED_DATA.get("sen2chain_meta") self.xml_path = xml_path - + if xml_path.exists(): self._tree = et.parse(str(xml_path)) @@ -148,13 +188,19 @@ class Sen2ChainMetadataParser: self._tree = et.parse(str(SEN2CHAIN_META)) self._root = self._tree.getroot() - + def get_default_values(self): - keys = ["SEN2CHAIN_VERSION", "SEN2CHAIN_PROCESSING_VERSION", "SEN2COR_VERSION"] + keys = [ + "SEN2CHAIN_VERSION", + "SEN2CHAIN_PROCESSING_VERSION", + "SEN2COR_VERSION", + ] self.default_values = [] for key in keys: - self.default_values.append([v.text for v in self._root.findall(".//{0}".format(key))][0]) - + self.default_values.append( + [v.text for v in self._root.findall(".//{0}".format(key))][0] + ) + def get_metadata_value(self, key: str = None) -> str: """ Returns metadata. @@ -162,23 +208,39 @@ class Sen2ChainMetadataParser: :param key: metadata tag name. """ try: - return [v.text for v in self._root.findall(".//{0}".format(key))][0] + return [v.text for v in self._root.findall(".//{0}".format(key))][ + 0 + ] except IndexError: logger.error("Metadata value not found: {}".format(key)) - + def init_metadata(self): self._root.find("SEN2CHAIN_VERSION").text = sen2chain.__version__ - self._root.find("SEN2CHAIN_PROCESSING_VERSION").text = Config().get("sen2chain_processing_version") + self._root.find("SEN2CHAIN_PROCESSING_VERSION").text = Config().get( + "sen2chain_processing_version" + ) self._root.find("SEN2COR_VERSION").text = get_current_Sen2Cor_version() - self._tree.write(str(self.xml_path), encoding="UTF-8", xml_declaration=True) - - def set_metadata(self, - sen2chain_version: str = None, - sen2chain_processing_version: str = None, - sen2cor_version: str = None, - ): - self._root.find("SEN2CHAIN_VERSION").text = sen2chain_version or sen2chain.__version__ - self._root.find("SEN2CHAIN_PROCESSING_VERSION").text = sen2chain_processing_version or Config().get("sen2chain_processing_version") - self._root.find("SEN2COR_VERSION").text = sen2cor_version or get_current_Sen2Cor_version() - self._tree.write(str(self.xml_path), encoding="UTF-8", xml_declaration=True) - + self._tree.write( + str(self.xml_path), encoding="UTF-8", xml_declaration=True + ) + + def set_metadata( + self, + sen2chain_version: str = None, + sen2chain_processing_version: str = None, + sen2cor_version: str = None, + ): + self._root.find("SEN2CHAIN_VERSION").text = ( + sen2chain_version or sen2chain.__version__ + ) + self._root.find( + "SEN2CHAIN_PROCESSING_VERSION" + ).text = sen2chain_processing_version or Config().get( + "sen2chain_processing_version" + ) + self._root.find("SEN2COR_VERSION").text = ( + sen2cor_version or get_current_Sen2Cor_version() + ) + self._tree.write( + str(self.xml_path), encoding="UTF-8", xml_declaration=True + )