diff --git a/README.rst b/README.rst index e54dbdb8819df4dc96dc1a1d04a5ca7152f23e29..7327dd9c948f9b98228b6786f6b1d68a5fedd098 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ Sen2Chain ========= -Sen2Chain is a simple utility to download and process Sentinel-2 images. +Sen2Chain is a simple utility written in Python 3 to download and process Sentinel-2 images. It uses the `sentinelsat <https://github.com/sentinelsat/sentinelsat>`_ and `peps_download <https://github.com/olivierhagolle/peps_download>`_ packages to find and download data, and ESA's `Sen2Cor <http://step.esa.int/main/third-party-plugins-2/sen2cor/>`_ processor to perform atmospheric, terrain and cirrus correction. @@ -43,7 +43,7 @@ This project is open to anyone willing to help. You can contribute by adding new Funding ------- -.. image:: docs/source/funding.png +.. image:: docs/source/logo_framagit_funding.JPG :align: center :alt: logos diff --git a/docs/source/logo_framagit_funding.JPG b/docs/source/logo_framagit_funding.JPG new file mode 100644 index 0000000000000000000000000000000000000000..d2e086f2b2e29e5f8b31c5ba37e8c6cb47304b17 Binary files /dev/null and b/docs/source/logo_framagit_funding.JPG differ diff --git a/requirements.txt b/requirements.txt index 482dcf8f99e441438e3190a43f12ffc5344795c6..d85bd7e003a4c3e414968fdda0672fc124369633 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,4 @@ pandas>=0.23.4 urllib3>=1.22 geopandas>=0.5.0 scipy - +pillow diff --git a/sen2chain/indices.py b/sen2chain/indices.py index 58bfc8dbf7608f4992716eb860ed48e0ffa9c260..9d88a74184128f11fd5a5d205c04115755afc9cf 100644 --- a/sen2chain/indices.py +++ b/sen2chain/indices.py @@ -106,6 +106,7 @@ class Ndvi(Indice): out_path=(out_path / self.indice_raw)) index_tiff_2_jp2(img_path=(out_path / self.indice_raw), out_path=(out_path / self.indice_filename)) + if nodata_clouds: if self.l2a_product.user_cloud_mask is None: raise ValueError("Cloud mask does not exist") @@ -125,7 +126,12 @@ class Ndvi(Indice): index_tiff_2_jp2(img_path=(out_path / masked_indice_raw), out_path=(out_path / masked_indice_filename)) os.remove(str(out_path / masked_indice_raw)) - + + try: + os.remove(str(out_path / self.indice_raw)) + except: + pass + if quicklook: cmap = matplotlib_colormap_to_rgb(self.colormap, revers=False) diff --git a/sen2chain/library.py b/sen2chain/library.py index 22ec776868ce5616de4adc97df8cfde1c5788f79..3ced9825e00c71e7530f9970cf6efbde5c81bf01 100644 --- a/sen2chain/library.py +++ b/sen2chain/library.py @@ -57,16 +57,31 @@ class Library: return self._indices def clean(self, - clean_list : list = [], + clean_list: list = [], + remove_indice_tif: bool = False, remove: bool = False): if not clean_list: clean_list = self.l1c for t in clean_list: try: til = Tile(t) - til.clean_lib(remove=remove) + til.clean_lib(remove=remove, remove_indice_tif=remove_indice_tif) except: pass + + def archive_l1c(self, + archive_list: list = [], + ): + if archive_list: + for t in archive_list: + try: + til = Tile(t) + til.archive_l1c() + except: + pass + else: + logger.info("Please specify a tile list to archive") + class TempContainer: @@ -99,6 +114,7 @@ class TempContainer: if zip_file.exists(): logger.info("{} : Unzipping L1C archive".format(self.l1c.identifier)) shutil.unpack_archive(str(zip_file), str(self.temp_path)) + sorted(zip_file.parent.glob("*.SAFE"))[0].rename(zip_file.parent / (zip_file.stem + ".SAFE")) os.remove(str(zip_file)) return self diff --git a/sen2chain/tiles.py b/sen2chain/tiles.py index 35f7225a05235e8a185e1890fef986d9898c7905..9de76b9632c4448113237e5d28801d5d7f63659a 100644 --- a/sen2chain/tiles.py +++ b/sen2chain/tiles.py @@ -324,8 +324,8 @@ class Tile: """Returns tile's L2A products that don't have indices as a ProductsList.""" prods_list = ProductsList() try: - missings_indice_set = set(self.l2a.products) - {identifier.replace("_" + indice.upper() + ".jp2", ".SAFE") \ - for identifier in getattr(getattr(self, indice.lower()), 'raws').products} + missings_indice_set = set(self.l2a.products) - {identifier.replace("_" + indice.upper() + "_MASKED.jp2", ".SAFE") \ + for identifier in getattr(getattr(self, indice.lower()), 'masks').products} except: missings_indice_set = set(self.l2a.products) for prod in missings_indice_set: @@ -334,15 +334,31 @@ class Tile: return prods_list def clean_lib(self, + remove_indice_tif: bool = False, remove: bool = False): """ - Clean processing errors + Search and clean processing error products - unmoved error l2a products from l1c folder - - moved error l2a products from l2a folder + - moved error l2a products from l2a folder - cloud masks error (0kb) - indices error (0kb) """ #~ logger.info("Cleaning {} library".format(self.name)) + + # identify corrupted jp2 in l1c folder + erase_set = set() + for f in self._paths["l1c"].glob("*/GRANULE/*/IMG_DATA/*.jp2"): + + if f.stat().st_size == 0: + logger.info("Identified 0b corrupted {} in L1C folder".format(f.name)) + if remove: + erase_set.update({f.parent.parent.parent.parent}) + for e in erase_set: + try: + logger.info("Removing {} from L1C folder".format(e)) + shutil.rmtree(str(e)) + except: + logger.error("Can't remove {} from L1C folder".format(e)) # identify residual l2a from l1c folder for f in self._paths["l1c"].glob("*L2A*.SAFE"): logger.info("Identified {} in L1C folder".format(f.name)) @@ -378,8 +394,9 @@ class Tile: f.unlink() # identify 0B or absent indices QL for f in self._paths["indices"]: - #~ print(f, self._paths["indices"][f]) + #~ logger.info(f, self._paths["indices"][f]) for p in self._paths["indices"][f].glob("*_MSIL2A_*/"): + #~ logger.info(p) if p.is_file(): logger.info("Identified old indice format {}".format(p.name)) if remove: @@ -394,9 +411,21 @@ class Tile: if remove: logger.info("Removing indice QL {}".format(q.name)) q.unlink() + for q in p.glob("*.*"): + if q.stat().st_size == 0: + logger.info("Corrupted file {} (0B size)".format(q.name)) + if remove: + logger.info("Removing indice QL {}".format(q.name)) + q.unlink() + if remove_indice_tif: + for q in p.glob("*" + f.upper() + ".tif"): + logger.info("Identified indice in tif format {}".format(q.name)) + if remove: + logger.info("Removing indice QL {}".format(q.name)) + q.unlink() + - def archive_l1c(self, - move: bool = False): + def archive_l1c(self): """ Check and move l1c products to archive folder @@ -406,8 +435,6 @@ class Tile: logger.info("Scanning L1C ready to move") prod_list = ProductsList() - #~ archive_l1c_set = set(self.l1c.products) - {identifier.replace("L2A_", "L1C_").replace("_USER_", "__OPER__") - #~ for identifier in self.l2a.products} archive_l1c_set = {a for a in {identifier.replace("L2A_", "L1C_").replace("_USER_", "__OPER__") for identifier in self.l2a.products} if a in set(self.l1c.products)} @@ -430,10 +457,3 @@ class Tile: logger.info("No L1C products to archive") - - - - - - - diff --git a/sen2chain/time_series.py b/sen2chain/time_series.py index 56bdc577b14703596ebb20cbc6f589fdca8d8f21..1799eed4d4726596ab1517deb5eceadb9ba59eef 100644 --- a/sen2chain/time_series.py +++ b/sen2chain/time_series.py @@ -13,6 +13,7 @@ import rasterio from rasterio.plot import show from rasterio.plot import plotting_extent from rasterio.mask import mask +from rasterio.enums import Resampling from fiona import transform from shapely.geometry import shape from shapely.geometry import mapping @@ -146,7 +147,9 @@ class TimeSeries: @staticmethod def _get_raster_stats_in_geom( feature: Dict, - raster_path: Union[str, pathlib.PosixPath] + raster_path: Union[str, pathlib.PosixPath], + cloud_path: str = None, + cloud_proba_path: str = None ) -> Dict: """Extracts statistics from a raster in a geometry. @@ -157,13 +160,60 @@ class TimeSeries: rasterio.open(str(raster_path)).crs["init"], feature["geometry"])) + #~ with rasterio.open(str(raster_path)) as raster_src: + #~ raster_profile = raster_src.profile + #~ logger.info(raster_profile) + stats = zonal_stats(geom, str(raster_path), stats=["count", "nodata", "min", "max", "median", "mean", - "std"])[0] + "std", + "percentile_25", + "percentile_75"])[0] + #~ logger.info(stats) + + if cloud_path: + #~ def mycount(x): + #~ return np.count_nonzero(x == 1) + #~ with rasterio.open(str(cloud_path)) as cld_src: + #~ cld_profile = cld_src.profile + #~ cld_array = cld_src.read(1, out_shape = (10980, 10980), resampling=Resampling.nearest) + #~ cld_array = cld_src.read(1) + #~ logger.info("shape: {}".format(cld_array.shape)) + + #~ stats_cld = zonal_stats(geom, cld_array, affine=raster_profile["transform"] , stats=["count", "nodata"], add_stats={'cldcount':mycount}) + #~ stats_cld = zonal_stats(geom, cld_array, affine=raster_profile["transform"], categorical=True)[0] + #~ stats_cld = zonal_stats(geom, str(cloud_path), nodata = 16383.0, categorical=True)[0] + #~ stats_cld = zonal_stats(geom, str(cloud_path), stats=["count", "nodata"], add_stats={'cldcount':mycount}) + #~ stats_cld = zonal_stats(geom, str(cloud_path), categorical=True, nodata = 16383.0, category_map= {-999: 'tttttttttttt', 1.0: 'clds', 0.0:'noclds', 16383.0: 'nnnnnnn'})[0] + stats_cld = zonal_stats(geom, str(cloud_path), stats=["count", "nodata"])[0] + + #~ logger.info(stats_cld) + try: + nbcld = stats["nodata"] - stats_cld["nodata"] + except: + nbcld = 0 + + #~ logger.info(stats_cld) + #~ logger.info(cld_pct) + stats['nbcld'] = nbcld + + if cloud_proba_path: + stats_cld_prb = zonal_stats(geom, str(cloud_proba_path), stats=["mean"])[0] + stats['cldprb'] = stats_cld_prb["mean"] + + logger.info(stats) + + + + + # mettre ici le cloud mask ! + + + return stats def _get_stats(self) -> None: @@ -190,6 +240,12 @@ class TimeSeries: for index1, prod in enumerate(products): prod_path = tile_indice_path / prod.identifier[:(-12 - len(indice))] / prod.identifier + #~ cloud_path = tile_obj.paths["l2a"] / (prod.identifier[:(-12 - len(indice))] + "_CLOUD_MASK.jp2") + prod_path_unmasked = tile_indice_path / prod.identifier[:(-12 - len(indice))] / (prod.identifier[:-11] + '.jp2') + prod_path_cloud_proba = L2aProduct(prod.identifier[:(-12 - len(indice))]).msk_cldprb_20m + #~ logger.info(prod_path_cloud_proba) + + logger.info("Product {}/{}: {}".format(index1 + 1, len(products), prod.identifier)) logger.info("{} features".format(len(fid_list))) @@ -200,7 +256,7 @@ class TimeSeries: # feat_properties = features[fid]["properties"] # if feat_properties: # df_dict.update(feat_properties) - df_dict.update(TimeSeries._get_raster_stats_in_geom(features[fid], prod_path)) + df_dict.update(TimeSeries._get_raster_stats_in_geom(features[fid], prod_path, prod_path_unmasked, prod_path_cloud_proba)) # df_properties = features[fid]["properties"] df_dict["date"] = prod.date @@ -218,10 +274,14 @@ class TimeSeries: def _raster_stats_multi(self, features, shared_list, proc_item): prod_path = proc_item[2] / proc_item[0].identifier[:(-12 - len(proc_item[3]))] / proc_item[0].identifier + prod_path_unmasked = proc_item[2] / proc_item[0].identifier[:(-12 - len(proc_item[3]))] / (proc_item[0].identifier[:-11] + '.jp2') + prod_path_cloud_proba = L2aProduct(proc_item[0].identifier[:(-12 - len(proc_item[3]))]).msk_cldprb_20m + #~ logger.info(prod_path_cloud_proba) + fid = proc_item[1] result_dict = OrderedDict() result_dict["fid"] = fid - result_dict.update(TimeSeries._get_raster_stats_in_geom(features[fid], prod_path)) + result_dict.update(TimeSeries._get_raster_stats_in_geom(features[fid], prod_path, prod_path_unmasked, prod_path_cloud_proba)) result_dict["date"] = proc_item[0].date result_dict["tile"] = proc_item[4] result_dict["filename"] = proc_item[0].identifier @@ -323,7 +383,7 @@ class TimeSeries: out_path_folder = Path(out_path) / self._vectors_file.stem out_path_folder.mkdir(parents=True, exist_ok=True) - list_order = ['fid', 'count', 'nodata', 'min', 'max', 'mean', 'median', 'std', 'tile', 'filename'] + list_order = ['fid', 'tile', 'filename', 'count', 'nodata', 'nbcld', 'cldprb', 'min', 'max', 'mean', 'std','median', 'percentile_25', 'percentile_75'] #~ b=[a for a in df.columns if a not in liste] for df_name, df in self._df_dicts.items(): @@ -332,7 +392,14 @@ class TimeSeries: df_name, self._date_min, self._date_max) - df.sort_values(by=['fid']).reindex(columns=(list_order + [a for a in df.columns if a not in list_order]), copy=False).to_csv(str(csv_path)) + #~ df.sort_values(by=['fid', 'date', 'count', 'tile']).reindex(columns=(list_order + [a for a in df.columns if a not in list_order]), copy=False).to_csv(str(csv_path)) + dg = df.sort_values(by=['fid', 'date', 'count', 'std'], ascending=[True, True, True, False]).\ + reindex(columns=(list_order + [a for a in df.columns if a not in list_order]), copy=False).\ + reset_index().\ + drop_duplicates(['date','fid'], keep='last') + + dg.set_index("date", inplace=True, drop=True) + dg.to_csv(str(csv_path)) logger.info("exported to csv: {}".format(csv_path)) def plot_global(self, out_path: Union[str, pathlib.PosixPath] = None) -> None: