From 9a4d7dd6271b6816a6ce546aac37d81ebd783184 Mon Sep 17 00:00:00 2001 From: Impact <pascal.mouquet@ird.fr> Date: Tue, 17 Sep 2019 14:46:00 +0400 Subject: [PATCH] clean lib and time series improvements --- sen2chain/library.py | 16 +++++++++- sen2chain/tiles.py | 23 ++++++++++---- sen2chain/time_series.py | 65 ++++++++++++++++++++++++++++++++++++---- 3 files changed, 93 insertions(+), 11 deletions(-) diff --git a/sen2chain/library.py b/sen2chain/library.py index 22ec776..84f59f1 100644 --- a/sen2chain/library.py +++ b/sen2chain/library.py @@ -57,7 +57,7 @@ class Library: return self._indices def clean(self, - clean_list : list = [], + clean_list: list = [], remove: bool = False): if not clean_list: clean_list = self.l1c @@ -67,6 +67,20 @@ class Library: til.clean_lib(remove=remove) except: pass + + def archive_l1c(self, + archive_list: list = [], + ): + if archive_list: + for t in archive_list: + try: + til = Tile(t) + til.archive_l1c() + except: + pass + else: + logger.info("Please specify a tile list to archive") + class TempContainer: diff --git a/sen2chain/tiles.py b/sen2chain/tiles.py index 04cae7a..509f7b6 100644 --- a/sen2chain/tiles.py +++ b/sen2chain/tiles.py @@ -343,6 +343,21 @@ class Tile: - indices error (0kb) """ #~ logger.info("Cleaning {} library".format(self.name)) + + # identify corrupted jp2 in l1c folder + erase_set = set() + for f in self._paths["l1c"].glob("*/GRANULE/*/IMG_DATA/*.jp2"): + + if f.stat().st_size == 0: + logger.info("Identified 0b corrupted {} in L1C folder".format(f.name)) + if remove: + erase_set.update({f.parent.parent.parent.parent}) + for e in erase_set: + try: + logger.info("Removing {} from L1C folder".format(e)) + shutil.rmtree(str(e)) + except: + logger.error("Can't remove {} from L1C folder".format(e)) # identify residual l2a from l1c folder for f in self._paths["l1c"].glob("*L2A*.SAFE"): logger.info("Identified {} in L1C folder".format(f.name)) @@ -378,8 +393,9 @@ class Tile: f.unlink() # identify 0B or absent indices QL for f in self._paths["indices"]: - #~ print(f, self._paths["indices"][f]) + #~ logger.info(f, self._paths["indices"][f]) for p in self._paths["indices"][f].glob("*_MSIL2A_*/"): + #~ logger.info(p) if p.is_file(): logger.info("Identified old indice format {}".format(p.name)) if remove: @@ -395,8 +411,7 @@ class Tile: logger.info("Removing indice QL {}".format(q.name)) q.unlink() - def archive_l1c(self, - move: bool = False): + def archive_l1c(self): """ Check and move l1c products to archive folder @@ -406,8 +421,6 @@ class Tile: logger.info("Scanning L1C ready to move") prod_list = ProductsList() - #~ archive_l1c_set = set(self.l1c.products) - {identifier.replace("L2A_", "L1C_").replace("_USER_", "__OPER__") - #~ for identifier in self.l2a.products} archive_l1c_set = {a for a in {identifier.replace("L2A_", "L1C_").replace("_USER_", "__OPER__") for identifier in self.l2a.products} if a in set(self.l1c.products)} diff --git a/sen2chain/time_series.py b/sen2chain/time_series.py index 56bdc57..5f5e14f 100644 --- a/sen2chain/time_series.py +++ b/sen2chain/time_series.py @@ -13,6 +13,7 @@ import rasterio from rasterio.plot import show from rasterio.plot import plotting_extent from rasterio.mask import mask +from rasterio.enums import Resampling from fiona import transform from shapely.geometry import shape from shapely.geometry import mapping @@ -146,7 +147,8 @@ class TimeSeries: @staticmethod def _get_raster_stats_in_geom( feature: Dict, - raster_path: Union[str, pathlib.PosixPath] + raster_path: Union[str, pathlib.PosixPath], + cloud_path: str = None ) -> Dict: """Extracts statistics from a raster in a geometry. @@ -157,6 +159,10 @@ class TimeSeries: rasterio.open(str(raster_path)).crs["init"], feature["geometry"])) + #~ with rasterio.open(str(raster_path)) as raster_src: + #~ raster_profile = raster_src.profile + #~ logger.info(raster_profile) + stats = zonal_stats(geom, str(raster_path), stats=["count", "nodata", "min", @@ -164,6 +170,43 @@ class TimeSeries: "median", "mean", "std"])[0] + #~ logger.info(stats) + + if cloud_path: + #~ def mycount(x): + #~ return np.count_nonzero(x == 1) + #~ with rasterio.open(str(cloud_path)) as cld_src: + #~ cld_profile = cld_src.profile + #~ cld_array = cld_src.read(1, out_shape = (10980, 10980), resampling=Resampling.nearest) + #~ cld_array = cld_src.read(1) + #~ logger.info("shape: {}".format(cld_array.shape)) + + #~ stats_cld = zonal_stats(geom, cld_array, affine=raster_profile["transform"] , stats=["count", "nodata"], add_stats={'cldcount':mycount}) + #~ stats_cld = zonal_stats(geom, cld_array, affine=raster_profile["transform"], categorical=True)[0] + #~ stats_cld = zonal_stats(geom, str(cloud_path), nodata = 16383.0, categorical=True)[0] + #~ stats_cld = zonal_stats(geom, str(cloud_path), stats=["count", "nodata"], add_stats={'cldcount':mycount}) + #~ stats_cld = zonal_stats(geom, str(cloud_path), categorical=True, nodata = 16383.0, category_map= {-999: 'tttttttttttt', 1.0: 'clds', 0.0:'noclds', 16383.0: 'nnnnnnn'})[0] + stats_cld = zonal_stats(geom, str(cloud_path), stats=["count", "nodata"])[0] + + #~ logger.info(stats_cld) + try: + nbcld = stats["nodata"] - stats_cld["nodata"] + except: + nbcld = 0 + + #~ logger.info(stats_cld) + #~ logger.info(cld_pct) + stats['nbcld'] = nbcld + + logger.info(stats) + + + + + # mettre ici le cloud mask ! + + + return stats def _get_stats(self) -> None: @@ -190,6 +233,9 @@ class TimeSeries: for index1, prod in enumerate(products): prod_path = tile_indice_path / prod.identifier[:(-12 - len(indice))] / prod.identifier + #~ cloud_path = tile_obj.paths["l2a"] / (prod.identifier[:(-12 - len(indice))] + "_CLOUD_MASK.jp2") + prod_path_unmasked = tile_indice_path / prod.identifier[:(-12 - len(indice))] / (prod.identifier[:-11] + '.jp2') + logger.info("Product {}/{}: {}".format(index1 + 1, len(products), prod.identifier)) logger.info("{} features".format(len(fid_list))) @@ -200,7 +246,7 @@ class TimeSeries: # feat_properties = features[fid]["properties"] # if feat_properties: # df_dict.update(feat_properties) - df_dict.update(TimeSeries._get_raster_stats_in_geom(features[fid], prod_path)) + df_dict.update(TimeSeries._get_raster_stats_in_geom(features[fid], prod_path, prod_path_unmasked)) # df_properties = features[fid]["properties"] df_dict["date"] = prod.date @@ -218,10 +264,12 @@ class TimeSeries: def _raster_stats_multi(self, features, shared_list, proc_item): prod_path = proc_item[2] / proc_item[0].identifier[:(-12 - len(proc_item[3]))] / proc_item[0].identifier + prod_path_unmasked = proc_item[2] / proc_item[0].identifier[:(-12 - len(proc_item[3]))] / (proc_item[0].identifier[:-11] + '.jp2') + fid = proc_item[1] result_dict = OrderedDict() result_dict["fid"] = fid - result_dict.update(TimeSeries._get_raster_stats_in_geom(features[fid], prod_path)) + result_dict.update(TimeSeries._get_raster_stats_in_geom(features[fid], prod_path, prod_path_unmasked)) result_dict["date"] = proc_item[0].date result_dict["tile"] = proc_item[4] result_dict["filename"] = proc_item[0].identifier @@ -323,7 +371,7 @@ class TimeSeries: out_path_folder = Path(out_path) / self._vectors_file.stem out_path_folder.mkdir(parents=True, exist_ok=True) - list_order = ['fid', 'count', 'nodata', 'min', 'max', 'mean', 'median', 'std', 'tile', 'filename'] + list_order = ['fid', 'count', 'nodata', 'nbcld', 'min', 'max', 'mean', 'median', 'std', 'tile', 'filename'] #~ b=[a for a in df.columns if a not in liste] for df_name, df in self._df_dicts.items(): @@ -332,7 +380,14 @@ class TimeSeries: df_name, self._date_min, self._date_max) - df.sort_values(by=['fid']).reindex(columns=(list_order + [a for a in df.columns if a not in list_order]), copy=False).to_csv(str(csv_path)) + #~ df.sort_values(by=['fid', 'date', 'count', 'tile']).reindex(columns=(list_order + [a for a in df.columns if a not in list_order]), copy=False).to_csv(str(csv_path)) + dg = df.sort_values(by=['fid', 'date', 'count', 'std'], ascending=[True, True, True, False]).\ + reindex(columns=(list_order + [a for a in df.columns if a not in list_order]), copy=False).\ + reset_index().\ + drop_duplicates(['date','fid'], keep='last') + + dg.set_index("date", inplace=True, drop=True) + dg.to_csv(str(csv_path)) logger.info("exported to csv: {}".format(csv_path)) def plot_global(self, out_path: Union[str, pathlib.PosixPath] = None) -> None: -- GitLab