add cloud probability to time series extractions

c4812451 · pascal.mouquet_ird.fr · 9abdbbc3 · c4812451
Commit c4812451 authored Oct 15, 2019 by pascal.mouquet_ird.fr
--- a/sen2chain/time_series.py
+++ b/sen2chain/time_series.py
@@ -148,7 +148,8 @@ class TimeSeries:
    def _get_raster_stats_in_geom(
            feature: Dict,
            raster_path: Union[str, pathlib.PosixPath],
-            cloud_path: str = None
+            cloud_path: str = None, 
+            cloud_proba_path: str = None
    ) -> Dict:
        """Extracts statistics from a raster in a geometry.
@@ -200,6 +201,10 @@ class TimeSeries:
            #~ logger.info(cld_pct)
            stats['nbcld'] = nbcld
+        if cloud_proba_path:
+            stats_cld_prb = zonal_stats(geom, str(cloud_proba_path), stats=["mean"])[0]
+            stats['cldprb'] = stats_cld_prb["mean"]
        logger.info(stats)
@@ -237,6 +242,9 @@ class TimeSeries:
                            prod_path = tile_indice_path / prod.identifier[:(-12 - len(indice))] / prod.identifier
                            #~ cloud_path = tile_obj.paths["l2a"] / (prod.identifier[:(-12 - len(indice))] + "_CLOUD_MASK.jp2")
                            prod_path_unmasked = tile_indice_path / prod.identifier[:(-12 - len(indice))] / (prod.identifier[:-11] + '.jp2')
+                            prod_path_cloud_proba = L2aProduct(prod.identifier[:(-12 - len(indice))]).msk_cldprb_20m
+                            #~ logger.info(prod_path_cloud_proba)
                            logger.info("Product {}/{}: {}".format(index1 + 1, len(products), prod.identifier))
                            logger.info("{} features".format(len(fid_list)))
@@ -248,7 +256,7 @@ class TimeSeries:
                                # feat_properties = features[fid]["properties"]
                                # if feat_properties:
                                    # df_dict.update(feat_properties)
-                                df_dict.update(TimeSeries._get_raster_stats_in_geom(features[fid], prod_path, prod_path_unmasked))
+                                df_dict.update(TimeSeries._get_raster_stats_in_geom(features[fid], prod_path, prod_path_unmasked, prod_path_cloud_proba))
                                # df_properties = features[fid]["properties"]
                                df_dict["date"] = prod.date
@@ -267,11 +275,13 @@ class TimeSeries:
    def _raster_stats_multi(self, features, shared_list, proc_item):
        prod_path = proc_item[2] / proc_item[0].identifier[:(-12 - len(proc_item[3]))] / proc_item[0].identifier
        prod_path_unmasked = proc_item[2] / proc_item[0].identifier[:(-12 - len(proc_item[3]))] / (proc_item[0].identifier[:-11] + '.jp2')
+        prod_path_cloud_proba = L2aProduct(proc_item[0].identifier[:(-12 - len(proc_item[3]))]).msk_cldprb_20m
+        #~ logger.info(prod_path_cloud_proba)
        fid = proc_item[1]
        result_dict = OrderedDict()
        result_dict["fid"] = fid
-        result_dict.update(TimeSeries._get_raster_stats_in_geom(features[fid], prod_path, prod_path_unmasked))
+        result_dict.update(TimeSeries._get_raster_stats_in_geom(features[fid], prod_path, prod_path_unmasked, prod_path_cloud_proba))
        result_dict["date"] = proc_item[0].date
        result_dict["tile"] = proc_item[4]
        result_dict["filename"] = proc_item[0].identifier
@@ -373,7 +383,7 @@ class TimeSeries:
        out_path_folder = Path(out_path) / self._vectors_file.stem
        out_path_folder.mkdir(parents=True, exist_ok=True)
-        list_order = ['fid', 'tile', 'filename', 'count', 'nodata', 'nbcld', 'min', 'max', 'mean', 'std','median', 'percentile_25', 'percentile_75']
+        list_order = ['fid', 'tile', 'filename', 'count', 'nodata', 'nbcld', 'cldprb', 'min', 'max', 'mean', 'std','median', 'percentile_25', 'percentile_75']
        #~ b=[a for a in df.columns if a not in liste]
        for df_name, df in self._df_dicts.items():