From f6a055a2b2e1be3d44053f0c9ba2326f703477f7 Mon Sep 17 00:00:00 2001 From: Impact <pascal.mouquet@ird.fr> Date: Mon, 28 Oct 2019 16:50:58 +0400 Subject: [PATCH] add counts for tile and library cleaning --- sen2chain/library.py | 7 ++++++- sen2chain/tiles.py | 25 ++++++++++++++++++++++--- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/sen2chain/library.py b/sen2chain/library.py index 3ced982..afaf66d 100644 --- a/sen2chain/library.py +++ b/sen2chain/library.py @@ -60,14 +60,19 @@ class Library: clean_list: list = [], remove_indice_tif: bool = False, remove: bool = False): + nb_id = 0 + nb_rm = 0 if not clean_list: clean_list = self.l1c for t in clean_list: try: til = Tile(t) - til.clean_lib(remove=remove, remove_indice_tif=remove_indice_tif) + counts = til.clean_lib(remove=remove, remove_indice_tif=remove_indice_tif) + nb_id += counts["identified_problems"] + nb_rm += counts["removed_problems"] except: pass + return {"identified_problems": nb_id, "removed_problems": nb_rm} def archive_l1c(self, archive_list: list = [], diff --git a/sen2chain/tiles.py b/sen2chain/tiles.py index 3e6fb5c..016fa4b 100644 --- a/sen2chain/tiles.py +++ b/sen2chain/tiles.py @@ -347,52 +347,63 @@ class Tile: #~ logger.info("Cleaning {} library".format(self.name)) # identify corrupted jp2 in l1c folder + nb_id = 0 + nb_rm = 0 erase_set = set() for f in self._paths["l1c"].glob("*/GRANULE/*/IMG_DATA/*.jp2"): - if f.stat().st_size == 0: logger.info("Identified 0b corrupted {} in L1C folder".format(f.name)) + nb_id += 1 if remove: erase_set.update({f.parent.parent.parent.parent}) for e in erase_set: try: logger.info("Removing {} from L1C folder".format(e)) shutil.rmtree(str(e)) + nb_rm += 1 except: logger.error("Can't remove {} from L1C folder".format(e)) # identify residual l2a from l1c folder for f in self._paths["l1c"].glob("*L2A*.SAFE"): logger.info("Identified {} in L1C folder".format(f.name)) + nb_id += 1 if remove: try: shutil.rmtree(str(f)) logger.info("Removing {} from L1C folder".format(f.name)) + nb_rm += 1 except: logger.error("Can't remove {} from L1C folder".format(f.name)) # identify residual l2a from l1c folder for f in self._paths["l2a"].glob("*L2A*.SAFE"): if len(list(f.glob("GRANULE/*/IMG_DATA/R10m/*.jp2"))) == 0: logger.info("Corrupted L2A {} in L2A folder (no jp2 in R10m folder)".format(f.name)) + nb_id += 1 if remove: try: shutil.rmtree(str(f)) logger.info("Removing corrupted L2A {} from L2A folder".format(f.name)) + nb_rm += 1 except: logger.error("Can't remove {} from L2A folder".format(f.name)) # identify 0B cloud masks for f in self._paths["l2a"].glob("*L2A*_CLOUD_MASK.jp2"): if f.stat().st_size == 0: logger.info("Corrupted cloud mask {} in L2A folder".format(f.name)) + nb_id += 1 if remove: logger.info("Removing corrupted cloud mask {} from L2A folder".format(f.name)) f.unlink() + nb_rm += 1 # identify wrong size l2a_QL for f in self._paths["l2a"].glob("*L2A*_QL.tif"): if f.stat().st_size != 3617212: logger.info("Corrupted L2A QL {} in L2A folder".format(f.name)) + nb_id += 1 if remove: logger.info("Removing corrupted QL {} from L2A folder".format(f.name)) f.unlink() + nb_rm += 1 # identify 0B or absent indices QL for f in self._paths["indices"]: #~ logger.info(f, self._paths["indices"][f]) @@ -400,9 +411,11 @@ class Tile: #~ logger.info(p) if p.is_file(): logger.info("Identified old indice format {}".format(p.name)) + nb_id += 1 if remove: logger.info("Removing old indice format {}".format(p.name)) p.unlink() + nb_rm += 1 else: #~ for q in p.glob("*_QUICKLOOK.tif"): #~ if not ((q.stat().st_size == 3617212) or @@ -420,23 +433,29 @@ class Tile: img.verify() # verify that it is, in fact an image except (IOError, SyntaxError) as e: logger.info('Bad file (PIL): {}'.format(str(q.name))) # print out the names of corrupt files + nb_id += 1 if remove: logger.info("Removing indice QL {}".format(q.name)) q.unlink() + nb_rm += 1 for q in p.glob("*.*"): if q.stat().st_size == 0: logger.info("Corrupted file {} (0B size)".format(q.name)) + nb_id += 1 if remove: logger.info("Removing indice QL {}".format(q.name)) q.unlink() + nb_rm += 1 if remove_indice_tif: for q in p.glob("*" + f.upper() + ".tif"): logger.info("Identified indice in tif format {}".format(q.name)) + nb_id += 1 if remove: logger.info("Removing indice QL {}".format(q.name)) q.unlink() - - + nb_rm += 1 + return {"identified_problems": nb_id, "removed_problems": nb_rm} + def archive_l1c(self): """ Check and move l1c products to archive folder -- GitLab