From 5109ce5fbc218d14177953bee44377f670140965 Mon Sep 17 00:00:00 2001 From: Impact <pascal.mouquet@ird.fr> Date: Fri, 27 Aug 2021 15:53:20 +0400 Subject: [PATCH] added identified problem details when cleaning --- sen2chain/library.py | 4 +++- sen2chain/tiles.py | 43 ++++++++++++++++++++++++++++++++----------- 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/sen2chain/library.py b/sen2chain/library.py index 4aaaf15..c7fca10 100644 --- a/sen2chain/library.py +++ b/sen2chain/library.py @@ -72,6 +72,7 @@ class Library: """ nb_id = 0 nb_rm = 0 + prob_id = [] if not clean_list: clean_list = self.l1c for t in clean_list: @@ -80,10 +81,11 @@ class Library: counts = til.clean_lib(remove=remove, remove_indice_tif=remove_indice_tif) nb_id += counts["identified_problems"] nb_rm += counts["removed_problems"] + prob_id.append(counts["problems"]) logger.info("{} - {}".format(til, counts)) except: pass - return {"identified_problems": nb_id, "removed_problems": nb_rm} + return {"identified_problems": nb_id, "removed_problems": nb_rm, "problems": prob_id} def archive_l1c(self, archive_list: list = [], diff --git a/sen2chain/tiles.py b/sen2chain/tiles.py index 18501bc..e72cbe1 100644 --- a/sen2chain/tiles.py +++ b/sen2chain/tiles.py @@ -688,10 +688,13 @@ class Tile: # identify corrupted jp2 in l1c folder nb_id = 0 nb_rm = 0 + prob_id = [] erase_set = set() for f in self._paths["l1c"].glob("*/GRANULE/*/IMG_DATA/*.jp2"): if f.stat().st_size == 0: - logger.info("Identified 0b corrupted {} in L1C folder".format(f.name)) + txt = "Identified 0b corrupted {} in L1C folder".format(f.name) + prob_id.append(txt) + logger.info(txt) nb_id += 1 if remove: erase_set.update({f.parent.parent.parent.parent}) @@ -704,7 +707,9 @@ class Tile: logger.error("Can't remove {} from L1C folder".format(e)) # identify residual l2a from l1c folder for f in chain(self._paths["l1c"].glob("*L2A*.SAFE"), self._paths["l1c"].glob("*L2A*.tmp")): - logger.info("Identified {} in L1C folder".format(f.name)) + txt = "Identified {} in L1C folder".format(f.name) + prob_id.append(txt) + logger.info(txt) nb_id += 1 if remove: try: @@ -717,7 +722,9 @@ class Tile: for f in self._paths["l2a"].glob("*L2A*.SAFE"): # Nb jp2 < 7 if len(list(f.glob("GRANULE/*/IMG_DATA/R10m/*.jp2"))) < 7: - logger.info("Corrupted L2A {} in L2A folder (nb jp2 <7 in R10m folder)".format(f.name)) + txt = "Corrupted L2A {} in L2A folder (nb jp2 <7 in R10m folder)".format(f.name) + prob_id.append(txt) + logger.info(txt) nb_id += 1 if remove: try: @@ -729,7 +736,9 @@ class Tile: # identify 0B cloud masks for f in self._paths["cloudmasks"].glob("*/*CM*.jp2"): if f.stat().st_size == 0: - logger.info("Corrupted cloud mask {} in L2A folder".format(f.name)) + txt = "Corrupted cloud mask {} in L2A folder".format(f.name) + prob_id.append(txt) + logger.info(txt) nb_id += 1 if remove: logger.info("Removing corrupted cloud mask {} from L2A folder".format(f.name)) @@ -738,7 +747,9 @@ class Tile: # identify wrong size l2a_QL for f in self._paths["l2a"].glob("QL/*_QL.tif"): if f.stat().st_size != 3617212: - logger.info("Corrupted L2A QL {} in L2A QL folder".format(f.name)) + txt = "Corrupted L2A QL {} in L2A QL folder".format(f.name) + prob_id.append(txt) + logger.info(txt) nb_id += 1 if remove: logger.info("Removing corrupted QL {} from L2A folder".format(f.name)) @@ -750,7 +761,9 @@ class Tile: for p in self._paths["indices"][f].glob("*_MSIL2A_*/"): #~ logger.info(p) if p.is_file(): - logger.info("Identified old indice format {}".format(p.name)) + txt = "Identified old indice format {}".format(p.name) + prob_id.append(txt) + logger.info(txt) nb_id += 1 if remove: logger.info("Removing old indice format {}".format(p.name)) @@ -772,7 +785,9 @@ class Tile: img = Image.open(str(q)) # open the image file img.verify() # verify that it is, in fact an image except (IOError, SyntaxError) as e: - logger.info('Bad file (PIL): {}'.format(str(q.name))) # print out the names of corrupt files + txt = 'Bad file (PIL): {}'.format(str(q.name)) # print out the names of corrupt files + prob_id.append(txt) + logger.info(txt) nb_id += 1 if remove: logger.info("Removing indice QL {}".format(q.name)) @@ -780,7 +795,9 @@ class Tile: nb_rm += 1 for q in list(p.glob("*.jp2")): if not (Path(str(q) + ".aux.xml")).exists(): - logger.info("Missing metadata: {}".format(q.name)) + txt = "Missing metadata: {}".format(q.name) + prob_id.append(txt) + logger.info(txt) nb_id += 1 if remove: logger.info("Removing jp2 {}".format(q.name)) @@ -788,7 +805,9 @@ class Tile: nb_rm += 1 for q in p.glob("*.*"): if q.stat().st_size == 0: - logger.info("Corrupted file {} (0B size)".format(q.name)) + txt = "Corrupted file {} (0B size)".format(q.name) + prob_id.append(txt) + logger.info(txt) nb_id += 1 if remove: logger.info("Removing indice QL {}".format(q.name)) @@ -796,13 +815,15 @@ class Tile: nb_rm += 1 if remove_indice_tif: for q in p.glob("*" + f.upper() + ".tif"): - logger.info("Identified indice in tif format {}".format(q.name)) + txt = "Identified indice in tif format {}".format(q.name) + prob_id.append(txt) + logger.info(txt) nb_id += 1 if remove: logger.info("Removing indice QL {}".format(q.name)) q.unlink() nb_rm += 1 - return {"identified_problems": nb_id, "removed_problems": nb_rm} + return {"identified_problems": nb_id, "removed_problems": nb_rm, "problems": prob_id} def archive_l1c(self, size_only: bool = False, -- GitLab