From f6a055a2b2e1be3d44053f0c9ba2326f703477f7 Mon Sep 17 00:00:00 2001
From: Impact <pascal.mouquet@ird.fr>
Date: Mon, 28 Oct 2019 16:50:58 +0400
Subject: [PATCH] add counts for tile and library cleaning

---
 sen2chain/library.py |  7 ++++++-
 sen2chain/tiles.py   | 25 ++++++++++++++++++++++---
 2 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/sen2chain/library.py b/sen2chain/library.py
index 3ced982..afaf66d 100644
--- a/sen2chain/library.py
+++ b/sen2chain/library.py
@@ -60,14 +60,19 @@ class Library:
               clean_list: list = [],
               remove_indice_tif: bool = False, 
               remove: bool = False):
+        nb_id = 0
+        nb_rm = 0
         if not clean_list:
             clean_list = self.l1c
         for t in clean_list:
             try:
                 til = Tile(t)
-                til.clean_lib(remove=remove, remove_indice_tif=remove_indice_tif)
+                counts = til.clean_lib(remove=remove, remove_indice_tif=remove_indice_tif)
+                nb_id += counts["identified_problems"]
+                nb_rm += counts["removed_problems"]
             except:
                 pass
+        return {"identified_problems": nb_id, "removed_problems": nb_rm}
             
     def archive_l1c(self,
                     archive_list: list = [],
diff --git a/sen2chain/tiles.py b/sen2chain/tiles.py
index 3e6fb5c..016fa4b 100644
--- a/sen2chain/tiles.py
+++ b/sen2chain/tiles.py
@@ -347,52 +347,63 @@ class Tile:
         #~ logger.info("Cleaning {} library".format(self.name))
         
         # identify corrupted jp2 in l1c folder
+        nb_id = 0
+        nb_rm = 0
         erase_set = set()
         for f in self._paths["l1c"].glob("*/GRANULE/*/IMG_DATA/*.jp2"):
-            
             if f.stat().st_size == 0:
                 logger.info("Identified 0b corrupted {} in L1C folder".format(f.name))
+                nb_id += 1
                 if remove:
                     erase_set.update({f.parent.parent.parent.parent})
         for e in erase_set:
             try:
                 logger.info("Removing {} from L1C folder".format(e))
                 shutil.rmtree(str(e))
+                nb_rm += 1
             except:
                 logger.error("Can't remove {} from L1C folder".format(e))
         # identify residual l2a from l1c folder
         for f in self._paths["l1c"].glob("*L2A*.SAFE"):
             logger.info("Identified {} in L1C folder".format(f.name))
+            nb_id += 1
             if remove:
                 try:
                     shutil.rmtree(str(f))
                     logger.info("Removing {} from L1C folder".format(f.name))
+                    nb_rm += 1
                 except:
                     logger.error("Can't remove {} from L1C folder".format(f.name))
         # identify residual l2a from l1c folder
         for f in self._paths["l2a"].glob("*L2A*.SAFE"):
             if len(list(f.glob("GRANULE/*/IMG_DATA/R10m/*.jp2"))) == 0:
                 logger.info("Corrupted L2A {} in L2A folder (no jp2 in R10m folder)".format(f.name))
+                nb_id += 1
                 if remove:
                     try:
                         shutil.rmtree(str(f))
                         logger.info("Removing corrupted L2A {} from L2A folder".format(f.name))
+                        nb_rm += 1
                     except:
                         logger.error("Can't remove {} from L2A folder".format(f.name))
         # identify 0B cloud masks
         for f in self._paths["l2a"].glob("*L2A*_CLOUD_MASK.jp2"):
             if f.stat().st_size == 0:
                 logger.info("Corrupted cloud mask {} in L2A folder".format(f.name))
+                nb_id += 1
                 if remove:
                     logger.info("Removing corrupted cloud mask {} from L2A folder".format(f.name))
                     f.unlink()
+                    nb_rm += 1
         # identify wrong size l2a_QL
         for f in self._paths["l2a"].glob("*L2A*_QL.tif"):
             if f.stat().st_size != 3617212:
                 logger.info("Corrupted L2A QL {} in L2A folder".format(f.name))
+                nb_id += 1
                 if remove:
                     logger.info("Removing corrupted QL {} from L2A folder".format(f.name))
                     f.unlink()
+                    nb_rm += 1
         # identify 0B or absent indices QL
         for f in self._paths["indices"]:
             #~ logger.info(f, self._paths["indices"][f])
@@ -400,9 +411,11 @@ class Tile:
                 #~ logger.info(p)
                 if p.is_file():
                     logger.info("Identified old indice format {}".format(p.name))
+                    nb_id += 1
                     if remove:
                         logger.info("Removing old indice format {}".format(p.name))
                         p.unlink()
+                        nb_rm += 1
                 else:
                     #~ for q in p.glob("*_QUICKLOOK.tif"):
                         #~ if not ((q.stat().st_size == 3617212) or 
@@ -420,23 +433,29 @@ class Tile:
                             img.verify() # verify that it is, in fact an image
                         except (IOError, SyntaxError) as e:
                             logger.info('Bad file (PIL): {}'.format(str(q.name))) # print out the names of corrupt files
+                            nb_id += 1
                             if remove:
                                 logger.info("Removing indice QL {}".format(q.name))
                                 q.unlink()
+                                nb_rm += 1
                     for q in p.glob("*.*"):
                         if q.stat().st_size == 0:
                             logger.info("Corrupted file {} (0B size)".format(q.name))
+                            nb_id += 1
                             if remove:
                                 logger.info("Removing indice QL {}".format(q.name))
                                 q.unlink()
+                                nb_rm += 1
                     if remove_indice_tif:
                         for q in p.glob("*" + f.upper() + ".tif"):
                             logger.info("Identified indice in tif format {}".format(q.name))
+                            nb_id += 1
                             if remove:
                                 logger.info("Removing indice QL {}".format(q.name))
                                 q.unlink()
-                        
-    
+                                nb_rm += 1
+        return {"identified_problems": nb_id, "removed_problems": nb_rm}
+            
     def archive_l1c(self):
         """
         Check and move l1c products to archive folder
-- 
GitLab