Skip to content
Snippets Groups Projects
Commit 5109ce5f authored by pascal.mouquet_ird.fr's avatar pascal.mouquet_ird.fr
Browse files

added identified problem details when cleaning

parent 863868a8
No related branches found
No related tags found
No related merge requests found
Pipeline #48 failed
...@@ -72,6 +72,7 @@ class Library: ...@@ -72,6 +72,7 @@ class Library:
""" """
nb_id = 0 nb_id = 0
nb_rm = 0 nb_rm = 0
prob_id = []
if not clean_list: if not clean_list:
clean_list = self.l1c clean_list = self.l1c
for t in clean_list: for t in clean_list:
...@@ -80,10 +81,11 @@ class Library: ...@@ -80,10 +81,11 @@ class Library:
counts = til.clean_lib(remove=remove, remove_indice_tif=remove_indice_tif) counts = til.clean_lib(remove=remove, remove_indice_tif=remove_indice_tif)
nb_id += counts["identified_problems"] nb_id += counts["identified_problems"]
nb_rm += counts["removed_problems"] nb_rm += counts["removed_problems"]
prob_id.append(counts["problems"])
logger.info("{} - {}".format(til, counts)) logger.info("{} - {}".format(til, counts))
except: except:
pass pass
return {"identified_problems": nb_id, "removed_problems": nb_rm} return {"identified_problems": nb_id, "removed_problems": nb_rm, "problems": prob_id}
def archive_l1c(self, def archive_l1c(self,
archive_list: list = [], archive_list: list = [],
......
...@@ -688,10 +688,13 @@ class Tile: ...@@ -688,10 +688,13 @@ class Tile:
# identify corrupted jp2 in l1c folder # identify corrupted jp2 in l1c folder
nb_id = 0 nb_id = 0
nb_rm = 0 nb_rm = 0
prob_id = []
erase_set = set() erase_set = set()
for f in self._paths["l1c"].glob("*/GRANULE/*/IMG_DATA/*.jp2"): for f in self._paths["l1c"].glob("*/GRANULE/*/IMG_DATA/*.jp2"):
if f.stat().st_size == 0: if f.stat().st_size == 0:
logger.info("Identified 0b corrupted {} in L1C folder".format(f.name)) txt = "Identified 0b corrupted {} in L1C folder".format(f.name)
prob_id.append(txt)
logger.info(txt)
nb_id += 1 nb_id += 1
if remove: if remove:
erase_set.update({f.parent.parent.parent.parent}) erase_set.update({f.parent.parent.parent.parent})
...@@ -704,7 +707,9 @@ class Tile: ...@@ -704,7 +707,9 @@ class Tile:
logger.error("Can't remove {} from L1C folder".format(e)) logger.error("Can't remove {} from L1C folder".format(e))
# identify residual l2a from l1c folder # identify residual l2a from l1c folder
for f in chain(self._paths["l1c"].glob("*L2A*.SAFE"), self._paths["l1c"].glob("*L2A*.tmp")): for f in chain(self._paths["l1c"].glob("*L2A*.SAFE"), self._paths["l1c"].glob("*L2A*.tmp")):
logger.info("Identified {} in L1C folder".format(f.name)) txt = "Identified {} in L1C folder".format(f.name)
prob_id.append(txt)
logger.info(txt)
nb_id += 1 nb_id += 1
if remove: if remove:
try: try:
...@@ -717,7 +722,9 @@ class Tile: ...@@ -717,7 +722,9 @@ class Tile:
for f in self._paths["l2a"].glob("*L2A*.SAFE"): for f in self._paths["l2a"].glob("*L2A*.SAFE"):
# Nb jp2 < 7 # Nb jp2 < 7
if len(list(f.glob("GRANULE/*/IMG_DATA/R10m/*.jp2"))) < 7: if len(list(f.glob("GRANULE/*/IMG_DATA/R10m/*.jp2"))) < 7:
logger.info("Corrupted L2A {} in L2A folder (nb jp2 <7 in R10m folder)".format(f.name)) txt = "Corrupted L2A {} in L2A folder (nb jp2 <7 in R10m folder)".format(f.name)
prob_id.append(txt)
logger.info(txt)
nb_id += 1 nb_id += 1
if remove: if remove:
try: try:
...@@ -729,7 +736,9 @@ class Tile: ...@@ -729,7 +736,9 @@ class Tile:
# identify 0B cloud masks # identify 0B cloud masks
for f in self._paths["cloudmasks"].glob("*/*CM*.jp2"): for f in self._paths["cloudmasks"].glob("*/*CM*.jp2"):
if f.stat().st_size == 0: if f.stat().st_size == 0:
logger.info("Corrupted cloud mask {} in L2A folder".format(f.name)) txt = "Corrupted cloud mask {} in L2A folder".format(f.name)
prob_id.append(txt)
logger.info(txt)
nb_id += 1 nb_id += 1
if remove: if remove:
logger.info("Removing corrupted cloud mask {} from L2A folder".format(f.name)) logger.info("Removing corrupted cloud mask {} from L2A folder".format(f.name))
...@@ -738,7 +747,9 @@ class Tile: ...@@ -738,7 +747,9 @@ class Tile:
# identify wrong size l2a_QL # identify wrong size l2a_QL
for f in self._paths["l2a"].glob("QL/*_QL.tif"): for f in self._paths["l2a"].glob("QL/*_QL.tif"):
if f.stat().st_size != 3617212: if f.stat().st_size != 3617212:
logger.info("Corrupted L2A QL {} in L2A QL folder".format(f.name)) txt = "Corrupted L2A QL {} in L2A QL folder".format(f.name)
prob_id.append(txt)
logger.info(txt)
nb_id += 1 nb_id += 1
if remove: if remove:
logger.info("Removing corrupted QL {} from L2A folder".format(f.name)) logger.info("Removing corrupted QL {} from L2A folder".format(f.name))
...@@ -750,7 +761,9 @@ class Tile: ...@@ -750,7 +761,9 @@ class Tile:
for p in self._paths["indices"][f].glob("*_MSIL2A_*/"): for p in self._paths["indices"][f].glob("*_MSIL2A_*/"):
#~ logger.info(p) #~ logger.info(p)
if p.is_file(): if p.is_file():
logger.info("Identified old indice format {}".format(p.name)) txt = "Identified old indice format {}".format(p.name)
prob_id.append(txt)
logger.info(txt)
nb_id += 1 nb_id += 1
if remove: if remove:
logger.info("Removing old indice format {}".format(p.name)) logger.info("Removing old indice format {}".format(p.name))
...@@ -772,7 +785,9 @@ class Tile: ...@@ -772,7 +785,9 @@ class Tile:
img = Image.open(str(q)) # open the image file img = Image.open(str(q)) # open the image file
img.verify() # verify that it is, in fact an image img.verify() # verify that it is, in fact an image
except (IOError, SyntaxError) as e: except (IOError, SyntaxError) as e:
logger.info('Bad file (PIL): {}'.format(str(q.name))) # print out the names of corrupt files txt = 'Bad file (PIL): {}'.format(str(q.name)) # print out the names of corrupt files
prob_id.append(txt)
logger.info(txt)
nb_id += 1 nb_id += 1
if remove: if remove:
logger.info("Removing indice QL {}".format(q.name)) logger.info("Removing indice QL {}".format(q.name))
...@@ -780,7 +795,9 @@ class Tile: ...@@ -780,7 +795,9 @@ class Tile:
nb_rm += 1 nb_rm += 1
for q in list(p.glob("*.jp2")): for q in list(p.glob("*.jp2")):
if not (Path(str(q) + ".aux.xml")).exists(): if not (Path(str(q) + ".aux.xml")).exists():
logger.info("Missing metadata: {}".format(q.name)) txt = "Missing metadata: {}".format(q.name)
prob_id.append(txt)
logger.info(txt)
nb_id += 1 nb_id += 1
if remove: if remove:
logger.info("Removing jp2 {}".format(q.name)) logger.info("Removing jp2 {}".format(q.name))
...@@ -788,7 +805,9 @@ class Tile: ...@@ -788,7 +805,9 @@ class Tile:
nb_rm += 1 nb_rm += 1
for q in p.glob("*.*"): for q in p.glob("*.*"):
if q.stat().st_size == 0: if q.stat().st_size == 0:
logger.info("Corrupted file {} (0B size)".format(q.name)) txt = "Corrupted file {} (0B size)".format(q.name)
prob_id.append(txt)
logger.info(txt)
nb_id += 1 nb_id += 1
if remove: if remove:
logger.info("Removing indice QL {}".format(q.name)) logger.info("Removing indice QL {}".format(q.name))
...@@ -796,13 +815,15 @@ class Tile: ...@@ -796,13 +815,15 @@ class Tile:
nb_rm += 1 nb_rm += 1
if remove_indice_tif: if remove_indice_tif:
for q in p.glob("*" + f.upper() + ".tif"): for q in p.glob("*" + f.upper() + ".tif"):
logger.info("Identified indice in tif format {}".format(q.name)) txt = "Identified indice in tif format {}".format(q.name)
prob_id.append(txt)
logger.info(txt)
nb_id += 1 nb_id += 1
if remove: if remove:
logger.info("Removing indice QL {}".format(q.name)) logger.info("Removing indice QL {}".format(q.name))
q.unlink() q.unlink()
nb_rm += 1 nb_rm += 1
return {"identified_problems": nb_id, "removed_problems": nb_rm} return {"identified_problems": nb_id, "removed_problems": nb_rm, "problems": prob_id}
def archive_l1c(self, def archive_l1c(self,
size_only: bool = False, size_only: bool = False,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment