diff --git a/sen2chain/download_eodag.py b/sen2chain/download_eodag.py index 8df64992f2ac27eb4ba6305431b6014ea0446be7..0a05c95eea1146aabf2d644897983f6bf49ae8d9 100644 --- a/sen2chain/download_eodag.py +++ b/sen2chain/download_eodag.py @@ -12,8 +12,13 @@ import os from pathlib import Path from eodag import EODataAccessGateway from eodag import setup_logging -from .config import SHARED_DATA, Config +from queue import Queue +from eodag.utils.logging import get_logging_verbose +from threading import Thread +import multiprocessing +from .config import SHARED_DATA, Config +from .utils import get_tile logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -66,6 +71,7 @@ class S2cEodag: productType: str = "L1C", start: str = "2015-01-01", end: str = "9999-12-31", + ref: str = "l1c", ): ######### ici faudrait virer le self.productType, qui ne doit pas être global pour le download... @@ -90,19 +96,28 @@ class S2cEodag: ) logging.disable(logging.NOTSET) - fitered = self.products + fitered = self.products[:] for p in fitered: if self.name not in p.properties["title"]: self.products.remove(p) - logger.info("removing wrong tiles from search items: {}".format(p.properties["title"])) - - for p in self.products: - if (outputs_prefix / (p.properties["title"] + ".SAFE")).exists(): - p.location = "file://" + str(outputs_prefix / (p.properties["title"] + ".SAFE")) - logger.info("{} - remote {} - local PRESENT".format(p.properties["title"], p.properties["storageStatus"])) - else: - logger.info("{} - remote {} - local ABSENT".format(p.properties["title"], p.properties["storageStatus"])) - logger.info("Search returned {} products".format(len(self.products))) ####### rajouter ici "- dont xx ABSENT - dont xx ONLINE / xx STAGING" + logger.info("{} - wrong Tile - filtering".format(p.properties["title"])) + + local = self.products[:] + for p in local: + if ref == "l1c": + if (outputs_prefix / (p.properties["title"] + ".SAFE")).exists(): + # p.location = "file://" + str(outputs_prefix / (p.properties["title"] + ".SAFE")) + logger.info("{} - remote {} - local l1c PRESENT - filtering".format(p.properties["title"], p.properties["storageStatus"])) + self.products.remove(p) + else: + logger.info("{} - remote {} - local l1c ABSENT".format(p.properties["title"], p.properties["storageStatus"])) + elif ref == "l2a": + if (Path(Config().get("l2a_path")) / self.name / (p.properties["title"].replace("L1C_", "L2A_").replace("__OPER__", "_USER_") + ".SAFE")).exists(): + logger.info("{} - remote {} - local l2a PRESENT - filtering".format(p.properties["title"], p.properties["storageStatus"])) + self.products.remove(p) + else: + logger.info("{} - remote {} - local l2a ABSENT".format(p.properties["title"], p.properties["storageStatus"])) + logger.info("Search returned {} new product(s) to download".format(len(self.products))) ####### rajouter ici "- dont xx ABSENT - dont xx ONLINE / xx STAGING" def download( self, @@ -154,8 +169,135 @@ class S2cEodag: timeout=0, ) except: - logger.info("OFFLINE product, ordered, retry later") + logger.info("remote OFFLINE, ordered") elif product_id.properties["storageStatus"] == "STAGING": - logger.info("STAGING product, retry later") + logger.info("remote STAGING, retry later") + + def order_offline( + self, + product_id, + ): + setup_logging(verbose = 0) + if product_id.properties["storageStatus"] == "OFFLINE": + try: + downloaded_path = self.dag.download( + product_id, + wait=1, + timeout=0, + ) + except: + logger.info("{} - remote OFFLINE, ordered".format(product_id.properties["title"])) + setup_logging(verbose = 2) + +def S2cEodag_download_uniq( + product, + outputs_prefix: str = None, + extract: bool = True, + delete_archive: bool = True, +): + tile_name = get_tile(product.properties['title']) + if not outputs_prefix: + if "L1C" in product.properties['title']: + root_path = "l1c_path" + elif "L2A" in product.properties['title']: + root_path = "l2a_path" + outputs_prefix = str(Path(Config().get(root_path)) / tile_name) + + setup_logging(verbose = 2) + + logging.disable(level=logging.WARNING) + dag = EODataAccessGateway() + logging.disable(logging.NOTSET) + + if product.properties["storageStatus"] == "ONLINE": + try: + # logging.disable(level=logging.WARNING) + downloaded_path = dag.download( + product, + outputs_prefix = outputs_prefix, + extract = extract, + delete_archive = delete_archive, + wait=1, + timeout=0, + ) + # logging.disable(logging.NOTSET) + + if Path(downloaded_path).stem == Path(downloaded_path).parent.stem: + upper_location = Path(downloaded_path).parent.parent / Path(downloaded_path).name + Path(downloaded_path).replace(upper_location) + product.location = "file://" + str(upper_location) + Path(downloaded_path).parent.rmdir() + logger.info("Moving up SAVE file") + except: + logger.info("Error: ONLINE product but cannot be downloaded, retry later") + + elif product.properties["storageStatus"] == "OFFLINE": + try: + setup_logging(verbose = 0) + downloaded_path = dag.download( + product, + outputs_prefix = outputs_prefix, + extract = extract, + delete_archive = delete_archive, + wait=1, + timeout=0, + ) + except: + logger.info("OFFLINE product, ordered, retry later") + + elif product.properties["storageStatus"] == "STAGING": + logger.info("STAGING product, retry later") + + +def S2cEodag_download( + download_list: list = None, + dl_mode: str = "multit", + outputs_prefix: str = None, + extract: bool = True, + delete_archive: bool = True, +): + + ## sequential + if dl_mode == "seq": + for product in download_list: + S2cEodag_download_uniq( + product, + outputs_prefix = outputs_prefix, + extract = extract, + delete_archive = delete_archive, + ) + + ## multithreading + elif dl_mode == "multit": + NUM_THREADS = 8 + q = Queue() + def do_stuff(q): + while True: + S2cEodag_download_uniq(q.get()) + q.task_done() + for product in download_list: + q.put(product) + for t in range(NUM_THREADS): + worker = Thread(target = do_stuff, args = (q,)) + worker.daemon = True + worker.start() + q.join() + + ## multiprocessing + elif dl_mode == "multip": + nb_proc = 8 + nb_proc = max(min(len(os.sched_getaffinity(0)) - 1, nb_proc), 1) + pool = multiprocessing.Pool(nb_proc) + results = [pool.map(S2cEodag_download_uniq, download_list)] + pool.close() + pool.join() + + + + + + + + diff --git a/sen2chain/jobs.py b/sen2chain/jobs.py index 020203e33eaadb997258033d55d0bfbf0d786404..662ffbe0079ebec89efde2456df8b12beff4dfdd 100644 --- a/sen2chain/jobs.py +++ b/sen2chain/jobs.py @@ -23,6 +23,11 @@ from .tiles import Tile from .utils import datetime_to_str from .multi_processing import ( l2a_multiprocessing, + idx_multiprocessing, +) +from .download_eodag import ( + S2cEodag_download, + S2cEodag_download_uniq, ) logging.basicConfig(level=logging.INFO) @@ -394,7 +399,7 @@ class Job: if not self.tasks.empty: - # Nettoyage + ## Cleaning before if clean_before: logger.info("Cleaning Tiles") clean_list = [] @@ -406,25 +411,49 @@ class Job: ) # lib.clean(clean_list, remove=False) - # Telechargement des L1C - logger.info("Downloading l1c") + ## L1C download - each tile sequential + # download_list= [] + # logger.info("Downloading l1c seq") + # for index, row in self.tasks.iterrows(): + # if bool(setuptools.distutils.util.strtobool(str(row.l1c))): + # t = Tile(row.tile) + # logger.info("Tile: {}".format(t.name)) + # downloaded = t.get_l1c( + # provider = "peps", + # download = True, + # dl_mode = "multit", + # start = row.date_min, + # end = row.date_max, + # new = False, + # ) + # logger.info("Downloaded list: {}".format(downloaded)) + # download_list.extend(downloaded) + # logger.info("download_list: {}".format(download_list)) + + ## L1C download - all tiles multi + download_list= [] + logger.info("Searching l1c products") for index, row in self.tasks.iterrows(): if bool(setuptools.distutils.util.strtobool(str(row.l1c))): t = Tile(row.tile) logger.info("Tile: {}".format(t.name)) - downloaded = t.get_l1c( + tile_download_list = t.get_l1c( provider = "peps", - download = True, - dl_mode = "multit", + download = False, start = row.date_min, end = row.date_max, new = False, ) - logger.info("Downloaded list: {}".format(downloaded)) - - # todo - # keep list of downloaded l1c products + download_list.extend(tile_download_list) + ### download + before_list = [p.location for p in download_list] + # S2cEodag_download(download_list, "seq") + S2cEodag_download(download_list, "multit") + # S2cEodag_download(download_list, "multip") + after_list = [p.location for p in download_list] + downloaded_products = [Path(after_list[i]).stem for i in range(len(before_list)) if before_list[i] != after_list[i]] + logger.info("Downloaded product(s): {}".format(downloaded_products)) # Traitement des L1C en L2A logger.info("Computing l2a") @@ -463,10 +492,13 @@ class Job: + "\n".join(l1c_process_list) + "\n" ) - - # Remove L1C - # todo - # remove list of downloaded l1c products + + # Remove downloaded L1C + for index, row in self.tasks.iterrows(): + if "l1c" in str(row.remove).lower(): + t = Tile(row.tile) + t.remove_l1c([p for p in downloaded_products if row.tile in p]) + # Traitement des L2A (clouds) logger.info("Computing cloudmasks") @@ -583,7 +615,8 @@ class Job: # todo - # Nettoyage + + # Cleaning after if clean_after: logger.info("Cleaning Tiles") clean_list = [] diff --git a/sen2chain/tiles.py b/sen2chain/tiles.py index 89db6b4236da8599050dd0460f8147defd17231e..6b1bcfad19c5e3eb0d85aa4f8d9007b066bfa3e0 100644 --- a/sen2chain/tiles.py +++ b/sen2chain/tiles.py @@ -17,10 +17,9 @@ from pathlib import Path from collections import namedtuple from datetime import datetime, timedelta from pprint import pformat -# import multiprocessing +import multiprocessing from queue import Queue from threading import Thread -# import threading # type annotations from typing import List, Dict, Iterable @@ -730,16 +729,38 @@ class Tile: start: str = "2015-01-01", end: str = "9999-12-31", new: bool = False, + ref: str = "l1c", + order_offline: bool = False, ): dag = S2cEodag(self.name, provider = provider) if new: - start = datetime_to_str(self.l1c.last.date + timedelta(days=1), date_format = 'ymd') - dag.search(start = start, end = end) + if self.l1c: + start = datetime_to_str(self.l1c.last.date + timedelta(days=1), date_format = 'ymd') + dag.search(start = start, end = end, ref = ref) - before_list = [p.location for p in dag.products] - - if download: + ############## virer ici de la liste des téléchargements les produits qui ont déjà des l2a ou indices (pour ne pas retélécharger des produits l1c à chaque fois) + # if ref == "l2a": + # new_list = [p for p in dag.products if p.properties["title"].replace("L1C_", "L2A_").replace("__OPER__", "_USER_") + ".SAFE" not in [l2a.identifier for l2a in self.l2a]] + # logger.info("new list: {}".format(new_list)) + # local = dag.products[:] + # l2a_identifiers = [Path(prod.identifier).stem for prod in self.l2a] + # logger.info("l2a_identifiers: {}".format(l2a_identifiers)) + + # for p in local: + # l2a_corresp = p.properties["title"].replace("L1C_", "L2A_").replace("__OPER__", "_USER_") + # logger.info("l2a_corresp: {}".format(l2a_corresp)) + # if l2a_corresp in l2a_identifiers: + # logger.info("found l2a {} removing l1c {} from download list".format(l2a_corresp, p.properties["title"])) + # dag.products.remove(p) + + # elif ref == "ndvi": + # toto = 13 + ############# + + if download: + before_list = [p.location for p in dag.products] + ## sequential if dl_mode == "seq": for p in dag.products: @@ -769,11 +790,28 @@ class Tile: results = [pool.map(dag.download, dag.products)] pool.close() pool.join() - - - after_list = [p.location for p in dag.products] - return [Path(after_list[i]).stem for i in range(len(before_list)) if before_list[i] != after_list[i]] + after_list = [p.location for p in dag.products] + + return [Path(after_list[i]).stem for i in range(len(before_list)) if before_list[i] != after_list[i]] + elif order_offline: + # for p in dag.products: + # if p.properties["storageStatus"] == "OFFLINE": + # dag.order_offline(p) + NUM_THREADS = 8 + q = Queue() + def do_stuff(q, dag): + while True: + dag.order_offline(q.get()) + q.task_done() + for p in dag.products: + q.put(p) + for t in range(NUM_THREADS): + worker = Thread(target=do_stuff, args = (q, dag)) + worker.daemon = True + worker.start() + q.join() + return dag.products def compute_l2a( self, @@ -1507,11 +1545,12 @@ class Tile: def remove_l1c( self, product_list: list = [], + entire: bool = False, ): """ Remove l1c files """ - if not product_list: + if entire: product_list = [product.identifier for product in self.l1c] for identifier in product_list: l1c = L1cProduct(identifier) @@ -1522,11 +1561,12 @@ class Tile: def remove_l2a( self, product_list: list = [], + entire: bool = False, ): """ Remove l2a files """ - if not product_list: + if entire: product_list = [product.identifier for product in self.l2a] for identifier in product_list: l2a = L2aProduct(identifier) diff --git a/sen2chain/utils.py b/sen2chain/utils.py index f54d141f0878968241779dd395e8a02448aa4e1a..12d0f6ca785b0f5421cf312927c4cbfccb2be2b6 100644 --- a/sen2chain/utils.py +++ b/sen2chain/utils.py @@ -144,3 +144,11 @@ def get_current_Sen2Cor_version(): ), None, ) + + +def get_tile(identifier) -> str: + """Returns tile name from a string. + + :param string: string from which to extract the tile name. + """ + return re.findall("_T([0-9]{2}[A-Z]{3})_", identifier)[0]