diff --git a/sen2chain/data_request.py b/sen2chain/data_request.py index c718d5d3a42d78c2c1280dcd046cc62cc5bb2d5f..65403749512bdcd45fcde6c4dacd342563fb557f 100644 --- a/sen2chain/data_request.py +++ b/sen2chain/data_request.py @@ -64,13 +64,6 @@ class DataRequest: land_only=True).from_tiles["40KCB", "40KEC"] """ - # Since "requester pays" was enabled on AWS for the Sentienl-2 L1C dataset - # (products are no longer free), downloading non-tiled products on AWS - # does'nt work anymore. - # Therefore, it's useless to make a complex request to separate non-tiled - # and tiled products. - # This class needs to be refactored. - # Proxy settings proxy_http_url = Config().get("proxy_http_url").strip() proxy_https_url = Config().get("proxy_https_url").strip() @@ -105,7 +98,7 @@ class DataRequest: self.land_only = land_only self.tiles_to_keep = None self.tiles_to_keep_geom = dict() - self.products_list = {"aws": {}, "hubs": {}} + self.products_list = {} self.cloudcoverpercentage = ( cloud_cover_percentage if cloud_cover_percentage else (0, 100) ) @@ -198,146 +191,18 @@ class DataRequest: return self.products_list def _make_request(self) -> None: - """Will call the right request method depending on products""" - + """Scihub API request using sentinelsat.""" logger.debug("_make_request") logger.info( - "Requesting images ranging from {} to {}".format( - self.start_date, self.end_date - ) + f"Requesting images ranging from {self.start_date} to {self.end_date}" ) if self.tiles_to_keep is None: raise ValueError("Query tiles not provided") - # reset products_list - # should the products_list be updated or erased for each new request ? - self.products_list = {"aws": {}, "hubs": {}} - - tileddate = str_to_datetime("2016-11-01", "ymd") - - if self.start_date > tileddate: - self._make_request_tiled_only() - else: - self._make_request_not_tiled() - - def _make_request_not_tiled(self) -> None: - """Scihub API request using sentinelsat. This method is called for - tiled products only.""" - - logger.debug("_make_request_not_tiled") - - print("Tile:", self.tiles_to_keep) - - products_from_hubs = dict() - products_from_aws = dict() - - # query by group of 3 tiles, otherwise getting error message - # "Request URI too long" from scihub - for tiles_to_keep_triplet, tiles_to_keep_triplet_geom in zip( - grouper(self.tiles_to_keep, 3), - grouper(self.tiles_to_keep_geom.values(), 3), - ): - - tiles_to_keep = [tile for tile in tiles_to_keep_triplet if tile] - tiles_to_keep_geom = [ - geom for geom in tiles_to_keep_triplet_geom if geom - ] - - print(tiles_to_keep) - - # build a multipolygon from tiles geom - query_geom = MultiPolygon(tiles_to_keep_geom) - logging.debug("query geometry:\n{}".format(query_geom)) - - # scihub request - products = OrderedDict() - products = self.api.query( - query_geom, - date=(self.start_date, self.end_date), - order_by="+endposition", - platformname="Sentinel-2", - producttype="S2MSI1C", - cloudcoverpercentage=self.cloudcoverpercentage, - ) - - # save products list as a pandas dataframe - products_df = self.api.to_dataframe(products) - - if products_df.empty: - return - # a products dictionnay for each server (AWS vs hubs) - # fill each dictionnary depending on the acquisition date - for index, row in products_df[ - ["title", "beginposition", "footprint"] - ].iterrows(): - - # start date of the tiled S2 collection on the scihub server - tileddate = str_to_datetime("2016-11-01", "ymd") - img_title = row[0] - img_date = row[1].to_pydatetime() - img_footprint = loads(row[2]) - - for tile_name, tile_geom in self.tiles_to_keep_geom.items(): - # in case of duplicates on the server - if ( - img_title not in self.products_list["hubs"].keys() - and img_title not in self.products_list["aws"].keys() - ): - - # tiled products are downloaded on hubs - if re.match(r".*_T[0-9]{2}[A-Z]{3}_.*", img_title): - if tile_name in img_title: - self.products_list["hubs"][img_title] = { - "date": img_date, - "tile": tile_name, - } - continue - else: - continue - - # non-tiled products will be downloaded on aws - else: - if tile_geom.intersects(img_footprint): - self.products_list["aws"][img_title] = { - "date": img_date, - "tile": tile_name, - } - - # pprint dicts in chronological order - print("\nFrom AWS") - pprint( - list( - OrderedDict( - sorted( - self.products_list["aws"].items(), - key=lambda t: t[1]["date"], - ) - ) - ) - ) - print("\nFrom hubs") - pprint( - list( - OrderedDict( - sorted( - self.products_list["hubs"].items(), - key=lambda t: t[1]["date"], - ) - ) - ) - ) - - # Tiled products request (lighter) - def _make_request_tiled_only(self) -> None: - """Scihub API request using sentinelsat. This method is called if - products are a mix of tiled and non-tiled products.""" - - logger.debug("_make_request_tiled_only") print("Sentinel2 tiles:\n", self.tiles_to_keep) - products_from_hubs = dict() - products_from_aws = dict() + products = dict() # remove water-only tiles when land-only parameter is enabled if self.land_only: @@ -361,7 +226,7 @@ class DataRequest: products = OrderedDict() for tile in self.tiles_to_keep: kw = query_kwargs.copy() - kw["filename"] = "*_T{}_*".format(tile) + kw["filename"] = f"*_T{tile}_*" pp = self.api.query(**kw) products.update(pp) @@ -370,23 +235,21 @@ class DataRequest: if products_df.empty: return - # a products dictionnay for each server (AWS vs hubs) # fill each dictionnary depending on the acquisition date for index, row in products_df[["title", "beginposition"]].iterrows(): img_title = row[0] img_date = row[1].to_pydatetime() - self.products_list["hubs"][img_title] = { + self.products_list[img_title] = { "date": img_date, "tile": re.findall("_T([0-9]{2}[A-Z]{3})_", img_title)[0], } # pprint dicts in chronological order - print("\nFrom hubs") pprint( list( OrderedDict( sorted( - self.products_list["hubs"].items(), + self.products_list.items(), key=lambda t: t[1]["date"], ) )