Remove AWS support from data_request

db6e6966 · Jérémy Commins · 802edbfb · db6e6966
Commit db6e6966 authored 2 years ago by Jérémy Commins
--- a/sen2chain/data_request.py
+++ b/sen2chain/data_request.py
@@ -64,13 +64,6 @@ class DataRequest:
                land_only=True).from_tiles["40KCB", "40KEC"]
    """
-    # Since "requester pays" was enabled on AWS for the Sentienl-2 L1C dataset
-    # (products are no longer free), downloading non-tiled products on AWS
-    # does'nt work anymore.
-    # Therefore, it's useless to make a complex request to separate non-tiled
-    # and tiled products.
-    # This class needs to be refactored.
    # Proxy settings
    proxy_http_url = Config().get("proxy_http_url").strip()
    proxy_https_url = Config().get("proxy_https_url").strip()
@@ -105,7 +98,7 @@ class DataRequest:
        self.land_only = land_only
        self.tiles_to_keep = None
        self.tiles_to_keep_geom = dict()
-        self.products_list = {"aws": {}, "hubs": {}}
+        self.products_list = {}
        self.cloudcoverpercentage = (
            cloud_cover_percentage if cloud_cover_percentage else (0, 100)
        )
@@ -198,146 +191,18 @@ class DataRequest:
        return self.products_list
    def _make_request(self) -> None:
-        """Will call the right request method depending on products"""
+        """Scihub API request using sentinelsat."""
        logger.debug("_make_request")
        logger.info(
-            "Requesting images ranging from {} to {}".format(
+            f"Requesting images ranging from {self.start_date} to {self.end_date}"
-                self.start_date, self.end_date
-            )
        )
        if self.tiles_to_keep is None:
            raise ValueError("Query tiles not provided")
-        # reset products_list
-        # should the products_list be updated or erased for each new request ?
-        self.products_list = {"aws": {}, "hubs": {}}
-        tileddate = str_to_datetime("2016-11-01", "ymd")
-        if self.start_date > tileddate:
-            self._make_request_tiled_only()
-        else:
-            self._make_request_not_tiled()
-    def _make_request_not_tiled(self) -> None:
-        """Scihub API request using sentinelsat. This method is called for
-        tiled products only."""
-        logger.debug("_make_request_not_tiled")
-        print("Tile:", self.tiles_to_keep)
-        products_from_hubs = dict()
-        products_from_aws = dict()
-        # query by group of 3 tiles,  otherwise getting error message
-        # "Request URI too long" from scihub
-        for tiles_to_keep_triplet, tiles_to_keep_triplet_geom in zip(
-            grouper(self.tiles_to_keep, 3),
-            grouper(self.tiles_to_keep_geom.values(), 3),
-        ):
-            tiles_to_keep = [tile for tile in tiles_to_keep_triplet if tile]
-            tiles_to_keep_geom = [
-                geom for geom in tiles_to_keep_triplet_geom if geom
-            ]
-            print(tiles_to_keep)
-            # build a multipolygon from tiles geom
-            query_geom = MultiPolygon(tiles_to_keep_geom)
-            logging.debug("query geometry:\n{}".format(query_geom))
-            # scihub request
-            products = OrderedDict()
-            products = self.api.query(
-                query_geom,
-                date=(self.start_date, self.end_date),
-                order_by="+endposition",
-                platformname="Sentinel-2",
-                producttype="S2MSI1C",
-                cloudcoverpercentage=self.cloudcoverpercentage,
-            )
-            # save products list as a pandas dataframe
-            products_df = self.api.to_dataframe(products)
-            if products_df.empty:
-                return
-            # a products dictionnay for each server (AWS vs hubs)
-            # fill each dictionnary depending on the acquisition date
-            for index, row in products_df[
-                ["title", "beginposition", "footprint"]
-            ].iterrows():
-                # start date of the tiled S2 collection on the scihub server
-                tileddate = str_to_datetime("2016-11-01", "ymd")
-                img_title = row[0]
-                img_date = row[1].to_pydatetime()
-                img_footprint = loads(row[2])
-                for tile_name, tile_geom in self.tiles_to_keep_geom.items():
-                    # in case of duplicates on the server
-                    if (
-                        img_title not in self.products_list["hubs"].keys()
-                        and img_title not in self.products_list["aws"].keys()
-                    ):
-                        # tiled products are downloaded on hubs
-                        if re.match(r".*_T[0-9]{2}[A-Z]{3}_.*", img_title):
-                            if tile_name in img_title:
-                                self.products_list["hubs"][img_title] = {
-                                    "date": img_date,
-                                    "tile": tile_name,
-                                }
-                                continue
-                            else:
-                                continue
-                        # non-tiled products will be downloaded on aws
-                        else:
-                            if tile_geom.intersects(img_footprint):
-                                self.products_list["aws"][img_title] = {
-                                    "date": img_date,
-                                    "tile": tile_name,
-                                }
-        # pprint dicts in chronological order
-        print("\nFrom AWS")
-        pprint(
-            list(
-                OrderedDict(
-                    sorted(
-                        self.products_list["aws"].items(),
-                        key=lambda t: t[1]["date"],
-                    )
-                )
-            )
-        )
-        print("\nFrom hubs")
-        pprint(
-            list(
-                OrderedDict(
-                    sorted(
-                        self.products_list["hubs"].items(),
-                        key=lambda t: t[1]["date"],
-                    )
-                )
-            )
-        )
-    # Tiled products request (lighter)
-    def _make_request_tiled_only(self) -> None:
-        """Scihub API request using sentinelsat. This method is called if
-        products are a mix of tiled and non-tiled products."""
-        logger.debug("_make_request_tiled_only")
        print("Sentinel2 tiles:\n", self.tiles_to_keep)
-        products_from_hubs = dict()
+        products = dict()
-        products_from_aws = dict()
        # remove water-only tiles when land-only parameter is enabled
        if self.land_only:
@@ -361,7 +226,7 @@ class DataRequest:
        products = OrderedDict()
        for tile in self.tiles_to_keep:
            kw = query_kwargs.copy()
-            kw["filename"] = "*_T{}_*".format(tile)
+            kw["filename"] = f"*_T{tile}_*"
            pp = self.api.query(**kw)
            products.update(pp)
@@ -370,23 +235,21 @@ class DataRequest:
        if products_df.empty:
            return
-            # a products dictionnay for each server (AWS vs hubs)
            # fill each dictionnary depending on the acquisition date
        for index, row in products_df[["title", "beginposition"]].iterrows():
            img_title = row[0]
            img_date = row[1].to_pydatetime()
-            self.products_list["hubs"][img_title] = {
+            self.products_list[img_title] = {
                "date": img_date,
                "tile": re.findall("_T([0-9]{2}[A-Z]{3})_", img_title)[0],
            }
        # pprint dicts in chronological order
-        print("\nFrom hubs")
        pprint(
            list(
                OrderedDict(
                    sorted(
-                        self.products_list["hubs"].items(),
+                        self.products_list.items(),
                        key=lambda t: t[1]["date"],
                    )
                )