Skip to content
Snippets Groups Projects
Commit db6e6966 authored by Jérémy Commins's avatar Jérémy Commins
Browse files

Remove AWS support from data_request

parent 802edbfb
No related branches found
No related tags found
No related merge requests found
...@@ -64,13 +64,6 @@ class DataRequest: ...@@ -64,13 +64,6 @@ class DataRequest:
land_only=True).from_tiles["40KCB", "40KEC"] land_only=True).from_tiles["40KCB", "40KEC"]
""" """
# Since "requester pays" was enabled on AWS for the Sentienl-2 L1C dataset
# (products are no longer free), downloading non-tiled products on AWS
# does'nt work anymore.
# Therefore, it's useless to make a complex request to separate non-tiled
# and tiled products.
# This class needs to be refactored.
# Proxy settings # Proxy settings
proxy_http_url = Config().get("proxy_http_url").strip() proxy_http_url = Config().get("proxy_http_url").strip()
proxy_https_url = Config().get("proxy_https_url").strip() proxy_https_url = Config().get("proxy_https_url").strip()
...@@ -105,7 +98,7 @@ class DataRequest: ...@@ -105,7 +98,7 @@ class DataRequest:
self.land_only = land_only self.land_only = land_only
self.tiles_to_keep = None self.tiles_to_keep = None
self.tiles_to_keep_geom = dict() self.tiles_to_keep_geom = dict()
self.products_list = {"aws": {}, "hubs": {}} self.products_list = {}
self.cloudcoverpercentage = ( self.cloudcoverpercentage = (
cloud_cover_percentage if cloud_cover_percentage else (0, 100) cloud_cover_percentage if cloud_cover_percentage else (0, 100)
) )
...@@ -198,146 +191,18 @@ class DataRequest: ...@@ -198,146 +191,18 @@ class DataRequest:
return self.products_list return self.products_list
def _make_request(self) -> None: def _make_request(self) -> None:
"""Will call the right request method depending on products""" """Scihub API request using sentinelsat."""
logger.debug("_make_request") logger.debug("_make_request")
logger.info( logger.info(
"Requesting images ranging from {} to {}".format( f"Requesting images ranging from {self.start_date} to {self.end_date}"
self.start_date, self.end_date
)
) )
if self.tiles_to_keep is None: if self.tiles_to_keep is None:
raise ValueError("Query tiles not provided") raise ValueError("Query tiles not provided")
# reset products_list
# should the products_list be updated or erased for each new request ?
self.products_list = {"aws": {}, "hubs": {}}
tileddate = str_to_datetime("2016-11-01", "ymd")
if self.start_date > tileddate:
self._make_request_tiled_only()
else:
self._make_request_not_tiled()
def _make_request_not_tiled(self) -> None:
"""Scihub API request using sentinelsat. This method is called for
tiled products only."""
logger.debug("_make_request_not_tiled")
print("Tile:", self.tiles_to_keep)
products_from_hubs = dict()
products_from_aws = dict()
# query by group of 3 tiles, otherwise getting error message
# "Request URI too long" from scihub
for tiles_to_keep_triplet, tiles_to_keep_triplet_geom in zip(
grouper(self.tiles_to_keep, 3),
grouper(self.tiles_to_keep_geom.values(), 3),
):
tiles_to_keep = [tile for tile in tiles_to_keep_triplet if tile]
tiles_to_keep_geom = [
geom for geom in tiles_to_keep_triplet_geom if geom
]
print(tiles_to_keep)
# build a multipolygon from tiles geom
query_geom = MultiPolygon(tiles_to_keep_geom)
logging.debug("query geometry:\n{}".format(query_geom))
# scihub request
products = OrderedDict()
products = self.api.query(
query_geom,
date=(self.start_date, self.end_date),
order_by="+endposition",
platformname="Sentinel-2",
producttype="S2MSI1C",
cloudcoverpercentage=self.cloudcoverpercentage,
)
# save products list as a pandas dataframe
products_df = self.api.to_dataframe(products)
if products_df.empty:
return
# a products dictionnay for each server (AWS vs hubs)
# fill each dictionnary depending on the acquisition date
for index, row in products_df[
["title", "beginposition", "footprint"]
].iterrows():
# start date of the tiled S2 collection on the scihub server
tileddate = str_to_datetime("2016-11-01", "ymd")
img_title = row[0]
img_date = row[1].to_pydatetime()
img_footprint = loads(row[2])
for tile_name, tile_geom in self.tiles_to_keep_geom.items():
# in case of duplicates on the server
if (
img_title not in self.products_list["hubs"].keys()
and img_title not in self.products_list["aws"].keys()
):
# tiled products are downloaded on hubs
if re.match(r".*_T[0-9]{2}[A-Z]{3}_.*", img_title):
if tile_name in img_title:
self.products_list["hubs"][img_title] = {
"date": img_date,
"tile": tile_name,
}
continue
else:
continue
# non-tiled products will be downloaded on aws
else:
if tile_geom.intersects(img_footprint):
self.products_list["aws"][img_title] = {
"date": img_date,
"tile": tile_name,
}
# pprint dicts in chronological order
print("\nFrom AWS")
pprint(
list(
OrderedDict(
sorted(
self.products_list["aws"].items(),
key=lambda t: t[1]["date"],
)
)
)
)
print("\nFrom hubs")
pprint(
list(
OrderedDict(
sorted(
self.products_list["hubs"].items(),
key=lambda t: t[1]["date"],
)
)
)
)
# Tiled products request (lighter)
def _make_request_tiled_only(self) -> None:
"""Scihub API request using sentinelsat. This method is called if
products are a mix of tiled and non-tiled products."""
logger.debug("_make_request_tiled_only")
print("Sentinel2 tiles:\n", self.tiles_to_keep) print("Sentinel2 tiles:\n", self.tiles_to_keep)
products_from_hubs = dict() products = dict()
products_from_aws = dict()
# remove water-only tiles when land-only parameter is enabled # remove water-only tiles when land-only parameter is enabled
if self.land_only: if self.land_only:
...@@ -361,7 +226,7 @@ class DataRequest: ...@@ -361,7 +226,7 @@ class DataRequest:
products = OrderedDict() products = OrderedDict()
for tile in self.tiles_to_keep: for tile in self.tiles_to_keep:
kw = query_kwargs.copy() kw = query_kwargs.copy()
kw["filename"] = "*_T{}_*".format(tile) kw["filename"] = f"*_T{tile}_*"
pp = self.api.query(**kw) pp = self.api.query(**kw)
products.update(pp) products.update(pp)
...@@ -370,23 +235,21 @@ class DataRequest: ...@@ -370,23 +235,21 @@ class DataRequest:
if products_df.empty: if products_df.empty:
return return
# a products dictionnay for each server (AWS vs hubs)
# fill each dictionnary depending on the acquisition date # fill each dictionnary depending on the acquisition date
for index, row in products_df[["title", "beginposition"]].iterrows(): for index, row in products_df[["title", "beginposition"]].iterrows():
img_title = row[0] img_title = row[0]
img_date = row[1].to_pydatetime() img_date = row[1].to_pydatetime()
self.products_list["hubs"][img_title] = { self.products_list[img_title] = {
"date": img_date, "date": img_date,
"tile": re.findall("_T([0-9]{2}[A-Z]{3})_", img_title)[0], "tile": re.findall("_T([0-9]{2}[A-Z]{3})_", img_title)[0],
} }
# pprint dicts in chronological order # pprint dicts in chronological order
print("\nFrom hubs")
pprint( pprint(
list( list(
OrderedDict( OrderedDict(
sorted( sorted(
self.products_list["hubs"].items(), self.products_list.items(),
key=lambda t: t[1]["date"], key=lambda t: t[1]["date"],
) )
) )
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment