Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Sen2Chain
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ESPACE-DEV
Sen2Chain
Commits
db6e6966
Commit
db6e6966
authored
2 years ago
by
Jérémy Commins
Browse files
Options
Downloads
Patches
Plain Diff
Remove AWS support from data_request
parent
802edbfb
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
sen2chain/data_request.py
+7
-144
7 additions, 144 deletions
sen2chain/data_request.py
with
7 additions
and
144 deletions
sen2chain/data_request.py
+
7
−
144
View file @
db6e6966
...
@@ -64,13 +64,6 @@ class DataRequest:
...
@@ -64,13 +64,6 @@ class DataRequest:
land_only=True).from_tiles[
"
40
KCB
"
,
"
40
KEC
"
]
land_only=True).from_tiles[
"
40
KCB
"
,
"
40
KEC
"
]
"""
"""
# Since
"
requester
pays
"
was enabled on AWS for the Sentienl-2 L1C dataset
# (products are no longer free), downloading non-tiled products on AWS
# does
'
nt work anymore.
# Therefore, it
'
s useless to make a complex request to separate non-tiled
# and tiled products.
# This class needs to be refactored.
# Proxy settings
# Proxy settings
proxy_http_url = Config().get(
"
proxy_http_url
"
).strip()
proxy_http_url = Config().get(
"
proxy_http_url
"
).strip()
proxy_https_url = Config().get(
"
proxy_https_url
"
).strip()
proxy_https_url = Config().get(
"
proxy_https_url
"
).strip()
...
@@ -105,7 +98,7 @@ class DataRequest:
...
@@ -105,7 +98,7 @@ class DataRequest:
self.land_only = land_only
self.land_only = land_only
self.tiles_to_keep = None
self.tiles_to_keep = None
self.tiles_to_keep_geom = dict()
self.tiles_to_keep_geom = dict()
self.products_list = {
"
aws
"
: {},
"
hubs
"
: {}
}
self.products_list = {}
self.cloudcoverpercentage = (
self.cloudcoverpercentage = (
cloud_cover_percentage if cloud_cover_percentage else (0, 100)
cloud_cover_percentage if cloud_cover_percentage else (0, 100)
)
)
...
@@ -198,146 +191,18 @@ class DataRequest:
...
@@ -198,146 +191,18 @@ class DataRequest:
return self.products_list
return self.products_list
def _make_request(self) -> None:
def _make_request(self) -> None:
"""
Will call the right request method depending on products
"""
"""
Scihub API request using sentinelsat.
"""
logger.debug(
"
_make_request
"
)
logger.debug(
"
_make_request
"
)
logger.info(
logger.info(
"
Requesting images ranging from {} to {}
"
.format(
f
"
Requesting images ranging from {self.start_date} to {self.end_date}
"
self.start_date, self.end_date
)
)
)
if self.tiles_to_keep is None:
if self.tiles_to_keep is None:
raise ValueError(
"
Query tiles not provided
"
)
raise ValueError(
"
Query tiles not provided
"
)
# reset products_list
# should the products_list be updated or erased for each new request ?
self.products_list = {
"
aws
"
: {},
"
hubs
"
: {}}
tileddate = str_to_datetime(
"
2016-11-01
"
,
"
ymd
"
)
if self.start_date > tileddate:
self._make_request_tiled_only()
else:
self._make_request_not_tiled()
def _make_request_not_tiled(self) -> None:
"""
Scihub API request using sentinelsat. This method is called for
tiled products only.
"""
logger.debug(
"
_make_request_not_tiled
"
)
print(
"
Tile:
"
, self.tiles_to_keep)
products_from_hubs = dict()
products_from_aws = dict()
# query by group of 3 tiles, otherwise getting error message
#
"
Request URI too long
"
from scihub
for tiles_to_keep_triplet, tiles_to_keep_triplet_geom in zip(
grouper(self.tiles_to_keep, 3),
grouper(self.tiles_to_keep_geom.values(), 3),
):
tiles_to_keep = [tile for tile in tiles_to_keep_triplet if tile]
tiles_to_keep_geom = [
geom for geom in tiles_to_keep_triplet_geom if geom
]
print(tiles_to_keep)
# build a multipolygon from tiles geom
query_geom = MultiPolygon(tiles_to_keep_geom)
logging.debug(
"
query geometry:
\n
{}
"
.format(query_geom))
# scihub request
products = OrderedDict()
products = self.api.query(
query_geom,
date=(self.start_date, self.end_date),
order_by=
"
+endposition
"
,
platformname=
"
Sentinel-2
"
,
producttype=
"
S2MSI1C
"
,
cloudcoverpercentage=self.cloudcoverpercentage,
)
# save products list as a pandas dataframe
products_df = self.api.to_dataframe(products)
if products_df.empty:
return
# a products dictionnay for each server (AWS vs hubs)
# fill each dictionnary depending on the acquisition date
for index, row in products_df[
[
"
title
"
,
"
beginposition
"
,
"
footprint
"
]
].iterrows():
# start date of the tiled S2 collection on the scihub server
tileddate = str_to_datetime(
"
2016-11-01
"
,
"
ymd
"
)
img_title = row[0]
img_date = row[1].to_pydatetime()
img_footprint = loads(row[2])
for tile_name, tile_geom in self.tiles_to_keep_geom.items():
# in case of duplicates on the server
if (
img_title not in self.products_list[
"
hubs
"
].keys()
and img_title not in self.products_list[
"
aws
"
].keys()
):
# tiled products are downloaded on hubs
if re.match(r
"
.*_T[0-9]{2}[A-Z]{3}_.*
"
, img_title):
if tile_name in img_title:
self.products_list[
"
hubs
"
][img_title] = {
"
date
"
: img_date,
"
tile
"
: tile_name,
}
continue
else:
continue
# non-tiled products will be downloaded on aws
else:
if tile_geom.intersects(img_footprint):
self.products_list[
"
aws
"
][img_title] = {
"
date
"
: img_date,
"
tile
"
: tile_name,
}
# pprint dicts in chronological order
print(
"
\n
From AWS
"
)
pprint(
list(
OrderedDict(
sorted(
self.products_list[
"
aws
"
].items(),
key=lambda t: t[1][
"
date
"
],
)
)
)
)
print(
"
\n
From hubs
"
)
pprint(
list(
OrderedDict(
sorted(
self.products_list[
"
hubs
"
].items(),
key=lambda t: t[1][
"
date
"
],
)
)
)
)
# Tiled products request (lighter)
def _make_request_tiled_only(self) -> None:
"""
Scihub API request using sentinelsat. This method is called if
products are a mix of tiled and non-tiled products.
"""
logger.debug(
"
_make_request_tiled_only
"
)
print(
"
Sentinel2 tiles:
\n
"
, self.tiles_to_keep)
print(
"
Sentinel2 tiles:
\n
"
, self.tiles_to_keep)
products_from_hubs = dict()
products = dict()
products_from_aws = dict()
# remove water-only tiles when land-only parameter is enabled
# remove water-only tiles when land-only parameter is enabled
if self.land_only:
if self.land_only:
...
@@ -361,7 +226,7 @@ class DataRequest:
...
@@ -361,7 +226,7 @@ class DataRequest:
products = OrderedDict()
products = OrderedDict()
for tile in self.tiles_to_keep:
for tile in self.tiles_to_keep:
kw = query_kwargs.copy()
kw = query_kwargs.copy()
kw[
"
filename
"
] =
"
*_T{}_*
"
.format(tile)
kw[
"
filename
"
] =
f
"
*_T{
tile
}_*
"
pp = self.api.query(**kw)
pp = self.api.query(**kw)
products.update(pp)
products.update(pp)
...
@@ -370,23 +235,21 @@ class DataRequest:
...
@@ -370,23 +235,21 @@ class DataRequest:
if products_df.empty:
if products_df.empty:
return
return
# a products dictionnay for each server (AWS vs hubs)
# fill each dictionnary depending on the acquisition date
# fill each dictionnary depending on the acquisition date
for index, row in products_df[[
"
title
"
,
"
beginposition
"
]].iterrows():
for index, row in products_df[[
"
title
"
,
"
beginposition
"
]].iterrows():
img_title = row[0]
img_title = row[0]
img_date = row[1].to_pydatetime()
img_date = row[1].to_pydatetime()
self.products_list[
"
hubs
"
][
img_title] = {
self.products_list[img_title] = {
"
date
"
: img_date,
"
date
"
: img_date,
"
tile
"
: re.findall(
"
_T([0-9]{2}[A-Z]{3})_
"
, img_title)[0],
"
tile
"
: re.findall(
"
_T([0-9]{2}[A-Z]{3})_
"
, img_title)[0],
}
}
# pprint dicts in chronological order
# pprint dicts in chronological order
print(
"
\n
From hubs
"
)
pprint(
pprint(
list(
list(
OrderedDict(
OrderedDict(
sorted(
sorted(
self.products_list
[
"
hubs
"
]
.items(),
self.products_list.items(),
key=lambda t: t[1][
"
date
"
],
key=lambda t: t[1][
"
date
"
],
)
)
)
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment