# -*- coding: UTF-8 -*-
# Python
"""
03-10-2022 modified 04-07-2023
@author: jeremy auclair
Download S2 data pre-modspa
"""
import os # for path exploration
import shutil # for file management
from eodag import setup_logging # module that downloads S2 data
from eodag import EODataAccessGateway # module that downloads S2 data
import geopandas as gpd # to read shapefile
from typing import List # to declare variables
import csv # for loading and saving path results in csv format
import zipfile as zp # to open zip archives
from tqdm import tqdm # to print progress bars during code execution
from fnmatch import fnmatch # for character string comparison
[docs]
def download_S2_data(start_date: str, end_date: str, preferred_provider: str, save_path: str, shapefile: str = None, cloud_cover_limit: int = 80) -> List[str]:
"""
download_S2_data uses the eodag module to look for all products of a given provider
(copernicus or theia) during a specific time window and covering the whole shapefile
enveloppe (several Sentinel-2 tiles might be needed). It then downloads that data into
the download path parametered in the config file. Paths to the downloaded data are
returned and saved as a ``csv`` file.
Arguments
=========
1. start_date: ``str``
beginning of the time window to download (format: ``YYYY-MM-DD``)
2. end_date: ``str``
end of the time window to download (format: ``YYYY-MM-DD``)
3. preferred_provider: ``str``
chosen source of the Sentinel-2 data (``copernicus`` or ``theia``)
4. save_path: ``str``
path where a csv file containing the product paths will be saved
5. shapefile: ``str``
path to the shapefile (``.shp``) for which the data is downloaded
6. cloud_cover_limit: ``int`` ``default = 80``
maximum percentage to pass the filter before download (between 0 and 100)
Returns
=======
1. product_paths: ``list[str]``
a list of the paths to the downloaded data
"""
setup_logging(2) # 3 for even more information
dag = EODataAccessGateway()
# Open shapefile containing geometry
geopandas_shape = gpd.read_file(shapefile)
geopandas_shape = geopandas_shape.to_crs(epsg = '4326') # Force WGS84 projection
bounds = geopandas_shape.geometry.total_bounds # In WGS84 projection
# Select product type based on preferred provider
if preferred_provider == 'theia':
product_type = 'S2_MSI_L2A_MAJA'
dag.set_preferred_provider('theia')
else:
product_type = 'S2_MSI_L2A'
dag.set_preferred_provider('scihub')
# Create a search criteria to feed into the eodag search_all method
search_criteria = {
'productType': product_type,
'start': start_date,
'end': end_date,
'geom': list(bounds)
}
# Try to search all products corresponding to the search criteria. If a type error occurs it
# means there is an error in the search criteria parameters
try:
all_products = dag.search_all(**search_criteria)
except TypeError:
print('Something went wrong during the product search, check your inputs')
return None
# If the search_all method returns None, there is no product matching the search criteria
if len(all_products) == 0:
print('No products matching your search criteria were found')
return None
# Filter products that have more clouds than desired
products_to_download = all_products.filter_property(cloudCover = cloud_cover_limit, operator = 'lt')
product_paths = dag.download_all(products_to_download, extract = False) # No archive extraction
product_paths.sort()
# Save list of paths as a csv file for later use
with open(save_path, 'w', newline = '') as f:
# using csv.writer method from CSV package
write = csv.writer(f)
for product in product_paths:
write.writerow([product])
return product_paths