Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
# -*- coding: utf-8 -*-
"""
Module for automatized downloading and processing of new Sentinel-2 images.
"""
import logging
import pathlib
import pandas as pd
import numpy as np
from pathlib import Path
from pprint import pprint
from datetime import datetime, timedelta
# type annotations
from typing import List, Set, Dict, Tuple, Optional
from .config import Config, SHARED_DATA
from .data_request import DataRequest
from .download_and_process import DownloadAndProcess
from .time_series import TimeSeries
from .utils import datetime_to_str, str_to_datetime
from .library import Library
from .tiles import Tile
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
class Automatization:
"""Automatization
"""
_csv_path = Config().tiles_to_watch
def __init__(self) -> None:
"""
"""
self._df = None
self._tiles_to_update = dict()
self._products_list = {"hubs": {}, "aws": {}}
if not self._csv_path.exists():
self._init()
else:
self._read_csv()
self._update_df()
def _init(self):
"""
"""
self._create_df()
self._load_library()
self._save_csv()
def _create_df(self):
"""
"""
self._df = pd.DataFrame(index=Library().l1c,
columns=["start_date",
"last_date",
"ignore",
"tags"])
self._df.index.name = "tile"
def _read_csv(self):
"""
"""
self._df = pd.read_csv(self._csv_path,
sep=",",
converters={"tile": str.strip,
"start_date": str.strip,
"last_date": str.strip,
"ignore": str.strip,
"tags": str.strip},
index_col="tile",
na_values="")
self._df["start_date"] = pd.to_datetime(self._df["start_date"], format="%Y-%m-%d")
self._df["last_date"] = pd.to_datetime(self._df["last_date"], format="%Y-%m-%d")
# bug sur replace:
#self._df.replace(r"^\s*$", np.nan, regex=True, inplace=True)
# -> workaround:
for c in self._df.select_dtypes(include=["object"]).columns:
self._df[c] = self._df[c].replace(r"^\s*$", np.nan, regex=True, inplace=True)
def _save_csv(self):
"""
"""
logger.info("Saving database")
self._df.to_csv(str(self._csv_path))
def _load_library(self):
"""
"""
logger.info("Scanning L1C library")
for tile in Library().l1c:
if tile not in self._df.index:
self._df.loc[tile] = [None, None, None, None]
self._update_df()
def _update_df(self):
"""
"""
logger.info("Updating database")
for tile in self._df.index:
if not pd.isnull(self._df.loc[tile, "ignore"]):
continue
self.get_tile_last_date(tile)
def get_tile_last_date(self, tile):
"""
"""
self._df.loc[tile, "last_date"] = Tile(tile).l1c.last.date
def get_tile_request_date(self, tile):
"""
"""
start_date = self._df.loc[tile, "start_date"]
last_date = self._df.loc[tile, "last_date"]
if pd.isnull(start_date) and pd.isnull(last_date):
request_date = None
elif not pd.isnull(start_date) and pd.isnull(last_date):
request_date = start_date
elif pd.isnull(start_date) and not pd.isnull(last_date):
request_date = last_date
elif not pd.isnull(start_date) and not pd.isnull(last_date):
request_date = start_date if start_date > last_date else last_date
# comment
request_date = request_date + timedelta(days=1) if request_date else None
return request_date
def _get_tiles_to_update(self, tiles_list=[]):
"""
"""
if not tiles_list:
tiles = self._df.index
else:
tiles = tiles_list
for tile in tiles:
if not pd.isnull(self._df.loc[tile, "ignore"]):
continue
request_date = self.get_tile_request_date(tile)
self._tiles_to_update[tile] = request_date
@staticmethod
def _five_days_since_date(date):
"""
"""
time_period = datetime.today() - date
if time_period.days < 4:
return False
return True
def _get_products_list(self) -> None:
"""
merge each request dict into a single one
"""
for tile, request_date in self._tiles_to_update.items():
# Don't overload the server with useless requests :
# Sentinel-2 revisit time is 5 days
if request_date:
if not Automatization._five_days_since_date(request_date):
logger.info("Too early to check {}".format(tile))
continue
logger.info("Checking tile: {}".format(tile))
request = DataRequest(start_date=request_date if request_date else None,
end_date=None)
request.from_tiles([tile])
self._products_list["hubs"].update(request.products_list["hubs"])
self._products_list["aws"].update(request.products_list["aws"])
def get_tiles_from_tags(self, tags=()):
"""
"""
tiles_set = set()
for tile in self._df.index:
tile_tags = self._df.loc[tile, "tags"]
if not pd.isnull(tile_tags):
for tag in tags:
if tag in self._df.loc[tile,"tags"].split():
tiles_set.add(tile)
return tiles_set
@staticmethod
def _get_ignored_tiles(self):
"""
"""
return self._df[self._df["ignore"].notna()].index.values
def run(self,
tiles: List[str] = [], process_products: bool = False,
indices_list: List[str] = [],
nodata_clouds=True, quicklook=True) -> None:
"""
check new
"""
logger.info("Running automatization")
logger.info("Ignored tiles: {}".format(self._get_ignored_tiles(self)))
self._get_tiles_to_update(tiles_list=tiles)
self._get_products_list()
if any(self._products_list.values()):
prods = DownloadAndProcess(identifiers=self._products_list,
hubs_limit={"peps":3, "scihub":2},
aws_limit=2,
process_products=process_products,
max_processes=3,
indices_list=indices_list,
nodata_clouds=nodata_clouds,
quicklook=quicklook)
failed = prods.failed_products
if failed:
print(failed)
self._update_df()
self._save_csv()
@property
def data(self):
return self._df
@property
def products(self):
return self._products_list
class TimeSeriesAutomatization:
""" automatization traitement séries temporelles
"""
_time_series_path = Path(Config().get("time_series_path"))
def __init__(self) -> None:
"""
description
"""
self._vectors_files = list()
self._list_files()
def _list_files(self) -> None:
"""
"""
valid_types = ("*.geojson", "*.shp")
for valid_type in valid_types:
self._vectors_files.extend(list(self._time_series_path.glob(valid_type)))
def run(self, indices=[]) -> None:
"""
"""
for vectors_file in self._vectors_files:
logger.info("Processing: {}".format(vectors_file.name))
#TimeSeries(start_date=datetime.strptime("20150101", "%Y%m%d"),
ts =TimeSeries(date_min=None,
date_max=None,
vectors_file=str(vectors_file),
indices=["NDVI"])
ts.to_csv()
#TimeSeriesAutomatization().run()