Skip to content
Snippets Groups Projects
Commit 69d29c2e authored by pascal.mouquet_ird.fr's avatar pascal.mouquet_ird.fr
Browse files

moved monthly summary to sen2change, updated library function to get size of...

moved monthly summary to sen2change, updated library function to get size of l1c products to archive with the size=True parameter
parent 5545685d
No related branches found
No related tags found
No related merge requests found
...@@ -28,10 +28,9 @@ from .indices import IndicesCollection ...@@ -28,10 +28,9 @@ from .indices import IndicesCollection
from .download_and_process import DownloadAndProcess from .download_and_process import DownloadAndProcess
from .time_series import TimeSeries from .time_series import TimeSeries
from .automatization import Automatization from .automatization import Automatization
from .utils import format_word, grouper, datetime_to_str, str_to_datetime from .utils import format_word, grouper, datetime_to_str, str_to_datetime, human_size_decimal, human_size
from .geo_utils import serialise_tiles_index, get_processed_indices_vect from .geo_utils import serialise_tiles_index, get_processed_indices_vect
from .multi_processing import l2a_multiprocessing, cldidx_multiprocessing, cld_multiprocessing, idx_multiprocessing from .multi_processing import l2a_multiprocessing, cldidx_multiprocessing, cld_multiprocessing, idx_multiprocessing
from .monthly_summary import MonthlySummary
__version__ = "0.1.0" __version__ = "0.1.0"
__author__ = "Jérémy Commins <jebins@openmailbox.org> & Impact <pascal.mouquet@ird.fr>" __author__ = "Jérémy Commins <jebins@openmailbox.org> & Impact <pascal.mouquet@ird.fr>"
...@@ -16,7 +16,7 @@ from typing import List, Dict ...@@ -16,7 +16,7 @@ from typing import List, Dict
from .config import Config, SHARED_DATA from .config import Config, SHARED_DATA
from .products import L1cProduct from .products import L1cProduct
from .tiles import Tile from .tiles import Tile
from .utils import human_size, human_size_decimal
s2_tiles_index = SHARED_DATA.get("tiles_index") s2_tiles_index = SHARED_DATA.get("tiles_index")
...@@ -77,16 +77,23 @@ class Library: ...@@ -77,16 +77,23 @@ class Library:
def archive_l1c(self, def archive_l1c(self,
archive_list: list = [], archive_list: list = [],
size: bool = False,
): ):
if archive_list: total_size = 0
for t in archive_list: if not archive_list:
try: archive_list = self.l1c
til = Tile(t) for t in archive_list:
til.archive_l1c() try:
except: logger.info(t)
pass til = Tile(t)
else: size_tile = til.archive_l1c(size = size)
logger.info("Please specify a tile list to archive") if size:
total_size += size_tile
logger.info("Total size: {}".format(human_size_decimal(total_size)))
except:
pass
#~ else:
#~ logger.info("Please specify a tile list to archive")
def update_latest_ql(self): def update_latest_ql(self):
""" """
......
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Module to compute monthly summaries for indices
"""
import logging
import datetime
import pandas as pd
from pathlib import Path
import rasterio
import numpy as np
from .tiles import Tile
from .config import Config
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class MonthlySummary:
"""Class for managing monthly summaries.
:param tile: tile number "40KCB"
:param indice: "NDVI"
:start_date: "2020-01-15"
:end_date: "2020-01-19"
Usage:
>>> MonthlySummary("40KCB", "ndvi", "2019-01-01", "2019-10-31")
"""
def __init__(self,
tile: str = None,
indice: str = None,
start_date: str = "2015-01-01",
stop_date: str = "2100-01-31",
):
try:
self.tile = Tile(tile)
self.indice = indice.lower()
self.datetime_start = max(datetime.datetime.strptime("2015-01-01", '%Y-%m-%d'), datetime.datetime.strptime(start_date, '%Y-%m-%d'))
self.datetime_stop = min(datetime.datetime.strptime(stop_date, '%Y-%m-%d'), datetime.datetime.now())
except:
logger.info("Mauvaise initialisation")
logger.info("Usage: MonthlySummary('40KCB', 'ndvi', '2019-01-01', '2019-05-20')")
return
_temporal_summary_path = Path(Config().get("temporal_summaries_path"))
logger.info(_temporal_summary_path)
self.start_list = sorted(set([self.datetime_start.strftime("%Y-%m-%d")] + pd.date_range(self.datetime_start, self.datetime_stop, freq='MS').strftime("%Y-%m-%d").to_list()))
self.stop_list = sorted(set(pd.date_range(self.datetime_start, self.datetime_stop, freq='M').strftime("%Y-%m-%d").to_list() + [self.datetime_stop.strftime("%Y-%m-%d")]))
#~ logger.info(self.start_list)
#~ logger.info(self.stop_list)
for index, (start, stop) in enumerate(zip(self.start_list, self.stop_list)):
process_list = getattr(self.tile, self.indice).masks.filter_dates(start, stop)
logger.info("Compiling {} ({} products)".format(start[:-3], len(process_list)))
count=0
summary_count = None
coef_count = None
for product, ccover in ([p.identifier, p.cloud_cover] for p in process_list):
count += 1
logger.info("{} - {} - {}".format(product, ccover, (1-ccover/100)))
#~ logger.info(ccover)
product_path = self.tile._paths['indices'][self.indice] / product[:(-12-len(self.indice))] / product
#~ logger.info(product_path)
with rasterio.open(product_path) as prod_src:
prod_profile = prod_src.profile
prod = prod_src.read(1).astype(np.int16)
try:
summary_count += np.where(prod != 16383, prod * (1-ccover/100), 0).astype(np.int32)
#~ summary_count += prod * (1-ccover/100)
#~ coef_count += (1-ccover/100)
coef_count += np.where(prod != 16383, (1-ccover/100), 0).astype(np.float)
except:
summary_count = np.where(prod != 16383, prod * (1-ccover/100), 0).astype(np.int32)
coef_count = np.where(prod != 16383, (1-ccover/100), 0).astype(np.float)
#~ logger.info(summary_count)
#~ logger.info(coef_count)
if count:
logger.info("Compiled {} images".format(count))
prod_summary = np.where(coef_count != 0, (summary_count / coef_count).astype(np.int16), 32767)
prod_profile.update(driver="Gtiff",
compress="NONE",
tiled=False,
dtype=np.int16,
nodata=32767,
transform=prod_src.transform,
count=1)
prod_profile.pop('tiled', None)
#~ prod_profile.pop('nodata', None)
outpath_tif = _temporal_summary_path / \
self.indice.upper() / \
self.tile.name / \
(self.tile.name + "_" + self.indice.upper() + "_MONTHLY_" + start[:-3] + '.tif')
outpath_tif.parent.mkdir(parents = True, exist_ok = True)
with rasterio.Env(GDAL_CACHEMAX=512) as env:
with rasterio.open(str(outpath_tif), "w", **prod_profile) as dst:
dst.write(prod_summary, 1)
...@@ -494,7 +494,8 @@ class Tile: ...@@ -494,7 +494,8 @@ class Tile:
nb_rm += 1 nb_rm += 1
return {"identified_problems": nb_id, "removed_problems": nb_rm} return {"identified_problems": nb_id, "removed_problems": nb_rm}
def archive_l1c(self): def archive_l1c(self,
size: bool = False,):
""" """
Check and move l1c products to archive folder Check and move l1c products to archive folder
...@@ -512,18 +513,27 @@ class Tile: ...@@ -512,18 +513,27 @@ class Tile:
"cloud_cover": self._products["l1c"][prod].cloud_cover} "cloud_cover": self._products["l1c"][prod].cloud_cover}
if prod_list: if prod_list:
count = 0 count = 0
total_size = 0
for prod in prod_list: for prod in prod_list:
l1c = L1cProduct(prod.identifier) l1c = L1cProduct(prod.identifier)
if not l1c.path.is_symlink(): if not l1c.path.is_symlink():
count += 1 count += 1
move_path = l1c_archive_path / l1c.tile / l1c.path.name if size:
logger.info("archiving {}".format(l1c.identifier)) total_size += getFolderSize(str(l1c.path))
move_path.parent.mkdir(exist_ok=True) #~ logger.info("{}: {}".format(prod, human_size(getFolderSize(str(l1c.path)))))
#~ shutil.move(str(l1c.path), str(move_path.parent)) else:
distutils.dir_util.copy_tree(str(l1c.path), str(move_path)) move_path = l1c_archive_path / l1c.tile / l1c.path.name
distutils.dir_util.remove_tree(str(l1c.path)) logger.info("archiving {}".format(l1c.identifier))
l1c.path.symlink_to(move_path, target_is_directory = True) move_path.parent.mkdir(exist_ok=True)
#~ shutil.move(str(l1c.path), str(move_path.parent))
distutils.dir_util.copy_tree(str(l1c.path), str(move_path))
distutils.dir_util.remove_tree(str(l1c.path))
l1c.path.symlink_to(move_path, target_is_directory = True)
logger.info("{} products archived".format(count)) logger.info("{} products archived".format(count))
if size:
logger.info("Total size to move: {}".format(human_size(total_size)))
return total_size
if not count: if not count:
logger.info("No L1C products to archive") logger.info("No L1C products to archive")
......
...@@ -75,7 +75,15 @@ def datetime_to_str(date: datetime, date_format: str) -> str: ...@@ -75,7 +75,15 @@ def datetime_to_str(date: datetime, date_format: str) -> str:
def human_size(bytes, units=['B','KB','MB','GB','TB', 'PB', 'EB']): def human_size(bytes, units=['B','KB','MB','GB','TB', 'PB', 'EB']):
""" Returns a human readable string reprentation of bytes""" """ Returns a human readable string reprentation of bytes"""
return str(bytes) + units[0] if bytes < 1024 else human_size(bytes>>10, units[1:]) return str(bytes) + units[0] if bytes < 1024.0 else human_size(bytes>>10, units[1:])
def human_size_decimal(size, decimal_places=2):
for unit in ['B','KB','MB','GB','TB', 'PB', 'EB']:
if size < 1024.0:
break
size /= 1024.0
return "{}{}".format(format(size, '.' + str(decimal_places) + 'f'), unit)
#~ return f"{size:.{decimal_places}f}{unit}"
def getFolderSize(folder, follow_symlinks = False): def getFolderSize(folder, follow_symlinks = False):
total_size = os.path.getsize(folder) total_size = os.path.getsize(folder)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment