diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 487761e1eb719f779618f92e128c42057eef0e52..26d559d91ca47117ac44730a46d29e7feec15e13 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -46,7 +46,8 @@ Install sen2chain from sources Configuration ------------- -The configuration file is located at: ``~/sen2chain_data/config/config.cfg`` +The configuration file is located at: ``~/sen2chain_data/config/config.cfg``. +This file (and corresponding path) is only created after the first sen2chain import in python. Configure Sentinel-2 images library ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/examples/scripts/download_and_process.py b/examples/scripts/download_and_process.py new file mode 100644 index 0000000000000000000000000000000000000000..e748bc11364a1d102afa984a8a93c9ccbf816dc5 --- /dev/null +++ b/examples/scripts/download_and_process.py @@ -0,0 +1,23 @@ +# -*- coding:utf-8 -*- + +#~ Download and process Sentinel 2 images +#~ More examples and details on the sen2chain wiki : https://framagit.org/jebins/sen2chain/wikis/Home + + +from sen2chain import DataRequest, DownloadAndProcess + +#~ list of tiles to be processed (Reunion Island example) + +tiles = ["40KCB"] + +#~ Times Period (here the request cover August 2016) + +req = DataRequest(start_date="2016-08-01", end_date="2016-08-31").from_tiles(tiles) + +#~ The images can be downloaded in parallel from the Scihub and PEPS servers. +#~ The number of concurrent downloads can't exceed 2 on Scihub and 8 on PEPS. +#~ If process_products is True you could calculate different product : +#~ a list of indices (available : NDVI/NDWIGAO/NDWIMCF/BIGR/BIRNIR/BIBG), +#~ And clouds_nodata (True/False) and quicklook (True/False) + +DownloadAndProcess(req, hubs_limit={"peps":1, "scihub":1}, process_products=True, indices_list=["NDVI"], nodata_clouds=False, quicklook=False) diff --git a/examples/scripts/download_tiles.csv b/examples/scripts/download_tiles.csv new file mode 100644 index 0000000000000000000000000000000000000000..5e580ac763afc22808b090378bb06dca2fb5c7f3 --- /dev/null +++ b/examples/scripts/download_tiles.csv @@ -0,0 +1,4 @@ +tile;start_time;end_time;max_clouds;site;nom + +40KCB;2019-01-01;;;France;Reunion +#37KFR;;;;TAAF_Eparses;Europa diff --git a/examples/scripts/download_tiles.py b/examples/scripts/download_tiles.py new file mode 100644 index 0000000000000000000000000000000000000000..647578699f37a1a06b9274392b8aacb7ffd3fc7b --- /dev/null +++ b/examples/scripts/download_tiles.py @@ -0,0 +1,51 @@ +# -*- coding:utf-8 -*- + +""" + Download L1C products from a tile and dates list provided by a csv file + + peps and scihub hub limits can be adjusted (line 53) + - usually PEPS is faster for recent data (limit max 8) + - scihub is better for a few months old products (limit max 2) + +""" + +import logging +import pandas as pd +from sen2chain import DataRequest, DownloadAndProcess, Tile +import datetime +import os +import time +import math + +logger = logging.getLogger("Downloading L1C") +logging.basicConfig(level=logging.INFO) + +fwd = os.path.dirname(os.path.realpath(__file__)) + +# Path to csv process list +tiles_file = fwd + "/download_tiles.csv" + +# default number of days to check before today +# if this field is empty in the csv file +delta_t = 15 + +tiles_list = pd.read_csv(tiles_file, sep = ';', na_values="", comment='#') + +for index, row in tiles_list.iterrows(): + try: + if math.isnan(row.start_time): + row.start_time = (datetime.datetime.now()-datetime.timedelta(days=delta_t)).strftime('%Y-%m-%d') + except: + pass + try: + if math.isnan(row.end_time): + row.end_time = (datetime.datetime.now()+datetime.timedelta(days=1)).strftime('%Y-%m-%d') + except: + pass + + tuile = Tile(row.tile) + req = DataRequest(start_date=row.start_time, end_date=row.end_time).from_tiles([row.tile]) + DownloadAndProcess(req, hubs_limit={"peps":8, "scihub":0}, process_products=False, indices_list=[], nodata_clouds=False, quicklook=False, max_processes=8) + time.sleep(1) + tuile = Tile(row.tile) + diff --git a/examples/scripts/download_tiles_by_identifier.csv b/examples/scripts/download_tiles_by_identifier.csv new file mode 100644 index 0000000000000000000000000000000000000000..da40f5f2d2b7175a6b9a38c6cf32c5d90c0c0958 --- /dev/null +++ b/examples/scripts/download_tiles_by_identifier.csv @@ -0,0 +1,5 @@ +pre1_pre2_starttime_pre3_pre4_tile_pre5_endtime_maxclouds_site_nom +# add below your identifier list +S2A_MSIL1C_20161003T070232_N0204_R120_T39LUC_20161003T070230 +#~ S2A_MSIL1C_20161108T072212_N0204_R063_T38KLA_20161108T072535 + diff --git a/examples/scripts/download_tiles_by_identifier.py b/examples/scripts/download_tiles_by_identifier.py new file mode 100644 index 0000000000000000000000000000000000000000..58315a91af0545aab9f83d21c10e4c0e2836b5bf --- /dev/null +++ b/examples/scripts/download_tiles_by_identifier.py @@ -0,0 +1,38 @@ +# -*- coding:utf-8 -*- + +""" + Download L1C product by identifier provided by a csv file + + peps and scihub hub limits can be adjusted (line 31) + - usually PEPS is faster for recent data (limit max 8) + - scihub is better for a few months old products (limit max 2) + +""" + +import logging +import pandas as pd +from sen2chain import DataRequest, DownloadAndProcess, Tile +import datetime +import os +import time + +logger = logging.getLogger("L1C Downloading") +logging.basicConfig(level=logging.INFO) + +fwd = os.path.dirname(os.path.realpath(__file__)) + +# Tile process list +tiles_file = fwd + "/download_tiles_by_identifier.csv" +tiles_list = pd.read_csv(tiles_file, sep = '_', na_values="", comment='#') + +for index, row in tiles_list.iterrows(): + row.starttime = datetime.datetime.strptime(row.starttime, '%Y%m%dT%H%M%S').strftime('%Y-%m-%d') + row.endtime = (datetime.datetime.strptime(row.starttime, '%Y-%m-%d') + datetime.timedelta(days=1)).strftime('%Y-%m-%d') + row.tile = row.tile[1:] + + tuile = Tile(row.tile) + req = DataRequest(start_date=row.starttime, end_date=row.endtime).from_tiles([row.tile]) + DownloadAndProcess(req, hubs_limit={"peps":0, "scihub":2}, process_products=False, indices_list=[], nodata_clouds=False, quicklook=False, max_processes=8) + time.sleep(1) + tuile = Tile(row.tile) + diff --git a/examples/scripts/process_l2a_and_indices_by_identifier_multiprocessing.csv b/examples/scripts/process_l2a_and_indices_by_identifier_multiprocessing.csv new file mode 100644 index 0000000000000000000000000000000000000000..99eb237102ac69f75f46f3ef2402af1d74abae4b --- /dev/null +++ b/examples/scripts/process_l2a_and_indices_by_identifier_multiprocessing.csv @@ -0,0 +1,5 @@ +pre1_pre2_starttime_pre3_pre4_tile_pre5_endtime_maxclouds_site_nom_indices + +# L1C identifier list +S2A_MSIL1C_20160830T072212_N0204_R063_T38KLE_20160830T072348 + diff --git a/examples/scripts/process_l2a_and_indices_by_identifier_multiprocessing.py b/examples/scripts/process_l2a_and_indices_by_identifier_multiprocessing.py new file mode 100644 index 0000000000000000000000000000000000000000..176857e5367164231efa2c790d631644566d3c11 --- /dev/null +++ b/examples/scripts/process_l2a_and_indices_by_identifier_multiprocessing.py @@ -0,0 +1,101 @@ +# -*- coding:utf-8 -*- + +""" Process L1C products by identifiers + + Input : - an identifier list in a csv file + - a Sen2chain L1C library + Output : L2A / indices + + If any argument is supplied nothiing is done, this script outputs only a product list to process but without any processing + + This script uses multiprocessing to process files. + Please adapt the number of process lines 65, 79, 96 according to your hardware + + Please refer to sen2chain wiki for more info : https://framagit.org/jebins/sen2chain/wikis/home +""" + +import logging +import datetime +import pandas as pd +from sen2chain import Tile, L1cProduct, L2aProduct, Library +from sen2chain import l2a_multiprocessing, cldidx_multiprocessing, cld_multiprocessing, idx_multiprocessing +import os, shutil, sys +import glob +import math +from itertools import chain + +try: + arg = sys.argv[1] +except: + arg = [] +print(arg) + +logger = logging.getLogger("L2A Processing") +logging.basicConfig(level=logging.INFO) +logger.setLevel(logging.INFO) + + +# default number of days to check before today +# if this field is empty in the csv file +delta_t = 15 + +# file path to tile identifiers list to process +fwd = os.path.dirname(os.path.realpath(__file__)) +tiles_file = fwd + "/process_l2a_and_indices_by_identifier_multiprocessing.csv" + +tiles_list = pd.read_csv(tiles_file, sep = '_', na_values="", na_filter=False, comment='#') + +for index, row in tiles_list.iterrows(): + date = datetime.datetime.strptime(row.starttime, '%Y%m%dT%H%M%S') + tiles_list.at[index, "starttime"] = date.strftime('%Y-%m-%d') + tiles_list.at[index, "endtime"] = (date + datetime.timedelta(days=1)).strftime('%Y-%m-%d') + tiles_list.at[index, "tile"] = row.tile[1:] + if not row.indices: + tiles_list.at[index, "indices"] = 'NDVI' + +logger.info("\n{}".format(tiles_list)) + + +# Processing L1C to L2A +l1c_process_list = [] +for index, row in tiles_list.iterrows(): + t = Tile(row.tile) + l1c_process_list.append(list(p.identifier for p in t.l2a_missings.filter_dates(date_min = row.starttime, date_max = row.endtime))) +l1c_process_list = list(chain.from_iterable(l1c_process_list)) +logger.info("l1c_process_list ({} files): {}\n".format(len(l1c_process_list), l1c_process_list)) + +l2a_res = False +if l1c_process_list: + if not arg: + l2a_res = l2a_multiprocessing(l1c_process_list, nb_proc=12) + + +# Processing L2A cloud masks +cld_l2a_process_list = [] +for index, row in tiles_list.iterrows(): + t = Tile(row.tile) + cld_l2a_process_list.append(list(p.identifier for p in t.cloudmasks_missings.filter_dates(date_min = row.starttime, date_max = row.endtime))) +cld_l2a_process_list = list(chain.from_iterable(cld_l2a_process_list)) +logger.info("cld_l2a_process_list ({} files): {}\n".format(len(cld_l2a_process_list), cld_l2a_process_list)) + +cld_res = False +if cld_l2a_process_list: + if not arg: + cld_res = cld_multiprocessing(cld_l2a_process_list, nb_proc=4) + + +# Processing L2A indices +indices_l2a_process_list = [] +for index, row in tiles_list.iterrows(): + t = Tile(row.tile) + indices_list = row.indices.split("/") + for i in indices_list: + l2a_list = [p.identifier for p in t.missing_indices(i).filter_dates(date_min = row.starttime, date_max = row.endtime)] + for j in l2a_list: + indices_l2a_process_list.append([j, i]) +logger.info("indices_l2a_process_list ({} files): {}\n".format(len(indices_l2a_process_list), indices_l2a_process_list)) + +indices_res = False +if indices_l2a_process_list: + if not arg: + indices_res = idx_multiprocessing(indices_l2a_process_list, nb_proc=4) diff --git a/examples/scripts/process_l2a_and_indices_multiprocessing.csv b/examples/scripts/process_l2a_and_indices_multiprocessing.csv new file mode 100644 index 0000000000000000000000000000000000000000..7cda20ded26131a449406b5957460ac4fd544787 --- /dev/null +++ b/examples/scripts/process_l2a_and_indices_multiprocessing.csv @@ -0,0 +1,3 @@ +tile;start_time;end_time;max_clouds;site;nom;indices +# max_clouds parameter not implemented yet +40KCB;2018-10-01;2018-11-01;;France;Reunion;NDVI/NDWIGAO/NDWIMCF/MNDWI/BIGR/BIRNIR/BIBG diff --git a/examples/scripts/process_l2a_and_indices_multiprocessing.py b/examples/scripts/process_l2a_and_indices_multiprocessing.py new file mode 100644 index 0000000000000000000000000000000000000000..020e1939f15f7c3b6e98f9cdeeaaf6a36e08d29e --- /dev/null +++ b/examples/scripts/process_l2a_and_indices_multiprocessing.py @@ -0,0 +1,99 @@ +# -*- coding:utf-8 -*- + +""" Process L1C products + + Input : - a tile list in a csv file (tile;start_time;end_time;max_clouds;site;nom;indices) + - a Sen2chain L1C library + Output : L2A / indices + + If any argument is supplied nothiing is done, this script outputs only a product list to process but without any processing + + This script uses multiprocessing to process files. + Adapt the number of process (lines 65, 79, 96) according to your hardware + + Please refer to sen2chain wiki for more info : https://framagit.org/jebins/sen2chain/wikis/home +""" + +import logging +import datetime +import pandas as pd +from sen2chain import Tile +from sen2chain import l2a_multiprocessing, cld_multiprocessing, idx_multiprocessing +import os, sys +from itertools import chain + +try: + arg = sys.argv[1] +except: + arg = [] +print(arg) + +logger = logging.getLogger("L2A Prcessing") +logging.basicConfig(level=logging.INFO) +logger.setLevel(logging.INFO) + + +# default number of days to check before today +# if this field is empty in the csv file +delta_t = 15 + +# file path to tile list to process +fwd = os.path.dirname(os.path.realpath(__file__)) +tiles_file = fwd + "/process_l2a_and_indices_multiprocessing.csv" + +tiles_list = pd.read_csv(tiles_file, sep = ';', na_values="", na_filter=False, comment='#') + +for index, row in tiles_list.iterrows(): + if not row.start_time: + tiles_list.at[index, "start_time"] = (datetime.datetime.now()-datetime.timedelta(days=delta_t)).strftime('%Y-%m-%d') + if not row.end_time: + tiles_list.at[index, "end_time"] = (datetime.datetime.now()+datetime.timedelta(days=1)).strftime('%Y-%m-%d') + if not row.indices: + tiles_list.at[index, "indices"] = 'NDVI' + +logger.info("\n{}".format(tiles_list)) + + +# Processing L1C to L2A +l1c_process_list = [] +for index, row in tiles_list.iterrows(): + t = Tile(row.tile) + l1c_process_list.append(list(p.identifier for p in t.l2a_missings.filter_dates(date_min = row.start_time, date_max = row.end_time))) +l1c_process_list = list(chain.from_iterable(l1c_process_list)) +logger.info("l1c_process_list ({} files): {}\n".format(len(l1c_process_list), l1c_process_list)) + +l2a_res = False +if l1c_process_list: + if not arg: + l2a_res = l2a_multiprocessing(l1c_process_list, nb_proc=4) + + +# Processing L2A cloud masks +cld_l2a_process_list = [] +for index, row in tiles_list.iterrows(): + t = Tile(row.tile) + cld_l2a_process_list.append(list(p.identifier for p in t.cloudmasks_missings.filter_dates(date_min = row.start_time, date_max = row.end_time))) +cld_l2a_process_list = list(chain.from_iterable(cld_l2a_process_list)) +logger.info("cld_l2a_process_list ({} files): {}\n".format(len(cld_l2a_process_list), cld_l2a_process_list)) + +cld_res = False +if cld_l2a_process_list: + if not arg: + cld_res = cld_multiprocessing(cld_l2a_process_list, nb_proc=4) + + +# Processing L2A indices +indices_l2a_process_list = [] +for index, row in tiles_list.iterrows(): + t = Tile(row.tile) + indices_list = row.indices.split("/") + for i in indices_list: + l2a_list = [p.identifier for p in t.missing_indices(i).filter_dates(date_min = row.start_time, date_max = row.end_time)] + for j in l2a_list: + indices_l2a_process_list.append([j, i]) +logger.info("indices_l2a_process_list ({} files): {}\n".format(len(indices_l2a_process_list), indices_l2a_process_list)) + +indices_res = False +if indices_l2a_process_list: + if not arg: + indices_res = idx_multiprocessing(indices_l2a_process_list, nb_proc=4) diff --git a/examples/scripts/process_l2a_and_products.py b/examples/scripts/process_l2a_and_products.py new file mode 100644 index 0000000000000000000000000000000000000000..6f1a755279e2f018e13e583b3680e697a9e3850d --- /dev/null +++ b/examples/scripts/process_l2a_and_products.py @@ -0,0 +1,39 @@ +# -*- coding:utf-8 -*- + +#~ Starts processing on L1C that have been downloaded +#~ but have not been processed +#~ More examples and details on the sen2chain wiki : https://framagit.org/jebins/sen2chain/wikis/Home + +from sen2chain import Tile, L1cProduct, L2aProduct + + +def process_missings(tile): + """ + Processing function + """ + t = Tile(tile) + for p in t.l2a_missings: + l1c = L1cProduct(p.identifier, t.name) + try: + l1c.process_l2a() + except: + print("FAILED:", p.identifier) + continue + + for p in t.l2a: + l2a = L2aProduct(p.identifier, t.name) + try: + l2a.process_cloud_mask_v2() +#~ list of indices to be caculated (available : NDVI/NDWIGAO/NDWIMCF/BIGR/BIRNIR/BIBG), +#~ And calculation of clouds_nodata (True/False) and quicklook (True/False) + l2a.process_indices(["NDVI","NDWIGAO","NDWIMCF","MNDWI"], True, True) + except: + print("FAILED:", p.identifier) + continue + +# list of tiles to be processed (Reunion Island example) +tiles = ["40KCB"] + +for t in tiles: + print("Processing:", t) + process_missings(t) diff --git a/sen2chain/cloud_mask.py b/sen2chain/cloud_mask.py index 201da0c697ad44b007aafcc2dac0c956a42eb9fc..7174a9b84168ed0f82d2877ca7f6bb9c1e3b5b14 100755 --- a/sen2chain/cloud_mask.py +++ b/sen2chain/cloud_mask.py @@ -312,3 +312,92 @@ def create_cloud_mask_v2( os.remove(out_bin) os.remove(out_erode) os.remove(out_dilate) + + +def create_cloud_mask_b11( + cloud_mask: Union[str, pathlib.PosixPath], + b11_path: Union[str, pathlib.PosixPath], + out_path="./cloud_mask_b11.jp2", + dilatation: int = 5) -> None: + """ + Marking cloud mask v2 with B11 values to reduce overdetection of clouds for some specific water bodies + uint8 + + :param cloud_mask: path to the cloud mask raster (sen2chain) + :param b11_path: path to the l2a b11 + :param out_path: path to the output. + :param erosion: size of the outer buffer in px. + :param dilatation: size of the inner buffer in px. + """ + logger.info("Masking cloud-mask_v2 with B11") + + out_temp_path = Path(Config().get("temp_path")) + out_bin = str(out_temp_path / (out_path.stem + "_tmp_bin.tif")) + out_dilate = str(out_temp_path / (out_path.stem + "_tmp_dilate.tif")) + out_mask = str(out_temp_path / (out_path.stem + "_tmp_mask.tif")) + + # Seuillage du masque B1 + b11_seuil = 1500 + BandMath = otbApplication.Registry.CreateApplication("BandMath") + BandMath.SetParameterStringList("il", [str(b11_path)]) + BandMath.SetParameterString("out", out_bin) + BandMath.SetParameterOutputImagePixelType("out", 0) + BandMath.SetParameterString("exp", "im1b1 < " + str(b11_seuil)) + BandMath.ExecuteAndWriteOutput() + + #~ # Erosion + #~ BinaryMorphologicalOperation = otbApplication.Registry.CreateApplication("BinaryMorphologicalOperation") + #~ BinaryMorphologicalOperation.SetParameterString("in", out_bin) + #~ BinaryMorphologicalOperation.SetParameterString("out", out_erode) + #~ BinaryMorphologicalOperation.SetParameterOutputImagePixelType("out", 0) + #~ BinaryMorphologicalOperation.SetParameterInt("channel", 1) + #~ BinaryMorphologicalOperation.SetParameterInt("structype.ball.xradius", 1) + #~ BinaryMorphologicalOperation.SetParameterInt("structype.ball.yradius", 1) + #~ BinaryMorphologicalOperation.SetParameterFloat("filter.erode.foreval", 1) + #~ BinaryMorphologicalOperation.SetParameterString("filter", "erode") + #~ BinaryMorphologicalOperation.ExecuteAndWriteOutput() + + # Dilatation + BinaryMorphologicalOperation = otbApplication.Registry.CreateApplication("BinaryMorphologicalOperation") + BinaryMorphologicalOperation.SetParameterString("in", out_bin) + BinaryMorphologicalOperation.SetParameterString("out", out_dilate) + BinaryMorphologicalOperation.SetParameterOutputImagePixelType("out", 0) + BinaryMorphologicalOperation.SetParameterInt("channel", 1) + BinaryMorphologicalOperation.SetParameterInt("structype.ball.xradius", dilatation) + BinaryMorphologicalOperation.SetParameterInt("structype.ball.yradius", dilatation) + BinaryMorphologicalOperation.SetParameterFloat("filter.dilate.foreval", 1) + BinaryMorphologicalOperation.SetParameterString("filter", "dilate") + BinaryMorphologicalOperation.ExecuteAndWriteOutput() + + # Masquage du masque + BandMath = otbApplication.Registry.CreateApplication("BandMath") + BandMath.SetParameterStringList("il", [str(cloud_mask),str(out_dilate)]) + BandMath.SetParameterString("out", out_mask) + BandMath.SetParameterOutputImagePixelType("out", 0) + BandMath.SetParameterString("exp", "(im1b1 == 1) * (im2b1 == 0)") + BandMath.ExecuteAndWriteOutput() + + # Dilatation + BinaryMorphologicalOperation = otbApplication.Registry.CreateApplication("BinaryMorphologicalOperation") + BinaryMorphologicalOperation.SetParameterString("in", out_mask) + BinaryMorphologicalOperation.SetParameterString("out", out_dilate) + BinaryMorphologicalOperation.SetParameterOutputImagePixelType("out", 0) + BinaryMorphologicalOperation.SetParameterInt("channel", 1) + BinaryMorphologicalOperation.SetParameterInt("structype.ball.xradius", dilatation+1) + BinaryMorphologicalOperation.SetParameterInt("structype.ball.yradius", dilatation+1) + BinaryMorphologicalOperation.SetParameterFloat("filter.dilate.foreval", 1) + BinaryMorphologicalOperation.SetParameterString("filter", "dilate") + BinaryMorphologicalOperation.ExecuteAndWriteOutput() + + + # Save to JP2000 + src_ds = gdal.Open(out_dilate) + driver = gdal.GetDriverByName("JP2OpenJPEG") + dst_ds = driver.CreateCopy(str(out_path), src_ds, + options=["CODEC=JP2", "QUALITY=100", "REVERSIBLE=YES", "YCBCR420=NO"]) + dst_ds = None + src_ds = None + + os.remove(out_bin) + os.remove(out_dilate) + os.remove(out_mask) diff --git a/sen2chain/products.py b/sen2chain/products.py index 11d43202f97626e2bd9f57ac5f2d7dd5eae0046b..42700b774851214558a1a7c881e55593d5c2f047 100755 --- a/sen2chain/products.py +++ b/sen2chain/products.py @@ -17,7 +17,7 @@ from .utils import grouper from .config import Config, SHARED_DATA from .xmlparser import MetadataParser from .sen2cor import process_sen2cor -from .cloud_mask import create_cloud_mask, create_cloud_mask_v2 +from .cloud_mask import create_cloud_mask, create_cloud_mask_v2, create_cloud_mask_b11 from .indices import IndicesCollection from .colormap import create_l2a_ql @@ -349,6 +349,11 @@ class L2aProduct(Product): if not self.user_cloud_mask.exists(): self.user_cloud_mask = None + # user cloud mask b11 + self.user_cloud_mask_b11 = self.path.parent / (self.identifier + "_CLOUD_MASK_B11.jp2") + if not self.user_cloud_mask_b11.exists(): + self.user_cloud_mask_b11 = None + # user QL self.user_ql = self.path.parent / (self.identifier + "_QL.tif") if not self.user_ql.exists(): @@ -414,20 +419,29 @@ class L2aProduct(Product): def process_cloud_mask_v2(self, buffering: bool = True, reprocess: bool = False, - out_path=None + out_path_mask = None, + out_path_mask_b11 = None ) -> "L2aProduct": """ """ logger.info("{}: processing cloud_mask_v2".format(self.identifier)) cloud_mask_filename = self.identifier + "_CLOUD_MASK.jp2" + cloud_mask_b11_filename = str(Path(cloud_mask_filename).parent/Path(cloud_mask_filename).stem) + "_B11.jp2" - if out_path is None: + if out_path_mask is None: cloud_mask_folder = self.library_path.parent cloud_mask_folder.mkdir(parents=True, exist_ok=True) cloud_mask_path = cloud_mask_folder / cloud_mask_filename else: - cloud_mask_path = Path(out_path) + cloud_mask_path = Path(out_path_mask) + + if out_path_mask_b11 is None: + cloud_mask_folder = self.library_path.parent + cloud_mask_folder.mkdir(parents=True, exist_ok=True) + cloud_mask_b11_path = cloud_mask_folder / cloud_mask_b11_filename + else: + cloud_mask_b11_path = Path(out_path_mask_b11) if cloud_mask_path.exists() and not reprocess: logger.info("{} cloud mask v2 already exists".format(self.identifier)) @@ -437,6 +451,16 @@ class L2aProduct(Product): dilatation=5, out_path=cloud_mask_path) self.user_cloud_mask = cloud_mask_path + + if cloud_mask_b11_path.exists() and not reprocess: + logger.info("{} cloud mask v2 masked b11 already exists".format(self.identifier)) + else: + create_cloud_mask_b11(self.user_cloud_mask, + self.b11_20m, + dilatation=5, + out_path=cloud_mask_b11_path) + self.user_cloud_mask_b11 = cloud_mask_b11_path + return self def process_indices(self, diff --git a/sen2chain/time_series.py b/sen2chain/time_series.py index 7f5beb04b5eba658101c51d33610c2e86732d8ba..ee52cf044194eea1e21e81e9a1bbc4da82de9897 100644 --- a/sen2chain/time_series.py +++ b/sen2chain/time_series.py @@ -10,17 +10,26 @@ from pathlib import Path from collections import OrderedDict import fiona import rasterio +from rasterio.plot import show +from rasterio.plot import plotting_extent +from rasterio.mask import mask from fiona import transform from shapely.geometry import shape +from shapely.geometry import mapping from rasterstats import zonal_stats import pandas as pd +import geopandas as gpd # type annotations from typing import Sequence, List, Dict, Union +import matplotlib.pyplot as plt +import numpy as np +import math from .config import Config from .tiles import Tile, ProductsList from .indices import IndicesCollection from .geo_utils import get_tiles_from_file +from .products import L2aProduct logger = logging.getLogger(__name__) @@ -45,7 +54,8 @@ class TimeSeries: vectors_file: Union[str, pathlib.PosixPath], indices: Sequence[str] = None, date_min: str = None, date_max: str = None, - cover_min: int = 0, cover_max: int = 100 + cover_min: int = 0, cover_max: int = 100, + field_names: str = None ) -> None: self._vectors_file = Path(vectors_file) @@ -53,7 +63,9 @@ class TimeSeries: self._date_max = date_max self._cover_min = cover_min self._cover_max = cover_max - + self._field_names = field_names + self._out_path = Config().get("time_series_path") + if indices is None: self._indices_list = IndicesCollection.list else: @@ -61,7 +73,12 @@ class TimeSeries: if indice not in IndicesCollection: raise ValueError("Invald indice name: {}".format(indice)) self._indices_list = indices - + + if self._field_names: + self._key = self._field_names + else: + self._key = 'fid' + self._tiles_geoms = self._get_tiles_geom_dict() self._df_dicts = dict() self._get_stats() @@ -196,12 +213,226 @@ class TimeSeries: :param out_path: output folder. Default is DATA/TIME_SERIES. """ if out_path is None: - out_path = Config().get("time_series_path") + out_path = self._out_path + out_path_folder = Path(out_path) / self._vectors_file.stem + out_path_folder.mkdir(parents=True, exist_ok=True) + for df_name, df in self._df_dicts.items(): - csv_path = Path(out_path) / "{0}_{1}_{2}_{3}.csv".format( + csv_path = out_path_folder / "{0}_{1}_{2}_{3}.csv".format( self._vectors_file.stem, df_name, self._date_min, self._date_max) df.sort_values(by=['fid']).to_csv(str(csv_path)) logger.info("exported to csv: {}".format(csv_path)) + + def plot_global(self, out_path: Union[str, pathlib.PosixPath] = None) -> None: + """Exports the time series to CSV format. + + :param out_path: output folder. Default is DATA/TIME_SERIES. + """ + if out_path is None: + out_path = self._out_path + out_path_folder = Path(out_path) / self._vectors_file.stem + out_path_folder.mkdir(parents=True, exist_ok=True) + + for df_name, df in self._df_dicts.items(): + png_path = out_path_folder / "{0}_plot-global_{1}_{2}_{3}.png".format( + self._vectors_file.stem, + df_name, + self._date_min, + self._date_max) + df = self.data[df_name] + plt.figure(figsize=(19.2,10.8)) + if df_name in ['NDVI', 'NDWIGAO', 'NDVIMCF', 'MNDWI']: + plt.ylim((-10000,10000)) + else: + plt.ylim((0,10000)) + df.dropna(subset = ['mean']).groupby(self._key)['mean'].plot(legend=True) + plt.title(df_name) + plt.savefig(str(png_path)) + plt.close() + logger.info("Plot saved to png: {}".format(png_path)) + + def plot_details(self, out_path: Union[str, pathlib.PosixPath] = None) -> None: + """Exports the time series to CSV format. + + :param out_path: output folder. Default is DATA/TIME_SERIES. + """ + if out_path is None: + out_path = self._out_path + out_path_folder = Path(out_path) / self._vectors_file.stem + out_path_folder.mkdir(parents=True, exist_ok=True) + for df_name, df in self._df_dicts.items(): + png_path = out_path_folder / "{0}_plot-details_{1}_{2}_{3}.png".format( + self._vectors_file.stem, + df_name, + self._date_min, + self._date_max) + + if df_name in ['NDVI', 'NDWIGAO', 'NDWMCF', 'MNDWI']: + ylim = [-10000, 10000] + else: + ylim = [0, 10000] + + df = self.data[df_name] + grouped = df.groupby(self._key) + ncols = int(math.ceil(len(grouped)**0.5)) + nrows = int(math.ceil(len(grouped)/ncols)) + fig, axs = plt.subplots(nrows, ncols, figsize=(19.2,10.8)) + for (name, dfe), ax in zip(grouped, axs.flat): + ax.set_ylim(ylim) + ax.set_title(name) + dfe.dropna(subset = ['mean']).plot(y=['mean'], ax=ax, yerr='std', color='black', elinewidth=0.2, legend=False) + dfe.dropna(subset = ['mean']).plot(y=['min', 'max'], ax=ax, linewidth=0.25, color='black', legend=False) + ax2 = ax.twinx() + ax2.set_ylim([0, 1]) + dfe['na_ratio'] = dfe['nodata']/(dfe['count'] + dfe['nodata']) + dfe.dropna(subset = ['mean']).plot(y=['na_ratio'], marker='o', ax=ax2, color='red', linewidth=0.25, linestyle='', legend=False) + #~ dfe.dropna().plot.bar(y=['na_ratio'], ax=ax2, color='red',legend=False) + ax.plot(np.nan, '-r', label = 'NaN ratio') + #~ ax.legend(loc=0, prop={'size': 6}) + ax.legend(loc=0, labelspacing=0.05) + fig.tight_layout() + fig.suptitle(df_name, fontsize=16) + + plt.savefig(str(png_path)) + plt.close() + logger.info("Plot saved to png: {}".format(png_path)) + + + def extract_ql(self, out_path: Union[str, pathlib.PosixPath] = None) -> None: + """Extract ql images around vectors. + """ + if out_path is None: + out_path = Config().get("time_series_path") + out_path_folder = Path(out_path) / self._vectors_file.stem + out_path_folder.mkdir(parents=True, exist_ok=True) + + for df_name, df in self.data.items(): + if self._field_names: + fid_list = dict(zip(df.fid, df.name)) + else: + fid_list = dict(zip(df.fid, df.fid)) + fid_list = fid_list.fromkeys(fid_list, '') + + cmap_dict = {'NDVI' : 'RdYlGn', + 'NDWIGAO' : 'RdYlBu', + 'NDWIMCF' : 'RdYlBu', + 'BIGR' : 'pink', + 'BIRNIR' : 'afmhot', + 'BIBG' : 'bone', + 'MNDWI' : 'BrBG'} + if df_name in ['NDVI', 'NDWIGAO', 'NDWMCF', 'MNDWI']: + vmin = -10000 + vmax = 10000 + else: + vmin = 0 + vmax = 10000 + + for fid, name in fid_list.items(): + if name: + fidname = name + else: + fidname = 'FID'+fid + out_path_fid_folder = out_path_folder / str("QL_" + fidname) + out_path_fid_folder.mkdir(parents=True, exist_ok=True) + indice_png_path = out_path_fid_folder / "{0}_MOZ-QL_{1}_{2}_{3}_{4}.png".format( + self._vectors_file.stem, + fidname, + df_name, + self._date_min, + self._date_max) + l2a_png_path = out_path_fid_folder / "{0}_MOZ-QL_{1}_{2}_{3}_{4}.png".format( + self._vectors_file.stem, + fidname, + 'L2A', + self._date_min, + self._date_max) + logger.info("fid/name:{}".format(fidname)) + nb_prod = len(df.loc[df['fid'] == fid]) + ncols = int(math.ceil(nb_prod**0.5)) + nrows = int(math.ceil(nb_prod/ncols)) + figa, axs = plt.subplots(nrows, ncols, figsize=(5*ncols,5*nrows), sharey=True, sharex=True) + figb, bxs = plt.subplots(nrows, ncols, figsize=(5*ncols,5*nrows), sharey=True, sharex=True) + for (index, row), ax, bx in zip((df.loc[df['fid'] == fid]).sort_values(by=['date']).iterrows(), + np.array(axs).flatten(), + np.array(bxs).flatten()): + prod_id = row['filename'][:(-12 - len(df_name))] + indice_png_tile_path = out_path_fid_folder / "{0}_QL_{1}_{2}_{3}_{4}_{5}.png".format( + self._vectors_file.stem, + fidname, + df_name, + prod_id[11:19], + prod_id[39:44], + prod_id[0:2]) + l2a_png_tile_path = out_path_fid_folder / "{0}_QL_{1}_{2}_{3}_{4}_{5}.png".format( + self._vectors_file.stem, + fidname, + 'L2A', + prod_id[11:19], + prod_id[39:44], + prod_id[0:2]) + tile_obj = Tile(row['tile']) + tile_indice_path = tile_obj.paths["indices"][df_name.lower()] + tile_l2a_path = tile_obj.paths["l2a"] + prod_path = tile_indice_path / prod_id / row['filename'] + tci_path = list(Path(str(tile_l2a_path / row['filename'][:(-12 - len(df_name))])+ '.SAFE/')\ + .glob('GRANULE/*/IMG_DATA/R10m/*_TCI_10m.jp2')) + + crop_extent = gpd.read_file(str(self._vectors_file)) + raster_tci = rasterio.open(tci_path[0]) + crop_extent_new_proj = crop_extent.to_crs(raster_tci.crs) + extent_geojson = mapping(crop_extent_new_proj['geometry'][int(fid)].buffer(1000)) + + with rasterio.open(tci_path[0]) as tci_data: + tci_data_crop, tci_data_crop_affine = mask(tci_data, + [extent_geojson], + crop=True) + tci_crop = np.dstack((tci_data_crop[0], tci_data_crop[1], tci_data_crop[2])) + tci_data_extent = plotting_extent(tci_data_crop[0], tci_data_crop_affine) + ax.imshow(tci_crop, extent = tci_data_extent) + crop_extent_new_proj.loc[[int(fid)], 'geometry'].plot(ax=ax, facecolor='none', edgecolor='black', linewidth = 3) + ax.set_title("{} - {}".format(prod_id[11:19], prod_id[39:44]), fontsize=10) + + fig2, ax2 = plt.subplots(1, 1, figsize=(4,4)) + plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1) + ax2.imshow(tci_crop, extent = tci_data_extent) + crop_extent_new_proj.loc[[int(fid)], 'geometry'].plot(ax=ax2, facecolor='none', edgecolor='black', linewidth = 3) + ax2.set_axis_off() + fig2.suptitle(name) + ax2.set_title(prod_id[39:44] + ' ' + prod_id[11:19]) + fig2.tight_layout() + fig2.savefig(str(l2a_png_tile_path), bbox_inches='tight') + plt.close(fig=fig2) + + raster = rasterio.open(prod_path) + nrg = raster.read(1) + with rasterio.open(prod_path) as img_data: + img_data_crop, img_data_crop_affine = mask(img_data, + [extent_geojson], + crop=True) + img_data_extent = plotting_extent(img_data_crop[0], img_data_crop_affine) + bx.imshow(img_data_crop[0], extent=img_data_extent, cmap=cmap_dict[df_name], vmin=vmin, vmax=vmax) + crop_extent_new_proj.loc[[int(fid)], 'geometry'].plot(ax=bx, facecolor='none', edgecolor='black', linewidth = 3) + bx.set_title("{} - {}".format(prod_id[11:19], prod_id[39:44]), fontsize=10) + + fig_t_ind, ax_t_ind = plt.subplots(1, 1, figsize=(4,4)) + plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1) + ax_t_ind.imshow(img_data_crop[0], extent=img_data_extent, cmap=cmap_dict[df_name], vmin=vmin, vmax=vmax) + crop_extent_new_proj.loc[[int(fid)], 'geometry'].plot(ax=ax_t_ind, facecolor='none', edgecolor='black', linewidth = 3) + ax_t_ind.set_axis_off() + fig_t_ind.suptitle(name) + ax_t_ind.set_title(prod_id[39:44] + ' ' + prod_id[11:19]) + fig_t_ind.tight_layout() + fig_t_ind.savefig(str(indice_png_tile_path), bbox_inches='tight') + plt.close(fig=fig_t_ind) + + figa.suptitle(fidname + ' - L2A') + figa.savefig(str(l2a_png_path), bbox_inches='tight') + plt.close(fig=figa) + + figb.suptitle(fidname + ' - ' + df_name) + figb.savefig(str(indice_png_path), bbox_inches='tight') + plt.close(fig=figb) +