Skip to content
Snippets Groups Projects
Commit 8082ec0a authored by paul.tresson_ird.fr's avatar paul.tresson_ird.fr
Browse files

redo projection algorithms to have more feedbacka and stop projection if too...

redo projection algorithms to have more feedbacka and stop projection if too long. adds psutil as dependency

psutil is very light and handy, I believe it is no big issue to add it as a requirement
parent bc388f6c
No related branches found
No related tags found
No related merge requests found
......@@ -4,7 +4,9 @@ from pathlib import Path
from typing import Dict, Any
import joblib
import pandas as pd
from sklearn.preprocessing import StandardScaler
import psutil
import json
import rasterio
from rasterio import windows
......@@ -26,11 +28,27 @@ from qgis.core import (Qgis,
)
from sklearn.decomposition import PCA
#from umap.umap_ import UMAP
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA, IncrementalPCA
from sklearn.cluster import KMeans
#from umap.umap_ import UMAP
RANDOM_SEED = 42
def calculate_chunk_size(X, memory_buffer=0.1):
# Estimate available memory
available_memory = psutil.virtual_memory().available
# Estimate the memory footprint of one sample
sample_memory = X[0].nbytes
# Determine maximum chunk size within available memory (leaving some buffer)
max_chunk_size = int(available_memory * (1 - memory_buffer) / sample_memory)
return max_chunk_size
import json
......@@ -105,7 +123,7 @@ class ReductionAlgorithm(QgsProcessingAlgorithm):
)
)
# self.method_opt = ['PCA', 'UMAP', 'K-means', '--Empty--']
self.method_opt = ['PCA', 'K-means', '--Empty--']
self.method_opt = ['IPCA','PCA', 'K-means', '--Empty--']
self.addParameter (
QgsProcessingParameterEnum(
name = self.METHOD,
......@@ -183,19 +201,8 @@ class ReductionAlgorithm(QgsProcessingAlgorithm):
input_bands = [i_band -1 for i_band in self.selected_bands]
if self.method == 'PCA':
proj = PCA(int(self.ncomponents))
save_file = 'pca.pkl'
# if self.method == 'UMAP':
# proj = UMAP(int(self.ncomponents))
# save_file = 'umap.pkl'
if self.method == 'K-means':
proj = KMeans(int(self.ncomponents))
save_file = 'kmeans_transform.pkl'
out_path = os.path.join(self.output_dir, save_file)
with rasterio.open(self.rlayer_path) as ds:
transform = ds.transform
crs = ds.crs
win = windows.from_bounds(
......@@ -209,81 +216,84 @@ class ReductionAlgorithm(QgsProcessingAlgorithm):
transform = ds.window_transform(win)
raster = np.transpose(raster, (1,2,0))
raster = raster[:,:,input_bands]
fit_raster = raster.reshape(-1, raster.shape[-1])
feedback.pushInfo(f'{raster.shape}')
feedback.pushInfo(f'{raster.reshape(-1, raster.shape[0]).shape}')
# feedback.pushInfo(f'{raster.shape}')
# feedback.pushInfo(f'{raster.reshape(-1, raster.shape[0]).shape}')
if self.subset:
feedback.pushInfo(f'Using a random subset of {self.subset} pixels')
feedback.pushInfo(f'Using a random subset of {self.subset} pixels, random seed is {RANDOM_SEED}')
fit_raster = raster.reshape(-1, raster.shape[-1])
nsamples = fit_raster.shape[0]
# Generate random indices to select subset_size number of samples
np.random.seed(42)
np.random.seed(RANDOM_SEED)
random_indices = np.random.choice(nsamples, size=self.subset, replace=False)
fit_raster = fit_raster[random_indices,:]
feedback.pushInfo(f'Starting fit\n')
feedback.pushInfo(f"Mean raster : {np.mean(raster)}")
feedback.pushInfo(f"Standart dev : {np.std(raster)}")
fit_raster = (fit_raster-np.mean(raster))/np.std(raster)
feedback.pushInfo(f"Mean raster normalized : {np.mean(fit_raster)}")
feedback.pushInfo(f"Standart dev normalized : {np.std(fit_raster)}")
iter = None
if self.method == 'PCA':
proj = PCA(int(self.ncomponents))
save_file = 'pca.pkl'
if self.method == 'IPCA':
proj = IncrementalPCA(int(self.ncomponents))
save_file = 'ipca.pkl'
chunk_size = calculate_chunk_size(fit_raster)
iter = range(0, len(fit_raster), chunk_size)
if self.method == 'K-means':
proj = KMeans(int(self.ncomponents))
save_file = 'kmeans_transform.pkl'
iter = range(proj.max_iter)
# if self.method == 'UMAP':
# proj = UMAP(int(self.ncomponents))
# save_file = 'umap.pkl'
feedback.pushInfo(f'Starting fit. If it goes for too long, consider setting a subset.\n')
## if fitting can be divided, we provide the possibility to cancel and to have progression
if iter and hasattr(proj, 'partial_fit'):
for i in iter:
if feedback.isCanceled():
feedback.pushWarning(
self.tr("\n !!!Processing is canceled by user!!! \n"))
break
proj.partial_fit(fit_raster)
feedback.setProgress((i / len(iter)) * 100)
## else, all in one go
else:
proj.fit(fit_raster)
if self.method=='PCA' :
eig = proj.components_
absolute_eig = np.abs(eig)
contributions = pd.DataFrame(eig.T, columns=[f'PC{i+1}' for i in range(eig.shape[0])])
#feedback.pushInfo(f"Contributions des variables aux composantes principales : {contributions}")
# Filtrer les contributions supérieures à 0.8
#threshold = 0.01
significant_contributions = contributions[contributions > self.thresold_pca].dropna(how='all')
feedback.pushInfo(f"Contributions des variables aux composantes principales : {significant_contributions}")
if self.save_model:
joblib.dump(proj, out_path)
feedback.pushInfo(f'starting inference\n')
proj_img = proj.transform(raster.reshape(-1, raster.shape[-1]))
feedback.pushInfo(f'Fitting done, saving model\n')
if self.save_model:
out_path = os.path.join(self.output_dir, save_file)
joblib.dump(proj, out_path)
feedback.pushInfo(f'Inference over raster\n')
proj_img = proj.transform(raster.reshape(-1, raster.shape[-1]))
if self.method=='PCA' :
else:
scaler = StandardScaler()
#raster_normalized=scaler.fit_transform(raster)
feedback.pushInfo(f"Mean raster : {np.mean(raster)}")
feedback.pushInfo(f"Standart dev : {np.std(raster)}")
raster = (raster-np.mean(raster))/np.std(raster)
feedback.pushInfo(f"Mean raster normalized : {np.mean(raster)}")
feedback.pushInfo(f"Standart dev normalized : {np.std(raster)}")
#feedback.pushInfo(f"Mean raster normalized: {raster_normalized.mean}")
proj_img = proj.fit_transform(raster.reshape(-1, raster.shape[-1]))
feedback.pushInfo(f'Loging variables contributions\n')
eig = proj.components_.T
feedback.pushInfo(f"Thresold: {self.thresold_pca}")
if self.method=='PCA' :
eig = proj.components_.T
feedback.pushInfo(f"Thresold: {self.thresold_pca}")
cos2 = eig ** 2
cos2_filtered = cos2 > self.thresold_pca
feedback.pushInfo(f"Contributions des variables aux composantes principales : {cos2_filtered}")
num_values_above_threshold = np.sum(cos2_filtered)
feedback.pushInfo(f"Number of values in cos2 > threshold: {num_values_above_threshold}")
#contributions = pd.DataFrame(eig, columns=[f'PC{i+1}' for i in range(eig.T.shape[0])])
#feedback.pushInfo(f"Contributions des variables aux composantes principales : {contributions}")
# Filtrer les contributions supérieures à 0.8
#threshold = 0.01
#significant_contributions = contributions[contributions > self.thresold_pca].dropna(how='all')
#feedback.pushInfo(f"Contributions des variables aux composantes principales : {significant_contributions}")
"""
for i in range(self.ncomponents):
component_loadings = absolute_eig[:, i]
top_indices = np.argsort(component_loadings)[-top_n:][::-1]
#top_indices = np.argsort(component_loadings)
feedback.pushInfo(f'eigenvectors significatifs : {top_indices}')
"""
if self.save_model:
joblib.dump(proj, out_path)
cos2 = eig ** 2
cos2_filtered = cos2 > self.thresold_pca
feedback.pushInfo(f"Contributions of variables to components : {cos2_filtered}")
num_values_above_threshold = np.sum(cos2_filtered)
feedback.pushInfo(f"Number of values in cos2 > threshold: {num_values_above_threshold}")
params = proj.get_params()
proj_img = proj_img.reshape((raster.shape[0], raster.shape[1],-1))
......@@ -303,8 +313,8 @@ class ReductionAlgorithm(QgsProcessingAlgorithm):
i += 1
feedback.pushInfo(f'Export to geotif\n')
if os.path.exists(dst_path):
i = 1
while True:
......@@ -327,6 +337,7 @@ class ReductionAlgorithm(QgsProcessingAlgorithm):
return {'OUTPUT_RASTER':dst_path}
def process_options(self,parameters, context, feedback):
self.iPatch = 0
......
torchgeo == 0.5.2
geopandas == 0.14.4
scikit-learn == 1.5.1
psutil > 5.0.0
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment