Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
iamap
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
AMAP
iamap
Commits
49ef333b
Commit
49ef333b
authored
5 months ago
by
paul.tresson_ird.fr
Browse files
Options
Downloads
Patches
Plain Diff
redo normalisation, clustering works with kmeans
parent
556ce2ab
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
clustering.py
+3
-489
3 additions, 489 deletions
clustering.py
utils/algo.py
+72
-45
72 additions, 45 deletions
utils/algo.py
with
75 additions
and
534 deletions
clustering.py
+
3
−
489
View file @
49ef333b
import
os
import
numpy
as
np
from
pathlib
import
Path
from
typing
import
Dict
,
Any
import
joblib
import
tempfile
import
json
import
rasterio
from
rasterio
import
windows
from
qgis.PyQt.QtCore
import
QCoreApplication
from
qgis.core
import
(
Qgis
,
QgsGeometry
,
QgsProcessingParameterBoolean
,
QgsProcessingParameterEnum
,
QgsCoordinateTransform
,
QgsProcessingException
,
QgsProcessingAlgorithm
,
QgsProcessingParameterRasterLayer
,
QgsProcessingParameterFolderDestination
,
QgsProcessingParameterBand
,
QgsProcessingParameterNumber
,
QgsProcessingParameterExtent
,
QgsProcessingParameterCrs
,
QgsProcessingParameterDefinition
,
)
from
sklearn.cluster
import
KMeans
from
.utils.
misc
import
get_unique_filename
from
.utils.
algo
import
SKAlgorithm
RANDOM_SEED
=
42
class
ClusterAlgorithm
(
QgsProcessingAlgorithm
):
class
ClusterAlgorithm
(
SKAlgorithm
):
"""
"""
INPUT
=
'
INPUT
'
BANDS
=
'
BANDS
'
EXTENT
=
'
EXTENT
'
LOAD
=
'
LOAD
'
OUTPUT
=
'
OUTPUT
'
RESOLUTION
=
'
RESOLUTION
'
CRS
=
'
CRS
'
CLUSTERS
=
'
CLUSTERS
'
SUBSET
=
'
SUBSET
'
METHOD
=
'
METHOD
'
SAVE_MODEL
=
'
SAVE_MODEL
'
def
initAlgorithm
(
self
,
config
=
None
):
"""
Here we define the inputs and output of the algorithm, along
with some other properties.
"""
cwd
=
Path
(
__file__
).
parent
.
absolute
()
tmp_wd
=
os
.
path
.
join
(
tempfile
.
gettempdir
(),
"
iamap_clustering
"
)
self
.
addParameter
(
QgsProcessingParameterRasterLayer
(
name
=
self
.
INPUT
,
description
=
self
.
tr
(
'
Input raster layer or image file path
'
),
defaultValue
=
os
.
path
.
join
(
cwd
,
'
assets
'
,
'
test.tif
'
),
),
)
self
.
addParameter
(
QgsProcessingParameterBand
(
name
=
self
.
BANDS
,
description
=
self
.
tr
(
'
Selected Bands (defaults to all bands selected)
'
),
defaultValue
=
None
,
parentLayerParameterName
=
self
.
INPUT
,
optional
=
True
,
allowMultiple
=
True
,
)
)
crs_param
=
QgsProcessingParameterCrs
(
name
=
self
.
CRS
,
description
=
self
.
tr
(
'
Target CRS (default to original CRS)
'
),
optional
=
True
,
)
res_param
=
QgsProcessingParameterNumber
(
name
=
self
.
RESOLUTION
,
description
=
self
.
tr
(
'
Target resolution in meters (default to native resolution)
'
),
type
=
QgsProcessingParameterNumber
.
Double
,
optional
=
True
,
minValue
=
0
,
maxValue
=
100000
)
self
.
addParameter
(
QgsProcessingParameterExtent
(
name
=
self
.
EXTENT
,
description
=
self
.
tr
(
'
Processing extent (default to the entire image)
'
),
optional
=
True
)
)
self
.
method_opt
=
[
'
K-means
'
,
'
--Empty--
'
]
self
.
addParameter
(
QgsProcessingParameterEnum
(
name
=
self
.
METHOD
,
description
=
self
.
tr
(
'
Method for the dimension reduction
'
),
defaultValue
=
0
,
options
=
self
.
method_opt
,
)
)
self
.
addParameter
(
QgsProcessingParameterNumber
(
name
=
self
.
CLUSTERS
,
description
=
self
.
tr
(
'
Number of target clusters
'
),
type
=
QgsProcessingParameterNumber
.
Integer
,
defaultValue
=
5
,
minValue
=
1
,
maxValue
=
1024
)
)
subset_param
=
QgsProcessingParameterNumber
(
name
=
self
.
SUBSET
,
description
=
self
.
tr
(
'
Select a subset of random pixels of the image to fit transform
'
),
type
=
QgsProcessingParameterNumber
.
Integer
,
defaultValue
=
None
,
minValue
=
1
,
maxValue
=
10_000
,
optional
=
True
,
)
save_param
=
QgsProcessingParameterBoolean
(
self
.
SAVE_MODEL
,
self
.
tr
(
"
Save projection model after fit.
"
),
defaultValue
=
True
)
self
.
addParameter
(
QgsProcessingParameterFolderDestination
(
self
.
OUTPUT
,
self
.
tr
(
"
Output directory (choose the location that the image features will be saved)
"
),
# defaultValue=os.path.join(cwd,'models'),
defaultValue
=
tmp_wd
,
)
)
for
param
in
(
crs_param
,
res_param
,
subset_param
,
save_param
):
param
.
setFlags
(
param
.
flags
()
|
QgsProcessingParameterDefinition
.
FlagAdvanced
)
self
.
addParameter
(
param
)
def
processAlgorithm
(
self
,
parameters
,
context
,
feedback
):
"""
Here is where the processing itself takes place.
"""
self
.
process_options
(
parameters
,
context
,
feedback
)
input_bands
=
[
i_band
-
1
for
i_band
in
self
.
selected_bands
]
if
self
.
method
==
'
K-means
'
:
proj
=
KMeans
(
int
(
self
.
nclusters
),
random_state
=
RANDOM_SEED
)
save_file
=
'
kmeans_cluster.pkl
'
params
=
proj
.
get_params
()
iter
=
range
(
proj
.
max_iter
)
out_path
=
os
.
path
.
join
(
self
.
output_dir
,
save_file
)
with
rasterio
.
open
(
self
.
rlayer_path
)
as
ds
:
transform
=
ds
.
transform
crs
=
ds
.
crs
win
=
windows
.
from_bounds
(
self
.
extent
.
xMinimum
(),
self
.
extent
.
yMinimum
(),
self
.
extent
.
xMaximum
(),
self
.
extent
.
yMaximum
(),
transform
=
transform
)
raster
=
ds
.
read
(
window
=
win
)
transform
=
ds
.
window_transform
(
win
)
raster
=
np
.
transpose
(
raster
,
(
1
,
2
,
0
))
raster
=
raster
[:,:,
input_bands
]
fit_raster
=
raster
.
reshape
(
-
1
,
raster
.
shape
[
-
1
])
feedback
.
pushInfo
(
f
'
{
raster
.
shape
}
'
)
feedback
.
pushInfo
(
f
'
{
raster
.
reshape
(
-
1
,
raster
.
shape
[
0
]).
shape
}
'
)
if
self
.
subset
:
feedback
.
pushInfo
(
f
'
Using a random subset of
{
self
.
subset
}
pixels, random seed is
{
RANDOM_SEED
}
'
)
nsamples
=
fit_raster
.
shape
[
0
]
# Generate random indices to select subset_size number of samples
np
.
random
.
seed
(
RANDOM_SEED
)
random_indices
=
np
.
random
.
choice
(
nsamples
,
size
=
self
.
subset
,
replace
=
False
)
fit_raster
=
fit_raster
[
random_indices
,:]
# remove nans
fit_raster
=
fit_raster
[
~
np
.
isnan
(
fit_raster
).
any
(
axis
=
1
)]
feedback
.
pushInfo
(
f
"
Mean raster :
{
np
.
mean
(
raster
)
}
"
)
feedback
.
pushInfo
(
f
"
Standart dev :
{
np
.
std
(
raster
)
}
"
)
fit_raster
=
(
fit_raster
-
np
.
mean
(
raster
))
/
np
.
std
(
raster
)
feedback
.
pushInfo
(
f
"
Mean raster normalized :
{
np
.
mean
(
fit_raster
)
}
"
)
feedback
.
pushInfo
(
f
"
Standart dev normalized :
{
np
.
std
(
fit_raster
)
}
"
)
feedback
.
pushInfo
(
f
'
Starting fit. If it goes for too long, consider setting a subset.
\n
'
)
## if fitting can be divided, we provide the possibility to cancel and to have progression
if
iter
and
hasattr
(
proj
,
'
partial_fit
'
):
for
i
in
iter
:
if
feedback
.
isCanceled
():
feedback
.
pushWarning
(
self
.
tr
(
"
\n
!!!Processing is canceled by user!!!
\n
"
))
break
proj
.
partial_fit
(
fit_raster
)
feedback
.
setProgress
((
i
/
len
(
iter
))
*
100
)
## else, all in one go
else
:
proj
.
fit
(
fit_raster
)
feedback
.
pushInfo
(
f
'
Fitting done, saving model
\n
'
)
if
self
.
save_model
:
out_path
=
os
.
path
.
join
(
self
.
output_dir
,
save_file
)
joblib
.
dump
(
proj
,
out_path
)
feedback
.
pushInfo
(
f
'
Inference over raster
\n
'
)
proj_img
=
proj
.
predict
(
raster
.
reshape
(
-
1
,
raster
.
shape
[
-
1
]))
# if self.subset:
# feedback.pushInfo(f'Using a random subset of {self.subset} pixels')
# fit_raster = raster.reshape(-1, raster.shape[-1])
# nsamples = fit_raster.shape[0]
# # Generate random indices to select subset_size number of samples
# np.random.seed(42)
# random_indices = np.random.choice(nsamples, size=self.subset, replace=False)
# fit_raster = fit_raster[random_indices,:]
# feedback.pushInfo(f'Starting fit\n')
# proj.fit(fit_raster)
# if self.save_model:
# joblib.dump(proj, out_path)
# feedback.pushInfo(f'starting inference\n')
# proj_img = proj.predict(raster.reshape(-1, raster.shape[-1]))
# else:
# proj_img = proj.fit_predict(raster.reshape(-1, raster.shape[-1]))
# if self.save_model:
# joblib.dump(proj, out_path)
proj_img
=
proj_img
.
reshape
((
raster
.
shape
[
0
],
raster
.
shape
[
1
],
-
1
))
height
,
width
,
channels
=
proj_img
.
shape
dst_path
=
os
.
path
.
join
(
self
.
output_dir
,
'
cluster.tif
'
)
params_file
=
os
.
path
.
join
(
self
.
output_dir
,
'
cluster_parameters.json
'
)
# if os.path.exists(dst_path):
# i = 1
# while True:
# modified_output_file = os.path.join(self.output_dir, f"cluster_{i}.tif")
# if not os.path.exists(modified_output_file):
# dst_path = modified_output_file
# break
# i += 1
rlayer_basename
=
os
.
path
.
basename
(
self
.
rlayer_path
)
rlayer_name
,
ext
=
os
.
path
.
splitext
(
rlayer_basename
)
dst_path
,
layer_name
=
get_unique_filename
(
self
.
output_dir
,
'
cluster.tif
'
,
f
'
{
rlayer_name
}
clustering
'
)
if
os
.
path
.
exists
(
params_file
):
i
=
1
while
True
:
modified_output_file_params
=
os
.
path
.
join
(
self
.
output_dir
,
f
"
cluster_parameters_
{
i
}
.json
"
)
if
not
os
.
path
.
exists
(
modified_output_file_params
):
params_file
=
modified_output_file_params
break
i
+=
1
with
rasterio
.
open
(
dst_path
,
'
w
'
,
driver
=
'
GTiff
'
,
height
=
height
,
width
=
width
,
count
=
channels
,
dtype
=
'
int8
'
,
crs
=
crs
,
transform
=
transform
)
as
dst_ds
:
dst_ds
.
write
(
np
.
transpose
(
proj_img
,
(
2
,
0
,
1
)))
with
open
(
params_file
,
'
w
'
)
as
f
:
json
.
dump
(
params
,
f
,
indent
=
4
)
feedback
.
pushInfo
(
f
"
Parameters saved to
{
params_file
}
"
)
parameters
[
'
OUTPUT_RASTER
'
]
=
dst_path
return
{
'
OUTPUT_RASTER
'
:
dst_path
,
'
OUTPUT_LAYER_NAME
'
:
layer_name
}
def
process_options
(
self
,
parameters
,
context
,
feedback
):
self
.
iPatch
=
0
self
.
feature_dir
=
""
feedback
.
pushInfo
(
f
'
PARAMETERS :
\n
{
parameters
}
'
)
feedback
.
pushInfo
(
f
'
CONTEXT :
\n
{
context
}
'
)
feedback
.
pushInfo
(
f
'
FEEDBACK :
\n
{
feedback
}
'
)
rlayer
=
self
.
parameterAsRasterLayer
(
parameters
,
self
.
INPUT
,
context
)
if
rlayer
is
None
:
raise
QgsProcessingException
(
self
.
invalidRasterError
(
parameters
,
self
.
INPUT
))
self
.
selected_bands
=
self
.
parameterAsInts
(
parameters
,
self
.
BANDS
,
context
)
if
len
(
self
.
selected_bands
)
==
0
:
self
.
selected_bands
=
list
(
range
(
1
,
rlayer
.
bandCount
()
+
1
))
if
max
(
self
.
selected_bands
)
>
rlayer
.
bandCount
():
raise
QgsProcessingException
(
self
.
tr
(
"
The chosen bands exceed the largest band number!
"
)
)
self
.
nclusters
=
self
.
parameterAsInt
(
parameters
,
self
.
CLUSTERS
,
context
)
self
.
subset
=
self
.
parameterAsInt
(
parameters
,
self
.
SUBSET
,
context
)
method_idx
=
self
.
parameterAsEnum
(
parameters
,
self
.
METHOD
,
context
)
self
.
method
=
self
.
method_opt
[
method_idx
]
res
=
self
.
parameterAsDouble
(
parameters
,
self
.
RESOLUTION
,
context
)
crs
=
self
.
parameterAsCrs
(
parameters
,
self
.
CRS
,
context
)
extent
=
self
.
parameterAsExtent
(
parameters
,
self
.
EXTENT
,
context
)
output_dir
=
self
.
parameterAsString
(
parameters
,
self
.
OUTPUT
,
context
)
self
.
save_model
=
self
.
parameterAsBoolean
(
parameters
,
self
.
SAVE_MODEL
,
context
)
self
.
output_dir
=
Path
(
output_dir
)
self
.
output_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
rlayer_data_provider
=
rlayer
.
dataProvider
()
# handle crs
if
crs
is
None
or
not
crs
.
isValid
():
crs
=
rlayer
.
crs
()
"""
feedback.pushInfo(
f
'
Layer CRS unit is {crs.mapUnits()}
'
) # 0 for meters, 6 for degrees, 9 for unknown
feedback.pushInfo(
f
'
whether the CRS is a geographic CRS (using lat/lon coordinates) {crs.isGeographic()}
'
)
if crs.mapUnits() == Qgis.DistanceUnit.Degrees:
crs = self.estimate_utm_crs(rlayer.extent())
# target crs should use meters as units
if crs.mapUnits() != Qgis.DistanceUnit.Meters:
feedback.pushInfo(
f
'
Layer CRS unit is {crs.mapUnits()}
'
)
feedback.pushInfo(
f
'
whether the CRS is a geographic CRS (using lat/lon coordinates) {crs.isGeographic()}
'
)
raise QgsProcessingException(
self.tr(
"
Only support CRS with the units as meters
"
)
)
"""
# 0 for meters, 6 for degrees, 9 for unknown
UNIT_METERS
=
0
UNIT_DEGREES
=
6
if
rlayer
.
crs
().
mapUnits
()
==
UNIT_DEGREES
:
# Qgis.DistanceUnit.Degrees:
layer_units
=
'
degrees
'
else
:
layer_units
=
'
meters
'
# if res is not provided, get res info from rlayer
if
np
.
isnan
(
res
)
or
res
==
0
:
res
=
rlayer
.
rasterUnitsPerPixelX
()
# rasterUnitsPerPixelY() is negative
target_units
=
layer_units
else
:
# when given res in meters by users, convert crs to utm if the original crs unit is degree
if
crs
.
mapUnits
()
!=
UNIT_METERS
:
# Qgis.DistanceUnit.Meters:
if
rlayer
.
crs
().
mapUnits
()
==
UNIT_DEGREES
:
# Qgis.DistanceUnit.Degrees:
# estimate utm crs based on layer extent
crs
=
self
.
estimate_utm_crs
(
rlayer
.
extent
())
else
:
raise
QgsProcessingException
(
f
"
Resampling of image with the CRS of
{
crs
.
authid
()
}
in meters is not supported.
"
)
target_units
=
'
meters
'
# else:
# res = (rlayer_extent.xMaximum() -
# rlayer_extent.xMinimum()) / rlayer.width()
self
.
res
=
res
# handle extent
if
extent
.
isNull
():
extent
=
rlayer
.
extent
()
# QgsProcessingUtils.combineLayerExtents(layers, crs, context)
extent_crs
=
rlayer
.
crs
()
else
:
if
extent
.
isEmpty
():
raise
QgsProcessingException
(
self
.
tr
(
"
The extent for processing can not be empty!
"
))
extent_crs
=
self
.
parameterAsExtentCrs
(
parameters
,
self
.
EXTENT
,
context
)
# if extent crs != target crs, convert it to target crs
if
extent_crs
!=
crs
:
transform
=
QgsCoordinateTransform
(
extent_crs
,
crs
,
context
.
transformContext
())
# extent = transform.transformBoundingBox(extent)
# to ensure coverage of the transformed extent
# convert extent to polygon, transform polygon, then get boundingBox of the new polygon
extent_polygon
=
QgsGeometry
.
fromRect
(
extent
)
extent_polygon
.
transform
(
transform
)
extent
=
extent_polygon
.
boundingBox
()
extent_crs
=
crs
# check intersects between extent and rlayer_extent
if
rlayer
.
crs
()
!=
crs
:
transform
=
QgsCoordinateTransform
(
rlayer
.
crs
(),
crs
,
context
.
transformContext
())
rlayer_extent
=
transform
.
transformBoundingBox
(
rlayer
.
extent
())
else
:
rlayer_extent
=
rlayer
.
extent
()
if
not
rlayer_extent
.
intersects
(
extent
):
raise
QgsProcessingException
(
self
.
tr
(
"
The extent for processing is not intersected with the input image!
"
))
img_width_in_extent
=
round
(
(
extent
.
xMaximum
()
-
extent
.
xMinimum
())
/
self
.
res
)
img_height_in_extent
=
round
(
(
extent
.
yMaximum
()
-
extent
.
yMinimum
())
/
self
.
res
)
# Send some information to the user
feedback
.
pushInfo
(
f
'
Layer path:
{
rlayer_data_provider
.
dataSourceUri
()
}
'
)
# feedback.pushInfo(
# f'Layer band scale: {rlayer_data_provider.bandScale(self.selected_bands[0])}')
feedback
.
pushInfo
(
f
'
Layer name:
{
rlayer
.
name
()
}
'
)
if
rlayer
.
crs
().
authid
():
feedback
.
pushInfo
(
f
'
Layer CRS:
{
rlayer
.
crs
().
authid
()
}
'
)
else
:
feedback
.
pushInfo
(
f
'
Layer CRS in WKT format:
{
rlayer
.
crs
().
toWkt
()
}
'
)
feedback
.
pushInfo
(
f
'
Layer pixel size:
{
rlayer
.
rasterUnitsPerPixelX
()
}
,
{
rlayer
.
rasterUnitsPerPixelY
()
}
{
layer_units
}
'
)
feedback
.
pushInfo
(
f
'
Bands selected:
{
self
.
selected_bands
}
'
)
if
crs
.
authid
():
feedback
.
pushInfo
(
f
'
Target CRS:
{
crs
.
authid
()
}
'
)
else
:
feedback
.
pushInfo
(
f
'
Target CRS in WKT format:
{
crs
.
toWkt
()
}
'
)
feedback
.
pushInfo
(
f
'
Target resolution:
{
self
.
res
}
{
target_units
}
'
)
feedback
.
pushInfo
(
(
f
'
Processing extent: minx:
{
extent
.
xMinimum
()
:
.
6
f
}
, maxx:
{
extent
.
xMaximum
()
:
.
6
f
}
,
'
f
'
miny:
{
extent
.
yMinimum
()
:
.
6
f
}
, maxy:
{
extent
.
yMaximum
()
:
.
6
f
}
'
))
feedback
.
pushInfo
(
(
f
'
Processing image size: (width
{
img_width_in_extent
}
,
'
f
'
height
{
img_height_in_extent
}
)
'
))
self
.
rlayer_path
=
rlayer
.
dataProvider
().
dataSourceUri
()
feedback
.
pushInfo
(
f
'
Selected bands:
{
self
.
selected_bands
}
'
)
## passing parameters to self once everything has been processed
self
.
extent
=
extent
self
.
rlayer
=
rlayer
self
.
crs
=
crs
# used to handle any thread-sensitive cleanup which is required by the algorithm.
def
postProcessAlgorithm
(
self
,
context
,
feedback
)
->
Dict
[
str
,
Any
]:
return
{}
TYPE
=
'
cluster
'
def
tr
(
self
,
string
):
"""
...
...
@@ -545,5 +61,3 @@ class ClusterAlgorithm(QgsProcessingAlgorithm):
def
icon
(
self
):
return
'
E
'
This diff is collapsed.
Click to expand it.
utils/algo.py
+
72
−
45
View file @
49ef333b
...
...
@@ -26,8 +26,10 @@ from rasterio.enums import Resampling
import
sklearn.decomposition
as
decomposition
import
sklearn.cluster
as
cluster
from
sklearn.base
import
BaseEstimator
from
sklearn.preprocessing
import
StandardScaler
from
.misc
import
get_unique_filename
,
calculate_chunk_size
if
__name__
!=
"
__main__
"
:
from
.misc
import
get_unique_filename
,
calculate_chunk_size
def
get_sklearn_algorithms_with_methods
(
module
,
required_methods
):
...
...
@@ -60,12 +62,13 @@ def get_arguments(module, algorithm_name):
for
param_name
,
param
in
parameters
.
items
():
# Skip 'self'
if
param_name
!=
'
self
'
:
if
param
.
default
==
inspect
.
Parameter
.
empty
:
required_kwargs
[
param_name
]
=
None
# Placeholder for the required value
else
:
default_kwargs
[
param_name
]
=
param
.
default
#
if param.default ==
None
:
#
required_kwargs[param_name] = None # Placeholder for the required value
#
else:
default_kwargs
[
param_name
]
=
param
.
default
return
required_kwargs
,
default_kwargs
# return required_kwargs, default_kwargs
return
default_kwargs
def
get_iter
(
model
,
fit_raster
):
...
...
@@ -398,7 +401,7 @@ class SKAlgorithm(IAMAPAlgorithm):
SAVE_MODEL
=
'
SAVE_MODEL
'
COMPRESS
=
'
COMPRESS
'
RANDOM_SEED
=
'
RANDOM_SEED
'
TMP_DIR
=
'
iamap_
proj
'
TMP_DIR
=
'
iamap_
reduction
'
TYPE
=
'
proj
'
...
...
@@ -467,7 +470,7 @@ class SKAlgorithm(IAMAPAlgorithm):
)
proj_methods
=
[
'
fit
'
,
'
transform
'
]
clust_methods
=
[
'
fit
'
]
clust_methods
=
[
'
fit
'
,
'
fit_predict
'
]
if
self
.
TYPE
==
'
proj
'
:
method_opt1
=
get_sklearn_algorithms_with_methods
(
decomposition
,
proj_methods
)
method_opt2
=
get_sklearn_algorithms_with_methods
(
cluster
,
proj_methods
)
...
...
@@ -570,7 +573,7 @@ class SKAlgorithm(IAMAPAlgorithm):
self
.
process_geo_parameters
(
parameters
,
context
,
feedback
)
self
.
process_common_sklearn
(
parameters
,
context
,
feedback
)
fit_raster
=
self
.
get_fit_raster
(
feedback
)
fit_raster
,
scaler
=
self
.
get_fit_raster
(
feedback
)
rlayer_basename
=
os
.
path
.
basename
(
self
.
rlayer_path
)
rlayer_name
,
ext
=
os
.
path
.
splitext
(
rlayer_basename
)
...
...
@@ -588,20 +591,32 @@ class SKAlgorithm(IAMAPAlgorithm):
parameters
[
'
OUTPUT_RASTER
'
]
=
self
.
dst_path
try
:
required_args
,
default_args
=
get_arguments
(
decomposition
,
self
.
method_name
)
default_args
=
get_arguments
(
decomposition
,
self
.
method_name
)
except
AttributeError
:
required_args
,
default_args
=
get_arguments
(
cluster
,
self
.
method_name
)
default_args
=
get_arguments
(
cluster
,
self
.
method_name
)
kwargs
=
self
.
update_kwargs
(
required_args
,
default_args
)
kwargs
=
self
.
update_kwargs
(
default_args
)
## some clustering algorithms need the entire dataset.
do_fit_predict
=
False
try
:
model
=
instantiate_sklearn_algorithm
(
decomposition
,
self
.
method_name
,
**
kwargs
)
except
AttributeError
:
model
=
instantiate_sklearn_algorithm
(
cluster
,
self
.
method_name
,
**
kwargs
)
## if model does not have a 'predict()' method, then we do a fit_predict in one go
if
not
hasattr
(
model
,
'
predict
'
):
do_fit_predict
=
True
except
:
feedback
.
pushWarning
(
f
'
{
self
.
method_name
}
not properly initialized ! Try passing custom parameters
'
)
return
{
'
OUTPUT_RASTER
'
:
self
.
dst_path
,
'
OUTPUT_LAYER_NAME
'
:
self
.
layer_name
}
if
do_fit_predict
:
proj_img
,
model
=
self
.
fit_predict
(
model
,
feedback
)
iter
=
get_iter
(
model
,
fit_raster
)
model
=
self
.
fit_model
(
model
,
fit_raster
,
iter
,
feedback
)
else
:
iter
=
get_iter
(
model
,
fit_raster
)
model
=
self
.
fit_model
(
model
,
fit_raster
,
iter
,
feedback
)
feedback
.
pushInfo
(
f
'
Fitting done, saving model
\n
'
)
save_file
=
f
'
{
self
.
method_name
}
.pkl
'
.
lower
()
...
...
@@ -609,8 +624,9 @@ class SKAlgorithm(IAMAPAlgorithm):
out_path
=
os
.
path
.
join
(
self
.
output_dir
,
save_file
)
joblib
.
dump
(
model
,
out_path
)
feedback
.
pushInfo
(
f
'
Inference over raster
\n
'
)
self
.
infer_model
(
model
,
feedback
)
if
not
do_fit_predict
:
feedback
.
pushInfo
(
f
'
Inference over raster
\n
'
)
self
.
infer_model
(
model
,
feedback
,
scaler
)
return
{
'
OUTPUT_RASTER
'
:
self
.
dst_path
,
'
OUTPUT_LAYER_NAME
'
:
self
.
layer_name
}
...
...
@@ -658,6 +674,8 @@ class SKAlgorithm(IAMAPAlgorithm):
raster
=
np
.
transpose
(
raster
,
(
1
,
2
,
0
))
raster
=
raster
[:,:,
self
.
input_bands
]
fit_raster
=
raster
.
reshape
(
-
1
,
raster
.
shape
[
-
1
])
scaler
=
StandardScaler
()
scaler
.
fit
(
fit_raster
)
if
self
.
subset
:
...
...
@@ -674,13 +692,10 @@ class SKAlgorithm(IAMAPAlgorithm):
# remove nans
fit_raster
=
fit_raster
[
~
np
.
isnan
(
fit_raster
).
any
(
axis
=
1
)]
feedback
.
pushInfo
(
f
"
Mean raster :
{
np
.
mean
(
raster
)
}
"
)
feedback
.
pushInfo
(
f
"
Standart dev :
{
np
.
std
(
raster
)
}
"
)
fit_raster
=
(
fit_raster
-
np
.
mean
(
raster
))
/
np
.
std
(
raster
)
feedback
.
pushInfo
(
f
"
Mean raster normalized :
{
np
.
mean
(
fit_raster
)
}
"
)
feedback
.
pushInfo
(
f
"
Standart dev normalized :
{
np
.
std
(
fit_raster
)
}
"
)
fit_raster
=
scaler
.
transform
(
fit_raster
)
np
.
nan_to_num
(
fit_raster
)
# NaN to zero after normalisation
return
fit_raster
return
fit_raster
,
scaler
def
fit_model
(
self
,
model
,
fit_raster
,
iter
,
feedback
):
...
...
@@ -721,7 +736,10 @@ class SKAlgorithm(IAMAPAlgorithm):
raster
=
raster
[:,:,
self
.
input_bands
]
raster
=
(
raster
-
np
.
mean
(
raster
))
/
np
.
std
(
raster
)
# raster = (raster-np.mean(raster))/np.std(raster)
scaler
=
StandardScaler
()
raster
=
scaler
.
fit_transform
(
raster
)
np
.
nan_to_num
(
raster
)
# NaN to zero after normalisation
proj_img
=
model
.
fit_predict
(
raster
.
reshape
(
-
1
,
raster
.
shape
[
-
1
]))
...
...
@@ -741,10 +759,10 @@ class SKAlgorithm(IAMAPAlgorithm):
dst_ds
.
write
(
np
.
transpose
(
proj_img
,
(
2
,
0
,
1
)))
feedback
.
pushInfo
(
f
'
Export to geotif done
\n
'
)
return
proj_img
,
model
return
model
def
infer_model
(
self
,
model
,
feedback
):
def
infer_model
(
self
,
model
,
feedback
,
scaler
=
None
):
with
rasterio
.
open
(
self
.
rlayer_path
)
as
ds
:
...
...
@@ -763,10 +781,16 @@ class SKAlgorithm(IAMAPAlgorithm):
raster
=
raster
[:,:,
self
.
input_bands
]
raster
=
(
raster
-
np
.
mean
(
raster
))
/
np
.
std
(
raster
)
np
.
nan_to_num
(
raster
)
# NaN to zero after normalisation
inf_raster
=
raster
.
reshape
(
-
1
,
raster
.
shape
[
-
1
])
if
scaler
:
inf_raster
=
scaler
.
transform
(
inf_raster
)
np
.
nan_to_num
(
inf_raster
)
# NaN to zero after normalisation
proj_img
=
model
.
transform
(
raster
.
reshape
(
-
1
,
raster
.
shape
[
-
1
]))
if
self
.
TYPE
==
'
cluster
'
:
proj_img
=
model
.
predict
(
inf_raster
)
else
:
proj_img
=
model
.
transform
(
inf_raster
)
proj_img
=
proj_img
.
reshape
((
raster
.
shape
[
0
],
raster
.
shape
[
1
],
-
1
))
height
,
width
,
channels
=
proj_img
.
shape
...
...
@@ -782,18 +806,14 @@ class SKAlgorithm(IAMAPAlgorithm):
dst_ds
.
write
(
np
.
transpose
(
proj_img
,
(
2
,
0
,
1
)))
feedback
.
pushInfo
(
f
'
Export to geotif done
\n
'
)
def
update_kwargs
(
self
,
kwargs_dict
,
default_kwargs
):
def
update_kwargs
(
self
,
kwargs_dict
):
if
'
n_clusters
'
in
kwargs_dict
.
keys
():
kwargs_dict
[
'
n_clusters
'
]
=
self
.
main_param
if
'
n_components
'
in
kwargs_dict
.
keys
():
kwargs_dict
[
'
n_components
'
]
=
self
.
main_param
if
'
min_samples
'
in
kwargs_dict
.
keys
():
kwargs_dict
[
'
min_samples
'
]
=
self
.
main_param
if
'
bandwitdth
'
in
kwargs_dict
.
keys
():
kwargs_dict
[
'
bandwitdth
'
]
=
self
.
main_param
if
'
random_state
'
in
default_
kwargs
.
keys
():
if
'
random_state
'
in
kwargs
_dict
.
keys
():
kwargs_dict
[
'
random_state
'
]
=
self
.
seed
return
kwargs_dict
...
...
@@ -804,16 +824,23 @@ class SKAlgorithm(IAMAPAlgorithm):
return
{}
# if self.TYPE != 'proj' and (not hasattr(model,'predict')):
# feedback.pushWarning(f'{self.method_name} does not support separate fit and prediction, doing all in one go.')
# feedback.pushInfo(f'fit+predict')
# del fit_raster
# dst_path, layer_name = self.fit_predict(model, feedback)
# parameters['OUTPUT_RASTER']=dst_path
# return {'OUTPUT_RASTER':dst_path, 'OUTPUT_LAYER_NAME':layer_name}
if
__name__
==
"
__main__
"
:
# else:
# proj_img = model.predict(raster.reshape(-1, raster.shape[-1]))
# print(proj_img)
# print('predict')
methods
=
[
'
fit_predict
'
,
'
partial_fit
'
]
algos
=
get_sklearn_algorithms_with_methods
(
cluster
,
methods
)
print
(
algos
)
methods
=
[
'
predict
'
,
'
fit
'
]
algos
=
get_sklearn_algorithms_with_methods
(
cluster
,
methods
)
print
(
algos
)
for
algo
in
algos
:
args
=
get_arguments
(
cluster
,
algo
)
print
(
algo
,
args
)
methods
=
[
'
transform
'
,
'
fit
'
]
algos
=
get_sklearn_algorithms_with_methods
(
decomposition
,
methods
)
for
algo
in
algos
:
args
=
get_arguments
(
decomposition
,
algo
)
print
(
algo
,
args
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment