Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# Script to use Biomod2 with raster layers describing urban landscapes and presence data of larval presence in urban spaces
# Author : Claire Teillet and Héloïse Pottier
# based on https://biomodhub.github.io/biomod2/articles/examples_1_mainFunctions.html
#### Load package ####
library(sf)
library(biomod2)
library(dplyr)
library(terra)
library(ggplot2)
library(raster)
library(corrplot)
library(FactoMineR)
library(factoextra)
library(ggplot2)
library(GGally)
#### Prepare data and parameters ####
#### Load Presence Data ####
df_GU_potentiel <- read.csv("G:/0_PHD/RECHERCHE/AXE_2_BIS/data_montpellier/GU_potentiel_2154_clean_filtering_by_type.csv")
#### Load Environmental data (tif) ####
var_env <- rast("G:/0_PHD/RECHERCHE/AXE_2_BIS/data_montpellier/grid_MTP_complete_IS_IT.tif")
# Select only the layers if needed
# select_columns <- c("NDVI_mean", "...")
#
# var_env <- var_env[[select_columns]]
# Show selected layers
names(var_env)
#### Other method to select layers ####
# var_env_filtered <- subset(var_env, !grepl("FOTO", names(var_env)))
# names(var_env_filtered)
# test <- st_drop_geometry(var_env)
# png("G:/0_PHD/RECHERCHE/AXE_2_BIS/FIG/scatter_plots.png", width = 2000, height = 2000)
# pairs(test,
# main = "Matrice de scatter plots de toutes les variables",
# pch = 15,
# col = "blue",
# cex = 2,
# cex.labels = 2,
# cex.main = 1)
# dev.off()
#
# ggpairs <- ggpairs(as.data.frame(test))
# Assigned 1 or 0 for if needed
df_GU_potentiel <- df_GU_potentiel %>%
mutate(positif = case_when(
positif=="OUI"~"1",
positif=="NON"~"0",
TRUE ~ positif
))
df_GU_potentiel$positif <- as.integer(df_GU_potentiel$positif)
# Fix the seed for reproductibility
set.seed(123)
# Formating data
df_GU_potentiel_biomod2 <-
BIOMOD_FormatingData(
resp.var = df_GU_potentiel['positif'], # var to explain (presence data)
resp.name = "positif", # name of the var
resp.xy = df_GU_potentiel[, c('long', 'lat')], #XY coordinates
expl.var = var_env, # environmental data
)
plot(df_GU_potentiel_biomod2)
#### Modeling ####
##### Individual models #####
GU_models2 <- BIOMOD_Modeling(bm.format = df_GU_potentiel_biomod2, # data
models = c("ANN", "CTA", "FDA", "GAM", "GBM", "GLM", "MARS", "MAXNET", "RF",
"SRE", "XGBOOST"), #vector containing several algorithms : "ANN", "MAXENT", "CTA", "FDA", "GAM", "GBM", "GLM", "MARS", "MAXNET", "RF", "SRE", "XGBOOST"
CV.strategy = 'kfold', #cross validation strategy : random, kfold, block, strat, env or user.defined
CV.nb.rep = 10, # number of repetitions for the cross validation
CV.k = 10,# number of partitions of the dataset for the cross validation
OPT.strategy = "bigboss", # parameters of each algorithms "bigboss" or "default"
var.import = 3,# number of permutations for variables importance
nb.cpu = 16, # if cpu using all cores
metric.eval = c('TSS','ROC')) # evaluations metrics
# to save the model
save(list = ls(all.names = TRUE), file = "XXX.Rdata", envir = .GlobalEnv)
# to load the model
load("C:/Users/teill/Documents/0_PHD/MODELE/Git/biomod2/positif_0_70_0_75/positif.1723633541.models.out")
# get evaluation score
eval_models <- get_evaluations(positif.1723633541.models.out) #or positif.1723633541.models.out if loaded
# Represent evaluation scores (TSS,AUC)
bm_PlotEvalMean(
bm.out = GU_models2,
metric.eval = NULL,
dataset = "validation", # show for validation dataset
group.by = "algo",# show by algorithms
do.plot = TRUE
)
#Represent variables importances by algo
bm_PlotVarImpBoxplot(bm.out = GU_models2, group.by = c('expl.var', 'algo', 'algo') ) # par models
var_importance <- get_variables_importance(GU_models2)
# get_models <- get_built_models(GU_models, algo = c("ANN", "CTA", "FDA",))
### Response Curves for individual models, selection options algo and run
bm_PlotResponseCurves(bm.out = GU_models2,
models.chosen = get_built_models(GU_models2)[c(1:2, 13:14)],
fixed.var = 'median')
bm_PlotResponseCurves(bm.out = GU_models2,
models.chosen = get_built_models( GU_models2)[c(1:3, 12:14)],
fixed.var = 'min')
##### Ensemble modeling #####
# Get individual model
AllModels <- get_built_models(positif.1723633541.models.out)
# Select individual model
AllModels <- AllModels[ grepl(pattern = "_ANN", x = AllModels, fixed = TRUE)|
grepl(pattern = "_FDA", x = AllModels, fixed = TRUE) |
grepl(pattern = "_GLM", x = AllModels, fixed = TRUE)|
grepl(pattern = "_MARS", x = AllModels, fixed = TRUE) |
grepl(pattern = "_MAXNET", x = AllModels, fixed = TRUE)|
grepl(pattern = "_XGBOOST", x = AllModels, fixed = TRUE) ]
myBiomodEM_grep <- BIOMOD_EnsembleModeling(bm.mod = GU_models2,
em.by = 'all',
em.algo = c("EMmean", "EMcv", "EMwmean", "EMmedian"), #, 'EMcv', 'EMci', 'EMmedian', 'EMca', 'EMwmean'
models.chosen = AllModels,
metric.select.dataset = "validation",
metric.eval = c("TSS", "ROC"),
var.import = 3,
EMci.alpha = 0.05, # value corresponding to the significance level to estimate confidence interval
EMwmean.decay = 'proportional', # A value defining the relative importance of the weights (if 'EMwmean' was given to argument em.algo). A high value will strongly discriminate good models from the bad ones (see Details), while proportional will attribute weights proportionally to the models evaluation scores
nb.cpu = 16)
# load ensemble.model.out if needeed
load("positif_0_70_0_75/positif.1723633541.ensemble.models.out")
# If needeed of you can use directly a threshold to select individual model
# myBiomodEM <- BIOMOD_EnsembleModeling(bm.mod = GU_models2, # individual models
# em.by = 'all',
# em.algo = c('EMmean', "EMcv", "EMwmean"), #, 'EMcv', 'EMci', 'EMmedian', 'EMca', 'EMwmean'
# metric.select = 'ROC' ,
# metric.select.thresh = 0.70, # threshold so select model
# metric.select.dataset = "validation",
# metric.eval = c('TSS', 'ROC'),
# var.import = 3,
# EMci.alpha = 0.05, # value corresponding to the significance level to estimate confidence interval
# EMwmean.decay = 'proportional', # A value defining the relative importance of the weights (if 'EMwmean' was given to argument em.algo). A high value will strongly discriminate good models from the bad ones (see Details), while proportional will attribute weights proportionally to the models evaluation scores
# nb.cpu = 16)
# Load directly the results if needed
load("C:/Users/teill/Documents/0_PHD/MODELE/Git/biomod2/positif/positif.1723633541.ensemble.models.out")
# Model use for article 1723633541.models.ensemble.out"
eval_ensemble_models <- get_evaluations(myBiomodEM_grep)
eval_ensemble_models
bm_PlotVarImpBoxplot(bm.out = myBiomodEM_grep, group.by = c('expl.var', 'algo', 'algo') ) # par models
#Represent response curves for ensemble modeling
load("C:/Users/teill/Documents/0_PHD/MODELE/Git/biomod2/positif_0_70_0_75/positif.1723633541.models.out")
get_variables_importance(myBiomodEM_grep)
varimp.out <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_grep, group.by = c('expl.var', 'merged.by.run','algo'), do.plot = FALSE) # par models
varimp.out$tab %>%
filter(algo == "EMmean") %>%
ggplot()+
geom_boxplot(aes(x = expl.var, y = var.imp), fill = "#F08080")
varimp.out$tab %>%
filter(algo == "EMmean") %>%
arrange(desc(var.imp)) %>% # order by importance scores
mutate(expl.var = factor(expl.var, levels = unique(expl.var))) %>%
ggplot() +
geom_boxplot(aes(x = expl.var, y = var.imp), fill = "#F08080") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Projection Forecasting
myBiomodEM_proj <- BIOMOD_EnsembleForecasting(bm.em = myBiomodEM_grep,
proj.name = 'proj_positif_grep',
new.env = var_env,
models.chosen = 'all',
metric.binary = 'all',
metric.filter = 'all',
on_0_1000 = FALSE) # to get 0-1 mean probabilities
myBiomodEM_proj <- rast("G:/0_PHD/RECHERCHE/AXE_2_BIS/Biomod2/positif_0_70_0_75/proj_positif_grep/proj_positif_grep_positif_ensemble.tif")