Skip to content
Snippets Groups Projects
Commit 503f506f authored by claire.teillet_ird.fr's avatar claire.teillet_ird.fr
Browse files

Upload biomod2_breeding_sites_public_spaces.R

parent f5199fa1
No related branches found
No related tags found
No related merge requests found
# Script to use Biomod2 with raster layers describing urban landscapes and presence data of larval presence in urban spaces
# Author : Claire Teillet and Héloïse Pottier
# based on https://biomodhub.github.io/biomod2/articles/examples_1_mainFunctions.html
#### Load package ####
library(sf)
library(biomod2)
library(dplyr)
library(terra)
library(ggplot2)
library(raster)
library(corrplot)
library(FactoMineR)
library(factoextra)
library(ggplot2)
library(GGally)
#### Prepare data and parameters ####
#### Load Presence Data ####
df_GU_potentiel <- read.csv("G:/0_PHD/RECHERCHE/AXE_2_BIS/data_montpellier/GU_potentiel_2154_clean_filtering_by_type.csv")
#### Load Environmental data (tif) ####
var_env <- rast("G:/0_PHD/RECHERCHE/AXE_2_BIS/data_montpellier/grid_MTP_complete_IS_IT.tif")
# Select only the layers if needed
# select_columns <- c("NDVI_mean", "...")
#
# var_env <- var_env[[select_columns]]
# Show selected layers
names(var_env)
#### Other method to select layers ####
# var_env_filtered <- subset(var_env, !grepl("FOTO", names(var_env)))
# names(var_env_filtered)
# test <- st_drop_geometry(var_env)
# png("G:/0_PHD/RECHERCHE/AXE_2_BIS/FIG/scatter_plots.png", width = 2000, height = 2000)
# pairs(test,
# main = "Matrice de scatter plots de toutes les variables",
# pch = 15,
# col = "blue",
# cex = 2,
# cex.labels = 2,
# cex.main = 1)
# dev.off()
#
# ggpairs <- ggpairs(as.data.frame(test))
# Assigned 1 or 0 for if needed
df_GU_potentiel <- df_GU_potentiel %>%
mutate(positif = case_when(
positif=="OUI"~"1",
positif=="NON"~"0",
TRUE ~ positif
))
df_GU_potentiel$positif <- as.integer(df_GU_potentiel$positif)
# Fix the seed for reproductibility
set.seed(123)
# Formating data
df_GU_potentiel_biomod2 <-
BIOMOD_FormatingData(
resp.var = df_GU_potentiel['positif'], # var to explain (presence data)
resp.name = "positif", # name of the var
resp.xy = df_GU_potentiel[, c('long', 'lat')], #XY coordinates
expl.var = var_env, # environmental data
)
plot(df_GU_potentiel_biomod2)
#### Modeling ####
##### Individual models #####
GU_models2 <- BIOMOD_Modeling(bm.format = df_GU_potentiel_biomod2, # data
models = c("ANN", "CTA", "FDA", "GAM", "GBM", "GLM", "MARS", "MAXNET", "RF",
"SRE", "XGBOOST"), #vector containing several algorithms : "ANN", "MAXENT", "CTA", "FDA", "GAM", "GBM", "GLM", "MARS", "MAXNET", "RF", "SRE", "XGBOOST"
CV.strategy = 'kfold', #cross validation strategy : random, kfold, block, strat, env or user.defined
CV.nb.rep = 10, # number of repetitions for the cross validation
CV.k = 10,# number of partitions of the dataset for the cross validation
OPT.strategy = "bigboss", # parameters of each algorithms "bigboss" or "default"
var.import = 3,# number of permutations for variables importance
nb.cpu = 16, # if cpu using all cores
metric.eval = c('TSS','ROC')) # evaluations metrics
# to save the model
save(list = ls(all.names = TRUE), file = "XXX.Rdata", envir = .GlobalEnv)
# to load the model
load("C:/Users/teill/Documents/0_PHD/MODELE/Git/biomod2/positif_0_70_0_75/positif.1723633541.models.out")
# get evaluation score
eval_models <- get_evaluations(positif.1723633541.models.out) #or positif.1723633541.models.out if loaded
# Represent evaluation scores (TSS,AUC)
bm_PlotEvalMean(
bm.out = GU_models2,
metric.eval = NULL,
dataset = "validation", # show for validation dataset
group.by = "algo",# show by algorithms
do.plot = TRUE
)
#Represent variables importances by algo
bm_PlotVarImpBoxplot(bm.out = GU_models2, group.by = c('expl.var', 'algo', 'algo') ) # par models
var_importance <- get_variables_importance(GU_models2)
# get_models <- get_built_models(GU_models, algo = c("ANN", "CTA", "FDA",))
### Response Curves for individual models, selection options algo and run
bm_PlotResponseCurves(bm.out = GU_models2,
models.chosen = get_built_models(GU_models2)[c(1:2, 13:14)],
fixed.var = 'median')
bm_PlotResponseCurves(bm.out = GU_models2,
models.chosen = get_built_models( GU_models2)[c(1:3, 12:14)],
fixed.var = 'min')
##### Ensemble modeling #####
# Get individual model
AllModels <- get_built_models(positif.1723633541.models.out)
# Select individual model
AllModels <- AllModels[ grepl(pattern = "_ANN", x = AllModels, fixed = TRUE)|
grepl(pattern = "_FDA", x = AllModels, fixed = TRUE) |
grepl(pattern = "_GLM", x = AllModels, fixed = TRUE)|
grepl(pattern = "_MARS", x = AllModels, fixed = TRUE) |
grepl(pattern = "_MAXNET", x = AllModels, fixed = TRUE)|
grepl(pattern = "_XGBOOST", x = AllModels, fixed = TRUE) ]
myBiomodEM_grep <- BIOMOD_EnsembleModeling(bm.mod = GU_models2,
em.by = 'all',
em.algo = c("EMmean", "EMcv", "EMwmean", "EMmedian"), #, 'EMcv', 'EMci', 'EMmedian', 'EMca', 'EMwmean'
models.chosen = AllModels,
metric.select.dataset = "validation",
metric.eval = c("TSS", "ROC"),
var.import = 3,
EMci.alpha = 0.05, # value corresponding to the significance level to estimate confidence interval
EMwmean.decay = 'proportional', # A value defining the relative importance of the weights (if 'EMwmean' was given to argument em.algo). A high value will strongly discriminate good models from the bad ones (see Details), while proportional will attribute weights proportionally to the models evaluation scores
nb.cpu = 16)
# load ensemble.model.out if needeed
load("positif_0_70_0_75/positif.1723633541.ensemble.models.out")
# If needeed of you can use directly a threshold to select individual model
# myBiomodEM <- BIOMOD_EnsembleModeling(bm.mod = GU_models2, # individual models
# em.by = 'all',
# em.algo = c('EMmean', "EMcv", "EMwmean"), #, 'EMcv', 'EMci', 'EMmedian', 'EMca', 'EMwmean'
# metric.select = 'ROC' ,
# metric.select.thresh = 0.70, # threshold so select model
# metric.select.dataset = "validation",
# metric.eval = c('TSS', 'ROC'),
# var.import = 3,
# EMci.alpha = 0.05, # value corresponding to the significance level to estimate confidence interval
# EMwmean.decay = 'proportional', # A value defining the relative importance of the weights (if 'EMwmean' was given to argument em.algo). A high value will strongly discriminate good models from the bad ones (see Details), while proportional will attribute weights proportionally to the models evaluation scores
# nb.cpu = 16)
# Load directly the results if needed
load("C:/Users/teill/Documents/0_PHD/MODELE/Git/biomod2/positif/positif.1723633541.ensemble.models.out")
# Model use for article 1723633541.models.ensemble.out"
eval_ensemble_models <- get_evaluations(myBiomodEM_grep)
eval_ensemble_models
bm_PlotVarImpBoxplot(bm.out = myBiomodEM_grep, group.by = c('expl.var', 'algo', 'algo') ) # par models
#Represent response curves for ensemble modeling
load("C:/Users/teill/Documents/0_PHD/MODELE/Git/biomod2/positif_0_70_0_75/positif.1723633541.models.out")
get_variables_importance(myBiomodEM_grep)
varimp.out <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_grep, group.by = c('expl.var', 'merged.by.run','algo'), do.plot = FALSE) # par models
varimp.out$tab %>%
filter(algo == "EMmean") %>%
ggplot()+
geom_boxplot(aes(x = expl.var, y = var.imp), fill = "#F08080")
varimp.out$tab %>%
filter(algo == "EMmean") %>%
arrange(desc(var.imp)) %>% # order by importance scores
mutate(expl.var = factor(expl.var, levels = unique(expl.var))) %>%
ggplot() +
geom_boxplot(aes(x = expl.var, y = var.imp), fill = "#F08080") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Projection Forecasting
myBiomodEM_proj <- BIOMOD_EnsembleForecasting(bm.em = myBiomodEM_grep,
proj.name = 'proj_positif_grep',
new.env = var_env,
models.chosen = 'all',
metric.binary = 'all',
metric.filter = 'all',
on_0_1000 = FALSE) # to get 0-1 mean probabilities
myBiomodEM_proj <- rast("G:/0_PHD/RECHERCHE/AXE_2_BIS/Biomod2/positif_0_70_0_75/proj_positif_grep/proj_positif_grep_positif_ensemble.tif")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment