diff --git a/biomod2_breeding_sites_public_spaces.R b/biomod2_breeding_sites_public_spaces.R new file mode 100644 index 0000000000000000000000000000000000000000..2ff7221a0ffb6c525529e5167784743f096ce3ee --- /dev/null +++ b/biomod2_breeding_sites_public_spaces.R @@ -0,0 +1,209 @@ +# Script to use Biomod2 with raster layers describing urban landscapes and presence data of larval presence in urban spaces +# Author : Claire Teillet and Héloïse Pottier +# based on https://biomodhub.github.io/biomod2/articles/examples_1_mainFunctions.html + +#### Load package #### +library(sf) +library(biomod2) +library(dplyr) +library(terra) +library(ggplot2) +library(raster) +library(corrplot) +library(FactoMineR) +library(factoextra) +library(ggplot2) +library(GGally) + +#### Prepare data and parameters #### + +#### Load Presence Data #### +df_GU_potentiel <- read.csv("G:/0_PHD/RECHERCHE/AXE_2_BIS/data_montpellier/GU_potentiel_2154_clean_filtering_by_type.csv") + +#### Load Environmental data (tif) #### +var_env <- rast("G:/0_PHD/RECHERCHE/AXE_2_BIS/data_montpellier/grid_MTP_complete_IS_IT.tif") + +# Select only the layers if needed +# select_columns <- c("NDVI_mean", "...") +# +# var_env <- var_env[[select_columns]] + +# Show selected layers +names(var_env) + +#### Other method to select layers #### + +# var_env_filtered <- subset(var_env, !grepl("FOTO", names(var_env))) +# names(var_env_filtered) + +# test <- st_drop_geometry(var_env) +# png("G:/0_PHD/RECHERCHE/AXE_2_BIS/FIG/scatter_plots.png", width = 2000, height = 2000) +# pairs(test, +# main = "Matrice de scatter plots de toutes les variables", +# pch = 15, +# col = "blue", +# cex = 2, +# cex.labels = 2, +# cex.main = 1) +# dev.off() +# +# ggpairs <- ggpairs(as.data.frame(test)) + +# Assigned 1 or 0 for if needed +df_GU_potentiel <- df_GU_potentiel %>% + mutate(positif = case_when( + positif=="OUI"~"1", + positif=="NON"~"0", + TRUE ~ positif + )) + +df_GU_potentiel$positif <- as.integer(df_GU_potentiel$positif) + +# Fix the seed for reproductibility +set.seed(123) + +# Formating data +df_GU_potentiel_biomod2 <- + BIOMOD_FormatingData( + resp.var = df_GU_potentiel['positif'], # var to explain (presence data) + resp.name = "positif", # name of the var + resp.xy = df_GU_potentiel[, c('long', 'lat')], #XY coordinates + expl.var = var_env, # environmental data + + ) + +plot(df_GU_potentiel_biomod2) + +#### Modeling #### + +##### Individual models ##### + +GU_models2 <- BIOMOD_Modeling(bm.format = df_GU_potentiel_biomod2, # data + models = c("ANN", "CTA", "FDA", "GAM", "GBM", "GLM", "MARS", "MAXNET", "RF", + "SRE", "XGBOOST"), #vector containing several algorithms : "ANN", "MAXENT", "CTA", "FDA", "GAM", "GBM", "GLM", "MARS", "MAXNET", "RF", "SRE", "XGBOOST" + CV.strategy = 'kfold', #cross validation strategy : random, kfold, block, strat, env or user.defined + CV.nb.rep = 10, # number of repetitions for the cross validation + CV.k = 10,# number of partitions of the dataset for the cross validation + OPT.strategy = "bigboss", # parameters of each algorithms "bigboss" or "default" + var.import = 3,# number of permutations for variables importance + nb.cpu = 16, # if cpu using all cores + metric.eval = c('TSS','ROC')) # evaluations metrics + +# to save the model +save(list = ls(all.names = TRUE), file = "XXX.Rdata", envir = .GlobalEnv) +# to load the model +load("C:/Users/teill/Documents/0_PHD/MODELE/Git/biomod2/positif_0_70_0_75/positif.1723633541.models.out") + +# get evaluation score +eval_models <- get_evaluations(positif.1723633541.models.out) #or positif.1723633541.models.out if loaded + +# Represent evaluation scores (TSS,AUC) +bm_PlotEvalMean( + bm.out = GU_models2, + metric.eval = NULL, + dataset = "validation", # show for validation dataset + group.by = "algo",# show by algorithms + do.plot = TRUE +) + +#Represent variables importances by algo +bm_PlotVarImpBoxplot(bm.out = GU_models2, group.by = c('expl.var', 'algo', 'algo') ) # par models + +var_importance <- get_variables_importance(GU_models2) + +# get_models <- get_built_models(GU_models, algo = c("ANN", "CTA", "FDA",)) + +### Response Curves for individual models, selection options algo and run +bm_PlotResponseCurves(bm.out = GU_models2, + models.chosen = get_built_models(GU_models2)[c(1:2, 13:14)], + fixed.var = 'median') +bm_PlotResponseCurves(bm.out = GU_models2, + models.chosen = get_built_models( GU_models2)[c(1:3, 12:14)], + fixed.var = 'min') + + +##### Ensemble modeling ##### + +# Get individual model +AllModels <- get_built_models(positif.1723633541.models.out) + +# Select individual model +AllModels <- AllModels[ grepl(pattern = "_ANN", x = AllModels, fixed = TRUE)| + grepl(pattern = "_FDA", x = AllModels, fixed = TRUE) | + grepl(pattern = "_GLM", x = AllModels, fixed = TRUE)| + grepl(pattern = "_MARS", x = AllModels, fixed = TRUE) | + grepl(pattern = "_MAXNET", x = AllModels, fixed = TRUE)| + grepl(pattern = "_XGBOOST", x = AllModels, fixed = TRUE) ] + +myBiomodEM_grep <- BIOMOD_EnsembleModeling(bm.mod = GU_models2, + em.by = 'all', + em.algo = c("EMmean", "EMcv", "EMwmean", "EMmedian"), #, 'EMcv', 'EMci', 'EMmedian', 'EMca', 'EMwmean' + models.chosen = AllModels, + metric.select.dataset = "validation", + metric.eval = c("TSS", "ROC"), + var.import = 3, + EMci.alpha = 0.05, # value corresponding to the significance level to estimate confidence interval + EMwmean.decay = 'proportional', # A value defining the relative importance of the weights (if 'EMwmean' was given to argument em.algo). A high value will strongly discriminate good models from the bad ones (see Details), while proportional will attribute weights proportionally to the models evaluation scores + nb.cpu = 16) + +# load ensemble.model.out if needeed +load("positif_0_70_0_75/positif.1723633541.ensemble.models.out") + +# If needeed of you can use directly a threshold to select individual model +# myBiomodEM <- BIOMOD_EnsembleModeling(bm.mod = GU_models2, # individual models +# em.by = 'all', +# em.algo = c('EMmean', "EMcv", "EMwmean"), #, 'EMcv', 'EMci', 'EMmedian', 'EMca', 'EMwmean' +# metric.select = 'ROC' , +# metric.select.thresh = 0.70, # threshold so select model +# metric.select.dataset = "validation", +# metric.eval = c('TSS', 'ROC'), +# var.import = 3, +# EMci.alpha = 0.05, # value corresponding to the significance level to estimate confidence interval +# EMwmean.decay = 'proportional', # A value defining the relative importance of the weights (if 'EMwmean' was given to argument em.algo). A high value will strongly discriminate good models from the bad ones (see Details), while proportional will attribute weights proportionally to the models evaluation scores +# nb.cpu = 16) + +# Load directly the results if needed +load("C:/Users/teill/Documents/0_PHD/MODELE/Git/biomod2/positif/positif.1723633541.ensemble.models.out") + +# Model use for article 1723633541.models.ensemble.out" + +eval_ensemble_models <- get_evaluations(myBiomodEM_grep) +eval_ensemble_models + + +bm_PlotVarImpBoxplot(bm.out = myBiomodEM_grep, group.by = c('expl.var', 'algo', 'algo') ) # par models + + +#Represent response curves for ensemble modeling +load("C:/Users/teill/Documents/0_PHD/MODELE/Git/biomod2/positif_0_70_0_75/positif.1723633541.models.out") + +get_variables_importance(myBiomodEM_grep) +varimp.out <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_grep, group.by = c('expl.var', 'merged.by.run','algo'), do.plot = FALSE) # par models + +varimp.out$tab %>% + filter(algo == "EMmean") %>% + ggplot()+ + geom_boxplot(aes(x = expl.var, y = var.imp), fill = "#F08080") + +varimp.out$tab %>% + filter(algo == "EMmean") %>% + arrange(desc(var.imp)) %>% # order by importance scores + mutate(expl.var = factor(expl.var, levels = unique(expl.var))) %>% + ggplot() + + geom_boxplot(aes(x = expl.var, y = var.imp), fill = "#F08080") + + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + +# Projection Forecasting +myBiomodEM_proj <- BIOMOD_EnsembleForecasting(bm.em = myBiomodEM_grep, + proj.name = 'proj_positif_grep', + new.env = var_env, + models.chosen = 'all', + metric.binary = 'all', + metric.filter = 'all', + on_0_1000 = FALSE) # to get 0-1 mean probabilities + + +myBiomodEM_proj <- rast("G:/0_PHD/RECHERCHE/AXE_2_BIS/Biomod2/positif_0_70_0_75/proj_positif_grep/proj_positif_grep_positif_ensemble.tif") + +