From 503f506f56e6a86db456dd87562f04e41966ba5b Mon Sep 17 00:00:00 2001
From: "claire.teillet_ird.fr" <claire.teillet@ird.fr>
Date: Wed, 19 Mar 2025 11:16:32 +0000
Subject: [PATCH] Upload biomod2_breeding_sites_public_spaces.R

---
 biomod2_breeding_sites_public_spaces.R | 209 +++++++++++++++++++++++++
 1 file changed, 209 insertions(+)
 create mode 100644 biomod2_breeding_sites_public_spaces.R

diff --git a/biomod2_breeding_sites_public_spaces.R b/biomod2_breeding_sites_public_spaces.R
new file mode 100644
index 0000000..2ff7221
--- /dev/null
+++ b/biomod2_breeding_sites_public_spaces.R
@@ -0,0 +1,209 @@
+# Script to use Biomod2 with raster layers describing urban landscapes and presence data of larval presence in urban spaces
+# Author : Claire Teillet and HÃ©loÃ¯se Pottier
+# based on https://biomodhub.github.io/biomod2/articles/examples_1_mainFunctions.html 
+
+#### Load package ####
+library(sf)
+library(biomod2)
+library(dplyr)
+library(terra)
+library(ggplot2)
+library(raster)
+library(corrplot)
+library(FactoMineR)
+library(factoextra)
+library(ggplot2)
+library(GGally) 
+
+#### Prepare data and parameters #### 
+
+#### Load Presence Data ####
+df_GU_potentiel <- read.csv("G:/0_PHD/RECHERCHE/AXE_2_BIS/data_montpellier/GU_potentiel_2154_clean_filtering_by_type.csv") 
+
+#### Load Environmental data (tif) ####
+var_env <- rast("G:/0_PHD/RECHERCHE/AXE_2_BIS/data_montpellier/grid_MTP_complete_IS_IT.tif") 
+
+# Select only the layers if needed
+# select_columns  <- c("NDVI_mean", "...")
+# 
+# var_env <- var_env[[select_columns]]
+
+# Show selected layers
+names(var_env)
+
+#### Other method to select layers ####
+
+# var_env_filtered <- subset(var_env, !grepl("FOTO", names(var_env)))
+# names(var_env_filtered)
+
+# test <- st_drop_geometry(var_env)
+# png("G:/0_PHD/RECHERCHE/AXE_2_BIS/FIG/scatter_plots.png", width = 2000, height = 2000)
+# pairs(test,
+#       main = "Matrice de scatter plots de toutes les variables",
+#       pch = 15,    
+#       col = "blue",
+#       cex = 2,        
+#       cex.labels = 2, 
+#       cex.main = 1)   
+# dev.off()
+# 
+# ggpairs <- ggpairs(as.data.frame(test))
+
+# Assigned 1 or 0 for if needed 
+df_GU_potentiel <- df_GU_potentiel %>%
+  mutate(positif = case_when(
+    positif=="OUI"~"1",
+    positif=="NON"~"0",
+    TRUE ~ positif
+  ))
+
+df_GU_potentiel$positif <- as.integer(df_GU_potentiel$positif)
+
+# Fix the seed for reproductibility
+set.seed(123) 
+
+# Formating data
+df_GU_potentiel_biomod2 <- 
+  BIOMOD_FormatingData(
+    resp.var = df_GU_potentiel['positif'], # var to explain (presence data)
+    resp.name = "positif", # name of the var
+    resp.xy = df_GU_potentiel[, c('long', 'lat')], #XY coordinates
+    expl.var = var_env, # environmental data
+    
+  )
+
+plot(df_GU_potentiel_biomod2) 
+
+#### Modeling ####
+
+##### Individual models #####
+
+GU_models2 <- BIOMOD_Modeling(bm.format = df_GU_potentiel_biomod2, # data
+                             models = c("ANN", "CTA", "FDA", "GAM", "GBM", "GLM", "MARS", "MAXNET", "RF",
+                                        "SRE", "XGBOOST"),  #vector containing several algorithms : "ANN", "MAXENT", "CTA", "FDA", "GAM", "GBM", "GLM", "MARS", "MAXNET", "RF", "SRE", "XGBOOST"
+                             CV.strategy = 'kfold', #cross validation strategy : random, kfold, block, strat, env or user.defined
+                             CV.nb.rep = 10, # number of repetitions for the cross validation 
+                             CV.k = 10,# number of partitions of the dataset for the cross validation
+                             OPT.strategy = "bigboss", # parameters of each algorithms "bigboss" or "default"
+                             var.import = 3,# number of permutations for variables importance
+                             nb.cpu = 16, # if cpu using all cores
+                             metric.eval = c('TSS','ROC')) # evaluations metrics
+
+# to save the model 
+save(list = ls(all.names = TRUE), file = "XXX.Rdata", envir = .GlobalEnv)
+# to load the model 
+load("C:/Users/teill/Documents/0_PHD/MODELE/Git/biomod2/positif_0_70_0_75/positif.1723633541.models.out")
+
+# get evaluation score 
+eval_models <- get_evaluations(positif.1723633541.models.out) #or positif.1723633541.models.out if loaded
+
+# Represent evaluation scores (TSS,AUC)
+bm_PlotEvalMean(
+  bm.out = GU_models2,
+  metric.eval = NULL,
+  dataset = "validation", # show for validation dataset
+  group.by = "algo",# show by algorithms
+  do.plot = TRUE   
+)
+
+#Represent variables importances by algo
+bm_PlotVarImpBoxplot(bm.out = GU_models2, group.by = c('expl.var', 'algo', 'algo') ) # par models
+
+var_importance <- get_variables_importance(GU_models2)
+ 
+# get_models <- get_built_models(GU_models,  algo =  c("ANN", "CTA", "FDA",))
+
+### Response Curves for individual models, selection options algo and run
+bm_PlotResponseCurves(bm.out = GU_models2, 
+                      models.chosen = get_built_models(GU_models2)[c(1:2, 13:14)],
+                      fixed.var = 'median')
+bm_PlotResponseCurves(bm.out =  GU_models2, 
+                      models.chosen = get_built_models( GU_models2)[c(1:3, 12:14)],
+                      fixed.var = 'min')
+
+
+##### Ensemble modeling #####
+
+# Get individual model
+AllModels <- get_built_models(positif.1723633541.models.out)
+
+# Select individual model 
+AllModels <- AllModels[ grepl(pattern = "_ANN", x = AllModels, fixed = TRUE)| 
+                          grepl(pattern = "_FDA", x = AllModels, fixed = TRUE) |
+                          grepl(pattern = "_GLM", x = AllModels, fixed = TRUE)| 
+                          grepl(pattern = "_MARS", x = AllModels, fixed = TRUE) |
+                          grepl(pattern = "_MAXNET", x = AllModels, fixed = TRUE)| 
+                          grepl(pattern = "_XGBOOST", x = AllModels, fixed = TRUE) ]
+
+myBiomodEM_grep <- BIOMOD_EnsembleModeling(bm.mod = GU_models2, 
+                                           em.by = 'all',
+                                           em.algo = c("EMmean", "EMcv", "EMwmean", "EMmedian"), #, 'EMcv', 'EMci', 'EMmedian', 'EMca', 'EMwmean'
+                                           models.chosen = AllModels,
+                                           metric.select.dataset = "validation",
+                                           metric.eval = c("TSS", "ROC"),
+                                           var.import = 3,
+                                           EMci.alpha = 0.05, # value corresponding to the significance level to estimate confidence interval
+                                           EMwmean.decay = 'proportional', # A value defining the relative importance of the weights (if 'EMwmean' was given to argument em.algo). A high value will strongly discriminate good models from the bad ones (see Details), while proportional will attribute weights proportionally to the models evaluation scores
+                                           nb.cpu = 16)
+
+# load ensemble.model.out if needeed
+load("positif_0_70_0_75/positif.1723633541.ensemble.models.out")
+
+# If needeed of you can use directly a threshold to select individual model
+# myBiomodEM <- BIOMOD_EnsembleModeling(bm.mod = GU_models2, # individual models 
+#                                       em.by = 'all',
+#                                       em.algo = c('EMmean', "EMcv", "EMwmean"), #, 'EMcv', 'EMci', 'EMmedian', 'EMca', 'EMwmean'
+#                                       metric.select =  'ROC' ,
+#                                       metric.select.thresh =  0.70, # threshold so select model
+#                                       metric.select.dataset = "validation",
+#                                       metric.eval = c('TSS', 'ROC'),
+#                                       var.import = 3,
+#                                       EMci.alpha = 0.05, # value corresponding to the significance level to estimate confidence interval
+#                                       EMwmean.decay = 'proportional', # A value defining the relative importance of the weights (if 'EMwmean' was given to argument em.algo). A high value will strongly discriminate good models from the bad ones (see Details), while proportional will attribute weights proportionally to the models evaluation scores
+#                                       nb.cpu = 16)
+
+# Load directly the results if needed
+load("C:/Users/teill/Documents/0_PHD/MODELE/Git/biomod2/positif/positif.1723633541.ensemble.models.out")
+
+# Model use for article 1723633541.models.ensemble.out"
+
+eval_ensemble_models <- get_evaluations(myBiomodEM_grep)
+eval_ensemble_models
+ 
+
+bm_PlotVarImpBoxplot(bm.out = myBiomodEM_grep, group.by = c('expl.var', 'algo', 'algo') ) # par models
+
+
+#Represent response curves for ensemble modeling
+load("C:/Users/teill/Documents/0_PHD/MODELE/Git/biomod2/positif_0_70_0_75/positif.1723633541.models.out")
+
+get_variables_importance(myBiomodEM_grep)
+varimp.out <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_grep, group.by = c('expl.var', 'merged.by.run','algo'), do.plot = FALSE) # par models
+
+varimp.out$tab %>% 
+  filter(algo == "EMmean") %>% 
+  ggplot()+
+  geom_boxplot(aes(x = expl.var, y = var.imp), fill = "#F08080")
+
+varimp.out$tab %>% 
+  filter(algo == "EMmean") %>% 
+  arrange(desc(var.imp)) %>%  # order by importance scores
+  mutate(expl.var = factor(expl.var, levels = unique(expl.var))) %>%   
+  ggplot() +
+  geom_boxplot(aes(x = expl.var, y = var.imp), fill = "#F08080") +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1))
+
+
+# Projection Forecasting
+myBiomodEM_proj <- BIOMOD_EnsembleForecasting(bm.em = myBiomodEM_grep,
+                                             proj.name = 'proj_positif_grep',  
+                                             new.env = var_env,
+                                             models.chosen = 'all',
+                                             metric.binary = 'all',
+                                             metric.filter = 'all',
+                                             on_0_1000 = FALSE) # to get 0-1 mean probabilities
+ 
+ 
+myBiomodEM_proj <- rast("G:/0_PHD/RECHERCHE/AXE_2_BIS/Biomod2/positif_0_70_0_75/proj_positif_grep/proj_positif_grep_positif_ensemble.tif")
+ 
+
-- 
GitLab