Upload biomod2_breeding_sites_public_spaces.R

503f506f · claire.teillet_ird.fr · f5199fa1 · 503f506f
Commit 503f506f authored 1 week ago by claire.teillet_ird.fr
--- a/biomod2_breeding_sites_public_spaces.R
+++ b/biomod2_breeding_sites_public_spaces.R
+# Script to use Biomod2 with raster layers describing urban landscapes and presence data of larval presence in urban spaces
+# Author : Claire Teillet and Héloïse Pottier
+# based on https://biomodhub.github.io/biomod2/articles/examples_1_mainFunctions.html 
+#### Load package ####
+library(sf)
+library(biomod2)
+library(dplyr)
+library(terra)
+library(ggplot2)
+library(raster)
+library(corrplot)
+library(FactoMineR)
+library(factoextra)
+library(ggplot2)
+library(GGally) 
+#### Prepare data and parameters #### 
+#### Load Presence Data ####
+df_GU_potentiel <- read.csv("G:/0_PHD/RECHERCHE/AXE_2_BIS/data_montpellier/GU_potentiel_2154_clean_filtering_by_type.csv") 
+#### Load Environmental data (tif) ####
+var_env <- rast("G:/0_PHD/RECHERCHE/AXE_2_BIS/data_montpellier/grid_MTP_complete_IS_IT.tif") 
+# Select only the layers if needed
+# select_columns  <- c("NDVI_mean", "...")
+# 
+# var_env <- var_env[[select_columns]]
+# Show selected layers
+names(var_env)
+#### Other method to select layers ####
+# var_env_filtered <- subset(var_env, !grepl("FOTO", names(var_env)))
+# names(var_env_filtered)
+# test <- st_drop_geometry(var_env)
+# png("G:/0_PHD/RECHERCHE/AXE_2_BIS/FIG/scatter_plots.png", width = 2000, height = 2000)
+# pairs(test,
+#       main = "Matrice de scatter plots de toutes les variables",
+#       pch = 15,    
+#       col = "blue",
+#       cex = 2,        
+#       cex.labels = 2, 
+#       cex.main = 1)   
+# dev.off()
+# 
+# ggpairs <- ggpairs(as.data.frame(test))
+# Assigned 1 or 0 for if needed 
+df_GU_potentiel <- df_GU_potentiel %>%
+  mutate(positif = case_when(
+    positif=="OUI"~"1",
+    positif=="NON"~"0",
+    TRUE ~ positif
+  ))
+df_GU_potentiel$positif <- as.integer(df_GU_potentiel$positif)
+# Fix the seed for reproductibility
+set.seed(123) 
+# Formating data
+df_GU_potentiel_biomod2 <- 
+  BIOMOD_FormatingData(
+    resp.var = df_GU_potentiel['positif'], # var to explain (presence data)
+    resp.name = "positif", # name of the var
+    resp.xy = df_GU_potentiel[, c('long', 'lat')], #XY coordinates
+    expl.var = var_env, # environmental data
+  )
+plot(df_GU_potentiel_biomod2) 
+#### Modeling ####
+##### Individual models #####
+GU_models2 <- BIOMOD_Modeling(bm.format = df_GU_potentiel_biomod2, # data
+                             models = c("ANN", "CTA", "FDA", "GAM", "GBM", "GLM", "MARS", "MAXNET", "RF",
+                                        "SRE", "XGBOOST"),  #vector containing several algorithms : "ANN", "MAXENT", "CTA", "FDA", "GAM", "GBM", "GLM", "MARS", "MAXNET", "RF", "SRE", "XGBOOST"
+                             CV.strategy = 'kfold', #cross validation strategy : random, kfold, block, strat, env or user.defined
+                             CV.nb.rep = 10, # number of repetitions for the cross validation 
+                             CV.k = 10,# number of partitions of the dataset for the cross validation
+                             OPT.strategy = "bigboss", # parameters of each algorithms "bigboss" or "default"
+                             var.import = 3,# number of permutations for variables importance
+                             nb.cpu = 16, # if cpu using all cores
+                             metric.eval = c('TSS','ROC')) # evaluations metrics
+# to save the model 
+save(list = ls(all.names = TRUE), file = "XXX.Rdata", envir = .GlobalEnv)
+# to load the model 
+load("C:/Users/teill/Documents/0_PHD/MODELE/Git/biomod2/positif_0_70_0_75/positif.1723633541.models.out")
+# get evaluation score 
+eval_models <- get_evaluations(positif.1723633541.models.out) #or positif.1723633541.models.out if loaded
+# Represent evaluation scores (TSS,AUC)
+bm_PlotEvalMean(
+  bm.out = GU_models2,
+  metric.eval = NULL,
+  dataset = "validation", # show for validation dataset
+  group.by = "algo",# show by algorithms
+  do.plot = TRUE   
+)
+#Represent variables importances by algo
+bm_PlotVarImpBoxplot(bm.out = GU_models2, group.by = c('expl.var', 'algo', 'algo') ) # par models
+var_importance <- get_variables_importance(GU_models2)
+# get_models <- get_built_models(GU_models,  algo =  c("ANN", "CTA", "FDA",))
+### Response Curves for individual models, selection options algo and run
+bm_PlotResponseCurves(bm.out = GU_models2, 
+                      models.chosen = get_built_models(GU_models2)[c(1:2, 13:14)],
+                      fixed.var = 'median')
+bm_PlotResponseCurves(bm.out =  GU_models2, 
+                      models.chosen = get_built_models( GU_models2)[c(1:3, 12:14)],
+                      fixed.var = 'min')
+##### Ensemble modeling #####
+# Get individual model
+AllModels <- get_built_models(positif.1723633541.models.out)
+# Select individual model 
+AllModels <- AllModels[ grepl(pattern = "_ANN", x = AllModels, fixed = TRUE)| 
+                          grepl(pattern = "_FDA", x = AllModels, fixed = TRUE) |
+                          grepl(pattern = "_GLM", x = AllModels, fixed = TRUE)| 
+                          grepl(pattern = "_MARS", x = AllModels, fixed = TRUE) |
+                          grepl(pattern = "_MAXNET", x = AllModels, fixed = TRUE)| 
+                          grepl(pattern = "_XGBOOST", x = AllModels, fixed = TRUE) ]
+myBiomodEM_grep <- BIOMOD_EnsembleModeling(bm.mod = GU_models2, 
+                                           em.by = 'all',
+                                           em.algo = c("EMmean", "EMcv", "EMwmean", "EMmedian"), #, 'EMcv', 'EMci', 'EMmedian', 'EMca', 'EMwmean'
+                                           models.chosen = AllModels,
+                                           metric.select.dataset = "validation",
+                                           metric.eval = c("TSS", "ROC"),
+                                           var.import = 3,
+                                           EMci.alpha = 0.05, # value corresponding to the significance level to estimate confidence interval
+                                           EMwmean.decay = 'proportional', # A value defining the relative importance of the weights (if 'EMwmean' was given to argument em.algo). A high value will strongly discriminate good models from the bad ones (see Details), while proportional will attribute weights proportionally to the models evaluation scores
+                                           nb.cpu = 16)
+# load ensemble.model.out if needeed
+load("positif_0_70_0_75/positif.1723633541.ensemble.models.out")
+# If needeed of you can use directly a threshold to select individual model
+# myBiomodEM <- BIOMOD_EnsembleModeling(bm.mod = GU_models2, # individual models 
+#                                       em.by = 'all',
+#                                       em.algo = c('EMmean', "EMcv", "EMwmean"), #, 'EMcv', 'EMci', 'EMmedian', 'EMca', 'EMwmean'
+#                                       metric.select =  'ROC' ,
+#                                       metric.select.thresh =  0.70, # threshold so select model
+#                                       metric.select.dataset = "validation",
+#                                       metric.eval = c('TSS', 'ROC'),
+#                                       var.import = 3,
+#                                       EMci.alpha = 0.05, # value corresponding to the significance level to estimate confidence interval
+#                                       EMwmean.decay = 'proportional', # A value defining the relative importance of the weights (if 'EMwmean' was given to argument em.algo). A high value will strongly discriminate good models from the bad ones (see Details), while proportional will attribute weights proportionally to the models evaluation scores
+#                                       nb.cpu = 16)
+# Load directly the results if needed
+load("C:/Users/teill/Documents/0_PHD/MODELE/Git/biomod2/positif/positif.1723633541.ensemble.models.out")
+# Model use for article 1723633541.models.ensemble.out"
+eval_ensemble_models <- get_evaluations(myBiomodEM_grep)
+eval_ensemble_models
+bm_PlotVarImpBoxplot(bm.out = myBiomodEM_grep, group.by = c('expl.var', 'algo', 'algo') ) # par models
+#Represent response curves for ensemble modeling
+load("C:/Users/teill/Documents/0_PHD/MODELE/Git/biomod2/positif_0_70_0_75/positif.1723633541.models.out")
+get_variables_importance(myBiomodEM_grep)
+varimp.out <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_grep, group.by = c('expl.var', 'merged.by.run','algo'), do.plot = FALSE) # par models
+varimp.out$tab %>% 
+  filter(algo == "EMmean") %>% 
+  ggplot()+
+  geom_boxplot(aes(x = expl.var, y = var.imp), fill = "#F08080")
+varimp.out$tab %>% 
+  filter(algo == "EMmean") %>% 
+  arrange(desc(var.imp)) %>%  # order by importance scores
+  mutate(expl.var = factor(expl.var, levels = unique(expl.var))) %>%   
+  ggplot() +
+  geom_boxplot(aes(x = expl.var, y = var.imp), fill = "#F08080") +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1))
+# Projection Forecasting
+myBiomodEM_proj <- BIOMOD_EnsembleForecasting(bm.em = myBiomodEM_grep,
+                                             proj.name = 'proj_positif_grep',  
+                                             new.env = var_env,
+                                             models.chosen = 'all',
+                                             metric.binary = 'all',
+                                             metric.filter = 'all',
+                                             on_0_1000 = FALSE) # to get 0-1 mean probabilities
+myBiomodEM_proj <- rast("G:/0_PHD/RECHERCHE/AXE_2_BIS/Biomod2/positif_0_70_0_75/proj_positif_grep/proj_positif_grep_positif_ensemble.tif")