diff --git a/07-basic_statistics.qmd b/07-basic_statistics.qmd
index 824c43a8307513576dbee0e80547f3ecb74f0215..e68a9cb41793eb0dce7bfb020779e1983677b752 100644
--- a/07-basic_statistics.qmd
+++ b/07-basic_statistics.qmd
@@ -49,6 +49,17 @@ In epidemiology, the true meaning of point is very questionable. If it usually g
 # Aggregate cases over districts
 district$cases <- lengths(st_intersects(district, cases))
 
+# Plot number of cases using proportional symbol 
+mf_map(x = district) 
+mf_map(
+  x = district, 
+  var = "cases",
+  val_max = 50,
+  type = "prop",
+  col = "#990000", 
+  leg_title = "Cases")
+mf_layout(title = "Number of cases of W Fever")
+
 ```
 
 The incidence ($\frac{cases}{population}$) expressed per 100,000 population is commonly use to represent cases distribution related to population density but other indicators exists. As example, the standardized incidence ratios (SIRs) represent the deviation of observed and expected number of cases and is expressed as $SIR = \frac{Y_i}{E_i}$ with $Y_i$, the observed number of cases and $E_i$, the expected number of cases. In this study, we computed the expected number of cases in each district by assuming infections are homogeneously distributed across Cambodia, i.e., the incidence is the same in each district. The SIR therefore represents the deviation of incidence compared to the average incidence across Cambodia.
@@ -68,40 +79,33 @@ district$expected <- district$T_POP * rate
 district$SIR <- district$cases / district$expected
 ```
 
-```{r inc_visualization, eval = TRUE, echo = TRUE, nm = TRUE, fig.width=8, fig.height=4, class.output="code-out", warning=FALSE, message=FALSE}
-par(mfrow = c(1, 3))
-# Plot number of cases using proportional symbol 
-mf_map(x = district) 
-mf_map(
-  x = district, 
-  var = "cases",
-  val_max = 50,
-  type = "prop",
-  col = "#990000", 
-  leg_title = "Cases")
-mf_layout(title = "Number of cases of W Fever")
+```{r inc_visualization, eval = TRUE, echo = TRUE, nm = TRUE, fig.width=11, fig.height=7, class.output="code-out", warning=FALSE, message=FALSE}
+par(mfrow = c(1, 2))
 
 # Plot incidence 
+mf_map(x = district)
 mf_map(x = district,
-       var = "incidence",
-       type = "choro",
-       pal = "Reds 3",
+       var = c("T_POP", "incidence"),
+       type = "prop_choro",
+       pal = "Reds",
+       inches = .1,
        breaks = exp(mf_get_breaks(log(district$incidence+1), breaks = "pretty"))-1,
-       leg_title = "Incidence \n(per 100 000)")
+       leg_title = c("Population", "Incidence \n(per 100 000)"))
 mf_layout(title = "Incidence of W Fever")
 
 # Plot SIRs
 # create breaks and associated color palette
 break_SIR <- c(0,exp(mf_get_breaks(log(district$SIR), nbreaks = 8, breaks = "pretty")))
 col_pal <- c("#273871", "#3267AD", "#6496C8", "#9BBFDD", "#CDE3F0", "#FFCEBC", "#FF967E", "#F64D41", "#B90E36")
-
+mf_map(x = district)
 mf_map(x = district,
-       var = "SIR",
-       type = "choro",
-       breaks = break_SIR, 
+       var = c("T_POP", "SIR"),
+       type = "prop_choro",
+       breaks = break_SIR,
        pal = col_pal,
-       cex = 2,
-       leg_title = "SIR")
+       inches = .1,
+       #cex = 2,
+       leg_title = c("Population", "SIR"))
 mf_layout(title = "Standardized Incidence Ratio of W Fever")
 ```
 
@@ -155,8 +159,11 @@ m_cases <- mean(district$incidence)
 sd_cases <- sd(district$incidence)
 
 hist(district$incidence, probability = TRUE, ylim = c(0, 0.4), xlim = c(-5, 16), xlab = "Number of cases", ylab = "Probability", main = "Histogram of observed incidence compared\nto Normal and Poisson distributions")
+
 curve(dnorm(x, m_cases, sd_cases),col = "blue",  lwd = 1, add = TRUE)
-points(0:max(district$incidence), dpois(0:max(district$incidence), m_cases),type = 'b', pch = 20, col = "red", ylim = c(0, 0.6), lty = 2)
+
+points(0:max(district$incidence), dpois(0:max(district$incidence),m_cases),
+       type = 'b', pch = 20, col = "red", ylim = c(0, 0.6), lty = 2)
 
 legend("topright", legend = c("Normal distribution", "Poisson distribution", "Observed distribution"), col = c("blue", "red", "black"),pch = c(NA, 20, NA), lty = c(1, 2, 1))
 ```
diff --git a/data_cambodia/district.gpkg b/data_cambodia/district.gpkg
index ad283fd7e4ab079c5520ab9f73409fb8024deb37..97c275c6023a570b4dfaf14c4587e50ec6f90c8c 100644
Binary files a/data_cambodia/district.gpkg and b/data_cambodia/district.gpkg differ
diff --git a/public/07-basic_statistics.html b/public/07-basic_statistics.html
index afa1864294301959d9676c133c31c3447ffc3374..c9697e8c7f9ee722dac323225994184d543db92e 100644
--- a/public/07-basic_statistics.html
+++ b/public/07-basic_statistics.html
@@ -314,7 +314,21 @@ Projected CRS: WGS 84 / UTM zone 48N
 <p>In epidemiology, the true meaning of point is very questionable. If it usually gives the location of an observation, we cannot precisely tell if this observation represents an event of interest (e.g., illness, death, â€¦) or a person at risk (e.g., a participant that may or may not experience the disease). If you can consider that the population at risk is uniformly distributed in small area (within a city for example), this is likely not the case at a country scale. Considering a ratio of event compared to a population at risk is often more informative than just considering cases. Administrative divisions of countries appear as great areal units for cases aggregation since they make available data on population count and structures. In this study, we will use the district as the areal unit of the study.</p>
 <div class="cell" data-nm="true">
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Aggregate cases over districts</span></span>
-<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>district<span class="sc">$</span>cases <span class="ot">&lt;-</span> <span class="fu">lengths</span>(<span class="fu">st_intersects</span>(district, cases))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>district<span class="sc">$</span>cases <span class="ot">&lt;-</span> <span class="fu">lengths</span>(<span class="fu">st_intersects</span>(district, cases))</span>
+<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Plot number of cases using proportional symbol </span></span>
+<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_map</span>(<span class="at">x =</span> district) </span>
+<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_map</span>(</span>
+<span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a>  <span class="at">x =</span> district, </span>
+<span id="cb5-8"><a href="#cb5-8" aria-hidden="true" tabindex="-1"></a>  <span class="at">var =</span> <span class="st">"cases"</span>,</span>
+<span id="cb5-9"><a href="#cb5-9" aria-hidden="true" tabindex="-1"></a>  <span class="at">val_max =</span> <span class="dv">50</span>,</span>
+<span id="cb5-10"><a href="#cb5-10" aria-hidden="true" tabindex="-1"></a>  <span class="at">type =</span> <span class="st">"prop"</span>,</span>
+<span id="cb5-11"><a href="#cb5-11" aria-hidden="true" tabindex="-1"></a>  <span class="at">col =</span> <span class="st">"#990000"</span>, </span>
+<span id="cb5-12"><a href="#cb5-12" aria-hidden="true" tabindex="-1"></a>  <span class="at">leg_title =</span> <span class="st">"Cases"</span>)</span>
+<span id="cb5-13"><a href="#cb5-13" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_layout</span>(<span class="at">title =</span> <span class="st">"Number of cases of W Fever"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output-display">
+<p><img src="07-basic_statistics_files/figure-html/district_aggregate-1.png" class="img-fluid" width="768"></p>
+</div>
 </div>
 <p>The incidence (<span class="math inline">\(\frac{cases}{population}\)</span>) expressed per 100,000 population is commonly use to represent cases distribution related to population density but other indicators exists. As example, the standardized incidence ratios (SIRs) represent the deviation of observed and expected number of cases and is expressed as <span class="math inline">\(SIR = \frac{Y_i}{E_i}\)</span> with <span class="math inline">\(Y_i\)</span>, the observed number of cases and <span class="math inline">\(E_i\)</span>, the expected number of cases. In this study, we computed the expected number of cases in each district by assuming infections are homogeneously distributed across Cambodia, i.e., the incidence is the same in each district. The SIR therefore represents the deviation of incidence compared to the average incidence across Cambodia.</p>
 <div class="cell" data-nm="true">
@@ -331,42 +345,35 @@ Projected CRS: WGS 84 / UTM zone 48N
 <span id="cb6-11"><a href="#cb6-11" aria-hidden="true" tabindex="-1"></a>district<span class="sc">$</span>SIR <span class="ot">&lt;-</span> district<span class="sc">$</span>cases <span class="sc">/</span> district<span class="sc">$</span>expected</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <div class="cell" data-nm="true">
-<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">3</span>))</span>
-<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a><span class="co"># Plot number of cases using proportional symbol </span></span>
-<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_map</span>(<span class="at">x =</span> district) </span>
-<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_map</span>(</span>
-<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a>  <span class="at">x =</span> district, </span>
-<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a>  <span class="at">var =</span> <span class="st">"cases"</span>,</span>
-<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a>  <span class="at">val_max =</span> <span class="dv">50</span>,</span>
-<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a>  <span class="at">type =</span> <span class="st">"prop"</span>,</span>
-<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a>  <span class="at">col =</span> <span class="st">"#990000"</span>, </span>
-<span id="cb7-10"><a href="#cb7-10" aria-hidden="true" tabindex="-1"></a>  <span class="at">leg_title =</span> <span class="st">"Cases"</span>)</span>
-<span id="cb7-11"><a href="#cb7-11" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_layout</span>(<span class="at">title =</span> <span class="st">"Number of cases of W Fever"</span>)</span>
-<span id="cb7-12"><a href="#cb7-12" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb7-13"><a href="#cb7-13" aria-hidden="true" tabindex="-1"></a><span class="co"># Plot incidence </span></span>
-<span id="cb7-14"><a href="#cb7-14" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_map</span>(<span class="at">x =</span> district,</span>
-<span id="cb7-15"><a href="#cb7-15" aria-hidden="true" tabindex="-1"></a>       <span class="at">var =</span> <span class="st">"incidence"</span>,</span>
-<span id="cb7-16"><a href="#cb7-16" aria-hidden="true" tabindex="-1"></a>       <span class="at">type =</span> <span class="st">"choro"</span>,</span>
-<span id="cb7-17"><a href="#cb7-17" aria-hidden="true" tabindex="-1"></a>       <span class="at">pal =</span> <span class="st">"Reds 3"</span>,</span>
-<span id="cb7-18"><a href="#cb7-18" aria-hidden="true" tabindex="-1"></a>       <span class="at">breaks =</span> <span class="fu">exp</span>(<span class="fu">mf_get_breaks</span>(<span class="fu">log</span>(district<span class="sc">$</span>incidence<span class="sc">+</span><span class="dv">1</span>), <span class="at">breaks =</span> <span class="st">"pretty"</span>))<span class="sc">-</span><span class="dv">1</span>,</span>
-<span id="cb7-19"><a href="#cb7-19" aria-hidden="true" tabindex="-1"></a>       <span class="at">leg_title =</span> <span class="st">"Incidence </span><span class="sc">\n</span><span class="st">(per 100 000)"</span>)</span>
-<span id="cb7-20"><a href="#cb7-20" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_layout</span>(<span class="at">title =</span> <span class="st">"Incidence of W Fever"</span>)</span>
-<span id="cb7-21"><a href="#cb7-21" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb7-22"><a href="#cb7-22" aria-hidden="true" tabindex="-1"></a><span class="co"># Plot SIRs</span></span>
-<span id="cb7-23"><a href="#cb7-23" aria-hidden="true" tabindex="-1"></a><span class="co"># create breaks and associated color palette</span></span>
-<span id="cb7-24"><a href="#cb7-24" aria-hidden="true" tabindex="-1"></a>break_SIR <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="dv">0</span>,<span class="fu">exp</span>(<span class="fu">mf_get_breaks</span>(<span class="fu">log</span>(district<span class="sc">$</span>SIR), <span class="at">nbreaks =</span> <span class="dv">8</span>, <span class="at">breaks =</span> <span class="st">"pretty"</span>)))</span>
-<span id="cb7-25"><a href="#cb7-25" aria-hidden="true" tabindex="-1"></a>col_pal <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="st">"#273871"</span>, <span class="st">"#3267AD"</span>, <span class="st">"#6496C8"</span>, <span class="st">"#9BBFDD"</span>, <span class="st">"#CDE3F0"</span>, <span class="st">"#FFCEBC"</span>, <span class="st">"#FF967E"</span>, <span class="st">"#F64D41"</span>, <span class="st">"#B90E36"</span>)</span>
-<span id="cb7-26"><a href="#cb7-26" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb7-27"><a href="#cb7-27" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_map</span>(<span class="at">x =</span> district,</span>
-<span id="cb7-28"><a href="#cb7-28" aria-hidden="true" tabindex="-1"></a>       <span class="at">var =</span> <span class="st">"SIR"</span>,</span>
-<span id="cb7-29"><a href="#cb7-29" aria-hidden="true" tabindex="-1"></a>       <span class="at">type =</span> <span class="st">"choro"</span>,</span>
-<span id="cb7-30"><a href="#cb7-30" aria-hidden="true" tabindex="-1"></a>       <span class="at">breaks =</span> break_SIR, </span>
-<span id="cb7-31"><a href="#cb7-31" aria-hidden="true" tabindex="-1"></a>       <span class="at">pal =</span> col_pal,</span>
-<span id="cb7-32"><a href="#cb7-32" aria-hidden="true" tabindex="-1"></a>       <span class="at">cex =</span> <span class="dv">2</span>,</span>
-<span id="cb7-33"><a href="#cb7-33" aria-hidden="true" tabindex="-1"></a>       <span class="at">leg_title =</span> <span class="st">"SIR"</span>)</span>
-<span id="cb7-34"><a href="#cb7-34" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_layout</span>(<span class="at">title =</span> <span class="st">"Standardized Incidence Ratio of W Fever"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">2</span>))</span>
+<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Plot incidence </span></span>
+<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_map</span>(<span class="at">x =</span> district)</span>
+<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_map</span>(<span class="at">x =</span> district,</span>
+<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a>       <span class="at">var =</span> <span class="fu">c</span>(<span class="st">"T_POP"</span>, <span class="st">"incidence"</span>),</span>
+<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a>       <span class="at">type =</span> <span class="st">"prop_choro"</span>,</span>
+<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a>       <span class="at">pal =</span> <span class="st">"Reds"</span>,</span>
+<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a>       <span class="at">inches =</span> .<span class="dv">1</span>,</span>
+<span id="cb7-10"><a href="#cb7-10" aria-hidden="true" tabindex="-1"></a>       <span class="at">breaks =</span> <span class="fu">exp</span>(<span class="fu">mf_get_breaks</span>(<span class="fu">log</span>(district<span class="sc">$</span>incidence<span class="sc">+</span><span class="dv">1</span>), <span class="at">breaks =</span> <span class="st">"pretty"</span>))<span class="sc">-</span><span class="dv">1</span>,</span>
+<span id="cb7-11"><a href="#cb7-11" aria-hidden="true" tabindex="-1"></a>       <span class="at">leg_title =</span> <span class="fu">c</span>(<span class="st">"Population"</span>, <span class="st">"Incidence </span><span class="sc">\n</span><span class="st">(per 100 000)"</span>))</span>
+<span id="cb7-12"><a href="#cb7-12" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_layout</span>(<span class="at">title =</span> <span class="st">"Incidence of W Fever"</span>)</span>
+<span id="cb7-13"><a href="#cb7-13" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb7-14"><a href="#cb7-14" aria-hidden="true" tabindex="-1"></a><span class="co"># Plot SIRs</span></span>
+<span id="cb7-15"><a href="#cb7-15" aria-hidden="true" tabindex="-1"></a><span class="co"># create breaks and associated color palette</span></span>
+<span id="cb7-16"><a href="#cb7-16" aria-hidden="true" tabindex="-1"></a>break_SIR <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="dv">0</span>,<span class="fu">exp</span>(<span class="fu">mf_get_breaks</span>(<span class="fu">log</span>(district<span class="sc">$</span>SIR), <span class="at">nbreaks =</span> <span class="dv">8</span>, <span class="at">breaks =</span> <span class="st">"pretty"</span>)))</span>
+<span id="cb7-17"><a href="#cb7-17" aria-hidden="true" tabindex="-1"></a>col_pal <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="st">"#273871"</span>, <span class="st">"#3267AD"</span>, <span class="st">"#6496C8"</span>, <span class="st">"#9BBFDD"</span>, <span class="st">"#CDE3F0"</span>, <span class="st">"#FFCEBC"</span>, <span class="st">"#FF967E"</span>, <span class="st">"#F64D41"</span>, <span class="st">"#B90E36"</span>)</span>
+<span id="cb7-18"><a href="#cb7-18" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_map</span>(<span class="at">x =</span> district)</span>
+<span id="cb7-19"><a href="#cb7-19" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_map</span>(<span class="at">x =</span> district,</span>
+<span id="cb7-20"><a href="#cb7-20" aria-hidden="true" tabindex="-1"></a>       <span class="at">var =</span> <span class="fu">c</span>(<span class="st">"T_POP"</span>, <span class="st">"SIR"</span>),</span>
+<span id="cb7-21"><a href="#cb7-21" aria-hidden="true" tabindex="-1"></a>       <span class="at">type =</span> <span class="st">"prop_choro"</span>,</span>
+<span id="cb7-22"><a href="#cb7-22" aria-hidden="true" tabindex="-1"></a>       <span class="at">breaks =</span> break_SIR,</span>
+<span id="cb7-23"><a href="#cb7-23" aria-hidden="true" tabindex="-1"></a>       <span class="at">pal =</span> col_pal,</span>
+<span id="cb7-24"><a href="#cb7-24" aria-hidden="true" tabindex="-1"></a>       <span class="at">inches =</span> .<span class="dv">1</span>,</span>
+<span id="cb7-25"><a href="#cb7-25" aria-hidden="true" tabindex="-1"></a>       <span class="co">#cex = 2,</span></span>
+<span id="cb7-26"><a href="#cb7-26" aria-hidden="true" tabindex="-1"></a>       <span class="at">leg_title =</span> <span class="fu">c</span>(<span class="st">"Population"</span>, <span class="st">"SIR"</span>))</span>
+<span id="cb7-27"><a href="#cb7-27" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_layout</span>(<span class="at">title =</span> <span class="st">"Standardized Incidence Ratio of W Fever"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output-display">
-<p><img src="07-basic_statistics_files/figure-html/inc_visualization-1.png" class="img-fluid" width="768"></p>
+<p><img src="07-basic_statistics_files/figure-html/inc_visualization-1.png" class="img-fluid" width="1056"></p>
 </div>
 </div>
 <p>These maps illustrate the spatial heterogeneity of the cases. The incidence shows how the disease vary from one district to another while the SIR highlight districts that have:</p>
@@ -430,10 +437,13 @@ Statistic tests and distributions
 <span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>sd_cases <span class="ot">&lt;-</span> <span class="fu">sd</span>(district<span class="sc">$</span>incidence)</span>
 <span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a><span class="fu">hist</span>(district<span class="sc">$</span>incidence, <span class="at">probability =</span> <span class="cn">TRUE</span>, <span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="fl">0.4</span>), <span class="at">xlim =</span> <span class="fu">c</span>(<span class="sc">-</span><span class="dv">5</span>, <span class="dv">16</span>), <span class="at">xlab =</span> <span class="st">"Number of cases"</span>, <span class="at">ylab =</span> <span class="st">"Probability"</span>, <span class="at">main =</span> <span class="st">"Histogram of observed incidence compared</span><span class="sc">\n</span><span class="st">to Normal and Poisson distributions"</span>)</span>
-<span id="cb8-6"><a href="#cb8-6" aria-hidden="true" tabindex="-1"></a><span class="fu">curve</span>(<span class="fu">dnorm</span>(x, m_cases, sd_cases),<span class="at">col =</span> <span class="st">"blue"</span>,  <span class="at">lwd =</span> <span class="dv">1</span>, <span class="at">add =</span> <span class="cn">TRUE</span>)</span>
-<span id="cb8-7"><a href="#cb8-7" aria-hidden="true" tabindex="-1"></a><span class="fu">points</span>(<span class="dv">0</span><span class="sc">:</span><span class="fu">max</span>(district<span class="sc">$</span>incidence), <span class="fu">dpois</span>(<span class="dv">0</span><span class="sc">:</span><span class="fu">max</span>(district<span class="sc">$</span>incidence), m_cases),<span class="at">type =</span> <span class="st">'b'</span>, <span class="at">pch =</span> <span class="dv">20</span>, <span class="at">col =</span> <span class="st">"red"</span>, <span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="fl">0.6</span>), <span class="at">lty =</span> <span class="dv">2</span>)</span>
+<span id="cb8-6"><a href="#cb8-6" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb8-7"><a href="#cb8-7" aria-hidden="true" tabindex="-1"></a><span class="fu">curve</span>(<span class="fu">dnorm</span>(x, m_cases, sd_cases),<span class="at">col =</span> <span class="st">"blue"</span>,  <span class="at">lwd =</span> <span class="dv">1</span>, <span class="at">add =</span> <span class="cn">TRUE</span>)</span>
 <span id="cb8-8"><a href="#cb8-8" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb8-9"><a href="#cb8-9" aria-hidden="true" tabindex="-1"></a><span class="fu">legend</span>(<span class="st">"topright"</span>, <span class="at">legend =</span> <span class="fu">c</span>(<span class="st">"Normal distribution"</span>, <span class="st">"Poisson distribution"</span>, <span class="st">"Observed distribution"</span>), <span class="at">col =</span> <span class="fu">c</span>(<span class="st">"blue"</span>, <span class="st">"red"</span>, <span class="st">"black"</span>),<span class="at">pch =</span> <span class="fu">c</span>(<span class="cn">NA</span>, <span class="dv">20</span>, <span class="cn">NA</span>), <span class="at">lty =</span> <span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">1</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<span id="cb8-9"><a href="#cb8-9" aria-hidden="true" tabindex="-1"></a><span class="fu">points</span>(<span class="dv">0</span><span class="sc">:</span><span class="fu">max</span>(district<span class="sc">$</span>incidence), <span class="fu">dpois</span>(<span class="dv">0</span><span class="sc">:</span><span class="fu">max</span>(district<span class="sc">$</span>incidence),m_cases),</span>
+<span id="cb8-10"><a href="#cb8-10" aria-hidden="true" tabindex="-1"></a>       <span class="at">type =</span> <span class="st">'b'</span>, <span class="at">pch =</span> <span class="dv">20</span>, <span class="at">col =</span> <span class="st">"red"</span>, <span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="fl">0.6</span>), <span class="at">lty =</span> <span class="dv">2</span>)</span>
+<span id="cb8-11"><a href="#cb8-11" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb8-12"><a href="#cb8-12" aria-hidden="true" tabindex="-1"></a><span class="fu">legend</span>(<span class="st">"topright"</span>, <span class="at">legend =</span> <span class="fu">c</span>(<span class="st">"Normal distribution"</span>, <span class="st">"Poisson distribution"</span>, <span class="st">"Observed distribution"</span>), <span class="at">col =</span> <span class="fu">c</span>(<span class="st">"blue"</span>, <span class="st">"red"</span>, <span class="st">"black"</span>),<span class="at">pch =</span> <span class="fu">c</span>(<span class="cn">NA</span>, <span class="dv">20</span>, <span class="cn">NA</span>), <span class="at">lty =</span> <span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">1</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output-display">
 <p><img src="07-basic_statistics_files/figure-html/distribution-1.png" class="img-fluid" width="576"></p>
 </div>
@@ -498,14 +508,14 @@ Moranâ€™s I test
     Model used when sampling: Poisson 
     Number of simulations: 499 
     Statistic:  0.1566449 
-    p-value :  0.014 </code></pre>
+    p-value :  0.006 </code></pre>
 </div>
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(m_test)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output-display">
 <p><img src="07-basic_statistics_files/figure-html/MoransI-1.png" class="img-fluid" width="768"></p>
 </div>
 </div>
-<p>The Moranâ€™s statistics is here <span class="math inline">\(I =\)</span> 0.16. When comparing its value to the H0 distribution (built under 499 simulations), the probability of observing such a I value under the null hypothesis, i.e.&nbsp;the distribution of cases is spatially independent, is <span class="math inline">\(p_{value} =\)</span> 0.014. We therefore reject H0 with error risk of <span class="math inline">\(\alpha = 5\%\)</span>. The distribution of cases is therefore autocorrelated across districts in Cambodia.</p>
+<p>The Moranâ€™s statistics is here <span class="math inline">\(I =\)</span> 0.16. When comparing its value to the H0 distribution (built under 499 simulations), the probability of observing such a I value under the null hypothesis, i.e.&nbsp;the distribution of cases is spatially independent, is <span class="math inline">\(p_{value} =\)</span> 0.006. We therefore reject H0 with error risk of <span class="math inline">\(\alpha = 5\%\)</span>. The distribution of cases is therefore autocorrelated across districts in Cambodia.</p>
 </section>
 <section id="the-local-morans-i-lisa-test" class="level4" data-number="6.2.2.2">
 <h4 data-number="6.2.2.2" class="anchored" data-anchor-id="the-local-morans-i-lisa-test"><span class="header-section-number">6.2.2.2</span> The Local Moranâ€™s I LISA test</h4>
@@ -720,7 +730,7 @@ Kulldorf test
 <span id="cb30-7"><a href="#cb30-7" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(df_secondary_clusters)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre class="code-out"><code>       SMR number.of.cases expected.cases p.value
-1 3.767698              16       4.246625   0.014</code></pre>
+1 3.767698              16       4.246625   0.012</code></pre>
 </div>
 </div>
 <p>We only have one secondary cluster composed of one district.</p>
@@ -925,4 +935,4 @@ window.document.addEventListener("DOMContentLoaded", function (event) {
 
 
 <script src="site_libs/quarto-html/zenscroll-min.js"></script>
-</body></html>
+</body></html>
\ No newline at end of file
diff --git a/public/07-basic_statistics_files/figure-html/LocalMoransI_plt-1.png b/public/07-basic_statistics_files/figure-html/LocalMoransI_plt-1.png
index 0b90946274b1d7730448b3c074974b13fbd7ea70..b968813091eedca65b463cb342437d53094770f1 100644
Binary files a/public/07-basic_statistics_files/figure-html/LocalMoransI_plt-1.png and b/public/07-basic_statistics_files/figure-html/LocalMoransI_plt-1.png differ
diff --git a/public/07-basic_statistics_files/figure-html/MoransI-1.png b/public/07-basic_statistics_files/figure-html/MoransI-1.png
index e890dc55ed2b8036e28122f6ed14dd78605d1107..56b623420bcf02cd03fe1544f1128aaf37352048 100644
Binary files a/public/07-basic_statistics_files/figure-html/MoransI-1.png and b/public/07-basic_statistics_files/figure-html/MoransI-1.png differ
diff --git a/public/07-basic_statistics_files/figure-html/district_aggregate-1.png b/public/07-basic_statistics_files/figure-html/district_aggregate-1.png
new file mode 100644
index 0000000000000000000000000000000000000000..eae279dfc2aa6d2d74ca4d163d436df934f06b61
Binary files /dev/null and b/public/07-basic_statistics_files/figure-html/district_aggregate-1.png differ
diff --git a/public/07-basic_statistics_files/figure-html/inc_visualization-1.png b/public/07-basic_statistics_files/figure-html/inc_visualization-1.png
index 631e529f9b65f28451f2428a723aa42572800024..32b95078e16f9d2c8690de9425cf0f03dd200120 100644
Binary files a/public/07-basic_statistics_files/figure-html/inc_visualization-1.png and b/public/07-basic_statistics_files/figure-html/inc_visualization-1.png differ
diff --git a/public/07-basic_statistics_files/figure-html/kd_test-1.png b/public/07-basic_statistics_files/figure-html/kd_test-1.png
index 6ea2fd3bce0cb4ae323e1ae4c2d05f588cec6bc3..3d7492cd4f4fac775b150e58dcbbd7c5ed578b4a 100644
Binary files a/public/07-basic_statistics_files/figure-html/kd_test-1.png and b/public/07-basic_statistics_files/figure-html/kd_test-1.png differ
diff --git a/public/search.json b/public/search.json
index d421310d196787766ede662c4850a5a30a04033c..e831fcb74285adbeec05c563df1b167fbb1f9479 100644
--- a/public/search.json
+++ b/public/search.json
@@ -11,14 +11,14 @@
     "href": "07-basic_statistics.html#import-and-visualize-epidemiological-data",
     "title": "6Â  Basic statistics for spatial analysis",
     "section": "6.1 Import and visualize epidemiological data",
-    "text": "6.1 Import and visualize epidemiological data\nIn this section, we load data that reference the cases of an imaginary disease, the W fever, throughout Cambodia. Each point corresponds to the geo-localization of a case.\n\nlibrary(dplyr)\nlibrary(sf)\n\n#Import Cambodia country border\ncountry <- st_read(\"data_cambodia/cambodia.gpkg\", layer = \"country\", quiet = TRUE)\n#Import provincial administrative border of Cambodia\neducation <- st_read(\"data_cambodia/cambodia.gpkg\", layer = \"education\", quiet = TRUE)\n#Import district administrative border of Cambodia\ndistrict <- st_read(\"data_cambodia/cambodia.gpkg\", layer = \"district\", quiet = TRUE)\n\n# Import locations of cases from an imaginary disease\ncases <- st_read(\"data_cambodia/cambodia.gpkg\", layer = \"cases\", quiet = TRUE)\ncases <- subset(cases, Disease == \"W fever\")\n\nThe first step of any statistical analysis always consists on visualizing the data to check they were correctly loaded and to observe general pattern of the cases.\n\n# View the cases object\nhead(cases)\n\nSimple feature collection with 6 features and 2 fields\nGeometry type: MULTIPOINT\nDimension:     XY\nBounding box:  xmin: 255891 ymin: 1179092 xmax: 506647.4 ymax: 1467441\nProjected CRS: WGS 84 / UTM zone 48N\n  id Disease                           geom\n1  0 W fever MULTIPOINT ((280036.2 12841...\n2  1 W fever MULTIPOINT ((451859.5 11790...\n3  2 W fever  MULTIPOINT ((255891 1467441))\n4  5 W fever MULTIPOINT ((506647.4 12322...\n5  6 W fever  MULTIPOINT ((440668 1197958))\n6  7 W fever MULTIPOINT ((481594.5 12714...\n\n# Map the cases\nlibrary(mapsf)\n\nmf_map(x = district, border = \"white\")\nmf_map(x = country,lwd = 2, col = NA, add = TRUE)\nmf_map(x = cases, lwd = .5, col = \"#990000\", pch = 20, add = TRUE)\nmf_layout(title = \"W Fever infections in Cambodia\")\n\n\n\n\nIn epidemiology, the true meaning of point is very questionable. If it usually gives the location of an observation, we cannot precisely tell if this observation represents an event of interest (e.g., illness, death, â€¦) or a person at risk (e.g., a participant that may or may not experience the disease). If you can consider that the population at risk is uniformly distributed in small area (within a city for example), this is likely not the case at a country scale. Considering a ratio of event compared to a population at risk is often more informative than just considering cases. Administrative divisions of countries appear as great areal units for cases aggregation since they make available data on population count and structures. In this study, we will use the district as the areal unit of the study.\n\n# Aggregate cases over districts\ndistrict$cases <- lengths(st_intersects(district, cases))\n\nThe incidence (\\(\\frac{cases}{population}\\)) expressed per 100,000 population is commonly use to represent cases distribution related to population density but other indicators exists. As example, the standardized incidence ratios (SIRs) represent the deviation of observed and expected number of cases and is expressed as \\(SIR = \\frac{Y_i}{E_i}\\) with \\(Y_i\\), the observed number of cases and \\(E_i\\), the expected number of cases. In this study, we computed the expected number of cases in each district by assuming infections are homogeneously distributed across Cambodia, i.e., the incidence is the same in each district. The SIR therefore represents the deviation of incidence compared to the average incidence across Cambodia.\n\n# Compute incidence in each district (per 100 000 population)\ndistrict$incidence <- district$cases/district$T_POP * 100000\n\n# Compute the global risk\nrate <- sum(district$cases)/sum(district$T_POP)\n\n# Compute expected number of cases \ndistrict$expected <- district$T_POP * rate\n\n# Compute SIR\ndistrict$SIR <- district$cases / district$expected\n\n\npar(mfrow = c(1, 3))\n# Plot number of cases using proportional symbol \nmf_map(x = district) \nmf_map(\n  x = district, \n  var = \"cases\",\n  val_max = 50,\n  type = \"prop\",\n  col = \"#990000\", \n  leg_title = \"Cases\")\nmf_layout(title = \"Number of cases of W Fever\")\n\n# Plot incidence \nmf_map(x = district,\n       var = \"incidence\",\n       type = \"choro\",\n       pal = \"Reds 3\",\n       breaks = exp(mf_get_breaks(log(district$incidence+1), breaks = \"pretty\"))-1,\n       leg_title = \"Incidence \\n(per 100 000)\")\nmf_layout(title = \"Incidence of W Fever\")\n\n# Plot SIRs\n# create breaks and associated color palette\nbreak_SIR <- c(0,exp(mf_get_breaks(log(district$SIR), nbreaks = 8, breaks = \"pretty\")))\ncol_pal <- c(\"#273871\", \"#3267AD\", \"#6496C8\", \"#9BBFDD\", \"#CDE3F0\", \"#FFCEBC\", \"#FF967E\", \"#F64D41\", \"#B90E36\")\n\nmf_map(x = district,\n       var = \"SIR\",\n       type = \"choro\",\n       breaks = break_SIR, \n       pal = col_pal,\n       cex = 2,\n       leg_title = \"SIR\")\nmf_layout(title = \"Standardized Incidence Ratio of W Fever\")\n\n\n\n\nThese maps illustrate the spatial heterogeneity of the cases. The incidence shows how the disease vary from one district to another while the SIR highlight districts that have:\n\nhigher risk than average (SIR > 1) when standardized for population\nlower risk than average (SIR < 1) when standardized for population\naverage risk (SIR ~ 1) when standardized for population\n\n\n\n\n\n\n\nTo go further â€¦\n\n\n\nIn this example, we standardized the cases distribution for population count. This simple standardization assumes that the risk of contracting the disease is similar for each person. However, assumption does not hold for all diseases and for all observed events since confounding effects can create nuisance into the interpretations (e.g., the number of childhood illness and death outcomes in a district are usually related to the age pyramid). A confounding factor is a variable that influences both the dependent variable and independent variable, causing a spurious association. You should keep in mind that other standardization can be performed based on these confounding factors, i.e.Â variables known to have an effect but that you donâ€™t want to analyze (e.g., sex ratio, occupations, age pyramid).\n\n\n\n\n\nIn addition, one can wonder what does an SIR ~ 1 means, i.e., what is the threshold to decide whether the SIR is greater, lower or equivalent to 1. The significant of the SIR can be tested globally (to determine whether or not the incidence is homogeneously distributed) and locally in each district (to determine Which district have an SIR different than 1). We wonâ€™t perform these analyses in this tutorial but you can look at the functions ?achisq.test() (from Dcluster package (GÃ³mez-Rubio et al. 2015)) and ?probmap() (from spdep package (R. Bivand et al. 2015)) to compute these statistics."
+    "text": "6.1 Import and visualize epidemiological data\nIn this section, we load data that reference the cases of an imaginary disease, the W fever, throughout Cambodia. Each point corresponds to the geo-localization of a case.\n\nlibrary(dplyr)\nlibrary(sf)\n\n#Import Cambodia country border\ncountry <- st_read(\"data_cambodia/cambodia.gpkg\", layer = \"country\", quiet = TRUE)\n#Import provincial administrative border of Cambodia\neducation <- st_read(\"data_cambodia/cambodia.gpkg\", layer = \"education\", quiet = TRUE)\n#Import district administrative border of Cambodia\ndistrict <- st_read(\"data_cambodia/cambodia.gpkg\", layer = \"district\", quiet = TRUE)\n\n# Import locations of cases from an imaginary disease\ncases <- st_read(\"data_cambodia/cambodia.gpkg\", layer = \"cases\", quiet = TRUE)\ncases <- subset(cases, Disease == \"W fever\")\n\nThe first step of any statistical analysis always consists on visualizing the data to check they were correctly loaded and to observe general pattern of the cases.\n\n# View the cases object\nhead(cases)\n\nSimple feature collection with 6 features and 2 fields\nGeometry type: MULTIPOINT\nDimension:     XY\nBounding box:  xmin: 255891 ymin: 1179092 xmax: 506647.4 ymax: 1467441\nProjected CRS: WGS 84 / UTM zone 48N\n  id Disease                           geom\n1  0 W fever MULTIPOINT ((280036.2 12841...\n2  1 W fever MULTIPOINT ((451859.5 11790...\n3  2 W fever  MULTIPOINT ((255891 1467441))\n4  5 W fever MULTIPOINT ((506647.4 12322...\n5  6 W fever  MULTIPOINT ((440668 1197958))\n6  7 W fever MULTIPOINT ((481594.5 12714...\n\n# Map the cases\nlibrary(mapsf)\n\nmf_map(x = district, border = \"white\")\nmf_map(x = country,lwd = 2, col = NA, add = TRUE)\nmf_map(x = cases, lwd = .5, col = \"#990000\", pch = 20, add = TRUE)\nmf_layout(title = \"W Fever infections in Cambodia\")\n\n\n\n\nIn epidemiology, the true meaning of point is very questionable. If it usually gives the location of an observation, we cannot precisely tell if this observation represents an event of interest (e.g., illness, death, â€¦) or a person at risk (e.g., a participant that may or may not experience the disease). If you can consider that the population at risk is uniformly distributed in small area (within a city for example), this is likely not the case at a country scale. Considering a ratio of event compared to a population at risk is often more informative than just considering cases. Administrative divisions of countries appear as great areal units for cases aggregation since they make available data on population count and structures. In this study, we will use the district as the areal unit of the study.\n\n# Aggregate cases over districts\ndistrict$cases <- lengths(st_intersects(district, cases))\n\n# Plot number of cases using proportional symbol \nmf_map(x = district) \nmf_map(\n  x = district, \n  var = \"cases\",\n  val_max = 50,\n  type = \"prop\",\n  col = \"#990000\", \n  leg_title = \"Cases\")\nmf_layout(title = \"Number of cases of W Fever\")\n\n\n\n\nThe incidence (\\(\\frac{cases}{population}\\)) expressed per 100,000 population is commonly use to represent cases distribution related to population density but other indicators exists. As example, the standardized incidence ratios (SIRs) represent the deviation of observed and expected number of cases and is expressed as \\(SIR = \\frac{Y_i}{E_i}\\) with \\(Y_i\\), the observed number of cases and \\(E_i\\), the expected number of cases. In this study, we computed the expected number of cases in each district by assuming infections are homogeneously distributed across Cambodia, i.e., the incidence is the same in each district. The SIR therefore represents the deviation of incidence compared to the average incidence across Cambodia.\n\n# Compute incidence in each district (per 100 000 population)\ndistrict$incidence <- district$cases/district$T_POP * 100000\n\n# Compute the global risk\nrate <- sum(district$cases)/sum(district$T_POP)\n\n# Compute expected number of cases \ndistrict$expected <- district$T_POP * rate\n\n# Compute SIR\ndistrict$SIR <- district$cases / district$expected\n\n\npar(mfrow = c(1, 2))\n\n# Plot incidence \nmf_map(x = district)\nmf_map(x = district,\n       var = c(\"T_POP\", \"incidence\"),\n       type = \"prop_choro\",\n       pal = \"Reds\",\n       inches = .1,\n       breaks = exp(mf_get_breaks(log(district$incidence+1), breaks = \"pretty\"))-1,\n       leg_title = c(\"Population\", \"Incidence \\n(per 100 000)\"))\nmf_layout(title = \"Incidence of W Fever\")\n\n# Plot SIRs\n# create breaks and associated color palette\nbreak_SIR <- c(0,exp(mf_get_breaks(log(district$SIR), nbreaks = 8, breaks = \"pretty\")))\ncol_pal <- c(\"#273871\", \"#3267AD\", \"#6496C8\", \"#9BBFDD\", \"#CDE3F0\", \"#FFCEBC\", \"#FF967E\", \"#F64D41\", \"#B90E36\")\nmf_map(x = district)\nmf_map(x = district,\n       var = c(\"T_POP\", \"SIR\"),\n       type = \"prop_choro\",\n       breaks = break_SIR,\n       pal = col_pal,\n       inches = .1,\n       #cex = 2,\n       leg_title = c(\"Population\", \"SIR\"))\nmf_layout(title = \"Standardized Incidence Ratio of W Fever\")\n\n\n\n\nThese maps illustrate the spatial heterogeneity of the cases. The incidence shows how the disease vary from one district to another while the SIR highlight districts that have:\n\nhigher risk than average (SIR > 1) when standardized for population\nlower risk than average (SIR < 1) when standardized for population\naverage risk (SIR ~ 1) when standardized for population\n\n\n\n\n\n\n\nTo go further â€¦\n\n\n\nIn this example, we standardized the cases distribution for population count. This simple standardization assumes that the risk of contracting the disease is similar for each person. However, assumption does not hold for all diseases and for all observed events since confounding effects can create nuisance into the interpretations (e.g., the number of childhood illness and death outcomes in a district are usually related to the age pyramid). A confounding factor is a variable that influences both the dependent variable and independent variable, causing a spurious association. You should keep in mind that other standardization can be performed based on these confounding factors, i.e.Â variables known to have an effect but that you donâ€™t want to analyze (e.g., sex ratio, occupations, age pyramid).\n\n\n\n\n\nIn addition, one can wonder what does an SIR ~ 1 means, i.e., what is the threshold to decide whether the SIR is greater, lower or equivalent to 1. The significant of the SIR can be tested globally (to determine whether or not the incidence is homogeneously distributed) and locally in each district (to determine Which district have an SIR different than 1). We wonâ€™t perform these analyses in this tutorial but you can look at the functions ?achisq.test() (from Dcluster package (GÃ³mez-Rubio et al. 2015)) and ?probmap() (from spdep package (R. Bivand et al. 2015)) to compute these statistics."
   },
   {
     "objectID": "07-basic_statistics.html#cluster-analysis",
     "href": "07-basic_statistics.html#cluster-analysis",
     "title": "6Â  Basic statistics for spatial analysis",
     "section": "6.2 Cluster analysis",
-    "text": "6.2 Cluster analysis\n\n6.2.1 General introduction\nWhy studying clusters in epidemiology? Cluster analysis help identifying unusual patterns that occurs during a given period of time. The underlying ultimate goal of such analysis is to explain the observation of such patterns. In epidemiology, we can distinguish two types of process that would explain heterogeneity in case distribution:\n\nThe 1st order effects are the spatial variations of cases distribution caused by underlying properties of environment or the population structure itself. In such process individual get infected independently from the rest of the population. Such process includes the infection through an environment at risk as, for example, air pollution, contaminated waters or soils and UV exposition. This effect assume that the observed pattern is caused by a difference in risk intensity.\nThe 2nd order effects describes process of spread, contagion and diffusion of diseases caused by interactions between individuals. This includes transmission of infectious disease by proximity, but also the transmission of non-infectious disease, for example, with the diffusion of social norms within networks. This effect assume that the observed pattern is caused by correlations or co-variations.\n\n\n\n\n\n\nNo statistical methods could distinguish between these competing processes since their outcome results in similar pattern of points. The cluster analysis help describing the magnitude and the location of pattern but in no way could answer the question of why such patterns occurs. It is therefore a step that help detecting cluster for description and surveillance purpose and rising hypothesis on the underlying process that will lead further investigations.\nKnowledge about the disease and its transmission process could orientate the choice of the methods of study. We presented in this brief tutorial two methods of cluster detection, the Moranâ€™s I test that test for spatial independence (likely related to 2nd order effects) and the scan statistics that test for homogeneous distribution (likely related 1st order effects). It relies on epidemiologist to select the tools that best serve the studied question.\n\n\n\n\n\n\nStatistic tests and distributions\n\n\n\nIn statistics, problems are usually expressed by defining two hypotheses: the null hypothesis (H0), i.e., an a priori hypothesis of the studied phenomenon (e.g., the situation is a random) and the alternative hypothesis (H1), e.g., the situation is not random. The main principle is to measure how likely the observed situation belong to the ensemble of situation that are possible under the H0 hypothesis.\nIn mathematics, a probability distribution is a mathematical expression that represents what we would expect due to random chance. The choice of the probability distribution relies on the type of data you use (continuous, count, binary). In general, three distribution a used while studying disease rates, the Binomial, the Poisson and the Poisson-gamma mixture (also known as negative binomial) distributions.\nMany the statistical tests assume by default that data are normally distributed. It implies that your variable is continuous and that all data could easily be represented by two parameters, the mean and the variance, i.e., each value have the same level of certainty. If many measure can be assessed under the normality assumption, this is usually not the case in epidemiology with strictly positives rates and count values that 1) does not fit the normal distribution and 2) does not provide with the same degree of certainty since variances likely differ between district due to different population size, i.e., some district have very sparse data (with high variance) while other have adequate data (with lower variance).\n\n# dataset statistics\nm_cases <- mean(district$incidence)\nsd_cases <- sd(district$incidence)\n\nhist(district$incidence, probability = TRUE, ylim = c(0, 0.4), xlim = c(-5, 16), xlab = \"Number of cases\", ylab = \"Probability\", main = \"Histogram of observed incidence compared\\nto Normal and Poisson distributions\")\ncurve(dnorm(x, m_cases, sd_cases),col = \"blue\",  lwd = 1, add = TRUE)\npoints(0:max(district$incidence), dpois(0:max(district$incidence), m_cases),type = 'b', pch = 20, col = \"red\", ylim = c(0, 0.6), lty = 2)\n\nlegend(\"topright\", legend = c(\"Normal distribution\", \"Poisson distribution\", \"Observed distribution\"), col = c(\"blue\", \"red\", \"black\"),pch = c(NA, 20, NA), lty = c(1, 2, 1))\n\n\n\n\nIn this tutorial, we used the Poisson distribution in our statistical tests.\n\n\n\n\n6.2.2 Test for spatial autocorrelation (Moranâ€™s I test)\n\n6.2.2.1 The global Moranâ€™s I test\nA popular test for spatial autocorrelation is the Moranâ€™s test. This test tells us whether nearby units tend to exhibit similar incidences. It ranges from -1 to +1. A value of -1 denote that units with low rates are located near other units with high rates, while a Moranâ€™s I value of +1 indicates a concentration of spatial units exhibiting similar rates.\n\n\n\n\n\n\nMoranâ€™s I test\n\n\n\nThe Moranâ€™s statistics is:\n\\[I = \\frac{N}{\\sum_{i=1}^N\\sum_{j=1}^Nw_{ij}}\\frac{\\sum_{i=1}^N\\sum_{j=1}^Nw_{ij}(Y_i-\\bar{Y})(Y_j - \\bar{Y})}{\\sum_{i=1}^N(Y_i-\\bar{Y})^2}\\] with:\n\n\\(N\\): the number of polygons,\n\\(w_{ij}\\): is a matrix of spatial weight with zeroes on the diagonal (i.e., \\(w_{ii}=0\\)). For example, if polygons are neighbors, the weight takes the value \\(1\\) otherwise it takes the value \\(0\\).\n\\(Y_i\\): the variable of interest,\n\\(\\bar{Y}\\): the mean value of \\(Y\\).\n\nUnder the Moranâ€™s test, the statistics hypotheses are:\n\nH0: the distribution of cases is spatially independent, i.e., \\(I=0\\).\nH1: the distribution of cases is spatially autocorrelated, i.e., \\(I\\ne0\\).\n\n\n\nWe will compute the Moranâ€™s statistics using spdep(R. Bivand et al. 2015) and Dcluster(GÃ³mez-Rubio et al. 2015) packages. spdep package provides a collection of functions to analyze spatial correlations of polygons and works with sp objects. In this example, we use poly2nb() and nb2listw(). These functions respectively detect the neighboring polygons and assign weight corresponding to \\(1/\\#\\ of\\ neighbors\\). Dcluster package provides a set of functions for the detection of spatial clusters of disease using count data.\n\n#install.packages(\"spdep\")\n#install.packages(\"DCluster\")\nlibrary(spdep) # Functions for creating spatial weight, spatial analysis\nlibrary(DCluster)  # Package with functions for spatial cluster analysis\n\nqueen_nb <- poly2nb(district) # Neighbors according to queen case\nq_listw <- nb2listw(queen_nb, style = 'W') # row-standardized weights\n\n# Moran's I test\nm_test <- moranI.test(cases ~ offset(log(expected)), \n                  data = district,\n                  model = 'poisson',\n                  R = 499,\n                  listw = q_listw,\n                  n = length(district$cases), # number of regions\n                  S0 = Szero(q_listw)) # Global sum of weights\nprint(m_test)\n\nMoran's I test of spatial autocorrelation \n\n    Type of boots.: parametric \n    Model used when sampling: Poisson \n    Number of simulations: 499 \n    Statistic:  0.1566449 \n    p-value :  0.014 \n\nplot(m_test)\n\n\n\n\nThe Moranâ€™s statistics is here \\(I =\\) 0.16. When comparing its value to the H0 distribution (built under 499 simulations), the probability of observing such a I value under the null hypothesis, i.e.Â the distribution of cases is spatially independent, is \\(p_{value} =\\) 0.014. We therefore reject H0 with error risk of \\(\\alpha = 5\\%\\). The distribution of cases is therefore autocorrelated across districts in Cambodia.\n\n\n6.2.2.2 The Local Moranâ€™s I LISA test\nThe global Moranâ€™s test provides us a global statistical value informing whether autocorrelation occurs over the territory but does not inform on where does these correlations occurs, i.e., what is the locations of the clusters. To identify such cluster, we can decompose the Moranâ€™s I statistic to extract local information of the level of correlation of each district and its neighbors. This is called the Local Moranâ€™s I LISA statistic. Because the Local Moranâ€™s I LISA statistic test each district for autocorrelation independently, concern is raised about multiple testing limitations that increase the Type I error (\\(\\alpha\\)) of the statistical tests. The use of local test should therefore be study in light of explore and describes clusters once the global test has detected autocorrelation.\n\n\n\n\n\n\nStatistical test\n\n\n\nFor each district \\(i\\), the Local Moranâ€™s I statistics is:\n\\[I_i = \\frac{(Y_i-\\bar{Y})}{\\sum_{i=1}^N(Y_i-\\bar{Y})^2}\\sum_{j=1}^Nw_{ij}(Y_j - \\bar{Y}) \\text{ with }  I = \\sum_{i=1}^NI_i/N\\]\n\n\nThe localmoran()function from the package spdep treats the variable of interest as if it was normally distributed. In some cases, this assumption could be reasonable for incidence rate, especially when the areal units of analysis have sufficiently large population count suggesting that the values have similar level of variances. Unfortunately, the local Moranâ€™s test has not been implemented for Poisson distribution (population not large enough in some districts) in spdep package. However, Bivand et al. (R. S. Bivand et al. 2008) provided some code to manually perform the analysis using Poisson distribution and this code was further implemented in the course â€œSpatial Epidemiologyâ€.\n\n# Step 1 - Create the standardized deviation of observed from expected\nsd_lm <- (district$cases - district$expected) / sqrt(district$expected)\n\n# Step 2 - Create a spatially lagged version of standardized deviation of neighbors\nwsd_lm <- lag.listw(q_listw, sd_lm)\n\n# Step 3 - the local Moran's I is the product of step 1 and step 2\ndistrict$I_lm <- sd_lm * wsd_lm\n\n# Step 4 - setup parameters for simulation of the null distribution\n\n# Specify number of simulations to run\nnsim <- 499\n\n# Specify dimensions of result based on number of regions\nN <- length(district$expected)\n\n# Create a matrix of zeros to hold results, with a row for each county, and a column for each simulation\nsims <- matrix(0, ncol = nsim, nrow = N)\n\n# Step 5 - Start a for-loop to iterate over simulation columns\nfor(i in 1:nsim){\n  y <- rpois(N, lambda = district$expected) # generate a random event count, given expected\n  sd_lmi <- (y - district$expected) / sqrt(district$expected) # standardized local measure\n  wsd_lmi <- lag.listw(q_listw, sd_lmi) # standardized spatially lagged measure\n  sims[, i] <- sd_lmi * wsd_lmi # this is the I(i) statistic under this iteration of null\n}\n\n# Step 6 - For each county, test where the observed value ranks with respect to the null simulations\nxrank <- apply(cbind(district$I_lm, sims), 1, function(x) rank(x)[1])\n\n# Step 7 - Calculate the difference between observed rank and total possible (nsim)\ndiff <- nsim - xrank\ndiff <- ifelse(diff > 0, diff, 0)\n\n# Step 8 - Assuming a uniform distribution of ranks, calculate p-value for observed\n# given the null distribution generate from simulations\ndistrict$pval_lm <- punif((diff + 1) / (nsim + 1))\n\nBriefly, the process consist on 1) computing the I statistics for the observed data, 2) estimating the null distribution of the I statistics by performing random sampling into a poisson distribution and 3) comparing the observed I statistic with the null distribution to determine the probability to observe such value if the number of cases were spatially independent. For each district, we obtain a p-value based on the comparison of the observed value and the null distribution.\nA conventional way of plotting these results is to classify the districts into 5 classes based on local Moranâ€™s I output. The classification of cluster that are significantly autocorrelated to their neighbors is performed based on a comparison of the scaled incidence in the district compared to the scaled weighted averaged incidence of it neighboring districts (computed with lag.listw()):\n\nDistricts that have higher-than-average rates in both index regions and their neighbors and showing statistically significant positive values for the local \\(I_i\\) statistic are defined as High-High (hotspot of the disease)\nDistricts that have lower-than-average rates in both index regions and their neighbors and showing statistically significant positive values for the local \\(I_i\\) statistic are defined as Low-Low (cold spot of the disease).\nDistricts that have higher-than-average rates in the index regions and lower-than-average rates in their neighbors, and showing statistically significant negative values for the local \\(I_i\\) statistic are defined as High-Low(outlier with high incidence in an area with low incidence).\nDistricts that have lower-than-average rates in the index regions and higher-than-average rates in their neighbors, and showing statistically significant negative values for the local \\(I_i\\) statistic are defined as Low-High (outlier of low incidence in area with high incidence).\nDistricts with non-significant values for the \\(I_i\\) statistic are defined as Non-significant.\n\n\n# create lagged local raw_rate - in other words the average of the queen neighbors value\n# values are scaled (centered and reduced) to be compared to average\ndistrict$lag_std   <- scale(lag.listw(q_listw, var = district$incidence))\ndistrict$incidence_std <- scale(district$incidence)\n\n# extract pvalues\n# district$lm_pv <- lm_test[,5]\n\n# Classify local moran's outputs\ndistrict$lm_class <- NA\ndistrict$lm_class[district$incidence_std >=0 & district$lag_std >=0] <- 'High-High'\ndistrict$lm_class[district$incidence_std <=0 & district$lag_std <=0] <- 'Low-Low'\ndistrict$lm_class[district$incidence_std <=0 & district$lag_std >=0] <- 'Low-High'\ndistrict$lm_class[district$incidence_std >=0 & district$lag_std <=0] <- 'High-Low'\ndistrict$lm_class[district$pval_lm >= 0.05] <- 'Non-significant'\n\ndistrict$lm_class <- factor(district$lm_class, levels=c(\"High-High\", \"Low-Low\", \"High-Low\",  \"Low-High\", \"Non-significant\") )\n\n# create map\nmf_map(x = district,\n       var = \"lm_class\",\n       type = \"typo\",\n       cex = 2,\n       col_na = \"white\",\n       #val_order = c(\"High-High\", \"Low-Low\", \"High-Low\",  \"Low-High\", \"Non-significant\") ,\n       pal = c(\"#6D0026\" , \"blue\",  \"white\") , # \"#FF755F\",\"#7FABD3\" ,\n       leg_title = \"Clusters\")\n\nmf_layout(title = \"Cluster using Local Moran's I statistic\")\n\n\n\n\n\n\n\n6.2.3 Spatial scan statistics\nWhile Moranâ€™s indices focus on testing for autocorrelation between neighboring polygons (under the null assumption of spatial independence), the spatial scan statistic aims at identifying an abnormal higher risk in a given region compared to the risk outside of this region (under the null assumption of homogeneous distribution). The conception of a cluster is therefore different between the two methods.\nThe function kulldorff from the package SpatialEpi (Kim and Wakefield 2010) is a simple tool to implement spatial-only scan statistics.\n\n\n\n\n\n\nKulldorf test\n\n\n\nUnder the kulldorff test, the statistics hypotheses are:\n\nH0: the risk is constant over the area, i.e., there is a spatial homogeneity of the incidence.\nH1: a particular window have higher incidence than the rest of the area , i.e., there is a spatial heterogeneity of incidence.\n\n\n\nBriefly, the kulldorff scan statistics scan the area for clusters using several steps:\n\nIt create a circular window of observation by defining a single location and an associated radius of the windows varying from 0 to a large number that depends on population distribution (largest radius could include 50% of the population).\nIt aggregates the count of events and the population at risk (or an expected count of events) inside and outside the window of observation.\nFinally, it computes the likelihood ratio and test whether the risk is equal inside versus outside the windows (H0) or greater inside the observed window (H1). The H0 distribution is estimated by simulating the distribution of counts under the null hypothesis (homogeneous risk).\nThese 3 steps are repeated for each location and each possible windows-radii.\n\nWhile we test the significance of a large number of observation windows, one can raise concern about multiple testing and Type I error. This approach however suggest that we are not interest in a set of signifiant cluster but only in a most-likely cluster. This a priori restriction eliminate concern for multpile comparison since the test is simplified to a statistically significance of one single most-likely cluster.\nBecause we tested all-possible locations and window-radius, we can also choose to look at secondary clusters. In this case, you should keep in mind that increasing the number of secondary cluster you select, increases the risk for Type I error.\n\n#install.packages(\"SpatialEpi\")\nlibrary(\"SpatialEpi\")\n\nThe use of R spatial object is not implements in kulldorff() function. It uses instead matrix of xy coordinates that represents the centroids of the districts. A given district is included into the observed circular window if its centroids fall into the circle.\n\ndistrict_xy <- st_centroid(district) %>% \n  st_coordinates()\n\nhead(district_xy)\n\n         X       Y\n1 330823.3 1464560\n2 749758.3 1541787\n3 468384.0 1277007\n4 494548.2 1215261\n5 459644.2 1194615\n6 360528.3 1516339\n\n\nWe can then call kulldorff function (you are strongly encouraged to call ?kulldorff to properly call the function). The alpha.level threshold filter for the secondary clusters that will be retained. The most-likely cluster will be saved whatever its significance.\n\nkd_Wfever <- kulldorff(district_xy, \n                cases = district$cases,\n                population = district$T_POP,\n                expected.cases = district$expected,\n                pop.upper.bound = 0.5, # include maximum 50% of the population in a windows\n                n.simulations = 499,\n                alpha.level = 0.2)\n\n\n\n\nThe function plot the histogram of the distribution of log-likelihood ratio simulated under the null hypothesis that is estimated based on Monte Carlo simulations. The observed value of the most significant cluster identified from all possible scans is compared to the distribution to determine significance. All outputs are saved into an R object, here called kd_Wfever. Unfortunately, the package did not develop any summary and visualization of the results but we can explore the output object.\n\nnames(kd_Wfever)\n\n[1] \"most.likely.cluster\" \"secondary.clusters\"  \"type\"               \n[4] \"log.lkhd\"            \"simulated.log.lkhd\" \n\n\nFirst, we can focus on the most likely cluster and explore its characteristics.\n\n# We can see which districts (r number) belong to this cluster\nkd_Wfever$most.likely.cluster$location.IDs.included\n\n [1]  48  93  66 180 133  29 194 118  50 144  31 141   3 117  22  43 142\n\n# standardized incidence ratio\nkd_Wfever$most.likely.cluster$SMR\n\n[1] 2.303106\n\n# number of observed and expected cases in this cluster\nkd_Wfever$most.likely.cluster$number.of.cases\n\n[1] 122\n\nkd_Wfever$most.likely.cluster$expected.cases\n\n[1] 52.97195\n\n\n17 districts belong to the cluster and its number of cases is 2.3 times higher than the expected number of cases.\nSimilarly, we could study the secondary clusters. Results are saved in a list.\n\n# We can see which districts (r number) belong to this cluster\nlength(kd_Wfever$secondary.clusters)\n\n[1] 1\n\n# retrieve data for all secondary clusters into a table\ndf_secondary_clusters <- data.frame(SMR = sapply(kd_Wfever$secondary.clusters, '[[', 5),  \n                          number.of.cases = sapply(kd_Wfever$secondary.clusters, '[[', 3),\n                          expected.cases = sapply(kd_Wfever$secondary.clusters, '[[', 4),\n                          p.value = sapply(kd_Wfever$secondary.clusters, '[[', 8))\n\nprint(df_secondary_clusters)\n\n       SMR number.of.cases expected.cases p.value\n1 3.767698              16       4.246625   0.014\n\n\nWe only have one secondary cluster composed of one district.\n\n# create empty column to store cluster informations\ndistrict$k_cluster <- NA\n\n# save cluster information from kulldorff outputs\ndistrict$k_cluster[kd_Wfever$most.likely.cluster$location.IDs.included] <- 'Most likely cluster'\n\nfor(i in 1:length(kd_Wfever$secondary.clusters)){\ndistrict$k_cluster[kd_Wfever$secondary.clusters[[i]]$location.IDs.included] <- paste(\n  'Secondary cluster', i, sep = '')\n}\n\n#district$k_cluster[is.na(district$k_cluster)] <- \"No cluster\"\n\n\n# create map\nmf_map(x = district,\n       var = \"k_cluster\",\n       type = \"typo\",\n       cex = 2,\n       col_na = \"white\",\n       pal = mf_get_pal(palette = \"Reds\", n = 3)[1:2],\n       leg_title = \"Clusters\")\n\nmf_layout(title = \"Cluster using kulldorf scan statistic\")\n\n\n\n\n\n\n\n\n\n\nTo go further â€¦\n\n\n\nIn this example, the expected number of cases was defined using the population count but note that standardization over other variables as age could also be implemented with the strata parameter in the kulldorff() function.\nIn addition, this cluster analysis was performed solely using the spatial scan but you should keep in mind that this method of cluster detection can be implemented for spatio-temporal data as well where the cluster definition is an abnormal number of cases in a delimited spatial area and during a given period of time. The windows of observation are therefore defined for a different center, radius and time-period. You should take a look at the function scan_ep_poisson() function in the package scanstatistic (AllÃ©vius 2018) for this analysis.\n\n\n\n\n\n\nAllÃ©vius, Benjamin. 2018. â€œScanstatistics: Space-Time Anomaly Detection Using Scan Statistics.â€ Journal of Open Source Software 3 (25): 515.\n\n\nBivand, Roger S, Edzer J Pebesma, Virgilio GÃ³mez-Rubio, and Edzer Jan Pebesma. 2008. Applied Spatial Data Analysis with r. Vol. 747248717. Springer.\n\n\nBivand, Roger, Micah Altman, Luc Anselin, Renato AssunÃ§Ã£o, Olaf Berke, Andrew Bernat, and Guillaume Blanchet. 2015. â€œPackage â€˜Spdepâ€™.â€ The Comprehensive R Archive Network.\n\n\nGÃ³mez-Rubio, Virgilio, Juan FerrÃ¡ndiz-Ferragud, Antonio LÃ³pez-QuÄ±Ìlez, et al. 2015. â€œPackage â€˜DClusterâ€™.â€\n\n\nKim, Albert Y, and Jon Wakefield. 2010. â€œR Data and Methods for Spatial Epidemiology: The SpatialEpi Package.â€ Dept of Statistics, University of Washington."
+    "text": "6.2 Cluster analysis\n\n6.2.1 General introduction\nWhy studying clusters in epidemiology? Cluster analysis help identifying unusual patterns that occurs during a given period of time. The underlying ultimate goal of such analysis is to explain the observation of such patterns. In epidemiology, we can distinguish two types of process that would explain heterogeneity in case distribution:\n\nThe 1st order effects are the spatial variations of cases distribution caused by underlying properties of environment or the population structure itself. In such process individual get infected independently from the rest of the population. Such process includes the infection through an environment at risk as, for example, air pollution, contaminated waters or soils and UV exposition. This effect assume that the observed pattern is caused by a difference in risk intensity.\nThe 2nd order effects describes process of spread, contagion and diffusion of diseases caused by interactions between individuals. This includes transmission of infectious disease by proximity, but also the transmission of non-infectious disease, for example, with the diffusion of social norms within networks. This effect assume that the observed pattern is caused by correlations or co-variations.\n\n\n\n\n\n\nNo statistical methods could distinguish between these competing processes since their outcome results in similar pattern of points. The cluster analysis help describing the magnitude and the location of pattern but in no way could answer the question of why such patterns occurs. It is therefore a step that help detecting cluster for description and surveillance purpose and rising hypothesis on the underlying process that will lead further investigations.\nKnowledge about the disease and its transmission process could orientate the choice of the methods of study. We presented in this brief tutorial two methods of cluster detection, the Moranâ€™s I test that test for spatial independence (likely related to 2nd order effects) and the scan statistics that test for homogeneous distribution (likely related 1st order effects). It relies on epidemiologist to select the tools that best serve the studied question.\n\n\n\n\n\n\nStatistic tests and distributions\n\n\n\nIn statistics, problems are usually expressed by defining two hypotheses: the null hypothesis (H0), i.e., an a priori hypothesis of the studied phenomenon (e.g., the situation is a random) and the alternative hypothesis (H1), e.g., the situation is not random. The main principle is to measure how likely the observed situation belong to the ensemble of situation that are possible under the H0 hypothesis.\nIn mathematics, a probability distribution is a mathematical expression that represents what we would expect due to random chance. The choice of the probability distribution relies on the type of data you use (continuous, count, binary). In general, three distribution a used while studying disease rates, the Binomial, the Poisson and the Poisson-gamma mixture (also known as negative binomial) distributions.\nMany the statistical tests assume by default that data are normally distributed. It implies that your variable is continuous and that all data could easily be represented by two parameters, the mean and the variance, i.e., each value have the same level of certainty. If many measure can be assessed under the normality assumption, this is usually not the case in epidemiology with strictly positives rates and count values that 1) does not fit the normal distribution and 2) does not provide with the same degree of certainty since variances likely differ between district due to different population size, i.e., some district have very sparse data (with high variance) while other have adequate data (with lower variance).\n\n# dataset statistics\nm_cases <- mean(district$incidence)\nsd_cases <- sd(district$incidence)\n\nhist(district$incidence, probability = TRUE, ylim = c(0, 0.4), xlim = c(-5, 16), xlab = \"Number of cases\", ylab = \"Probability\", main = \"Histogram of observed incidence compared\\nto Normal and Poisson distributions\")\n\ncurve(dnorm(x, m_cases, sd_cases),col = \"blue\",  lwd = 1, add = TRUE)\n\npoints(0:max(district$incidence), dpois(0:max(district$incidence),m_cases),\n       type = 'b', pch = 20, col = \"red\", ylim = c(0, 0.6), lty = 2)\n\nlegend(\"topright\", legend = c(\"Normal distribution\", \"Poisson distribution\", \"Observed distribution\"), col = c(\"blue\", \"red\", \"black\"),pch = c(NA, 20, NA), lty = c(1, 2, 1))\n\n\n\n\nIn this tutorial, we used the Poisson distribution in our statistical tests.\n\n\n\n\n6.2.2 Test for spatial autocorrelation (Moranâ€™s I test)\n\n6.2.2.1 The global Moranâ€™s I test\nA popular test for spatial autocorrelation is the Moranâ€™s test. This test tells us whether nearby units tend to exhibit similar incidences. It ranges from -1 to +1. A value of -1 denote that units with low rates are located near other units with high rates, while a Moranâ€™s I value of +1 indicates a concentration of spatial units exhibiting similar rates.\n\n\n\n\n\n\nMoranâ€™s I test\n\n\n\nThe Moranâ€™s statistics is:\n\\[I = \\frac{N}{\\sum_{i=1}^N\\sum_{j=1}^Nw_{ij}}\\frac{\\sum_{i=1}^N\\sum_{j=1}^Nw_{ij}(Y_i-\\bar{Y})(Y_j - \\bar{Y})}{\\sum_{i=1}^N(Y_i-\\bar{Y})^2}\\] with:\n\n\\(N\\): the number of polygons,\n\\(w_{ij}\\): is a matrix of spatial weight with zeroes on the diagonal (i.e., \\(w_{ii}=0\\)). For example, if polygons are neighbors, the weight takes the value \\(1\\) otherwise it takes the value \\(0\\).\n\\(Y_i\\): the variable of interest,\n\\(\\bar{Y}\\): the mean value of \\(Y\\).\n\nUnder the Moranâ€™s test, the statistics hypotheses are:\n\nH0: the distribution of cases is spatially independent, i.e., \\(I=0\\).\nH1: the distribution of cases is spatially autocorrelated, i.e., \\(I\\ne0\\).\n\n\n\nWe will compute the Moranâ€™s statistics using spdep(R. Bivand et al. 2015) and Dcluster(GÃ³mez-Rubio et al. 2015) packages. spdep package provides a collection of functions to analyze spatial correlations of polygons and works with sp objects. In this example, we use poly2nb() and nb2listw(). These functions respectively detect the neighboring polygons and assign weight corresponding to \\(1/\\#\\ of\\ neighbors\\). Dcluster package provides a set of functions for the detection of spatial clusters of disease using count data.\n\n#install.packages(\"spdep\")\n#install.packages(\"DCluster\")\nlibrary(spdep) # Functions for creating spatial weight, spatial analysis\nlibrary(DCluster)  # Package with functions for spatial cluster analysis\n\nqueen_nb <- poly2nb(district) # Neighbors according to queen case\nq_listw <- nb2listw(queen_nb, style = 'W') # row-standardized weights\n\n# Moran's I test\nm_test <- moranI.test(cases ~ offset(log(expected)), \n                  data = district,\n                  model = 'poisson',\n                  R = 499,\n                  listw = q_listw,\n                  n = length(district$cases), # number of regions\n                  S0 = Szero(q_listw)) # Global sum of weights\nprint(m_test)\n\nMoran's I test of spatial autocorrelation \n\n    Type of boots.: parametric \n    Model used when sampling: Poisson \n    Number of simulations: 499 \n    Statistic:  0.1566449 \n    p-value :  0.006 \n\nplot(m_test)\n\n\n\n\nThe Moranâ€™s statistics is here \\(I =\\) 0.16. When comparing its value to the H0 distribution (built under 499 simulations), the probability of observing such a I value under the null hypothesis, i.e.Â the distribution of cases is spatially independent, is \\(p_{value} =\\) 0.006. We therefore reject H0 with error risk of \\(\\alpha = 5\\%\\). The distribution of cases is therefore autocorrelated across districts in Cambodia.\n\n\n6.2.2.2 The Local Moranâ€™s I LISA test\nThe global Moranâ€™s test provides us a global statistical value informing whether autocorrelation occurs over the territory but does not inform on where does these correlations occurs, i.e., what is the locations of the clusters. To identify such cluster, we can decompose the Moranâ€™s I statistic to extract local information of the level of correlation of each district and its neighbors. This is called the Local Moranâ€™s I LISA statistic. Because the Local Moranâ€™s I LISA statistic test each district for autocorrelation independently, concern is raised about multiple testing limitations that increase the Type I error (\\(\\alpha\\)) of the statistical tests. The use of local test should therefore be study in light of explore and describes clusters once the global test has detected autocorrelation.\n\n\n\n\n\n\nStatistical test\n\n\n\nFor each district \\(i\\), the Local Moranâ€™s I statistics is:\n\\[I_i = \\frac{(Y_i-\\bar{Y})}{\\sum_{i=1}^N(Y_i-\\bar{Y})^2}\\sum_{j=1}^Nw_{ij}(Y_j - \\bar{Y}) \\text{ with }  I = \\sum_{i=1}^NI_i/N\\]\n\n\nThe localmoran()function from the package spdep treats the variable of interest as if it was normally distributed. In some cases, this assumption could be reasonable for incidence rate, especially when the areal units of analysis have sufficiently large population count suggesting that the values have similar level of variances. Unfortunately, the local Moranâ€™s test has not been implemented for Poisson distribution (population not large enough in some districts) in spdep package. However, Bivand et al. (R. S. Bivand et al. 2008) provided some code to manually perform the analysis using Poisson distribution and this code was further implemented in the course â€œSpatial Epidemiologyâ€.\n\n# Step 1 - Create the standardized deviation of observed from expected\nsd_lm <- (district$cases - district$expected) / sqrt(district$expected)\n\n# Step 2 - Create a spatially lagged version of standardized deviation of neighbors\nwsd_lm <- lag.listw(q_listw, sd_lm)\n\n# Step 3 - the local Moran's I is the product of step 1 and step 2\ndistrict$I_lm <- sd_lm * wsd_lm\n\n# Step 4 - setup parameters for simulation of the null distribution\n\n# Specify number of simulations to run\nnsim <- 499\n\n# Specify dimensions of result based on number of regions\nN <- length(district$expected)\n\n# Create a matrix of zeros to hold results, with a row for each county, and a column for each simulation\nsims <- matrix(0, ncol = nsim, nrow = N)\n\n# Step 5 - Start a for-loop to iterate over simulation columns\nfor(i in 1:nsim){\n  y <- rpois(N, lambda = district$expected) # generate a random event count, given expected\n  sd_lmi <- (y - district$expected) / sqrt(district$expected) # standardized local measure\n  wsd_lmi <- lag.listw(q_listw, sd_lmi) # standardized spatially lagged measure\n  sims[, i] <- sd_lmi * wsd_lmi # this is the I(i) statistic under this iteration of null\n}\n\n# Step 6 - For each county, test where the observed value ranks with respect to the null simulations\nxrank <- apply(cbind(district$I_lm, sims), 1, function(x) rank(x)[1])\n\n# Step 7 - Calculate the difference between observed rank and total possible (nsim)\ndiff <- nsim - xrank\ndiff <- ifelse(diff > 0, diff, 0)\n\n# Step 8 - Assuming a uniform distribution of ranks, calculate p-value for observed\n# given the null distribution generate from simulations\ndistrict$pval_lm <- punif((diff + 1) / (nsim + 1))\n\nBriefly, the process consist on 1) computing the I statistics for the observed data, 2) estimating the null distribution of the I statistics by performing random sampling into a poisson distribution and 3) comparing the observed I statistic with the null distribution to determine the probability to observe such value if the number of cases were spatially independent. For each district, we obtain a p-value based on the comparison of the observed value and the null distribution.\nA conventional way of plotting these results is to classify the districts into 5 classes based on local Moranâ€™s I output. The classification of cluster that are significantly autocorrelated to their neighbors is performed based on a comparison of the scaled incidence in the district compared to the scaled weighted averaged incidence of it neighboring districts (computed with lag.listw()):\n\nDistricts that have higher-than-average rates in both index regions and their neighbors and showing statistically significant positive values for the local \\(I_i\\) statistic are defined as High-High (hotspot of the disease)\nDistricts that have lower-than-average rates in both index regions and their neighbors and showing statistically significant positive values for the local \\(I_i\\) statistic are defined as Low-Low (cold spot of the disease).\nDistricts that have higher-than-average rates in the index regions and lower-than-average rates in their neighbors, and showing statistically significant negative values for the local \\(I_i\\) statistic are defined as High-Low(outlier with high incidence in an area with low incidence).\nDistricts that have lower-than-average rates in the index regions and higher-than-average rates in their neighbors, and showing statistically significant negative values for the local \\(I_i\\) statistic are defined as Low-High (outlier of low incidence in area with high incidence).\nDistricts with non-significant values for the \\(I_i\\) statistic are defined as Non-significant.\n\n\n# create lagged local raw_rate - in other words the average of the queen neighbors value\n# values are scaled (centered and reduced) to be compared to average\ndistrict$lag_std   <- scale(lag.listw(q_listw, var = district$incidence))\ndistrict$incidence_std <- scale(district$incidence)\n\n# extract pvalues\n# district$lm_pv <- lm_test[,5]\n\n# Classify local moran's outputs\ndistrict$lm_class <- NA\ndistrict$lm_class[district$incidence_std >=0 & district$lag_std >=0] <- 'High-High'\ndistrict$lm_class[district$incidence_std <=0 & district$lag_std <=0] <- 'Low-Low'\ndistrict$lm_class[district$incidence_std <=0 & district$lag_std >=0] <- 'Low-High'\ndistrict$lm_class[district$incidence_std >=0 & district$lag_std <=0] <- 'High-Low'\ndistrict$lm_class[district$pval_lm >= 0.05] <- 'Non-significant'\n\ndistrict$lm_class <- factor(district$lm_class, levels=c(\"High-High\", \"Low-Low\", \"High-Low\",  \"Low-High\", \"Non-significant\") )\n\n# create map\nmf_map(x = district,\n       var = \"lm_class\",\n       type = \"typo\",\n       cex = 2,\n       col_na = \"white\",\n       #val_order = c(\"High-High\", \"Low-Low\", \"High-Low\",  \"Low-High\", \"Non-significant\") ,\n       pal = c(\"#6D0026\" , \"blue\",  \"white\") , # \"#FF755F\",\"#7FABD3\" ,\n       leg_title = \"Clusters\")\n\nmf_layout(title = \"Cluster using Local Moran's I statistic\")\n\n\n\n\n\n\n\n6.2.3 Spatial scan statistics\nWhile Moranâ€™s indices focus on testing for autocorrelation between neighboring polygons (under the null assumption of spatial independence), the spatial scan statistic aims at identifying an abnormal higher risk in a given region compared to the risk outside of this region (under the null assumption of homogeneous distribution). The conception of a cluster is therefore different between the two methods.\nThe function kulldorff from the package SpatialEpi (Kim and Wakefield 2010) is a simple tool to implement spatial-only scan statistics.\n\n\n\n\n\n\nKulldorf test\n\n\n\nUnder the kulldorff test, the statistics hypotheses are:\n\nH0: the risk is constant over the area, i.e., there is a spatial homogeneity of the incidence.\nH1: a particular window have higher incidence than the rest of the area , i.e., there is a spatial heterogeneity of incidence.\n\n\n\nBriefly, the kulldorff scan statistics scan the area for clusters using several steps:\n\nIt create a circular window of observation by defining a single location and an associated radius of the windows varying from 0 to a large number that depends on population distribution (largest radius could include 50% of the population).\nIt aggregates the count of events and the population at risk (or an expected count of events) inside and outside the window of observation.\nFinally, it computes the likelihood ratio and test whether the risk is equal inside versus outside the windows (H0) or greater inside the observed window (H1). The H0 distribution is estimated by simulating the distribution of counts under the null hypothesis (homogeneous risk).\nThese 3 steps are repeated for each location and each possible windows-radii.\n\nWhile we test the significance of a large number of observation windows, one can raise concern about multiple testing and Type I error. This approach however suggest that we are not interest in a set of signifiant cluster but only in a most-likely cluster. This a priori restriction eliminate concern for multpile comparison since the test is simplified to a statistically significance of one single most-likely cluster.\nBecause we tested all-possible locations and window-radius, we can also choose to look at secondary clusters. In this case, you should keep in mind that increasing the number of secondary cluster you select, increases the risk for Type I error.\n\n#install.packages(\"SpatialEpi\")\nlibrary(\"SpatialEpi\")\n\nThe use of R spatial object is not implements in kulldorff() function. It uses instead matrix of xy coordinates that represents the centroids of the districts. A given district is included into the observed circular window if its centroids fall into the circle.\n\ndistrict_xy <- st_centroid(district) %>% \n  st_coordinates()\n\nhead(district_xy)\n\n         X       Y\n1 330823.3 1464560\n2 749758.3 1541787\n3 468384.0 1277007\n4 494548.2 1215261\n5 459644.2 1194615\n6 360528.3 1516339\n\n\nWe can then call kulldorff function (you are strongly encouraged to call ?kulldorff to properly call the function). The alpha.level threshold filter for the secondary clusters that will be retained. The most-likely cluster will be saved whatever its significance.\n\nkd_Wfever <- kulldorff(district_xy, \n                cases = district$cases,\n                population = district$T_POP,\n                expected.cases = district$expected,\n                pop.upper.bound = 0.5, # include maximum 50% of the population in a windows\n                n.simulations = 499,\n                alpha.level = 0.2)\n\n\n\n\nThe function plot the histogram of the distribution of log-likelihood ratio simulated under the null hypothesis that is estimated based on Monte Carlo simulations. The observed value of the most significant cluster identified from all possible scans is compared to the distribution to determine significance. All outputs are saved into an R object, here called kd_Wfever. Unfortunately, the package did not develop any summary and visualization of the results but we can explore the output object.\n\nnames(kd_Wfever)\n\n[1] \"most.likely.cluster\" \"secondary.clusters\"  \"type\"               \n[4] \"log.lkhd\"            \"simulated.log.lkhd\" \n\n\nFirst, we can focus on the most likely cluster and explore its characteristics.\n\n# We can see which districts (r number) belong to this cluster\nkd_Wfever$most.likely.cluster$location.IDs.included\n\n [1]  48  93  66 180 133  29 194 118  50 144  31 141   3 117  22  43 142\n\n# standardized incidence ratio\nkd_Wfever$most.likely.cluster$SMR\n\n[1] 2.303106\n\n# number of observed and expected cases in this cluster\nkd_Wfever$most.likely.cluster$number.of.cases\n\n[1] 122\n\nkd_Wfever$most.likely.cluster$expected.cases\n\n[1] 52.97195\n\n\n17 districts belong to the cluster and its number of cases is 2.3 times higher than the expected number of cases.\nSimilarly, we could study the secondary clusters. Results are saved in a list.\n\n# We can see which districts (r number) belong to this cluster\nlength(kd_Wfever$secondary.clusters)\n\n[1] 1\n\n# retrieve data for all secondary clusters into a table\ndf_secondary_clusters <- data.frame(SMR = sapply(kd_Wfever$secondary.clusters, '[[', 5),  \n                          number.of.cases = sapply(kd_Wfever$secondary.clusters, '[[', 3),\n                          expected.cases = sapply(kd_Wfever$secondary.clusters, '[[', 4),\n                          p.value = sapply(kd_Wfever$secondary.clusters, '[[', 8))\n\nprint(df_secondary_clusters)\n\n       SMR number.of.cases expected.cases p.value\n1 3.767698              16       4.246625   0.012\n\n\nWe only have one secondary cluster composed of one district.\n\n# create empty column to store cluster informations\ndistrict$k_cluster <- NA\n\n# save cluster information from kulldorff outputs\ndistrict$k_cluster[kd_Wfever$most.likely.cluster$location.IDs.included] <- 'Most likely cluster'\n\nfor(i in 1:length(kd_Wfever$secondary.clusters)){\ndistrict$k_cluster[kd_Wfever$secondary.clusters[[i]]$location.IDs.included] <- paste(\n  'Secondary cluster', i, sep = '')\n}\n\n#district$k_cluster[is.na(district$k_cluster)] <- \"No cluster\"\n\n\n# create map\nmf_map(x = district,\n       var = \"k_cluster\",\n       type = \"typo\",\n       cex = 2,\n       col_na = \"white\",\n       pal = mf_get_pal(palette = \"Reds\", n = 3)[1:2],\n       leg_title = \"Clusters\")\n\nmf_layout(title = \"Cluster using kulldorf scan statistic\")\n\n\n\n\n\n\n\n\n\n\nTo go further â€¦\n\n\n\nIn this example, the expected number of cases was defined using the population count but note that standardization over other variables as age could also be implemented with the strata parameter in the kulldorff() function.\nIn addition, this cluster analysis was performed solely using the spatial scan but you should keep in mind that this method of cluster detection can be implemented for spatio-temporal data as well where the cluster definition is an abnormal number of cases in a delimited spatial area and during a given period of time. The windows of observation are therefore defined for a different center, radius and time-period. You should take a look at the function scan_ep_poisson() function in the package scanstatistic (AllÃ©vius 2018) for this analysis.\n\n\n\n\n\n\nAllÃ©vius, Benjamin. 2018. â€œScanstatistics: Space-Time Anomaly Detection Using Scan Statistics.â€ Journal of Open Source Software 3 (25): 515.\n\n\nBivand, Roger S, Edzer J Pebesma, Virgilio GÃ³mez-Rubio, and Edzer Jan Pebesma. 2008. Applied Spatial Data Analysis with r. Vol. 747248717. Springer.\n\n\nBivand, Roger, Micah Altman, Luc Anselin, Renato AssunÃ§Ã£o, Olaf Berke, Andrew Bernat, and Guillaume Blanchet. 2015. â€œPackage â€˜Spdepâ€™.â€ The Comprehensive R Archive Network.\n\n\nGÃ³mez-Rubio, Virgilio, Juan FerrÃ¡ndiz-Ferragud, Antonio LÃ³pez-QuÄ±Ìlez, et al. 2015. â€œPackage â€˜DClusterâ€™.â€\n\n\nKim, Albert Y, and Jon Wakefield. 2010. â€œR Data and Methods for Spatial Epidemiology: The SpatialEpi Package.â€ Dept of Statistics, University of Washington."
   },
   {
     "objectID": "01-introduction.html",
@@ -244,4 +244,4 @@
     "section": "",
     "text": "Agafonkin, Vladimir. 2015. â€œLeaflet Javascript Libary.â€\n\n\nAllÃ©vius, Benjamin. 2018. â€œScanstatistics: Space-Time Anomaly\nDetection Using Scan Statistics.â€ Journal of Open Source\nSoftware 3 (25): 515.\n\n\nAppelhans, Tim, Florian Detsch, Christoph Reudenbach, and Stefan\nWoellauer. 2022. â€œMapview: Interactive Viewing of Spatial Data in\nr.â€ https://CRAN.R-project.org/package=mapview.\n\n\nAppelhans, Tim, Kenton Russell, and Lorenzo Busetto. 2020.\nâ€œMapedit: Interactive Editing of Spatial Data in r.â€ https://CRAN.R-project.org/package=mapedit.\n\n\nBivand, Roger S, Edzer J Pebesma, Virgilio GÃ³mez-Rubio, and Edzer Jan\nPebesma. 2008. Applied Spatial Data Analysis with r. Vol.\n747248717. Springer.\n\n\nBivand, Roger, Micah Altman, Luc Anselin, Renato AssunÃ§Ã£o, Olaf Berke,\nAndrew Bernat, and Guillaume Blanchet. 2015. â€œPackage\nâ€˜Spdepâ€™.â€ The Comprehensive R Archive\nNetwork.\n\n\nBivand, Roger, Tim Keitt, and Barry Rowlingson. 2022. â€œRgdal:\nBindings for the â€™Geospatialâ€™ Data Abstraction Library.â€ https://CRAN.R-project.org/package=rgdal.\n\n\nBivand, Roger, and Colin Rundel. 2021. â€œRgeos: Interface to\nGeometry Engine - Open Source (â€™GEOSâ€™).â€ https://CRAN.R-project.org/package=rgeos.\n\n\nBrunet, Roger, Robert Ferras, and HervÃ© ThÃ©ry. 1993. Les Mots de La\ngÃ©ographie: Dictionnaire Critique. 03) 911 BRU.\n\n\nCambon, Jesse, Diego HernangÃ³mez, Christopher Belanger, and Daniel\nPossenriede. 2021. â€œTidygeocoder: An r Package for\nGeocodingâ€ 6: 3544. https://doi.org/10.21105/joss.03544.\n\n\nCauvin, Colette, Francisco Escobar, and Aziz Serradj. 2013. Thematic\nCartography, Cartography and the Impact of the Quantitative\nRevolution. Vol. 2. John Wiley & Sons.\n\n\nCheng, Joe, Bhaskar Karambelkar, and Yihui Xie. 2022. â€œLeaflet:\nCreate Interactive Web Maps with the JavaScript â€™Leafletâ€™\nLibrary.â€ https://CRAN.R-project.org/package=leaflet.\n\n\nDicko, Ahmadou. 2021. R Client for the geoBoundaries API, Providing\nCountry Political Administrative Boundaries. https://dickoa.gitlab.io/rgeoboundaries/index.html.\n\n\nDorling, Daniel. 1996. Area Cartograms: Their Use and Creation,\nConcepts and Techniques in Modern Geography. Vol. 59. CATMOG:\nConcepts and Techniques in Modern Geography. Institute of British\nGeographers.\n\n\nDougenik, James A, Nicholas R Chrisman, and Duane R Niemeyer. 1985.\nâ€œAn Algorithm to Construct Continuous Area Cartograms.â€\nThe Professional Geographer 37 (1): 75â€“81.\n\n\nDunnington, Dewey. 2021. â€œGgspatial: Spatial Data Framework for\nGgplot2.â€ https://CRAN.R-project.org/package=ggspatial.\n\n\nGDAL/OGR contributors. n.d. GDAL/OGR Geospatial Data\nAbstraction Software Library. Open Source Geospatial Foundation. https://gdal.org.\n\n\nGilardi, Andrea, and Robin Lovelace. 2021. â€œOsmextract: Download\nand Import Open Street Map Data Extracts.â€ https://CRAN.R-project.org/package=osmextract.\n\n\nGiraud, TimothÃ©e. 2021a. â€œLinemap: Line Maps.â€ https://CRAN.R-project.org/package=linemap.\n\n\nâ€”â€”â€”. 2021b. â€œMaptiles: Download and Display Map Tiles.â€ https://CRAN.R-project.org/package=maptiles.\n\n\nâ€”â€”â€”. 2022a. â€œMapsf: Thematic Cartography.â€ https://CRAN.R-project.org/package=mapsf.\n\n\nâ€”â€”â€”. 2022b. â€œTanaka: Design Shaded Contour Lines (or Tanaka)\nMaps.â€ https://CRAN.R-project.org/package=tanaka.\n\n\nGiraud, TimothÃ©e, and Nicolas Lambert. 2016. â€œCartography: Create\nand Integrate Maps in Your r Workflowâ€ 1. https://doi.org/10.21105/joss.00054.\n\n\nGombin, Joel, and Paul-Antoine Chevalier. 2022. â€œbanR: R Client\nfor the BAN API.â€\n\n\nGÃ³mez-Rubio, Virgilio, Juan FerrÃ¡ndiz-Ferragud, Antonio LÃ³pez-QuÄ±Ìlez, et\nal. 2015. â€œPackage â€˜DClusterâ€™.â€\n\n\nGuevarra, Ernest. 2021. Gadmr: An r Interface to the GADM Map\nRepository. https://github.com/SpatialWorks/gadmr.\n\n\nHijmans, Robert J. 2022a. â€œRaster: Geographic Data Analysis and\nModeling.â€ https://CRAN.R-project.org/package=raster.\n\n\nâ€”â€”â€”. 2022b. â€œTerra: Spatial Data Analysis.â€ https://CRAN.R-project.org/package=terra.\n\n\nJeworutzki, Sebastian. 2020. â€œCartogram: Create Cartograms with\nr.â€ https://CRAN.R-project.org/package=cartogram.\n\n\nKim, Albert Y, and Jon Wakefield. 2010. â€œR Data and Methods for\nSpatial Epidemiology: The SpatialEpi Package.â€ Dept of\nStatistics, University of Washington.\n\n\nLambert, Nicolas. 2015. â€œLes Anamorphoses Cartographiques.â€\nBlog. Carnet NÃ©ocartographique. https://neocarto.hypotheses.org/366.\n\n\nLi, Xingong. 2009. â€œMap Algebra and Beyond : 1. Map Algebra for\nScalar Fields.â€ https://slideplayer.com/slide/5822638/.\n\n\nMadelin, Malika. 2021. â€œAnalyse dâ€™images Raster (Et\nTÃ©lÃ©dÃ©tection).â€ https://mmadelin.github.io/sigr2021/SIGR2021_raster_MM.html.\n\n\nMennis, Jeremy. 2015. â€œFundamentals of GIS : Raster\nOperations.â€ https://cupdf.com/document/gus-0262-fundamentals-of-gis-lecture-presentation-7-raster-operations-jeremy.html.\n\n\nNowosad, Jakub. 2021. â€œImage Processing and All Things\nRaster.â€ https://nowosad.github.io/SIGR2021/workshop2/workshop2.html.\n\n\nOlson, Judy M. 1976. â€œNoncontiguous Area Cartograms.â€\nThe Professional Geographer 28 (4): 371â€“80.\n\n\nPadgham, Mark, Bob Rudis, Robin Lovelace, and MaÃ«lle Salmon. 2017a.\nâ€œOsmdataâ€ 2. https://doi.org/10.21105/joss.00305.\n\n\nâ€”â€”â€”. 2017b. â€œOsmdata.â€ The Journal of Open Source\nSoftware 2 (14). https://doi.org/10.21105/joss.00305.\n\n\nPaull, John, and Benjamin Hennig. 2016. â€œAtlas of Organics: Four\nMaps of the World of Organic Agriculture.â€ Journal of\nOrganics 3 (1): 25â€“32.\n\n\nPebesma, Edzer. 2018b. â€œSimple Features for r:\nStandardized Support for Spatial Vector Dataâ€ 10. https://doi.org/10.32614/RJ-2018-009.\n\n\nâ€”â€”â€”. 2018a. â€œSimple Features for R: Standardized Support for\nSpatial Vector Data.â€ The R Journal 10 (1): 439. https://doi.org/10.32614/rj-2018-009.\n\n\nâ€”â€”â€”. 2021. â€œStars: Spatiotemporal Arrays, Raster and Vector Data\nCubes.â€ https://CRAN.R-project.org/package=stars.\n\n\nPebesma, Edzer J., and Roger S. Bivand. 2005. â€œClasses and Methods\nfor Spatial Data in râ€ 5. https://CRAN.R-project.org/doc/Rnews/.\n\n\nPROJ contributors. 2021. PROJ Coordinate Transformation\nSoftware Library. Open Source Geospatial Foundation. https://proj.org/.\n\n\nRacine, Etienne B. 2016. â€œThe Visual Raster Cheat Sheet.â€\nhttps://rpubs.com/etiennebr/visualraster.\n\n\nRowlingson, Barry. 2019. Geonames: Interface to the \"Geonames\"\nSpatial Query Web Service. https://CRAN.R-project.org/package=geonames.\n\n\nSouth, Andy. 2017. â€œRnaturalearth: World Map Data from Natural\nEarth.â€ https://CRAN.R-project.org/package=rnaturalearth.\n\n\nTanaka, Kitiro. 1950. â€œThe Relief Contour Method of Representing\nTopography on Maps.â€ Geographical Review 40 (3): 444. https://doi.org/10.2307/211219.\n\n\nTennekes, Martijn. 2018. â€œTmap: Thematic\nMaps in râ€ 84. https://doi.org/10.18637/jss.v084.i06.\n\n\nTomlin, C. Dana. 1990. Geographic Information Systems and\nCartographic Modeling. Prentice Hall.\n\n\nWeidmann, Nils B., Guy Schvitz, and Luc Girardin. 2021. Cshapes: The\nCShapes 2.0 Dataset and Utilities. https://CRAN.R-project.org/package=cshapes.\n\n\nWickham, Hadley. 2016. â€œGgplot2: Elegant Graphics for Data\nAnalysis.â€ https://ggplot2.tidyverse.org."
   }
-]
+]
\ No newline at end of file