Skip to content
Snippets Groups Projects
07-basic_statistics.html.orig 98.4 KiB
Newer Older
lea.douchet_ird.fr's avatar
lea.douchet_ird.fr committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>

<meta charset="utf-8">
<meta name="generator" content="quarto-1.1.251">

<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">


<title>Mapping and spatial analyses in R for One Health studies - 6&nbsp; Basic statistics for spatial analysis</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
  width: 0.8em;
  margin: 0 0.8em 0.2em -1.6em;
  vertical-align: middle;
}
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
  { counter-reset: source-line 0; }
pre.numberSource code > span
  { position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
  { content: counter(source-line);
    position: relative; left: -1em; text-align: right; vertical-align: baseline;
    border: none; display: inline-block;
    -webkit-touch-callout: none; -webkit-user-select: none;
    -khtml-user-select: none; -moz-user-select: none;
    -ms-user-select: none; user-select: none;
    padding: 0 4px; width: 4em;
    color: #aaaaaa;
  }
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
div.sourceCode
  {   }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; } /* Alert */
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code span.at { color: #7d9029; } /* Attribute */
code span.bn { color: #40a070; } /* BaseN */
code span.bu { color: #008000; } /* BuiltIn */
code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code span.ch { color: #4070a0; } /* Char */
code span.cn { color: #880000; } /* Constant */
code span.co { color: #60a0b0; font-style: italic; } /* Comment */
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code span.do { color: #ba2121; font-style: italic; } /* Documentation */
code span.dt { color: #902000; } /* DataType */
code span.dv { color: #40a070; } /* DecVal */
code span.er { color: #ff0000; font-weight: bold; } /* Error */
code span.ex { } /* Extension */
code span.fl { color: #40a070; } /* Float */
code span.fu { color: #06287e; } /* Function */
code span.im { color: #008000; font-weight: bold; } /* Import */
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
code span.kw { color: #007020; font-weight: bold; } /* Keyword */
code span.op { color: #666666; } /* Operator */
code span.ot { color: #007020; } /* Other */
code span.pp { color: #bc7a00; } /* Preprocessor */
code span.sc { color: #4070a0; } /* SpecialChar */
code span.ss { color: #bb6688; } /* SpecialString */
code span.st { color: #4070a0; } /* String */
code span.va { color: #19177c; } /* Variable */
code span.vs { color: #4070a0; } /* VerbatimString */
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
div.csl-bib-body { }
div.csl-entry {
  clear: both;
}
.hanging div.csl-entry {
  margin-left:2em;
  text-indent:-2em;
}
div.csl-left-margin {
  min-width:2em;
  float:left;
}
div.csl-right-inline {
  margin-left:2em;
  padding-left:1em;
}
div.csl-indent {
  margin-left: 2em;
}
</style>


<script src="site_libs/quarto-nav/quarto-nav.js"></script>
<script src="site_libs/quarto-nav/headroom.min.js"></script>
<script src="site_libs/clipboard/clipboard.min.js"></script>
<script src="site_libs/quarto-search/autocomplete.umd.js"></script>
<script src="site_libs/quarto-search/fuse.min.js"></script>
<script src="site_libs/quarto-search/quarto-search.js"></script>
<meta name="quarto:offset" content="./">
<link href="./08-exercices.html" rel="next">
<link href="./05-mapping_with_r.html" rel="prev">
<script src="site_libs/quarto-html/quarto.js"></script>
<script src="site_libs/quarto-html/popper.min.js"></script>
<script src="site_libs/quarto-html/tippy.umd.min.js"></script>
<script src="site_libs/quarto-html/anchor.min.js"></script>
<link href="site_libs/quarto-html/tippy.css" rel="stylesheet">
<link href="site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="site_libs/bootstrap/bootstrap.min.js"></script>
<link href="site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
<script id="quarto-search-options" type="application/json">{
  "location": "sidebar",
  "copy-button": false,
  "collapse-after": 3,
  "panel-placement": "start",
  "type": "textbox",
  "limit": 20,
  "language": {
    "search-no-results-text": "No results",
    "search-matching-documents-text": "matching documents",
    "search-copy-link-title": "Copy link to search",
    "search-hide-matches-text": "Hide additional matches",
    "search-more-match-text": "more match in this document",
    "search-more-matches-text": "more matches in this document",
    "search-clear-button-title": "Clear",
    "search-detached-cancel-button-title": "Cancel",
    "search-submit-button-title": "Submit"
  }
}</script>
<style>html{ scroll-behavior: smooth; }</style>

  <script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.js" type="text/javascript"></script>

<link rel="stylesheet" href="styles.css">
</head>

<body class="nav-sidebar floating">

<div id="quarto-search-results"></div>
  <header id="quarto-header" class="headroom fixed-top">
  <nav class="quarto-secondary-nav" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
    <div class="container-fluid d-flex justify-content-between">
      <h1 class="quarto-secondary-nav-title"><span class="chapter-number">6</span>&nbsp; <span class="chapter-title">Basic statistics for spatial analysis</span></h1>
      <button type="button" class="quarto-btn-toggle btn" aria-label="Show secondary navigation">
        <i class="bi bi-chevron-right"></i>
      </button>
    </div>
  </nav>
</header>
<!-- content -->
<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article">
<!-- sidebar -->
  <nav id="quarto-sidebar" class="sidebar collapse sidebar-navigation floating overflow-auto">
    <div class="pt-lg-2 mt-2 text-left sidebar-header">
    <div class="sidebar-title mb-0 py-0">
      <a href="./">Mapping and spatial analyses in R for One Health studies</a> 
        <div class="sidebar-tools-main">
    <a href="https://forge.ird.fr/espace-dev/personnels/longour/geohealth/documentation/rspatial-for-onehealth" title="Source Code" class="sidebar-tool px-1"><i class="bi bi-git"></i></a>
</div>
    </div>
      </div>
      <div class="mt-2 flex-shrink-0 align-items-center">
        <div class="sidebar-search">
        <div id="quarto-search" class="" title="Search"></div>
        </div>
      </div>
    <div class="sidebar-menu-container"> 
    <ul class="list-unstyled mt-1">
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./index.html" class="sidebar-item-text sidebar-link">Preface</a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./01-introduction.html" class="sidebar-item-text sidebar-link"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Introduction</span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./02-data_acquisition.html" class="sidebar-item-text sidebar-link"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Data Acquisition</span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./03-vector_data.html" class="sidebar-item-text sidebar-link"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Using vector data</span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./04-raster_data.html" class="sidebar-item-text sidebar-link"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Using raster data</span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./05-mapping_with_r.html" class="sidebar-item-text sidebar-link"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Mapping With R</span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./07-basic_statistics.html" class="sidebar-item-text sidebar-link active"><span class="chapter-number">6</span>&nbsp; <span class="chapter-title">Basic statistics for spatial analysis</span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./08-exercices.html" class="sidebar-item-text sidebar-link">Exercices</a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./references.html" class="sidebar-item-text sidebar-link">References</a>
  </div>
</li>
    </ul>
    </div>
</nav>
<!-- margin-sidebar -->
    <div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
        <nav id="TOC" role="doc-toc" class="toc-active">
    <h2 id="toc-title">Table of contents</h2>
   
  <ul>
  <li><a href="#import-and-visualize-epidemiological-data" id="toc-import-and-visualize-epidemiological-data" class="nav-link active" data-scroll-target="#import-and-visualize-epidemiological-data"><span class="toc-section-number">6.1</span>  Import and visualize epidemiological data</a></li>
  <li><a href="#cluster-analysis" id="toc-cluster-analysis" class="nav-link" data-scroll-target="#cluster-analysis"><span class="toc-section-number">6.2</span>  Cluster analysis</a>
  <ul>
  <li><a href="#general-introduction" id="toc-general-introduction" class="nav-link" data-scroll-target="#general-introduction"><span class="toc-section-number">6.2.1</span>  General introduction</a></li>
  <li><a href="#test-for-spatial-autocorrelation-morans-i-test" id="toc-test-for-spatial-autocorrelation-morans-i-test" class="nav-link" data-scroll-target="#test-for-spatial-autocorrelation-morans-i-test"><span class="toc-section-number">6.2.2</span>  Test for spatial autocorrelation (Moran’s I test)</a>
  <ul class="collapse">
  <li><a href="#the-global-morans-i-test" id="toc-the-global-morans-i-test" class="nav-link" data-scroll-target="#the-global-morans-i-test"><span class="toc-section-number">6.2.2.1</span>  The global Moran’s I test</a></li>
  <li><a href="#the-local-morans-i-lisa-test" id="toc-the-local-morans-i-lisa-test" class="nav-link" data-scroll-target="#the-local-morans-i-lisa-test"><span class="toc-section-number">6.2.2.2</span>  The Local Moran’s I LISA test</a></li>
  </ul></li>
  <li><a href="#spatial-scan-statistics" id="toc-spatial-scan-statistics" class="nav-link" data-scroll-target="#spatial-scan-statistics"><span class="toc-section-number">6.2.3</span>  Spatial scan statistics</a></li>
  </ul></li>
  <li><a href="#conclusion" id="toc-conclusion" class="nav-link" data-scroll-target="#conclusion"><span class="toc-section-number">6.3</span>  Conclusion</a></li>
  </ul>
</nav>
    </div>
<!-- main -->
<main class="content" id="quarto-document-content">

<header id="title-block-header" class="quarto-title-block default">
<div class="quarto-title">
<h1 class="title d-none d-lg-block"><span class="chapter-number">6</span>&nbsp; <span class="chapter-title">Basic statistics for spatial analysis</span></h1>
</div>



<div class="quarto-title-meta">

    
    
  </div>
  

</header>

<p>This section aims at providing some basic statistical tools to study the spatial distribution of epidemiological data. If you wish to go further into spatial statistics applied to epidemiology and their limitations you can consult the tutorial “<a href="https://mkram01.github.io/EPI563-SpatialEPI/index.html">Spatial Epidemiology</a>” from M. Kramer from which the statistical analysis of this section was adapted.</p>
<section id="import-and-visualize-epidemiological-data" class="level2" data-number="6.1">
<h2 data-number="6.1" class="anchored" data-anchor-id="import-and-visualize-epidemiological-data"><span class="header-section-number">6.1</span> Import and visualize epidemiological data</h2>
<p>In this section, we load data that reference the cases of an imaginary disease, the W fever, throughout Cambodia. Each point corresponds to the geo-localization of a case.</p>
<div class="cell" data-nm="true">
<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(dplyr)</span>
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(sf)</span>
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a><span class="co">#Import Cambodia country border</span></span>
<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a>country <span class="ot">&lt;-</span> <span class="fu">st_read</span>(<span class="st">"data_cambodia/cambodia.gpkg"</span>, <span class="at">layer =</span> <span class="st">"country"</span>, <span class="at">quiet =</span> <span class="cn">TRUE</span>)</span>
<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a><span class="co">#Import provincial administrative border of Cambodia</span></span>
<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a>education <span class="ot">&lt;-</span> <span class="fu">st_read</span>(<span class="st">"data_cambodia/cambodia.gpkg"</span>, <span class="at">layer =</span> <span class="st">"education"</span>, <span class="at">quiet =</span> <span class="cn">TRUE</span>)</span>
<span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a><span class="co">#Import district administrative border of Cambodia</span></span>
<span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a>district <span class="ot">&lt;-</span> <span class="fu">st_read</span>(<span class="st">"data_cambodia/cambodia.gpkg"</span>, <span class="at">layer =</span> <span class="st">"district"</span>, <span class="at">quiet =</span> <span class="cn">TRUE</span>)</span>
<span id="cb1-10"><a href="#cb1-10" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-11"><a href="#cb1-11" aria-hidden="true" tabindex="-1"></a><span class="co"># Import locations of cases from an imaginary disease</span></span>
<span id="cb1-12"><a href="#cb1-12" aria-hidden="true" tabindex="-1"></a>cases <span class="ot">&lt;-</span> <span class="fu">st_read</span>(<span class="st">"data_cambodia/cambodia.gpkg"</span>, <span class="at">layer =</span> <span class="st">"cases"</span>, <span class="at">quiet =</span> <span class="cn">TRUE</span>)</span>
<span id="cb1-13"><a href="#cb1-13" aria-hidden="true" tabindex="-1"></a>cases <span class="ot">&lt;-</span> <span class="fu">subset</span>(cases, Disease <span class="sc">==</span> <span class="st">"W fever"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>The first step of any statistical analysis always consists on visualizing the data to check they were correctly loaded and to observe general pattern of the cases.</p>
<div class="cell" data-nm="true">
<div class="sourceCode cell-code" id="cb2"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="co"># View the cases object</span></span>
<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(cases)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre class="code-out"><code>Simple feature collection with 6 features and 2 fields
Geometry type: MULTIPOINT
Dimension:     XY
Bounding box:  xmin: 255891 ymin: 1179092 xmax: 506647.4 ymax: 1467441
Projected CRS: WGS 84 / UTM zone 48N
  id Disease                           geom
1  0 W fever MULTIPOINT ((280036.2 12841...
2  1 W fever MULTIPOINT ((451859.5 11790...
3  2 W fever  MULTIPOINT ((255891 1467441))
4  5 W fever MULTIPOINT ((506647.4 12322...
5  6 W fever  MULTIPOINT ((440668 1197958))
6  7 W fever MULTIPOINT ((481594.5 12714...</code></pre>
</div>
<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Map the cases</span></span>
<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(mapsf)</span>
<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_map</span>(<span class="at">x =</span> district, <span class="at">border =</span> <span class="st">"white"</span>)</span>
<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_map</span>(<span class="at">x =</span> country,<span class="at">lwd =</span> <span class="dv">2</span>, <span class="at">col =</span> <span class="cn">NA</span>, <span class="at">add =</span> <span class="cn">TRUE</span>)</span>
<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_map</span>(<span class="at">x =</span> cases, <span class="at">lwd =</span> .<span class="dv">5</span>, <span class="at">col =</span> <span class="st">"#990000"</span>, <span class="at">pch =</span> <span class="dv">20</span>, <span class="at">add =</span> <span class="cn">TRUE</span>)</span>
<span id="cb4-7"><a href="#cb4-7" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_layout</span>(<span class="at">title =</span> <span class="st">"W Fever infections in Cambodia"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="07-basic_statistics_files/figure-html/cases_visualization-1.png" class="img-fluid" width="768"></p>
</div>
</div>
<p>In epidemiology, the true meaning of point is very questionable. If it usually gives the location of an observation, we cannot precisely tell if this observation represents an event of interest (e.g., illness, death, …) or a person at risk (e.g., a participant that may or may not experience the disease). If you can consider that the population at risk is uniformly distributed in small area (within a city for example), this is likely not the case at a country scale. Considering a ratio of event compared to a population at risk is often more informative than just considering cases. Administrative divisions of countries appear as great areal units for cases aggregation since they make available data on population count and structures. In this study, we will use the district as the areal unit of the study.</p>
<div class="cell" data-nm="true">
<div class="sourceCode cell-code" id="cb5"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Aggregate cases over districts</span></span>
<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>district<span class="sc">$</span>cases <span class="ot">&lt;-</span> <span class="fu">lengths</span>(<span class="fu">st_intersects</span>(district, cases))</span>
<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Plot number of cases using proportional symbol </span></span>
<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_map</span>(<span class="at">x =</span> district) </span>
<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_map</span>(</span>
<span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a>  <span class="at">x =</span> district, </span>
<span id="cb5-8"><a href="#cb5-8" aria-hidden="true" tabindex="-1"></a>  <span class="at">var =</span> <span class="st">"cases"</span>,</span>
<span id="cb5-9"><a href="#cb5-9" aria-hidden="true" tabindex="-1"></a>  <span class="at">val_max =</span> <span class="dv">50</span>,</span>
<span id="cb5-10"><a href="#cb5-10" aria-hidden="true" tabindex="-1"></a>  <span class="at">type =</span> <span class="st">"prop"</span>,</span>
<span id="cb5-11"><a href="#cb5-11" aria-hidden="true" tabindex="-1"></a>  <span class="at">col =</span> <span class="st">"#990000"</span>, </span>
<span id="cb5-12"><a href="#cb5-12" aria-hidden="true" tabindex="-1"></a>  <span class="at">leg_title =</span> <span class="st">"Cases"</span>)</span>
<span id="cb5-13"><a href="#cb5-13" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_layout</span>(<span class="at">title =</span> <span class="st">"Number of cases of W Fever"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="07-basic_statistics_files/figure-html/district_aggregate-1.png" class="img-fluid" width="768"></p>
</div>
</div>
<p>The incidence (<span class="math inline">\(\frac{cases}{population}\)</span>) expressed per 100,000 population is commonly use to represent cases distribution related to population density but other indicators exists. As example, the standardized incidence ratios (SIRs) represent the deviation of observed and expected number of cases and is expressed as <span class="math inline">\(SIR = \frac{Y_i}{E_i}\)</span> with <span class="math inline">\(Y_i\)</span>, the observed number of cases and <span class="math inline">\(E_i\)</span>, the expected number of cases. In this study, we computed the expected number of cases in each district by assuming infections are homogeneously distributed across Cambodia, i.e., the incidence is the same in each district. The SIR therefore represents the deviation of incidence compared to the average incidence across Cambodia.</p>
<div class="cell" data-nm="true">
<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Compute incidence in each district (per 100 000 population)</span></span>
<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>district<span class="sc">$</span>incidence <span class="ot">&lt;-</span> district<span class="sc">$</span>cases<span class="sc">/</span>district<span class="sc">$</span>T_POP <span class="sc">*</span> <span class="dv">100000</span></span>
<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Compute the global risk</span></span>
<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a>rate <span class="ot">&lt;-</span> <span class="fu">sum</span>(district<span class="sc">$</span>cases)<span class="sc">/</span><span class="fu">sum</span>(district<span class="sc">$</span>T_POP)</span>
<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a><span class="co"># Compute expected number of cases </span></span>
<span id="cb6-8"><a href="#cb6-8" aria-hidden="true" tabindex="-1"></a>district<span class="sc">$</span>expected <span class="ot">&lt;-</span> district<span class="sc">$</span>T_POP <span class="sc">*</span> rate</span>
<span id="cb6-9"><a href="#cb6-9" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-10"><a href="#cb6-10" aria-hidden="true" tabindex="-1"></a><span class="co"># Compute SIR</span></span>
<span id="cb6-11"><a href="#cb6-11" aria-hidden="true" tabindex="-1"></a>district<span class="sc">$</span>SIR <span class="ot">&lt;-</span> district<span class="sc">$</span>cases <span class="sc">/</span> district<span class="sc">$</span>expected</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell" data-nm="true">
<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">2</span>))</span>
<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Plot incidence </span></span>
<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_map</span>(<span class="at">x =</span> district)</span>
<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_map</span>(<span class="at">x =</span> district,</span>
<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a>       <span class="at">var =</span> <span class="fu">c</span>(<span class="st">"T_POP"</span>, <span class="st">"incidence"</span>),</span>
<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a>       <span class="at">type =</span> <span class="st">"prop_choro"</span>,</span>
<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a>       <span class="at">pal =</span> <span class="st">"Reds"</span>,</span>
<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a>       <span class="at">inches =</span> .<span class="dv">1</span>,</span>
<span id="cb7-10"><a href="#cb7-10" aria-hidden="true" tabindex="-1"></a>       <span class="at">breaks =</span> <span class="fu">exp</span>(<span class="fu">mf_get_breaks</span>(<span class="fu">log</span>(district<span class="sc">$</span>incidence<span class="sc">+</span><span class="dv">1</span>), <span class="at">breaks =</span> <span class="st">"pretty"</span>))<span class="sc">-</span><span class="dv">1</span>,</span>
<span id="cb7-11"><a href="#cb7-11" aria-hidden="true" tabindex="-1"></a>       <span class="at">leg_title =</span> <span class="fu">c</span>(<span class="st">"Population"</span>, <span class="st">"Incidence </span><span class="sc">\n</span><span class="st">(per 100 000)"</span>))</span>
<span id="cb7-12"><a href="#cb7-12" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_layout</span>(<span class="at">title =</span> <span class="st">"Incidence of W Fever"</span>)</span>
<span id="cb7-13"><a href="#cb7-13" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb7-14"><a href="#cb7-14" aria-hidden="true" tabindex="-1"></a><span class="co"># Plot SIRs</span></span>
<span id="cb7-15"><a href="#cb7-15" aria-hidden="true" tabindex="-1"></a><span class="co"># create breaks and associated color palette</span></span>
<span id="cb7-16"><a href="#cb7-16" aria-hidden="true" tabindex="-1"></a>break_SIR <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="dv">0</span>,<span class="fu">exp</span>(<span class="fu">mf_get_breaks</span>(<span class="fu">log</span>(district<span class="sc">$</span>SIR), <span class="at">nbreaks =</span> <span class="dv">8</span>, <span class="at">breaks =</span> <span class="st">"pretty"</span>)))</span>
<span id="cb7-17"><a href="#cb7-17" aria-hidden="true" tabindex="-1"></a>col_pal <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="st">"#273871"</span>, <span class="st">"#3267AD"</span>, <span class="st">"#6496C8"</span>, <span class="st">"#9BBFDD"</span>, <span class="st">"#CDE3F0"</span>, <span class="st">"#FFCEBC"</span>, <span class="st">"#FF967E"</span>, <span class="st">"#F64D41"</span>, <span class="st">"#B90E36"</span>)</span>
<span id="cb7-18"><a href="#cb7-18" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_map</span>(<span class="at">x =</span> district)</span>
<span id="cb7-19"><a href="#cb7-19" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_map</span>(<span class="at">x =</span> district,</span>
<span id="cb7-20"><a href="#cb7-20" aria-hidden="true" tabindex="-1"></a>       <span class="at">var =</span> <span class="fu">c</span>(<span class="st">"T_POP"</span>, <span class="st">"SIR"</span>),</span>
<span id="cb7-21"><a href="#cb7-21" aria-hidden="true" tabindex="-1"></a>       <span class="at">type =</span> <span class="st">"prop_choro"</span>,</span>
<span id="cb7-22"><a href="#cb7-22" aria-hidden="true" tabindex="-1"></a>       <span class="at">breaks =</span> break_SIR,</span>
<span id="cb7-23"><a href="#cb7-23" aria-hidden="true" tabindex="-1"></a>       <span class="at">pal =</span> col_pal,</span>
<span id="cb7-24"><a href="#cb7-24" aria-hidden="true" tabindex="-1"></a>       <span class="at">inches =</span> .<span class="dv">1</span>,</span>
<span id="cb7-25"><a href="#cb7-25" aria-hidden="true" tabindex="-1"></a>       <span class="co">#cex = 2,</span></span>
<span id="cb7-26"><a href="#cb7-26" aria-hidden="true" tabindex="-1"></a>       <span class="at">leg_title =</span> <span class="fu">c</span>(<span class="st">"Population"</span>, <span class="st">"SIR"</span>))</span>
<span id="cb7-27"><a href="#cb7-27" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_layout</span>(<span class="at">title =</span> <span class="st">"Standardized Incidence Ratio of W Fever"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="07-basic_statistics_files/figure-html/inc_visualization-1.png" class="img-fluid" width="1056"></p>
</div>
</div>
<p>These maps illustrate the spatial heterogeneity of the cases. The incidence shows how the disease vary from one district to another while the SIR highlight districts that have:</p>
<ul>
<li><p>higher risk than average (SIR &gt; 1) when standardized for population</p></li>
<li><p>lower risk than average (SIR &lt; 1) when standardized for population</p></li>
<li><p>average risk (SIR ~ 1) when standardized for population</p></li>
</ul>
<div class="callout-tip callout callout-style-default callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
To go further …
</div>
</div>
<div class="callout-body-container callout-body">
<p>In this example, we standardized the cases distribution for population count. This simple standardization assumes that the risk of contracting the disease is similar for each person. However, assumption does not hold for all diseases and for all observed events since confounding effects can create nuisance into the interpretations (e.g., the number of childhood illness and death outcomes in a district are usually related to the age pyramid). A confounding factor is a variable that influences both the dependent variable and independent variable, causing a spurious association. You should keep in mind that other standardization can be performed based on these confounding factors, i.e.&nbsp;variables known to have an effect but that you don’t want to analyze (e.g., sex ratio, occupations, age pyramid).</p>
<div class="quarto-figure quarto-figure-center">
<figure class="figure">
<p><img src="img/Stat_Confounders.jpg" class="img-fluid figure-img" width="300"></p>
</figure>
</div>
<p>In addition, one can wonder what does an SIR ~ 1 means, i.e., what is the threshold to decide whether the SIR is greater, lower or equivalent to 1. The significant of the SIR can be tested globally (to determine whether or not the incidence is homogeneously distributed) and locally in each district (to determine Which district have an SIR different than 1). We won’t perform these analyses in this tutorial but you can look at the functions <code>?achisq.test()</code> (from <code>Dcluster</code> package <span class="citation" data-cites="DCluster">(<a href="references.html#ref-DCluster" role="doc-biblioref">Gómez-Rubio et al. 2015</a>)</span>) and <code>?probmap()</code> (from <code>spdep</code> package <span class="citation" data-cites="spdep">(<a href="references.html#ref-spdep" role="doc-biblioref">R. Bivand et al. 2015</a>)</span>) to compute these statistics.</p>
</div>
</div>
</section>
<section id="cluster-analysis" class="level2" data-number="6.2">
<h2 data-number="6.2" class="anchored" data-anchor-id="cluster-analysis"><span class="header-section-number">6.2</span> Cluster analysis</h2>
<section id="general-introduction" class="level3" data-number="6.2.1">
<h3 data-number="6.2.1" class="anchored" data-anchor-id="general-introduction"><span class="header-section-number">6.2.1</span> General introduction</h3>
<p>Why studying clusters in epidemiology? Cluster analysis help identifying unusual patterns that occurs during a given period of time. The underlying ultimate goal of such analysis is to explain the observation of such patterns. In epidemiology, we can distinguish two types of process that would explain heterogeneity in case distribution:</p>
<ul>
<li><p>The <strong>1st order effects</strong> are the spatial variations of cases distribution caused by underlying properties of environment or the population structure itself. In such process individual get infected independently from the rest of the population. Such process includes the infection through an environment at risk as, for example, air pollution, contaminated waters or soils and UV exposition. This effect assume that the observed pattern is caused by a difference in risk intensity.</p></li>
<li><p>The <strong>2nd order effects</strong> describes process of spread, contagion and diffusion of diseases caused by interactions between individuals. This includes transmission of infectious disease by proximity, but also the transmission of non-infectious disease, for example, with the diffusion of social norms within networks. This effect assume that the observed pattern is caused by correlations or co-variations.</p></li>
</ul>
<div class="quarto-figure quarto-figure-center">
<figure class="figure">
<p><img src="img/Stat_order_effects.png" class="img-fluid figure-img" width="500"></p>
</figure>
</div>
<p>No statistical methods could distinguish between these competing processes since their outcome results in similar pattern of points. The cluster analysis help describing the magnitude and the location of pattern but in no way could answer the question of why such patterns occurs. It is therefore a step that help detecting cluster for description and surveillance purpose and rising hypothesis on the underlying process that will lead further investigations.</p>
<p>Knowledge about the disease and its transmission process could orientate the choice of the methods of study. We presented in this brief tutorial two methods of cluster detection, the Moran’s I test that test for spatial independence (likely related to 2nd order effects) and the scan statistics that test for homogeneous distribution (likely related 1st order effects). It relies on epidemiologist to select the tools that best serve the studied question.</p>
<div class="callout-note callout callout-style-default callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
Statistic tests and distributions
</div>
</div>
<div class="callout-body-container callout-body">
<p>In statistics, problems are usually expressed by defining two hypotheses: the null hypothesis (H0), i.e., an <em>a priori</em> hypothesis of the studied phenomenon (e.g., the situation is a random) and the alternative hypothesis (H1), e.g., the situation is not random. The main principle is to measure how likely the observed situation belong to the ensemble of situation that are possible under the H0 hypothesis.</p>
<p>In mathematics, a probability distribution is a mathematical expression that represents what we would expect due to random chance. The choice of the probability distribution relies on the type of data you use (continuous, count, binary). In general, three distribution a used while studying disease rates, the Binomial, the Poisson and the Poisson-gamma mixture (also known as negative binomial) distributions.</p>
<p>Many the statistical tests assume by default that data are normally distributed. It implies that your variable is continuous and that all data could easily be represented by two parameters, the mean and the variance, i.e., each value have the same level of certainty. If many measure can be assessed under the normality assumption, this is usually not the case in epidemiology with strictly positives rates and count values that 1) does not fit the normal distribution and 2) does not provide with the same degree of certainty since variances likely differ between district due to different population size, i.e., some district have very sparse data (with high variance) while other have adequate data (with lower variance).</p>
<div class="cell" data-nm="true">
<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="co"># dataset statistics</span></span>
<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>m_cases <span class="ot">&lt;-</span> <span class="fu">mean</span>(district<span class="sc">$</span>incidence)</span>
<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>sd_cases <span class="ot">&lt;-</span> <span class="fu">sd</span>(district<span class="sc">$</span>incidence)</span>
<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a><span class="fu">hist</span>(district<span class="sc">$</span>incidence, <span class="at">probability =</span> <span class="cn">TRUE</span>, <span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="fl">0.4</span>), <span class="at">xlim =</span> <span class="fu">c</span>(<span class="sc">-</span><span class="dv">5</span>, <span class="dv">16</span>), <span class="at">xlab =</span> <span class="st">"Number of cases"</span>, <span class="at">ylab =</span> <span class="st">"Probability"</span>, <span class="at">main =</span> <span class="st">"Histogram of observed incidence compared</span><span class="sc">\n</span><span class="st">to Normal and Poisson distributions"</span>)</span>
<span id="cb8-6"><a href="#cb8-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb8-7"><a href="#cb8-7" aria-hidden="true" tabindex="-1"></a><span class="fu">curve</span>(<span class="fu">dnorm</span>(x, m_cases, sd_cases),<span class="at">col =</span> <span class="st">"blue"</span>,  <span class="at">lwd =</span> <span class="dv">1</span>, <span class="at">add =</span> <span class="cn">TRUE</span>)</span>
<span id="cb8-8"><a href="#cb8-8" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb8-9"><a href="#cb8-9" aria-hidden="true" tabindex="-1"></a><span class="fu">points</span>(<span class="dv">0</span><span class="sc">:</span><span class="fu">max</span>(district<span class="sc">$</span>incidence), <span class="fu">dpois</span>(<span class="dv">0</span><span class="sc">:</span><span class="fu">max</span>(district<span class="sc">$</span>incidence),m_cases),</span>
<span id="cb8-10"><a href="#cb8-10" aria-hidden="true" tabindex="-1"></a>       <span class="at">type =</span> <span class="st">'b'</span>, <span class="at">pch =</span> <span class="dv">20</span>, <span class="at">col =</span> <span class="st">"red"</span>, <span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="fl">0.6</span>), <span class="at">lty =</span> <span class="dv">2</span>)</span>
<span id="cb8-11"><a href="#cb8-11" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb8-12"><a href="#cb8-12" aria-hidden="true" tabindex="-1"></a><span class="fu">legend</span>(<span class="st">"topright"</span>, <span class="at">legend =</span> <span class="fu">c</span>(<span class="st">"Normal distribution"</span>, <span class="st">"Poisson distribution"</span>, <span class="st">"Observed distribution"</span>), <span class="at">col =</span> <span class="fu">c</span>(<span class="st">"blue"</span>, <span class="st">"red"</span>, <span class="st">"black"</span>),<span class="at">pch =</span> <span class="fu">c</span>(<span class="cn">NA</span>, <span class="dv">20</span>, <span class="cn">NA</span>), <span class="at">lty =</span> <span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">1</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="07-basic_statistics_files/figure-html/distribution-1.png" class="img-fluid" width="576"></p>
</div>
</div>
<p>In this tutorial, we used the Poisson distribution in our statistical tests.</p>
</div>
</div>
</section>
<section id="test-for-spatial-autocorrelation-morans-i-test" class="level3" data-number="6.2.2">
<h3 data-number="6.2.2" class="anchored" data-anchor-id="test-for-spatial-autocorrelation-morans-i-test"><span class="header-section-number">6.2.2</span> Test for spatial autocorrelation (Moran’s I test)</h3>
<section id="the-global-morans-i-test" class="level4" data-number="6.2.2.1">
<h4 data-number="6.2.2.1" class="anchored" data-anchor-id="the-global-morans-i-test"><span class="header-section-number">6.2.2.1</span> The global Moran’s I test</h4>
<p>A popular test for spatial autocorrelation is the Moran’s test. This test tells us whether nearby units tend to exhibit similar incidences. It ranges from -1 to +1. A value of -1 denote that units with low rates are located near other units with high rates, while a Moran’s I value of +1 indicates a concentration of spatial units exhibiting similar rates.</p>
<div class="callout-note callout callout-style-default callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
Moran’s I test
</div>
</div>
<div class="callout-body-container callout-body">
<p>The Moran’s statistics is:</p>
<p><span class="math display">\[I = \frac{N}{\sum_{i=1}^N\sum_{j=1}^Nw_{ij}}\frac{\sum_{i=1}^N\sum_{j=1}^Nw_{ij}(Y_i-\bar{Y})(Y_j - \bar{Y})}{\sum_{i=1}^N(Y_i-\bar{Y})^2}\]</span> with:</p>
<ul>
<li><p><span class="math inline">\(N\)</span>: the number of polygons,</p></li>
<li><p><span class="math inline">\(w_{ij}\)</span>: is a matrix of spatial weight with zeroes on the diagonal (i.e., <span class="math inline">\(w_{ii}=0\)</span>). For example, if polygons are neighbors, the weight takes the value <span class="math inline">\(1\)</span> otherwise it takes the value <span class="math inline">\(0\)</span>.</p></li>
<li><p><span class="math inline">\(Y_i\)</span>: the variable of interest,</p></li>
<li><p><span class="math inline">\(\bar{Y}\)</span>: the mean value of <span class="math inline">\(Y\)</span>.</p></li>
</ul>
<p>Under the Moran’s test, the statistics hypotheses are:</p>
<ul>
<li><p><strong>H0</strong>: the distribution of cases is spatially independent, i.e., <span class="math inline">\(I=0\)</span>.</p></li>
<li><p><strong>H1</strong>: the distribution of cases is spatially autocorrelated, i.e., <span class="math inline">\(I\ne0\)</span>.</p></li>
</ul>
</div>
</div>
<p>We will compute the Moran’s statistics using <code>spdep</code><span class="citation" data-cites="spdep">(<a href="references.html#ref-spdep" role="doc-biblioref">R. Bivand et al. 2015</a>)</span> and <code>Dcluster</code><span class="citation" data-cites="DCluster">(<a href="references.html#ref-DCluster" role="doc-biblioref">Gómez-Rubio et al. 2015</a>)</span> packages. <code>spdep</code> package provides a collection of functions to analyze spatial correlations of polygons and works with sp objects. In this example, we use <code>poly2nb()</code> and <code>nb2listw()</code>. These functions respectively detect the neighboring polygons and assign weight corresponding to <span class="math inline">\(1/\#\ of\ neighbors\)</span>. <code>Dcluster</code> package provides a set of functions for the detection of spatial clusters of disease using count data.</p>
<div class="cell" data-nm="true">
<div class="sourceCode cell-code" id="cb9"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="co">#install.packages("spdep")</span></span>
<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a><span class="co">#install.packages("DCluster")</span></span>
<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(spdep) <span class="co"># Functions for creating spatial weight, spatial analysis</span></span>
<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(DCluster)  <span class="co"># Package with functions for spatial cluster analysis</span></span>
<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">345</span>) <span class="co"># remove random sampling for reproducibility</span></span>
<span id="cb9-7"><a href="#cb9-7" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb9-8"><a href="#cb9-8" aria-hidden="true" tabindex="-1"></a>queen_nb <span class="ot">&lt;-</span> <span class="fu">poly2nb</span>(district) <span class="co"># Neighbors according to queen case</span></span>
<span id="cb9-9"><a href="#cb9-9" aria-hidden="true" tabindex="-1"></a>q_listw <span class="ot">&lt;-</span> <span class="fu">nb2listw</span>(queen_nb, <span class="at">style =</span> <span class="st">'W'</span>) <span class="co"># row-standardized weights</span></span>
<span id="cb9-10"><a href="#cb9-10" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb9-11"><a href="#cb9-11" aria-hidden="true" tabindex="-1"></a><span class="co"># Moran's I test</span></span>
<span id="cb9-12"><a href="#cb9-12" aria-hidden="true" tabindex="-1"></a>m_test <span class="ot">&lt;-</span> <span class="fu">moranI.test</span>(cases <span class="sc">~</span> <span class="fu">offset</span>(<span class="fu">log</span>(expected)), </span>
<span id="cb9-13"><a href="#cb9-13" aria-hidden="true" tabindex="-1"></a>                  <span class="at">data =</span> district,</span>
<span id="cb9-14"><a href="#cb9-14" aria-hidden="true" tabindex="-1"></a>                  <span class="at">model =</span> <span class="st">'poisson'</span>,</span>
<span id="cb9-15"><a href="#cb9-15" aria-hidden="true" tabindex="-1"></a>                  <span class="at">R =</span> <span class="dv">499</span>,</span>
<span id="cb9-16"><a href="#cb9-16" aria-hidden="true" tabindex="-1"></a>                  <span class="at">listw =</span> q_listw,</span>
<span id="cb9-17"><a href="#cb9-17" aria-hidden="true" tabindex="-1"></a>                  <span class="at">n =</span> <span class="fu">length</span>(district<span class="sc">$</span>cases), <span class="co"># number of regions</span></span>
<span id="cb9-18"><a href="#cb9-18" aria-hidden="true" tabindex="-1"></a>                  <span class="at">S0 =</span> <span class="fu">Szero</span>(q_listw)) <span class="co"># Global sum of weights</span></span>
<span id="cb9-19"><a href="#cb9-19" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(m_test)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre class="code-out"><code>Moran's I test of spatial autocorrelation 

    Type of boots.: parametric 
    Model used when sampling: Poisson 
    Number of simulations: 499 
    Statistic:  0.1566449 
    p-value :  0.006 </code></pre>
</div>
<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(m_test)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="07-basic_statistics_files/figure-html/MoransI-1.png" class="img-fluid" width="768"></p>
</div>
</div>
<p>The Moran’s statistics is here <span class="math inline">\(I =\)</span> 0.16. When comparing its value to the H0 distribution (built under 499 simulations), the probability of observing such a I value under the null hypothesis, i.e.&nbsp;the distribution of cases is spatially independent, is <span class="math inline">\(p_{value} =\)</span> 0.006. We therefore reject H0 with error risk of <span class="math inline">\(\alpha = 5\%\)</span>. The distribution of cases is therefore autocorrelated across districts in Cambodia.</p>
</section>
<section id="the-local-morans-i-lisa-test" class="level4" data-number="6.2.2.2">
<h4 data-number="6.2.2.2" class="anchored" data-anchor-id="the-local-morans-i-lisa-test"><span class="header-section-number">6.2.2.2</span> The Local Moran’s I LISA test</h4>
<p>The global Moran’s test provides us a global statistical value informing whether autocorrelation occurs over the territory but does not inform on where does these correlations occurs, i.e., what is the locations of the clusters. To identify such cluster, we can decompose the Moran’s I statistic to extract local information of the level of correlation of each district and its neighbors. This is called the Local Moran’s I LISA statistic. Because the Local Moran’s I LISA statistic test each district for autocorrelation independently, concern is raised about multiple testing limitations that increase the Type I error (<span class="math inline">\(\alpha\)</span>) of the statistical tests. The use of local test should therefore be study in light of explore and describes clusters once the global test has detected autocorrelation.</p>
<div class="callout-note callout callout-style-default callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
Statistical test
</div>
</div>
<div class="callout-body-container callout-body">
<p>For each district <span class="math inline">\(i\)</span>, the Local Moran’s I statistics is:</p>
<p><span class="math display">\[I_i = \frac{(Y_i-\bar{Y})}{\sum_{i=1}^N(Y_i-\bar{Y})^2}\sum_{j=1}^Nw_{ij}(Y_j - \bar{Y}) \text{ with }  I = \sum_{i=1}^NI_i/N\]</span></p>
</div>
</div>
<p>The <code>localmoran()</code>function from the package <code>spdep</code> treats the variable of interest as if it was normally distributed. In some cases, this assumption could be reasonable for incidence rate, especially when the areal units of analysis have sufficiently large population count suggesting that the values have similar level of variances. Unfortunately, the local Moran’s test has not been implemented for Poisson distribution (population not large enough in some districts) in <code>spdep</code> package. However, Bivand <em>et al.</em> <span class="citation" data-cites="bivand2008applied">(<a href="references.html#ref-bivand2008applied" role="doc-biblioref">R. S. Bivand et al. 2008</a>)</span> provided some code to manually perform the analysis using Poisson distribution and this code was further implemented in the course “<a href="https://mkram01.github.io/EPI563-SpatialEPI/index.html">Spatial Epidemiology</a>”.</p>
<div class="cell" data-nm="true">
<div class="sourceCode cell-code" id="cb12"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Step 1 - Create the standardized deviation of observed from expected</span></span>
<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a>sd_lm <span class="ot">&lt;-</span> (district<span class="sc">$</span>cases <span class="sc">-</span> district<span class="sc">$</span>expected) <span class="sc">/</span> <span class="fu">sqrt</span>(district<span class="sc">$</span>expected)</span>
<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb12-4"><a href="#cb12-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Step 2 - Create a spatially lagged version of standardized deviation of neighbors</span></span>
<span id="cb12-5"><a href="#cb12-5" aria-hidden="true" tabindex="-1"></a>wsd_lm <span class="ot">&lt;-</span> <span class="fu">lag.listw</span>(q_listw, sd_lm)</span>
<span id="cb12-6"><a href="#cb12-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb12-7"><a href="#cb12-7" aria-hidden="true" tabindex="-1"></a><span class="co"># Step 3 - the local Moran's I is the product of step 1 and step 2</span></span>
<span id="cb12-8"><a href="#cb12-8" aria-hidden="true" tabindex="-1"></a>district<span class="sc">$</span>I_lm <span class="ot">&lt;-</span> sd_lm <span class="sc">*</span> wsd_lm</span>
<span id="cb12-9"><a href="#cb12-9" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb12-10"><a href="#cb12-10" aria-hidden="true" tabindex="-1"></a><span class="co"># Step 4 - setup parameters for simulation of the null distribution</span></span>
<span id="cb12-11"><a href="#cb12-11" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb12-12"><a href="#cb12-12" aria-hidden="true" tabindex="-1"></a><span class="co"># Specify number of simulations to run</span></span>
<span id="cb12-13"><a href="#cb12-13" aria-hidden="true" tabindex="-1"></a>nsim <span class="ot">&lt;-</span> <span class="dv">499</span></span>
<span id="cb12-14"><a href="#cb12-14" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb12-15"><a href="#cb12-15" aria-hidden="true" tabindex="-1"></a><span class="co"># Specify dimensions of result based on number of regions</span></span>
<span id="cb12-16"><a href="#cb12-16" aria-hidden="true" tabindex="-1"></a>N <span class="ot">&lt;-</span> <span class="fu">length</span>(district<span class="sc">$</span>expected)</span>
<span id="cb12-17"><a href="#cb12-17" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb12-18"><a href="#cb12-18" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a matrix of zeros to hold results, with a row for each county, and a column for each simulation</span></span>
<span id="cb12-19"><a href="#cb12-19" aria-hidden="true" tabindex="-1"></a>sims <span class="ot">&lt;-</span> <span class="fu">matrix</span>(<span class="dv">0</span>, <span class="at">ncol =</span> nsim, <span class="at">nrow =</span> N)</span>
<span id="cb12-20"><a href="#cb12-20" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb12-21"><a href="#cb12-21" aria-hidden="true" tabindex="-1"></a><span class="co"># Step 5 - Start a for-loop to iterate over simulation columns</span></span>
<span id="cb12-22"><a href="#cb12-22" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span>(i <span class="cf">in</span> <span class="dv">1</span><span class="sc">:</span>nsim){</span>
<span id="cb12-23"><a href="#cb12-23" aria-hidden="true" tabindex="-1"></a>  y <span class="ot">&lt;-</span> <span class="fu">rpois</span>(N, <span class="at">lambda =</span> district<span class="sc">$</span>expected) <span class="co"># generate a random event count, given expected</span></span>
<span id="cb12-24"><a href="#cb12-24" aria-hidden="true" tabindex="-1"></a>  sd_lmi <span class="ot">&lt;-</span> (y <span class="sc">-</span> district<span class="sc">$</span>expected) <span class="sc">/</span> <span class="fu">sqrt</span>(district<span class="sc">$</span>expected) <span class="co"># standardized local measure</span></span>
<span id="cb12-25"><a href="#cb12-25" aria-hidden="true" tabindex="-1"></a>  wsd_lmi <span class="ot">&lt;-</span> <span class="fu">lag.listw</span>(q_listw, sd_lmi) <span class="co"># standardized spatially lagged measure</span></span>
<span id="cb12-26"><a href="#cb12-26" aria-hidden="true" tabindex="-1"></a>  sims[, i] <span class="ot">&lt;-</span> sd_lmi <span class="sc">*</span> wsd_lmi <span class="co"># this is the I(i) statistic under this iteration of null</span></span>
<span id="cb12-27"><a href="#cb12-27" aria-hidden="true" tabindex="-1"></a>}</span>
<span id="cb12-28"><a href="#cb12-28" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb12-29"><a href="#cb12-29" aria-hidden="true" tabindex="-1"></a><span class="co"># Step 6 - For each county, test where the observed value ranks with respect to the null simulations</span></span>
<span id="cb12-30"><a href="#cb12-30" aria-hidden="true" tabindex="-1"></a>xrank <span class="ot">&lt;-</span> <span class="fu">apply</span>(<span class="fu">cbind</span>(district<span class="sc">$</span>I_lm, sims), <span class="dv">1</span>, <span class="cf">function</span>(x) <span class="fu">rank</span>(x)[<span class="dv">1</span>])</span>
<span id="cb12-31"><a href="#cb12-31" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb12-32"><a href="#cb12-32" aria-hidden="true" tabindex="-1"></a><span class="co"># Step 7 - Calculate the difference between observed rank and total possible (nsim)</span></span>
<span id="cb12-33"><a href="#cb12-33" aria-hidden="true" tabindex="-1"></a>diff <span class="ot">&lt;-</span> nsim <span class="sc">-</span> xrank</span>
<span id="cb12-34"><a href="#cb12-34" aria-hidden="true" tabindex="-1"></a>diff <span class="ot">&lt;-</span> <span class="fu">ifelse</span>(diff <span class="sc">&gt;</span> <span class="dv">0</span>, diff, <span class="dv">0</span>)</span>
<span id="cb12-35"><a href="#cb12-35" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb12-36"><a href="#cb12-36" aria-hidden="true" tabindex="-1"></a><span class="co"># Step 8 - Assuming a uniform distribution of ranks, calculate p-value for observed</span></span>
<span id="cb12-37"><a href="#cb12-37" aria-hidden="true" tabindex="-1"></a><span class="co"># given the null distribution generate from simulations</span></span>
<span id="cb12-38"><a href="#cb12-38" aria-hidden="true" tabindex="-1"></a>district<span class="sc">$</span>pval_lm <span class="ot">&lt;-</span> <span class="fu">punif</span>((diff <span class="sc">+</span> <span class="dv">1</span>) <span class="sc">/</span> (nsim <span class="sc">+</span> <span class="dv">1</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>Briefly, the process consist on 1) computing the I statistics for the observed data, 2) estimating the null distribution of the I statistics by performing random sampling into a poisson distribution and 3) comparing the observed I statistic with the null distribution to determine the probability to observe such value if the number of cases were spatially independent. For each district, we obtain a p-value based on the comparison of the observed value and the null distribution.</p>
<p>A conventional way of plotting these results is to classify the districts into 5 classes based on local Moran’s I output. The classification of cluster that are significantly autocorrelated to their neighbors is performed based on a comparison of the scaled incidence in the district compared to the scaled weighted averaged incidence of it neighboring districts (computed with <code>lag.listw()</code>):</p>
<ul>
<li><p>Districts that have higher-than-average rates in both index regions and their neighbors and showing statistically significant positive values for the local <span class="math inline">\(I_i\)</span> statistic are defined as <strong>High-High</strong> (hotspot of the disease)</p></li>
<li><p>Districts that have lower-than-average rates in both index regions and their neighbors and showing statistically significant positive values for the local <span class="math inline">\(I_i\)</span> statistic are defined as <strong>Low-Low</strong> (cold spot of the disease).</p></li>
<li><p>Districts that have higher-than-average rates in the index regions and lower-than-average rates in their neighbors, and showing statistically significant negative values for the local <span class="math inline">\(I_i\)</span> statistic are defined as <strong>High-Low</strong>(outlier with high incidence in an area with low incidence).</p></li>
<li><p>Districts that have lower-than-average rates in the index regions and higher-than-average rates in their neighbors, and showing statistically significant negative values for the local <span class="math inline">\(I_i\)</span> statistic are defined as <strong>Low-High</strong> (outlier of low incidence in area with high incidence).</p></li>
<li><p>Districts with non-significant values for the <span class="math inline">\(I_i\)</span> statistic are defined as <strong>Non-significant</strong>.</p></li>
</ul>
<div class="cell" data-nm="true">
<div class="sourceCode cell-code" id="cb13"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="co"># create lagged local raw_rate - in other words the average of the queen neighbors value</span></span>
<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a><span class="co"># values are scaled (centered and reduced) to be compared to average</span></span>
<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>district<span class="sc">$</span>lag_std   <span class="ot">&lt;-</span> <span class="fu">scale</span>(<span class="fu">lag.listw</span>(q_listw, <span class="at">var =</span> district<span class="sc">$</span>incidence))</span>
<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a>district<span class="sc">$</span>incidence_std <span class="ot">&lt;-</span> <span class="fu">scale</span>(district<span class="sc">$</span>incidence)</span>
<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a><span class="co"># extract pvalues</span></span>
<span id="cb13-7"><a href="#cb13-7" aria-hidden="true" tabindex="-1"></a><span class="co"># district$lm_pv &lt;- lm_test[,5]</span></span>
<span id="cb13-8"><a href="#cb13-8" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb13-9"><a href="#cb13-9" aria-hidden="true" tabindex="-1"></a><span class="co"># Classify local moran's outputs</span></span>
<span id="cb13-10"><a href="#cb13-10" aria-hidden="true" tabindex="-1"></a>district<span class="sc">$</span>lm_class <span class="ot">&lt;-</span> <span class="cn">NA</span></span>
<span id="cb13-11"><a href="#cb13-11" aria-hidden="true" tabindex="-1"></a>district<span class="sc">$</span>lm_class[district<span class="sc">$</span>incidence_std <span class="sc">&gt;=</span><span class="dv">0</span> <span class="sc">&amp;</span> district<span class="sc">$</span>lag_std <span class="sc">&gt;=</span><span class="dv">0</span>] <span class="ot">&lt;-</span> <span class="st">'High-High'</span></span>
<span id="cb13-12"><a href="#cb13-12" aria-hidden="true" tabindex="-1"></a>district<span class="sc">$</span>lm_class[district<span class="sc">$</span>incidence_std <span class="sc">&lt;=</span><span class="dv">0</span> <span class="sc">&amp;</span> district<span class="sc">$</span>lag_std <span class="sc">&lt;=</span><span class="dv">0</span>] <span class="ot">&lt;-</span> <span class="st">'Low-Low'</span></span>
<span id="cb13-13"><a href="#cb13-13" aria-hidden="true" tabindex="-1"></a>district<span class="sc">$</span>lm_class[district<span class="sc">$</span>incidence_std <span class="sc">&lt;=</span><span class="dv">0</span> <span class="sc">&amp;</span> district<span class="sc">$</span>lag_std <span class="sc">&gt;=</span><span class="dv">0</span>] <span class="ot">&lt;-</span> <span class="st">'Low-High'</span></span>
<span id="cb13-14"><a href="#cb13-14" aria-hidden="true" tabindex="-1"></a>district<span class="sc">$</span>lm_class[district<span class="sc">$</span>incidence_std <span class="sc">&gt;=</span><span class="dv">0</span> <span class="sc">&amp;</span> district<span class="sc">$</span>lag_std <span class="sc">&lt;=</span><span class="dv">0</span>] <span class="ot">&lt;-</span> <span class="st">'High-Low'</span></span>
<span id="cb13-15"><a href="#cb13-15" aria-hidden="true" tabindex="-1"></a>district<span class="sc">$</span>lm_class[district<span class="sc">$</span>pval_lm <span class="sc">&gt;=</span> <span class="fl">0.05</span>] <span class="ot">&lt;-</span> <span class="st">'Non-significant'</span></span>
<span id="cb13-16"><a href="#cb13-16" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb13-17"><a href="#cb13-17" aria-hidden="true" tabindex="-1"></a>district<span class="sc">$</span>lm_class <span class="ot">&lt;-</span> <span class="fu">factor</span>(district<span class="sc">$</span>lm_class, <span class="at">levels=</span><span class="fu">c</span>(<span class="st">"High-High"</span>, <span class="st">"Low-Low"</span>, <span class="st">"High-Low"</span>,  <span class="st">"Low-High"</span>, <span class="st">"Non-significant"</span>) )</span>
<span id="cb13-18"><a href="#cb13-18" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb13-19"><a href="#cb13-19" aria-hidden="true" tabindex="-1"></a><span class="co"># create map</span></span>
<span id="cb13-20"><a href="#cb13-20" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_map</span>(<span class="at">x =</span> district,</span>
<span id="cb13-21"><a href="#cb13-21" aria-hidden="true" tabindex="-1"></a>       <span class="at">var =</span> <span class="st">"lm_class"</span>,</span>
<span id="cb13-22"><a href="#cb13-22" aria-hidden="true" tabindex="-1"></a>       <span class="at">type =</span> <span class="st">"typo"</span>,</span>
<span id="cb13-23"><a href="#cb13-23" aria-hidden="true" tabindex="-1"></a>       <span class="at">cex =</span> <span class="dv">2</span>,</span>
<span id="cb13-24"><a href="#cb13-24" aria-hidden="true" tabindex="-1"></a>       <span class="at">col_na =</span> <span class="st">"white"</span>,</span>
<span id="cb13-25"><a href="#cb13-25" aria-hidden="true" tabindex="-1"></a>       <span class="co">#val_order = c("High-High", "Low-Low", "High-Low",  "Low-High", "Non-significant") ,</span></span>
<span id="cb13-26"><a href="#cb13-26" aria-hidden="true" tabindex="-1"></a>       <span class="at">pal =</span> <span class="fu">c</span>(<span class="st">"#6D0026"</span> , <span class="st">"blue"</span>,  <span class="st">"white"</span>) , <span class="co"># "#FF755F","#7FABD3" ,</span></span>
<span id="cb13-27"><a href="#cb13-27" aria-hidden="true" tabindex="-1"></a>       <span class="at">leg_title =</span> <span class="st">"Clusters"</span>)</span>
<span id="cb13-28"><a href="#cb13-28" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb13-29"><a href="#cb13-29" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_layout</span>(<span class="at">title =</span> <span class="st">"Cluster using Local Moran's I statistic"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="07-basic_statistics_files/figure-html/LocalMoransI_plt-1.png" class="img-fluid" width="768"></p>
</div>
</div>
</section>
</section>
<section id="spatial-scan-statistics" class="level3" data-number="6.2.3">
<h3 data-number="6.2.3" class="anchored" data-anchor-id="spatial-scan-statistics"><span class="header-section-number">6.2.3</span> Spatial scan statistics</h3>
<p>While Moran’s indices focus on testing for autocorrelation between neighboring polygons (under the null assumption of spatial independence), the spatial scan statistic aims at identifying an abnormal higher risk in a given region compared to the risk outside of this region (under the null assumption of homogeneous distribution). The conception of a cluster is therefore different between the two methods.</p>
<p>The function <code>kulldorff</code> from the package <code>SpatialEpi</code> <span class="citation" data-cites="SpatialEpi">(<a href="references.html#ref-SpatialEpi" role="doc-biblioref">Kim and Wakefield 2010</a>)</span> is a simple tool to implement spatial-only scan statistics.</p>
<div class="callout-note callout callout-style-default callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
Kulldorf test
</div>
</div>
<div class="callout-body-container callout-body">
<p>Under the kulldorff test, the statistics hypotheses are:</p>
<ul>
<li><p><strong>H0</strong>: the risk is constant over the area, i.e., there is a spatial homogeneity of the incidence.</p></li>
<<<<<<< HEAD
<li><p><strong>H1</strong>: The observed window have higher incidence than the rest of the area , i.e., there is a spatial heterogeneity of incidence.</p></li>
=======
<li><p><strong>H1</strong>: the observed window have higher incidence than the rest of the area , i.e., there is a spatial heterogeneity of incidence.</p></li>
>>>>>>> refs/remotes/origin/main
</ul>
</div>
</div>
<p>Briefly, the kulldorff scan statistics scan the area for clusters using several steps:</p>
<ol type="1">
<li><p>It create a circular window of observation by defining a single location and an associated radius of the windows varying from 0 to a large number that depends on population distribution (largest radius could include 50% of the population).</p></li>
<li><p>It aggregates the count of events and the population at risk (or an expected count of events) inside and outside the window of observation.</p></li>
<li><p>Finally, it computes the likelihood ratio and test whether the risk is equal inside versus outside the windows (H0) or greater inside the observed window (H1). The H0 distribution is estimated by simulating the distribution of counts under the null hypothesis (homogeneous risk).</p></li>
<li><p>These 3 steps are repeated for each location and each possible windows-radii.</p></li>
</ol>
<p>While we test the significance of a large number of observation windows, one can raise concern about multiple testing and Type I error. This approach however suggest that we are not interest in a set of signifiant cluster but only in a most-likely cluster. This <em>a priori</em> restriction eliminate concern for multpile comparison since the test is simplified to a statistically significance of one single most-likely cluster.</p>
<p>Because we tested all-possible locations and window-radius, we can also choose to look at secondary clusters. In this case, you should keep in mind that increasing the number of secondary cluster you select, increases the risk for Type I error.</p>
<div class="cell" data-nm="true">
<div class="sourceCode cell-code" id="cb14"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="co">#install.packages("SpatialEpi")</span></span>
<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"SpatialEpi"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>The use of R spatial object is not implements in <code>kulldorff()</code> function. It uses instead matrix of xy coordinates that represents the centroids of the districts. A given district is included into the observed circular window if its centroids fall into the circle.</p>
<div class="cell" data-nm="true">
<div class="sourceCode cell-code" id="cb15"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>district_xy <span class="ot">&lt;-</span> <span class="fu">st_centroid</span>(district) <span class="sc">%&gt;%</span> </span>
<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">st_coordinates</span>()</span>
<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(district_xy)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre class="code-out"><code>         X       Y
1 330823.3 1464560
2 749758.3 1541787
3 468384.0 1277007
4 494548.2 1215261
5 459644.2 1194615
6 360528.3 1516339</code></pre>
</div>
</div>
<p>We can then call kulldorff function (you are strongly encouraged to call <code>?kulldorff</code> to properly call the function). The <code>alpha.level</code> threshold filter for the secondary clusters that will be retained. The most-likely cluster will be saved whatever its significance.</p>
<div class="cell" data-nm="true">
<div class="sourceCode cell-code" id="cb17"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>kd_Wfever <span class="ot">&lt;-</span> <span class="fu">kulldorff</span>(district_xy, </span>
<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>                <span class="at">cases =</span> district<span class="sc">$</span>cases,</span>
<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a>                <span class="at">population =</span> district<span class="sc">$</span>T_POP,</span>
<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a>                <span class="at">expected.cases =</span> district<span class="sc">$</span>expected,</span>
<span id="cb17-5"><a href="#cb17-5" aria-hidden="true" tabindex="-1"></a>                <span class="at">pop.upper.bound =</span> <span class="fl">0.5</span>, <span class="co"># include maximum 50% of the population in a windows</span></span>
<span id="cb17-6"><a href="#cb17-6" aria-hidden="true" tabindex="-1"></a>                <span class="at">n.simulations =</span> <span class="dv">499</span>,</span>
<span id="cb17-7"><a href="#cb17-7" aria-hidden="true" tabindex="-1"></a>                <span class="at">alpha.level =</span> <span class="fl">0.2</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="07-basic_statistics_files/figure-html/kd_test-1.png" class="img-fluid" width="576"></p>
</div>
</div>
<p>The function plot the histogram of the distribution of log-likelihood ratio simulated under the null hypothesis that is estimated based on Monte Carlo simulations. The observed value of the most significant cluster identified from all possible scans is compared to the distribution to determine significance. All outputs are saved into an R object, here called <code>kd_Wfever</code>. Unfortunately, the package did not develop any summary and visualization of the results but we can explore the output object.</p>
<div class="cell" data-nm="true">
<div class="sourceCode cell-code" id="cb18"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="fu">names</span>(kd_Wfever)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre class="code-out"><code>[1] "most.likely.cluster" "secondary.clusters"  "type"               
[4] "log.lkhd"            "simulated.log.lkhd" </code></pre>
</div>
</div>
<p>First, we can focus on the most likely cluster and explore its characteristics.</p>
<div class="cell" data-nm="true">
<div class="sourceCode cell-code" id="cb20"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="co"># We can see which districts (r number) belong to this cluster</span></span>
<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>kd_Wfever<span class="sc">$</span>most.likely.cluster<span class="sc">$</span>location.IDs.included</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre class="code-out"><code> [1]  48  93  66 180 133  29 194 118  50 144  31 141   3 117  22  43 142</code></pre>
</div>
<div class="sourceCode cell-code" id="cb22"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="co"># standardized incidence ratio</span></span>
<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a>kd_Wfever<span class="sc">$</span>most.likely.cluster<span class="sc">$</span>SMR</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre class="code-out"><code>[1] 2.303106</code></pre>
</div>
<div class="sourceCode cell-code" id="cb24"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="co"># number of observed and expected cases in this cluster</span></span>
<span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a>kd_Wfever<span class="sc">$</span>most.likely.cluster<span class="sc">$</span>number.of.cases</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre class="code-out"><code>[1] 122</code></pre>
</div>
<div class="sourceCode cell-code" id="cb26"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a>kd_Wfever<span class="sc">$</span>most.likely.cluster<span class="sc">$</span>expected.cases</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre class="code-out"><code>[1] 52.97195</code></pre>
</div>
</div>
<p>17 districts belong to the cluster and its number of cases is 2.3 times higher than the expected number of cases.</p>
<p>Similarly, we could study the secondary clusters. Results are saved in a list.</p>
<div class="cell" data-nm="true">
<div class="sourceCode cell-code" id="cb28"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a><span class="co"># We can see which districts (r number) belong to this cluster</span></span>
<span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a><span class="fu">length</span>(kd_Wfever<span class="sc">$</span>secondary.clusters)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre class="code-out"><code>[1] 1</code></pre>
</div>
<div class="sourceCode cell-code" id="cb30"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a><span class="co"># retrieve data for all secondary clusters into a table</span></span>
<span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a>df_secondary_clusters <span class="ot">&lt;-</span> <span class="fu">data.frame</span>(<span class="at">SMR =</span> <span class="fu">sapply</span>(kd_Wfever<span class="sc">$</span>secondary.clusters, <span class="st">'[['</span>, <span class="dv">5</span>),  </span>
<span id="cb30-3"><a href="#cb30-3" aria-hidden="true" tabindex="-1"></a>                          <span class="at">number.of.cases =</span> <span class="fu">sapply</span>(kd_Wfever<span class="sc">$</span>secondary.clusters, <span class="st">'[['</span>, <span class="dv">3</span>),</span>
<span id="cb30-4"><a href="#cb30-4" aria-hidden="true" tabindex="-1"></a>                          <span class="at">expected.cases =</span> <span class="fu">sapply</span>(kd_Wfever<span class="sc">$</span>secondary.clusters, <span class="st">'[['</span>, <span class="dv">4</span>),</span>
<span id="cb30-5"><a href="#cb30-5" aria-hidden="true" tabindex="-1"></a>                          <span class="at">p.value =</span> <span class="fu">sapply</span>(kd_Wfever<span class="sc">$</span>secondary.clusters, <span class="st">'[['</span>, <span class="dv">8</span>))</span>
<span id="cb30-6"><a href="#cb30-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb30-7"><a href="#cb30-7" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(df_secondary_clusters)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre class="code-out"><code>       SMR number.of.cases expected.cases p.value
1 3.767698              16       4.246625   0.008</code></pre>
</div>
</div>
<p>We only have one secondary cluster composed of one district.</p>
<div class="cell" data-nm="true">
<div class="sourceCode cell-code" id="cb32"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="co"># create empty column to store cluster informations</span></span>
<span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a>district<span class="sc">$</span>k_cluster <span class="ot">&lt;-</span> <span class="cn">NA</span></span>
<span id="cb32-3"><a href="#cb32-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb32-4"><a href="#cb32-4" aria-hidden="true" tabindex="-1"></a><span class="co"># save cluster information from kulldorff outputs</span></span>
<span id="cb32-5"><a href="#cb32-5" aria-hidden="true" tabindex="-1"></a>district<span class="sc">$</span>k_cluster[kd_Wfever<span class="sc">$</span>most.likely.cluster<span class="sc">$</span>location.IDs.included] <span class="ot">&lt;-</span> <span class="st">'Most likely cluster'</span></span>
<span id="cb32-6"><a href="#cb32-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb32-7"><a href="#cb32-7" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span>(i <span class="cf">in</span> <span class="dv">1</span><span class="sc">:</span><span class="fu">length</span>(kd_Wfever<span class="sc">$</span>secondary.clusters)){</span>
<span id="cb32-8"><a href="#cb32-8" aria-hidden="true" tabindex="-1"></a>district<span class="sc">$</span>k_cluster[kd_Wfever<span class="sc">$</span>secondary.clusters[[i]]<span class="sc">$</span>location.IDs.included] <span class="ot">&lt;-</span> <span class="fu">paste</span>(</span>
<span id="cb32-9"><a href="#cb32-9" aria-hidden="true" tabindex="-1"></a>  <span class="st">'Secondary cluster'</span>, i, <span class="at">sep =</span> <span class="st">''</span>)</span>
<span id="cb32-10"><a href="#cb32-10" aria-hidden="true" tabindex="-1"></a>}</span>
<span id="cb32-11"><a href="#cb32-11" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb32-12"><a href="#cb32-12" aria-hidden="true" tabindex="-1"></a><span class="co">#district$k_cluster[is.na(district$k_cluster)] &lt;- "No cluster"</span></span>
<span id="cb32-13"><a href="#cb32-13" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb32-14"><a href="#cb32-14" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb32-15"><a href="#cb32-15" aria-hidden="true" tabindex="-1"></a><span class="co"># create map</span></span>
<span id="cb32-16"><a href="#cb32-16" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_map</span>(<span class="at">x =</span> district,</span>
<span id="cb32-17"><a href="#cb32-17" aria-hidden="true" tabindex="-1"></a>       <span class="at">var =</span> <span class="st">"k_cluster"</span>,</span>
<span id="cb32-18"><a href="#cb32-18" aria-hidden="true" tabindex="-1"></a>       <span class="at">type =</span> <span class="st">"typo"</span>,</span>
<span id="cb32-19"><a href="#cb32-19" aria-hidden="true" tabindex="-1"></a>       <span class="at">cex =</span> <span class="dv">2</span>,</span>
<span id="cb32-20"><a href="#cb32-20" aria-hidden="true" tabindex="-1"></a>       <span class="at">col_na =</span> <span class="st">"white"</span>,</span>
<span id="cb32-21"><a href="#cb32-21" aria-hidden="true" tabindex="-1"></a>       <span class="at">pal =</span> <span class="fu">mf_get_pal</span>(<span class="at">palette =</span> <span class="st">"Reds"</span>, <span class="at">n =</span> <span class="dv">3</span>)[<span class="dv">1</span><span class="sc">:</span><span class="dv">2</span>],</span>
<span id="cb32-22"><a href="#cb32-22" aria-hidden="true" tabindex="-1"></a>       <span class="at">leg_title =</span> <span class="st">"Clusters"</span>)</span>
<span id="cb32-23"><a href="#cb32-23" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb32-24"><a href="#cb32-24" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_layout</span>(<span class="at">title =</span> <span class="st">"Cluster using kulldorf scan statistic"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="07-basic_statistics_files/figure-html/plt_clusters-1.png" class="img-fluid" width="768"></p>
</div>
</div>
<div class="callout-tip callout callout-style-default callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
To go further …
</div>
</div>
<div class="callout-body-container callout-body">
<p>In this example, the expected number of cases was defined using the population count but note that standardization over other variables as age could also be implemented with the <code>strata</code> parameter in the <code>kulldorff()</code> function.</p>
<p>In addition, this cluster analysis was performed solely using the spatial scan but you should keep in mind that this method of cluster detection can be implemented for spatio-temporal data as well where the cluster definition is an abnormal number of cases in a delimited spatial area and during a given period of time. The windows of observation are therefore defined for a different center, radius and time-period. You should take a look at the function <code>scan_ep_poisson()</code> function in the package <code>scanstatistic</code> <span class="citation" data-cites="scanstatistics">(<a href="references.html#ref-scanstatistics" role="doc-biblioref">Allévius 2018</a>)</span> for this analysis.</p>
</div>
</div>
</section>
</section>
<section id="conclusion" class="level2" data-number="6.3">
<h2 data-number="6.3" class="anchored" data-anchor-id="conclusion"><span class="header-section-number">6.3</span> Conclusion</h2>
<div class="cell" data-nm="true">
<div class="sourceCode cell-code" id="cb33"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">2</span>))</span>
<span id="cb33-2"><a href="#cb33-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb33-3"><a href="#cb33-3" aria-hidden="true" tabindex="-1"></a><span class="co"># create map</span></span>
<span id="cb33-4"><a href="#cb33-4" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_map</span>(<span class="at">x =</span> district,</span>
<span id="cb33-5"><a href="#cb33-5" aria-hidden="true" tabindex="-1"></a>       <span class="at">var =</span> <span class="st">"lm_class"</span>,</span>
<span id="cb33-6"><a href="#cb33-6" aria-hidden="true" tabindex="-1"></a>       <span class="at">type =</span> <span class="st">"typo"</span>,</span>
<span id="cb33-7"><a href="#cb33-7" aria-hidden="true" tabindex="-1"></a>       <span class="at">cex =</span> <span class="dv">2</span>,</span>
<span id="cb33-8"><a href="#cb33-8" aria-hidden="true" tabindex="-1"></a>       <span class="at">col_na =</span> <span class="st">"white"</span>,</span>
<span id="cb33-9"><a href="#cb33-9" aria-hidden="true" tabindex="-1"></a>       <span class="at">pal =</span> <span class="fu">c</span>(<span class="st">"#6D0026"</span> , <span class="st">"blue"</span>,  <span class="st">"white"</span>) , <span class="co"># "#FF755F","#7FABD3" ,</span></span>
<span id="cb33-10"><a href="#cb33-10" aria-hidden="true" tabindex="-1"></a>       <span class="at">leg_title =</span> <span class="st">"Clusters"</span>)</span>
<span id="cb33-11"><a href="#cb33-11" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb33-12"><a href="#cb33-12" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_layout</span>(<span class="at">title =</span> <span class="st">"Cluster using Local Moran's I statistic"</span>)</span>
<span id="cb33-13"><a href="#cb33-13" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb33-14"><a href="#cb33-14" aria-hidden="true" tabindex="-1"></a><span class="co"># create map</span></span>
<span id="cb33-15"><a href="#cb33-15" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_map</span>(<span class="at">x =</span> district,</span>
<span id="cb33-16"><a href="#cb33-16" aria-hidden="true" tabindex="-1"></a>       <span class="at">var =</span> <span class="st">"k_cluster"</span>,</span>
<span id="cb33-17"><a href="#cb33-17" aria-hidden="true" tabindex="-1"></a>       <span class="at">type =</span> <span class="st">"typo"</span>,</span>
<span id="cb33-18"><a href="#cb33-18" aria-hidden="true" tabindex="-1"></a>       <span class="at">cex =</span> <span class="dv">2</span>,</span>
<span id="cb33-19"><a href="#cb33-19" aria-hidden="true" tabindex="-1"></a>       <span class="at">col_na =</span> <span class="st">"white"</span>,</span>
<span id="cb33-20"><a href="#cb33-20" aria-hidden="true" tabindex="-1"></a>       <span class="at">pal =</span> <span class="fu">mf_get_pal</span>(<span class="at">palette =</span> <span class="st">"Reds"</span>, <span class="at">n =</span> <span class="dv">3</span>)[<span class="dv">1</span><span class="sc">:</span><span class="dv">2</span>],</span>
<span id="cb33-21"><a href="#cb33-21" aria-hidden="true" tabindex="-1"></a>       <span class="at">leg_title =</span> <span class="st">"Clusters"</span>)</span>
<span id="cb33-22"><a href="#cb33-22" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb33-23"><a href="#cb33-23" aria-hidden="true" tabindex="-1"></a><span class="fu">mf_layout</span>(<span class="at">title =</span> <span class="st">"Cluster using kulldorf scan statistic"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="07-basic_statistics_files/figure-html/conclusion-1.png" class="img-fluid" width="768"></p>
</div>
</div>
<p>Both methods identified significant clusters. The two methods could identify a cluster around Phnom Penh after standardization for population counts. However, the identified clusters does not rely on the same assumption. While the Moran’s test wonder whether their is any autocorrelation between clusters (i.e.&nbsp;second order effects of infection), the Kulldorff scan statistics wonder whether their is any heterogeneity in the case distribution. None of these test can inform on the infection processes (first or second order) for the studied disease and previous knowledge on the disease will help selecting the most accurate test.</p>
<div class="callout-note callout callout-style-default callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
Note
</div>
</div>
<div class="callout-body-container callout-body">
<p>In this example, Cambodia is treated as an island, i.e.&nbsp;there is no data outside of its borders. In reality, some clusters can occurs across country’s borders. You should be aware that such district will likely not be detected by these analysis. This border effect is still a hot topic in spatial studies and there is no conventional ways to deal with it. You can find in the literature some suggestion on how to deals with these border effect as assigning weights, or extrapolating data.</p>
</div>
</div>


<div id="refs" class="references csl-bib-body hanging-indent" role="doc-bibliography" style="display: none">
<div id="ref-scanstatistics" class="csl-entry" role="doc-biblioentry">
Allévius, Benjamin. 2018. <span>“Scanstatistics: Space-Time Anomaly Detection Using Scan Statistics.”</span> <em>Journal of Open Source Software</em> 3 (25): 515.
</div>
<div id="ref-bivand2008applied" class="csl-entry" role="doc-biblioentry">
Bivand, Roger S, Edzer J Pebesma, Virgilio Gómez-Rubio, and Edzer Jan Pebesma. 2008. <em>Applied Spatial Data Analysis with r</em>. Vol. 747248717. Springer.
</div>
<div id="ref-spdep" class="csl-entry" role="doc-biblioentry">
Bivand, Roger, Micah Altman, Luc Anselin, Renato Assunção, Olaf Berke, Andrew Bernat, and Guillaume Blanchet. 2015. <span>“Package <span>‘Spdep’</span>.”</span> <em>The Comprehensive R Archive Network</em>.
</div>
<div id="ref-DCluster" class="csl-entry" role="doc-biblioentry">
Gómez-Rubio, Virgilio, Juan Ferrándiz-Ferragud, Antonio López-Quı́lez, et al. 2015. <span>“Package <span>‘DCluster’</span>.”</span>
</div>
<div id="ref-SpatialEpi" class="csl-entry" role="doc-biblioentry">
Kim, Albert Y, and Jon Wakefield. 2010. <span>“R Data and Methods for Spatial Epidemiology: The SpatialEpi Package.”</span> <em>Dept of Statistics, University of Washington</em>.
</div>
</div>
</section>

</main> <!-- /main -->
<script id="quarto-html-after-body" type="application/javascript">
window.document.addEventListener("DOMContentLoaded", function (event) {
  const toggleBodyColorMode = (bsSheetEl) => {
    const mode = bsSheetEl.getAttribute("data-mode");
    const bodyEl = window.document.querySelector("body");
    if (mode === "dark") {
      bodyEl.classList.add("quarto-dark");
      bodyEl.classList.remove("quarto-light");
    } else {
      bodyEl.classList.add("quarto-light");
      bodyEl.classList.remove("quarto-dark");
    }
  }
  const toggleBodyColorPrimary = () => {
    const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
    if (bsSheetEl) {
      toggleBodyColorMode(bsSheetEl);
    }
  }
  toggleBodyColorPrimary();  
  const icon = "";
  const anchorJS = new window.AnchorJS();
  anchorJS.options = {
    placement: 'right',
    icon: icon
  };
  anchorJS.add('.anchored');
  const clipboard = new window.ClipboardJS('.code-copy-button', {
    target: function(trigger) {
      return trigger.previousElementSibling;
    }
  });
  clipboard.on('success', function(e) {
    // button target
    const button = e.trigger;
    // don't keep focus
    button.blur();
    // flash "checked"
    button.classList.add('code-copy-button-checked');
    var currentTitle = button.getAttribute("title");
    button.setAttribute("title", "Copied!");
    setTimeout(function() {
      button.setAttribute("title", currentTitle);
      button.classList.remove('code-copy-button-checked');
    }, 1000);
    // clear code selection
    e.clearSelection();
  });
  function tippyHover(el, contentFn) {
    const config = {
      allowHTML: true,
      content: contentFn,
      maxWidth: 500,
      delay: 100,
      arrow: false,
      appendTo: function(el) {
          return el.parentElement;
      },
      interactive: true,
      interactiveBorder: 10,
      theme: 'quarto',
      placement: 'bottom-start'
    };
    window.tippy(el, config); 
  }
  const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
  for (var i=0; i<noterefs.length; i++) {
    const ref = noterefs[i];
    tippyHover(ref, function() {
      // use id or data attribute instead here
      let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
      try { href = new URL(href).hash; } catch {}
      const id = href.replace(/^#\/?/, "");
      const note = window.document.getElementById(id);
      return note.innerHTML;
    });
  }
  var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
  for (var i=0; i<bibliorefs.length; i++) {
    const ref = bibliorefs[i];
    const cites = ref.parentNode.getAttribute('data-cites').split(' ');
    tippyHover(ref, function() {
      var popup = window.document.createElement('div');
      cites.forEach(function(cite) {
        var citeDiv = window.document.createElement('div');
        citeDiv.classList.add('hanging-indent');
        citeDiv.classList.add('csl-entry');
        var biblioDiv = window.document.getElementById('ref-' + cite);
        if (biblioDiv) {
          citeDiv.innerHTML = biblioDiv.innerHTML;
        }
        popup.appendChild(citeDiv);
      });
      return popup.innerHTML;
    });
  }
    var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
      var filterRegex = new RegExp('/' + window.location.host + '/');
    var isInternal = (href) => {
        return filterRegex.test(href) || localhostRegex.test(href);
    }
    // Inspect non-navigation links and adorn them if external
    var links = window.document.querySelectorAll('a:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external)');
    for (var i=0; i<links.length; i++) {
      const link = links[i];
      if (!isInternal(link.href)) {
          // target, if specified
          link.setAttribute("target", "_blank");
      }
    }
});
</script>
<nav class="page-navigation">
  <div class="nav-page nav-page-previous">
      <a href="./05-mapping_with_r.html" class="pagination-link">
        <i class="bi bi-arrow-left-short"></i> <span class="nav-page-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Mapping With R</span></span>
      </a>          
  </div>
  <div class="nav-page nav-page-next">
      <a href="./08-exercices.html" class="pagination-link">
        <span class="nav-page-text">Exercices</span> <i class="bi bi-arrow-right-short"></i>
      </a>
  </div>
</nav>
</div> <!-- /content -->
<footer class="footer">
  <div class="nav-footer">
    <div class="nav-footer-left">UMR 228 ESPACE-DEV</div>   
    <div class="nav-footer-right"><img src="img/ird_footer.png" height="50"></div>
  </div>
</footer>



<script src="site_libs/quarto-html/zenscroll-min.js"></script>
</body></html>