From 2a82c3d5e9fa4d3df5334b0f1899c80cc6b4b231 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Thu, 26 Sep 2024 13:52:59 -0700 Subject: [PATCH 1/2] fix: update the book for epiprocess 0.9.0 --- _freeze/archive/execute-results/html.json | 4 +- .../archive/figure-html/unnamed-chunk-8-1.svg | 2 +- _freeze/epidf/execute-results/html.json | 4 +- .../epidf/figure-html/unnamed-chunk-11-1.svg | 941 +- .../epidf/figure-html/unnamed-chunk-13-1.svg | 1423 +- .../epidf/figure-html/unnamed-chunk-15-1.svg | 3989 +-- _freeze/outliers/execute-results/html.json | 4 +- .../figure-html/unnamed-chunk-3-1.svg | 1057 +- .../figure-html/unnamed-chunk-7-1.svg | 2282 +- .../figure-html/unnamed-chunk-7-2.svg | 2246 +- .../figure-html/unnamed-chunk-9-1.svg | 1069 +- _freeze/slide/execute-results/html.json | 4 +- .../slide/figure-html/unnamed-chunk-10-1.svg | 308 + .../slide/figure-html/unnamed-chunk-12-1.svg | 19061 ++++++++++-- .../slide/figure-html/unnamed-chunk-16-1.svg | 2860 ++ .../slide/figure-html/unnamed-chunk-8-1.svg | 24153 ++++++++-------- .../execute-results/html.json | 4 +- .../figure-html/plot-ar-asof-1.svg | 1812 +- .../figure-html/plot-arx-1.svg | 1210 +- .../figure-html/plot-can-fc-boost-1.svg | 5656 ++-- .../figure-html/plot-can-fc-lr-1.svg | 3826 ++- archive.qmd | 38 +- epidf.qmd | 107 +- outliers.qmd | 65 +- renv.lock | 612 +- renv/activate.R | 105 +- slide.qmd | 346 +- sliding-forecasters.qmd | 283 +- 28 files changed, 45664 insertions(+), 27807 deletions(-) create mode 100644 _freeze/slide/figure-html/unnamed-chunk-10-1.svg create mode 100644 _freeze/slide/figure-html/unnamed-chunk-16-1.svg diff --git a/_freeze/archive/execute-results/html.json b/_freeze/archive/execute-results/html.json index 8c57390..c74dd54 100644 --- a/_freeze/archive/execute-results/html.json +++ b/_freeze/archive/execute-results/html.json @@ -1,7 +1,7 @@ { - "hash": "4abbf9d8187ca890d0c13fd0656d50e9", + "hash": "7f45dbf54b783ddcda2ce2462e5f9d69", "result": { - "markdown": "# Work with archive objects and data revisions\n\nIn addition to the `epi_df` data structure, which we have been working with all\nalong in these vignettes, the `epiprocess` package has a companion structure\ncalled `epi_archive`. In comparison to an `epi_df` object, which can be seen as\nstoring a single snapshot of a data set with the most up-to-date signal values\nas of some given time, an `epi_archive` object stores the full version history\nof a data set. Many signals of interest for epidemiological tracking are subject\nto revision (some more than others), and paying attention to data revisions can\nbe important for all sorts of downstream data analysis and modeling tasks.\n\nThis chapter walks through working with `epi_archive` objects and demonstrates\nsome of their key functionality. We'll work with a signal on the percentage of\ndoctor's visits with CLI (COVID-like illness) computed from medical insurance\nclaims, available through the [COVIDcast\nAPI](https://cmu-delphi.github.io/delphi-epidata/api/covidcast.html). This\nsignal is subject to very heavy and regular revision; you can read more about it\non its [API documentation\npage](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html). We'll use the offline version stored in `{epidatasets}`.\n\n\n\n\n\n\n## Getting data into `epi_archive` format\n\nAn `epi_archive` object can be constructed from a data frame, data table, or\ntibble, provided that it has (at least) the following columns:\n\n* `geo_value`: the geographic value associated with each row of measurements.\n* `time_value`: the time value associated with each row of measurements.\n* `version`: the time value specifying the version for each row of measurements.\n For example, if in a given row the `version` is January 15, 2022 and\n `time_value` is January 14, 2022, then this row contains the measurements of\n the data for January 14, 2022 that were available one day later.\n\nAs we can see from the above, the data frame returned by\n`epidatr::covidcast()` has the columns required for the `epi_archive`\nformat, so we use\n`as_epi_archive()` to cast it into `epi_archive` format.[^1]\n\n[^1]: For a discussion of the removal of\nredundant version updates in `as_epi_archive` using compactify, please refer\nto the [compactify vignette](https://cmu-delphi.github.io/epiprocess/articles/compactify.html).\n\n\n\n::: {.cell layout-align=\"center\" hash='archive_cache/html/unnamed-chunk-2_39c5cbdbb56253b327ea66e6ab4e8220'}\n\n```{.r .cell-code}\nx <- archive_cases_dv_subset_dt %>%\n select(geo_value, time_value, version, percent_cli) %>%\n as_epi_archive(compactify = TRUE)\n\nclass(x)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> [1] \"epi_archive\"\n```\n:::\n\n```{.r .cell-code}\nprint(x)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> → An `epi_archive` object, with metadata:\n#> ℹ Min/max time values: 2020-06-01 / 2021-11-30\n#> ℹ First/last version with update: 2020-06-02 / 2021-12-01\n#> ℹ Versions end: 2021-12-01\n#> ℹ A preview of the table (119316 rows x 4 columns):\n#> Key: \n#> geo_value time_value version percent_cli\n#> \n#> 1: ca 2020-06-01 2020-06-02 NA\n#> 2: ca 2020-06-01 2020-06-06 2.140116\n#> 3: ca 2020-06-01 2020-06-08 2.140379\n#> 4: ca 2020-06-01 2020-06-09 2.114430\n#> 5: ca 2020-06-01 2020-06-10 2.133677\n#> --- \n#> 119312: tx 2021-11-26 2021-11-29 1.858596\n#> 119313: tx 2021-11-27 2021-11-28 NA\n#> 119314: tx 2021-11-28 2021-11-29 NA\n#> 119315: tx 2021-11-29 2021-11-30 NA\n#> 119316: tx 2021-11-30 2021-12-01 NA\n```\n:::\n:::\n\n\nAn `epi_archive` is an S3 class. Its primary field is a data table `DT`, which\nis of class `data.table` (from the `data.table` package), and has columns\n`geo_value`, `time_value`, `version`, as well as any number of additional\ncolumns.\n\n\n::: {.cell layout-align=\"center\" hash='archive_cache/html/unnamed-chunk-3_99d23f4e3321a367498344c4b6282562'}\n\n```{.r .cell-code}\nclass(x$DT)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> [1] \"data.table\" \"data.frame\"\n```\n:::\n\n```{.r .cell-code}\nhead(x$DT)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> Key: \n#> geo_value time_value version percent_cli\n#> \n#> 1: ca 2020-06-01 2020-06-02 NA\n#> 2: ca 2020-06-01 2020-06-06 2.140116\n#> 3: ca 2020-06-01 2020-06-08 2.140379\n#> 4: ca 2020-06-01 2020-06-09 2.114430\n#> 5: ca 2020-06-01 2020-06-10 2.133677\n#> 6: ca 2020-06-01 2020-06-11 2.197207\n```\n:::\n:::\n\n\nThe variables `geo_value`, `time_value`, `version` serve as **key variables**\nfor the data table, as well as any other specified in the metadata (described\nbelow). There can only be a single row per unique combination of key variables,\nand therefore the key variables are critical for figuring out how to generate a\nsnapshot of data from the archive, as of a given version (also described below).\n\n\n::: {.cell layout-align=\"center\" hash='archive_cache/html/unnamed-chunk-4_8b3712fe1140194d1eb702521cf15238'}\n\n```{.r .cell-code}\nkey(x$DT)\n```\n\n::: {.cell-output .cell-output-error}\n```\n#> Error in key(x$DT): could not find function \"key\"\n```\n:::\n:::\n\n\nIn general, the last version of each observation is carried forward (LOCF) to\nfill in data between recorded versions.\n\n## Some details on metadata\n\nThe following pieces of metadata are included as fields in an `epi_archive`\nobject:\n\n* `geo_type`: the type for the geo values.\n* `time_type`: the type for the time values.\n* `additional_metadata`: list of additional metadata for the data archive.\n\nMetadata for an `epi_archive` object `x` can be accessed (and altered) directly,\nas in `x$geo_type` or `x$time_type`, etc. Just like `as_epi_df()`, the function\n`as_epi_archive()` attempts to guess metadata fields when an `epi_archive`\nobject is instantiated, if they are not explicitly specified in the function\ncall (as it did in the case above).\n\n## Producing snapshots in `epi_df` form\n\nA key method of an `epi_archive` class is `as_of()`, which generates a snapshot\nof the archive in `epi_df` format. This represents the most up-to-date values of\nthe signal variables as of a given version. This can be accessed via\n`epix_as_of()`.\n\n\n::: {.cell layout-align=\"center\" hash='archive_cache/html/unnamed-chunk-5_d82372bbd8143517377c9afe9103cce8'}\n\n```{.r .cell-code}\nx_snapshot <- epix_as_of(x, max_version = as.Date(\"2021-06-01\"))\nclass(x_snapshot)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> [1] \"epi_df\" \"tbl_df\" \"tbl\" \"data.frame\"\n```\n:::\n\n```{.r .cell-code}\nx_snapshot\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> An `epi_df` object, 1,460 x 3 with metadata:\n#> * geo_type = state\n#> * time_type = day\n#> * as_of = 2021-06-01\n#> \n#> # A tibble: 1,460 × 3\n#> geo_value time_value percent_cli\n#> * \n#> 1 ca 2020-06-01 2.75\n#> 2 ca 2020-06-02 2.57\n#> 3 ca 2020-06-03 2.48\n#> 4 ca 2020-06-04 2.41\n#> 5 ca 2020-06-05 2.57\n#> 6 ca 2020-06-06 2.63\n#> # ℹ 1,454 more rows\n```\n:::\n\n```{.r .cell-code}\nmax(x_snapshot$time_value)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> [1] \"2021-05-31\"\n```\n:::\n\n```{.r .cell-code}\nattributes(x_snapshot)$metadata$as_of\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> [1] \"2021-06-01\"\n```\n:::\n:::\n\n\nWe can see that the max time value in the `epi_df` object `x_snapshot` that was\ngenerated from the archive is May 29, 2021, even though the specified version\ndate was June 1, 2021. From this we can infer that the doctor's visits signal\nwas 2 days latent on June 1. Also, we can see that the metadata in the `epi_df`\nobject has the version date recorded in the `as_of` field.\n\nBy default, using the maximum of the `version` column in the underlying data table in an\n`epi_archive` object itself generates a snapshot of the latest values of signal\nvariables in the entire archive. The `epix_as_of()` function issues a warning in\nthis case, since updates to the current version may still come in at a later\npoint in time, due to various reasons, such as synchronization issues.\n\n\n::: {.cell layout-align=\"center\" hash='archive_cache/html/unnamed-chunk-6_ae9ceb907b24026cb708ea184ff52cc4'}\n\n```{.r .cell-code}\nx_latest <- epix_as_of(x, max_version = max(x$DT$version))\n```\n:::\n\n\nBelow, we pull several snapshots from the archive, spaced one month apart. We\noverlay the corresponding signal curves as colored lines, with the version dates\nmarked by dotted vertical lines, and draw the latest curve in black (from the\nlatest snapshot `x_latest` that the archive can provide).\n\n\n::: {.cell layout-align=\"center\" hash='archive_cache/html/unnamed-chunk-7_7b41b3bd4e404515018a4c8d6293057d'}\n\n```{.r .cell-code}\nself_max <- max(x$DT$version)\nversions <- seq(as.Date(\"2020-06-01\"), self_max - 1, by = \"1 month\")\nsnapshots <- map(\n versions,\n function(v) {\n epix_as_of(x, max_version = v) %>% mutate(version = v)\n }\n) %>%\n list_rbind() %>%\n bind_rows(x_latest %>% mutate(version = self_max)) %>%\n mutate(latest = version == self_max)\n```\n:::\n\n::: {.cell layout-align=\"center\" hash='archive_cache/html/unnamed-chunk-8_8625834090bf668df1c1c2bcae527e81'}\n\n```{.r .cell-code code-fold=\"true\"}\nggplot(\n snapshots %>% filter(!latest),\n aes(x = time_value, y = percent_cli)\n) +\n geom_line(aes(color = factor(version)), na.rm = TRUE) +\n geom_vline(aes(color = factor(version), xintercept = version), lty = 2) +\n facet_wrap(~geo_value, scales = \"free_y\", ncol = 1) +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %Y\") +\n scale_color_viridis_d(option = \"A\", end = .9) +\n labs(x = \"Date\", y = \"% of doctor's visits with CLI\") +\n theme(legend.position = \"none\") +\n geom_line(\n data = snapshots %>% filter(latest),\n aes(x = time_value, y = percent_cli),\n inherit.aes = FALSE, color = \"black\", na.rm = TRUE\n )\n```\n\n::: {.cell-output-display}\n![](archive_files/figure-html/unnamed-chunk-8-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nWe can see some interesting and highly nontrivial revision behavior: at some\npoints in time the provisional data snapshots grossly underestimate the latest\ncurve (look in particular at Florida close to the end of 2021), and at others\nthey overestimate it (both states towards the beginning of 2021), though not\nquite as dramatically. Modeling the revision process, which is often called\n*backfill modeling*, is an important statistical problem in it of itself.\n\n\n## Merging `epi_archive` objects\n\nNow we demonstrate how to merge two `epi_archive` objects together, e.g., so\nthat grabbing data from multiple sources as of a particular version can be\nperformed with a single `as_of` call. The `epiprocess` packages provides\n`epix_merge()` for this purpose. Below we merge the working `epi_archive` of\nversioned percentage CLI from outpatient visits to another one of versioned\nCOVID-19 case reporting data, which we fetch the from the [COVIDcast\nAPI](https://cmu-delphi.github.io/delphi-epidata/api/covidcast.html/), on the\nrate scale (counts per 100,000 people in the population).\n\nWhen merging archives, unless the archives have identical data release patterns,\n`NA`s can be introduced in the non-key variables for a few reasons:\n- to represent the \"value\" of an observation before its initial release (when we\n need to pair it with additional observations from the other archive that have\n been released)\n- to represent the \"value\" of an observation that has no recorded versions at\n all (in the same sort of situation)\n- if requested via `sync = \"na\"`, to represent potential update data that we do\n not yet have access to (e.g., due to encountering issues while attempting to\n download the currently available version data for one of the archives, but not\n the other).\n\n\n::: {.cell layout-align=\"center\" hash='archive_cache/html/unnamed-chunk-9_dd8b9566b93ee92760a9756afba10db6'}\n\n```{.r .cell-code}\n# This code is for illustration and doesn't run.\n# The result is saved/loaded in the (hidden) next chunk from `{epidatasets}`\ny <- pub_covidcast(\n source = \"jhu-csse\",\n signals = \"confirmed_7dav_incidence_prop\",\n time_type = \"day\",\n geo_type = \"state\",\n time_values = epirange(20200601, 20211201),\n geo_values = \"ca,fl,ny,tx\",\n issues = epirange(20200601, 20211201)\n) %>%\n select(geo_value, time_value, version = issue, case_rate_7d_av = value) %>%\n as_epi_archive(compactify = TRUE)\n\nx <- epix_merge(x, y, sync = \"locf\", compactify = FALSE)\nprint(x)\nhead(x$DT)\n```\n:::\n\n\n## Sliding version-aware computations\n\n::: {.callout-note}\nTODO: need a simple example here.\n:::\n", + "markdown": "# Work with archive objects and data revisions\n\nIn addition to the `epi_df` data structure, which we have been working with all\nalong in these vignettes, the `epiprocess` package has a companion structure\ncalled `epi_archive`. In comparison to an `epi_df` object, which can be seen as\nstoring a single snapshot of a data set with the most up-to-date signal values\nas of some given time, an `epi_archive` object stores the full version history\nof a data set. Many signals of interest for epidemiological tracking are subject\nto revision (some more than others), and paying attention to data revisions can\nbe important for all sorts of downstream data analysis and modeling tasks.\n\nThis chapter walks through working with `epi_archive` objects and demonstrates\nsome of their key functionality. We'll work with a signal on the percentage of\ndoctor's visits with CLI (COVID-like illness) computed from medical insurance\nclaims, available through the [COVIDcast\nAPI](https://cmu-delphi.github.io/delphi-epidata/api/covidcast.html). This\nsignal is subject to very heavy and regular revision; you can read more about it\non its [API documentation\npage](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html).\nWe'll use the offline version stored in `{epidatasets}`.\n\n\n\n\n\n## Getting data into `epi_archive` format\n\nAn `epi_archive` object can be constructed from a data frame, data table, or\ntibble, provided that it has (at least) the following columns:\n\n* `geo_value`: the geographic value associated with each row of measurements.\n* `time_value`: the time value associated with each row of measurements.\n* `version`: the time value specifying the version for each row of measurements.\n For example, if in a given row the `version` is January 15, 2022 and\n `time_value` is January 14, 2022, then this row contains the measurements of\n the data for January 14, 2022 that were available one day later.\n\nAs we can see from the above, the data frame returned by\n`epidatr::pub_covidcast()` has the columns required for the `epi_archive`\nformat, so we use\n`as_epi_archive()` to cast it into `epi_archive` format.[^1]\n\n[^1]: For a discussion of the removal of\nredundant version updates in `as_epi_archive` using compactify, please refer\nto the [compactify vignette](https://cmu-delphi.github.io/epiprocess/articles/compactify.html).\n\n\n\n::: {.cell layout-align=\"center\" hash='archive_cache/html/unnamed-chunk-2_b11f7e2e1cfae56bec7a39b13bb5558c'}\n\n```{.r .cell-code}\nx <- archive_cases_dv_subset_dt %>%\n select(geo_value, time_value, version, percent_cli) %>%\n as_epi_archive(compactify = TRUE)\n\nclass(x)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> [1] \"epi_archive\"\n```\n:::\n\n```{.r .cell-code}\nprint(x)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> → An `epi_archive` object, with metadata:\n#> ℹ Min/max time values: 2020-06-01 / 2021-11-30\n#> ℹ First/last version with update: 2020-06-02 / 2021-12-01\n#> ℹ Versions end: 2021-12-01\n#> ℹ A preview of the table (119316 rows x 4 columns):\n#> Key: \n#> geo_value time_value version percent_cli\n#> \n#> 1: ca 2020-06-01 2020-06-02 NA\n#> 2: ca 2020-06-01 2020-06-06 2.140116\n#> 3: ca 2020-06-01 2020-06-08 2.140379\n#> 4: ca 2020-06-01 2020-06-09 2.114430\n#> 5: ca 2020-06-01 2020-06-10 2.133677\n#> --- \n#> 119312: tx 2021-11-26 2021-11-29 1.858596\n#> 119313: tx 2021-11-27 2021-11-28 NA\n#> 119314: tx 2021-11-28 2021-11-29 NA\n#> 119315: tx 2021-11-29 2021-11-30 NA\n#> 119316: tx 2021-11-30 2021-12-01 NA\n```\n:::\n:::\n\n\nAn `epi_archive` is an S3 class. Its primary field is a data table `DT`, which\nis of class `data.table` (from the `{data.table}` package), and has columns\n`geo_value`, `time_value`, `version`, as well as any number of additional\ncolumns.\n\n\n::: {.cell layout-align=\"center\" hash='archive_cache/html/unnamed-chunk-3_99d23f4e3321a367498344c4b6282562'}\n\n```{.r .cell-code}\nclass(x$DT)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> [1] \"data.table\" \"data.frame\"\n```\n:::\n\n```{.r .cell-code}\nhead(x$DT)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> Key: \n#> geo_value time_value version percent_cli\n#> \n#> 1: ca 2020-06-01 2020-06-02 NA\n#> 2: ca 2020-06-01 2020-06-06 2.140116\n#> 3: ca 2020-06-01 2020-06-08 2.140379\n#> 4: ca 2020-06-01 2020-06-09 2.114430\n#> 5: ca 2020-06-01 2020-06-10 2.133677\n#> 6: ca 2020-06-01 2020-06-11 2.197207\n```\n:::\n:::\n\n\nThe variables `geo_value`, `time_value`, `version` serve as **key variables**\nfor the data table, as well as any other specified in the metadata (described\nbelow). There can only be a single row per unique combination of key variables,\nand therefore the key variables are critical for figuring out how to generate a\nsnapshot of data from the archive, as of a given version (also described below).\n\n\n::: {.cell layout-align=\"center\" hash='archive_cache/html/unnamed-chunk-4_77a1a144e9c650c7e533687354a3a0de'}\n\n```{.r .cell-code}\ndata.table::key(x$DT)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> [1] \"geo_value\" \"time_value\" \"version\"\n```\n:::\n:::\n\n\nIn general, the last version of each observation is carried forward (LOCF) to\nfill in data between recorded versions.\n\n## Some details on metadata\n\nThe following pieces of metadata are included as fields in an `epi_archive`\nobject:\n\n* `geo_type`: the type for the geo values.\n* `time_type`: the type for the time values.\n* `additional_metadata`: list of additional metadata for the data archive.\n\nMetadata for an `epi_archive` object `x` can be accessed (and altered) directly,\nas in `x$geo_type` or `x$time_type`, etc. Just like `as_epi_df()`, the function\n`as_epi_archive()` attempts to guess metadata fields when an `epi_archive`\nobject is instantiated, if they are not explicitly specified in the function\ncall (as it did in the case above).\n\n## Producing snapshots in `epi_df` form\n\nA key method of an `epi_archive` class is `as_of()`, which generates a snapshot\nof the archive in `epi_df` format. This represents the most up-to-date values of\nthe signal variables as of a given version. This can be accessed via\n`epix_as_of()`.\n\n\n::: {.cell layout-align=\"center\" hash='archive_cache/html/unnamed-chunk-5_1836424cccdf3d1cf0472182f7c859f6'}\n\n```{.r .cell-code}\nx_snapshot <- epix_as_of(x, version = as.Date(\"2021-06-01\"))\nclass(x_snapshot)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> [1] \"epi_df\" \"tbl_df\" \"tbl\" \"data.frame\"\n```\n:::\n\n```{.r .cell-code}\nx_snapshot\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> An `epi_df` object, 1,460 x 3 with metadata:\n#> * geo_type = state\n#> * time_type = day\n#> * as_of = 2021-06-01\n#> \n#> # A tibble: 1,460 × 3\n#> geo_value time_value percent_cli\n#> * \n#> 1 ca 2020-06-01 2.75\n#> 2 ca 2020-06-02 2.57\n#> 3 ca 2020-06-03 2.48\n#> 4 ca 2020-06-04 2.41\n#> 5 ca 2020-06-05 2.57\n#> 6 ca 2020-06-06 2.63\n#> # ℹ 1,454 more rows\n```\n:::\n\n```{.r .cell-code}\nmax(x_snapshot$time_value)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> [1] \"2021-05-31\"\n```\n:::\n\n```{.r .cell-code}\nattributes(x_snapshot)$metadata$as_of\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> [1] \"2021-06-01\"\n```\n:::\n:::\n\n\nWe can see that the max time value in the `epi_df` object `x_snapshot` that was\ngenerated from the archive is May 29, 2021, even though the specified version\ndate was June 1, 2021. From this we can infer that the doctor's visits signal\nwas 2 days latent on June 1. Also, we can see that the metadata in the `epi_df`\nobject has the version date recorded in the `as_of` field.\n\nBy default, using the maximum of the `version` column in the underlying data table in an\n`epi_archive` object itself generates a snapshot of the latest values of signal\nvariables in the entire archive. The `epix_as_of()` function issues a warning in\nthis case, since updates to the current version may still come in at a later\npoint in time, due to various reasons, such as synchronization issues.\n\n\n::: {.cell layout-align=\"center\" hash='archive_cache/html/unnamed-chunk-6_ce529f6da4e30a0f740715bc4b9d054c'}\n\n```{.r .cell-code}\nx_latest <- epix_as_of(x, version = max(x$DT$version))\n```\n:::\n\n\nBelow, we pull several snapshots from the archive, spaced one month apart. We\noverlay the corresponding signal curves as colored lines, with the version dates\nmarked by dotted vertical lines, and draw the latest curve in black (from the\nlatest snapshot `x_latest` that the archive can provide).\n\n\n::: {.cell layout-align=\"center\" hash='archive_cache/html/unnamed-chunk-7_58040707d267aa6c13723eb08f4789fa'}\n\n```{.r .cell-code}\nself_max <- max(x$DT$version)\nversions <- seq(as.Date(\"2020-06-01\"), self_max - 1, by = \"1 month\")\nsnapshots <- map(\n versions,\n function(v) {\n epix_as_of(x, version = v) %>% mutate(version = v)\n }\n) %>%\n list_rbind() %>%\n bind_rows(x_latest %>% mutate(version = self_max)) %>%\n mutate(latest = version == self_max)\n```\n:::\n\n::: {.cell layout-align=\"center\" hash='archive_cache/html/unnamed-chunk-8_b16b9b5e7b728035f69a078cb0fc40db'}\n\n```{.r .cell-code code-fold=\"true\"}\nggplot(\n snapshots %>% filter(!latest),\n aes(x = time_value, y = percent_cli)\n) +\n geom_line(aes(color = factor(version)), na.rm = TRUE) +\n geom_vline(aes(color = factor(version), xintercept = version), lty = 2) +\n facet_wrap(~geo_value, scales = \"free_y\", ncol = 1) +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %Y\") +\n scale_color_viridis_d(option = \"A\", end = .9) +\n labs(x = \"Date\", y = \"% of doctor's visits with CLI\") +\n theme(legend.position = \"none\") +\n geom_line(\n data = snapshots %>% filter(latest),\n aes(x = time_value, y = percent_cli),\n inherit.aes = FALSE, color = \"black\", na.rm = TRUE\n )\n```\n\n::: {.cell-output-display}\n![](archive_files/figure-html/unnamed-chunk-8-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nWe can see some interesting and highly nontrivial revision behavior: at some\npoints in time the provisional data snapshots grossly underestimate the latest\ncurve (look in particular at Florida close to the end of 2021), and at others\nthey overestimate it (both states towards the beginning of 2021), though not\nquite as dramatically. Modeling the revision process, which is often called\n*backfill modeling*, is an important statistical problem in it of itself.\n\n## Merging `epi_archive` objects\n\nNow we demonstrate how to merge two `epi_archive` objects together, e.g., so\nthat grabbing data from multiple sources as of a particular version can be\nperformed with a single `as_of` call. The `epiprocess` packages provides\n`epix_merge()` for this purpose. Below we merge the working `epi_archive` of\nversioned percentage CLI from outpatient visits to another one of versioned\nCOVID-19 case reporting data, which we fetch the from the [COVIDcast\nAPI](https://cmu-delphi.github.io/delphi-epidata/api/covidcast.html/), on the\nrate scale (counts per 100,000 people in the population).\n\nWhen merging archives, unless the archives have identical data release patterns,\n`NA`s can be introduced in the non-key variables for a few reasons:\n- to represent the \"value\" of an observation before its initial release (when we\n need to pair it with additional observations from the other archive that have\n been released)\n- to represent the \"value\" of an observation that has no recorded versions at\n all (in the same sort of situation)\n- if requested via `sync = \"na\"`, to represent potential update data that we do\n not yet have access to (e.g., due to encountering issues while attempting to\n download the currently available version data for one of the archives, but not\n the other).\n\n\n::: {.cell layout-align=\"center\" hash='archive_cache/html/unnamed-chunk-9_dd8b9566b93ee92760a9756afba10db6'}\n\n```{.r .cell-code}\n# This code is for illustration and doesn't run.\n# The result is saved/loaded in the (hidden) next chunk from `{epidatasets}`\ny <- pub_covidcast(\n source = \"jhu-csse\",\n signals = \"confirmed_7dav_incidence_prop\",\n time_type = \"day\",\n geo_type = \"state\",\n time_values = epirange(20200601, 20211201),\n geo_values = \"ca,fl,ny,tx\",\n issues = epirange(20200601, 20211201)\n) %>%\n select(geo_value, time_value, version = issue, case_rate_7d_av = value) %>%\n as_epi_archive(compactify = TRUE)\n\nx <- epix_merge(x, y, sync = \"locf\", compactify = FALSE)\nprint(x)\nhead(x$DT)\n```\n:::\n\n\n## Sliding version-aware computations\n\n::: {.callout-note}\nTODO: need a simple example here.\n:::\n", "supporting": [], "filters": [ "rmarkdown/pagebreak.lua" diff --git a/_freeze/archive/figure-html/unnamed-chunk-8-1.svg b/_freeze/archive/figure-html/unnamed-chunk-8-1.svg index 9c9e89c..81b91f1 100644 --- a/_freeze/archive/figure-html/unnamed-chunk-8-1.svg +++ b/_freeze/archive/figure-html/unnamed-chunk-8-1.svg @@ -679,7 +679,7 @@ - + diff --git a/_freeze/epidf/execute-results/html.json b/_freeze/epidf/execute-results/html.json index b23b744..b9dd9cc 100644 --- a/_freeze/epidf/execute-results/html.json +++ b/_freeze/epidf/execute-results/html.json @@ -1,7 +1,7 @@ { - "hash": "ef06ff93fa44c25cdac272e9bf1aeb9b", + "hash": "eac6de9244d86d626bcdbb2945c1fe97", "result": { - "markdown": "# Getting data into epi_df format\n\n\n\n\n\nWe'll start by showing how to get data into \n`epi_df`, which is just\na tibble with a bit of special structure, and is the format assumed by all of\nthe functions in the `epiprocess` package. An `epi_df` object has (at least) the\nfollowing columns:\n\n* `geo_value`: the geographic value associated with each row of measurements.\n* `time_value`: the time value associated with each row of measurements.\n\nIt can have any number of other columns which can serve as measured variables,\nwhich we also broadly refer to as signal variables. The documentation for\n gives more details about this data format.\n\nA data frame or tibble that has `geo_value` and `time_value` columns can be\nconverted into an `epi_df` object, using the function `as_epi_df()`. As an\nexample, we'll work with daily cumulative COVID-19 cases from four U.S. states:\nCA, FL, NY, and TX, over time span from mid 2020 to early 2022, and we'll use\nthe [`epidatr`](https://github.com/cmu-delphi/epidatr) package\nto fetch this data from the [COVIDcast\nAPI](https://cmu-delphi.github.io/delphi-epidata/api/covidcast.html).\n\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-2_a8b0ce831d237748edcef31c420862a2'}\n\n```{.r .cell-code}\nlibrary(epidatr)\nlibrary(epiprocess)\nlibrary(withr)\n\ncases <- pub_covidcast(\n source = \"jhu-csse\",\n signals = \"confirmed_cumulative_num\",\n time_type = \"day\",\n geo_type = \"state\",\n time_values = epirange(20200301, 20220131),\n geo_values = \"ca,fl,ny,tx\"\n)\n\ncolnames(cases)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> [1] \"geo_value\" \"signal\" \"source\" \n#> [4] \"geo_type\" \"time_type\" \"time_value\" \n#> [7] \"direction\" \"issue\" \"lag\" \n#> [10] \"missing_value\" \"missing_stderr\" \"missing_sample_size\"\n#> [13] \"value\" \"stderr\" \"sample_size\"\n```\n:::\n:::\n\n\nAs we can see, a data frame returned by `epidatr::covidcast()` has the\ncolumns required for an `epi_df` object (along with many others). We can use\n`as_epi_df()`, with specification of some relevant metadata, to bring the data\nframe into `epi_df` format.\n\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-3_634293240d733bec84dd8b6a5c74e634'}\n\n```{.r .cell-code}\nx <- as_epi_df(cases,\n geo_type = \"state\",\n time_type = \"day\",\n as_of = max(cases$issue)\n) %>%\n select(geo_value, time_value, total_cases = value)\n\nclass(x)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> [1] \"epi_df\" \"tbl_df\" \"tbl\" \"data.frame\"\n```\n:::\n\n```{.r .cell-code}\nsummary(x)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> An `epi_df` x, with metadata:\n#> * geo_type = state\n#> * time_type = day\n#> * as_of = 2023-03-10\n#> ----------\n#> * min time value = 2020-03-01\n#> * max time value = 2022-01-31\n#> * average rows per time value = 4\n```\n:::\n\n```{.r .cell-code}\nhead(x)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> An `epi_df` object, 6 x 3 with metadata:\n#> * geo_type = state\n#> * time_type = day\n#> * as_of = 2023-03-10\n#> \n#> # A tibble: 6 × 3\n#> geo_value time_value total_cases\n#> * \n#> 1 ca 2020-03-01 19\n#> 2 fl 2020-03-01 0\n#> 3 ny 2020-03-01 0\n#> 4 tx 2020-03-01 0\n#> 5 ca 2020-03-02 23\n#> 6 fl 2020-03-02 1\n```\n:::\n\n```{.r .cell-code}\nattributes(x)$metadata\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> $geo_type\n#> [1] \"state\"\n#> \n#> $time_type\n#> [1] \"day\"\n#> \n#> $as_of\n#> [1] \"2023-03-10\"\n```\n:::\n:::\n\n\n## Some details on metadata\n\nIn general, an `epi_df` object has the following fields in its metadata:\n \n* `geo_type`: the type for the geo values.\n* `time_type`: the type for the time values.\n* `as_of`: the time value at which the given data were available.\n\nMetadata for an `epi_df` object `x` can be accessed (and altered) via\n`attributes(x)$metadata`. The first two fields here, `geo_type` and `time_type`,\nare not currently used by any downstream functions in the `epiprocess` package,\nand serve only as useful bits of information to convey about the data set at\nhand. The last field here, `as_of`, is one of the most unique aspects of an\n`epi_df` object.\n\nIn brief, we can think of an `epi_df` object as a single snapshot of a data set\nthat contains the most up-to-date values of some signals of interest, as of the\ntime specified `as_of`. For example, if `as_of` is January 31, 2022, then the\n`epi_df` object has the most up-to-date version of the data available as of\nJanuary 31, 2022. The `epiprocess` package also provides a companion data\nstructure called `epi_archive`, which stores the full version history of a given\ndata set. See the [archive\nvignette](https://cmu-delphi.github.io/epiprocess/articles/archive.html) for\nmore.\n\nIf any of the `geo_type`, `time_type`, or `as_of` arguments are missing in a \ncall to `as_epi_df()`, then this function will try to infer them from the passed\nobject. Usually, `geo_type` and `time_type` can be inferred from the `geo_value`\nand `time_value` columns, respectively, but inferring the `as_of` field is not \nas easy. See the documentation for `as_epi_df()` more details.\n\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-4_1c364218e936aa6527bd0675ab37d455'}\n\n```{.r .cell-code}\nx <- as_epi_df(cases) %>%\n select(geo_value, time_value, total_cases = value)\n\nattributes(x)$metadata\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> $geo_type\n#> [1] \"state\"\n#> \n#> $time_type\n#> [1] \"day\"\n#> \n#> $as_of\n#> [1] \"2023-03-10\"\n```\n:::\n:::\n\n\n## Using additional key columns in `epi_df` {#sec-additional-keys}\n\nIn the following examples we will show how to create an `epi_df` with additional keys.\n\n### Converting a `tsibble` that has county code as an extra key\n\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-5_28361d3ac565b78677e217c86faf03cc'}\n\n```{.r .cell-code}\nset.seed(12345)\nex1 <- tibble(\n geo_value = rep(c(\"ca\", \"fl\", \"pa\"), each = 3),\n county_code = c(\n \"06059\", \"06061\", \"06067\", \"12111\", \"12113\", \"12117\",\n \"42101\", \"42103\", \"42105\"\n ),\n time_value = rep(\n seq(as.Date(\"2020-06-01\"), as.Date(\"2020-06-03\"), by = \"1 day\"),\n length.out = 9\n ),\n value = rpois(9, 5)\n) %>%\n as_tsibble(index = time_value, key = c(geo_value, county_code))\n\nex1 <- as_epi_df(x = ex1, geo_type = \"state\", time_type = \"day\", as_of = \"2020-06-03\")\n```\n:::\n\n\nThe metadata now includes `county_code` as an extra key.\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-6_1c760ce7c25a1f6867568618118bb7ac'}\n\n```{.r .cell-code}\nattr(ex1, \"metadata\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> $geo_type\n#> [1] \"state\"\n#> \n#> $time_type\n#> [1] \"day\"\n#> \n#> $as_of\n#> [1] \"2020-06-03\"\n#> \n#> $other_keys\n#> [1] \"county_code\"\n```\n:::\n:::\n\n\n\n### Dealing with misspecified column names \n\n`epi_df` requires there to be columns `geo_value` and `time_value`, if they do not exist then `as_epi_df()` throws an error.\n\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-7_52307fa1e07fa21173de3e9416897483'}\n\n```{.r .cell-code}\nex2 <- data.frame(\n state = rep(c(\"ca\", \"fl\", \"pa\"), each = 3), # misnamed\n pol = rep(c(\"blue\", \"swing\", \"swing\"), each = 3), # extra key\n reported_date = rep(\n seq(as.Date(\"2020-06-01\"), as.Date(\"2020-06-03\"), by = \"day\"),\n length.out = 9\n ), # misnamed\n value = rpois(9, 5)\n)\nex2 %>% as_epi_df()\n```\n\n::: {.cell-output .cell-output-error}\n```\n#> Error in `Abort()`:\n#> ! `x` must contain a `geo_value` column.\n```\n:::\n:::\n\n\nThe columns should be renamed to match `epi_df` format. \n\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-8_eea2403289899a6533606cf4f555d400'}\n\n```{.r .cell-code}\nex2 <- ex2 %>%\n rename(geo_value = state, time_value = reported_date) %>%\n as_epi_df(\n geo_type = \"state\",\n as_of = \"2020-06-03\",\n additional_metadata = list(other_keys = \"pol\")\n )\n\nattr(ex2, \"metadata\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> $geo_type\n#> [1] \"state\"\n#> \n#> $time_type\n#> [1] \"day\"\n#> \n#> $as_of\n#> [1] \"2020-06-03\"\n#> \n#> $other_keys\n#> [1] \"pol\"\n```\n:::\n:::\n\n\n\n### Adding additional keys to an `epi_df` object\n\nIn the above examples, all the keys are added to objects prior to conversion to\n`epi_df` objects. But this can also be accomplished afterward.\nWe'll look at an included dataset and filter to a single state for simplicity.\n\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-9_fc0625e5160d2a01eb47d18c346874ed'}\n\n```{.r .cell-code}\nex3 <- jhu_csse_county_level_subset %>%\n filter(time_value > \"2021-12-01\", state_name == \"Massachusetts\") %>%\n slice_tail(n = 6)\n\nattr(ex3, \"metadata\") # geo_type is county currently\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> $geo_type\n#> [1] \"county\"\n#> \n#> $time_type\n#> [1] \"day\"\n#> \n#> $as_of\n#> [1] \"2022-05-23 14:35:45 PDT\"\n```\n:::\n:::\n\n\nNow we add `state` (MA) and `pol` as new columns to the data and as new keys to the metadata. The \"state\" `geo_type` anticipates lower-case abbreviations, so we'll match that. \n\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-10_fe2c6e15016b44b9220d5fc4f6b51049'}\n\n```{.r .cell-code}\nex3 <- ex3 %>%\n as_tibble() %>% # drop the `epi_df` class before adding additional metadata\n mutate(\n state = rep(tolower(\"MA\"), 6),\n pol = rep(c(\"blue\", \"swing\", \"swing\"), each = 2)\n ) %>%\n as_epi_df(additional_metadata = list(other_keys = c(\"state\", \"pol\")))\n\nattr(ex3, \"metadata\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> $geo_type\n#> [1] \"county\"\n#> \n#> $time_type\n#> [1] \"day\"\n#> \n#> $as_of\n#> [1] \"2023-12-15 04:50:49 PST\"\n#> \n#> $other_keys\n#> [1] \"state\" \"pol\"\n```\n:::\n:::\n\n\nNote that the two additional keys we added, `state` and `pol`, are specified as a character vector in the `other_keys` component of the `additional_metadata` list. They must be specified in this manner so that downstream actions on the `epi_df`, like model fitting and prediction, can recognize and use these keys.\n\n\n\n## Working with `epi_df` objects downstream\n\nData in `epi_df` format should be easy to work with downstream, since it is a\nvery standard tabular data format; in the other vignettes, we'll walk through\nsome basic signal processing tasks using functions provided in the `epiprocess`\npackage. Of course, we can also write custom code for other downstream uses,\nlike plotting, which is pretty easy to do `ggplot2`.\n\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-11_cf02eb699138d3d8365b66804d295fde'}\n\n```{.r .cell-code}\nggplot(x, aes(x = time_value, y = total_cases, color = geo_value)) +\n geom_line() +\n scale_color_brewer(palette = \"Set1\") +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %Y\") +\n labs(x = \"Date\", y = \"Cumulative COVID-19 cases\", color = \"State\")\n```\n\n::: {.cell-output-display}\n![](epidf_files/figure-html/unnamed-chunk-11-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nFinally, we'll examine some data from other packages just to show how \nwe might get them into `epi_df` format. \nThe first is data on daily new (not cumulative) SARS \ncases in Canada in 2003, from the \n[outbreaks](https://github.com/reconverse/outbreaks) package. New cases are\nbroken into a few categories by provenance.\n\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-12_f4dc254695766edbb2625b67c42932b7'}\n\n```{.r .cell-code}\nx <- outbreaks::sars_canada_2003 %>%\n mutate(geo_value = \"ca\") %>%\n select(geo_value, time_value = date, starts_with(\"cases\")) %>%\n as_epi_df(geo_type = \"nation\")\n\nhead(x)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> An `epi_df` object, 6 x 6 with metadata:\n#> * geo_type = nation\n#> * time_type = day\n#> * as_of = 2023-12-15 04:50:50\n#> \n#> # A tibble: 6 × 6\n#> geo_value time_value cases_travel cases_household cases_healthcare\n#> * \n#> 1 ca 2003-02-23 1 0 0\n#> 2 ca 2003-02-24 0 0 0\n#> 3 ca 2003-02-25 0 0 0\n#> 4 ca 2003-02-26 0 1 0\n#> 5 ca 2003-02-27 0 0 0\n#> 6 ca 2003-02-28 1 0 0\n#> # ℹ 1 more variable: cases_other \n```\n:::\n:::\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-13_af68bf6df70c76b27435c6f2822266e9'}\n\n```{.r .cell-code code-fold=\"true\"}\nx <- x %>%\n pivot_longer(starts_with(\"cases\"), names_to = \"type\") %>%\n mutate(type = substring(type, 7))\n\nggplot(x, aes(x = time_value, y = value)) +\n geom_col(aes(fill = type), just = 0.5) +\n scale_y_continuous(breaks = 0:4 * 2, expand = expansion(c(0, 0.05))) +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %Y\") +\n labs(x = \"Date\", y = \"SARS cases in Canada\", fill = \"Type\")\n```\n\n::: {.cell-output-display}\n![](epidf_files/figure-html/unnamed-chunk-13-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nThis next example examines data on new cases of Ebola in Sierra Leone in 2014 (from the same package).\n\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-14_09c7102254a1a233a78be842fcaf2096'}\n\n```{.r .cell-code}\nx <- outbreaks::ebola_sierraleone_2014 %>%\n mutate(\n cases = ifelse(status == \"confirmed\", 1, 0),\n province = case_when(\n district %in% c(\"Kailahun\", \"Kenema\", \"Kono\") ~ \"Eastern\",\n district %in% c(\n \"Bombali\", \"Kambia\", \"Koinadugu\",\n \"Port Loko\", \"Tonkolili\"\n ) ~ \"Northern\",\n district %in% c(\"Bo\", \"Bonthe\", \"Moyamba\", \"Pujehun\") ~ \"Sourthern\",\n district %in% c(\"Western Rural\", \"Western Urban\") ~ \"Western\"\n )\n ) %>%\n select(geo_value = province, time_value = date_of_onset, cases) %>%\n filter(cases == 1) %>%\n group_by(geo_value, time_value) %>%\n summarise(cases = sum(cases)) %>%\n as_epi_df(geo_type = \"province\")\n```\n:::\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-15_7b787f995e155e919b8f184101e75f87'}\n\n```{.r .cell-code code-fold=\"true\"}\nggplot(x, aes(x = time_value, y = cases)) +\n geom_col(aes(fill = geo_value), show.legend = FALSE) +\n facet_wrap(~geo_value, scales = \"free_y\") +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %Y\") +\n labs(x = \"Date\", y = \"Confirmed cases of Ebola in Sierra Leone\")\n```\n\n::: {.cell-output-display}\n![](epidf_files/figure-html/unnamed-chunk-15-1.svg){fig-align='center' width=90%}\n:::\n:::\n", + "markdown": "# Getting data into epi_df format\n\n\n\n\n\nWe'll start by showing how to get data into\n`epi_df`, which is just\na tibble with a bit of special structure, and is the format assumed by all of\nthe functions in the `epiprocess` package. An `epi_df` object has (at least) the\nfollowing columns:\n\n* `geo_value`: the geographic value associated with each row of measurements.\n* `time_value`: the time value associated with each row of measurements.\n\nIt can have any number of other columns which can serve as measured variables,\nwhich we also broadly refer to as signal variables. The documentation for\n gives more details about this data format.\n\nA data frame or tibble that has `geo_value` and `time_value` columns can be\nconverted into an `epi_df` object, using the function `as_epi_df()`. As an\nexample, we'll work with daily cumulative COVID-19 cases from four U.S. states:\nCA, FL, NY, and TX, over time span from mid 2020 to early 2022, and we'll use\nthe [`epidatr`](https://github.com/cmu-delphi/epidatr) package\nto fetch this data from the [COVIDcast\nAPI](https://cmu-delphi.github.io/delphi-epidata/api/covidcast.html).\n\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-2_a8b0ce831d237748edcef31c420862a2'}\n\n```{.r .cell-code}\nlibrary(epidatr)\nlibrary(epiprocess)\nlibrary(withr)\n\ncases <- pub_covidcast(\n source = \"jhu-csse\",\n signals = \"confirmed_cumulative_num\",\n time_type = \"day\",\n geo_type = \"state\",\n time_values = epirange(20200301, 20220131),\n geo_values = \"ca,fl,ny,tx\"\n)\n\ncolnames(cases)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> [1] \"geo_value\" \"signal\" \"source\" \n#> [4] \"geo_type\" \"time_type\" \"time_value\" \n#> [7] \"direction\" \"issue\" \"lag\" \n#> [10] \"missing_value\" \"missing_stderr\" \"missing_sample_size\"\n#> [13] \"value\" \"stderr\" \"sample_size\"\n```\n:::\n:::\n\n\nAs we can see, a data frame returned by `epidatr::pub_covidcast()` has the\ncolumns required for an `epi_df` object (along with many others). We can use\n`as_epi_df()`, with specification of some relevant metadata, to bring the data\nframe into `epi_df` format.\n\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-3_424072c00e940fde0fbfdef1fb6c56b1'}\n\n```{.r .cell-code}\nx <- as_epi_df(cases, as_of = max(cases$issue)) %>%\n select(geo_value, time_value, total_cases = value)\n\nclass(x)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> [1] \"epi_df\" \"tbl_df\" \"tbl\" \"data.frame\"\n```\n:::\n\n```{.r .cell-code}\nsummary(x)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> An `epi_df` x, with metadata:\n#> * geo_type = state\n#> * as_of = 2023-03-10\n#> ----------\n#> * min time value = 2020-03-01\n#> * max time value = 2022-01-31\n#> * average rows per time value = 4\n```\n:::\n\n```{.r .cell-code}\nhead(x)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> An `epi_df` object, 6 x 3 with metadata:\n#> * geo_type = state\n#> * time_type = day\n#> * as_of = 2023-03-10\n#> \n#> # A tibble: 6 × 3\n#> geo_value time_value total_cases\n#> * \n#> 1 ca 2020-03-01 19\n#> 2 fl 2020-03-01 0\n#> 3 ny 2020-03-01 0\n#> 4 tx 2020-03-01 0\n#> 5 ca 2020-03-02 23\n#> 6 fl 2020-03-02 1\n```\n:::\n\n```{.r .cell-code}\nattributes(x)$metadata\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> $geo_type\n#> [1] \"state\"\n#> \n#> $time_type\n#> [1] \"day\"\n#> \n#> $as_of\n#> [1] \"2023-03-10\"\n#> \n#> $other_keys\n#> character(0)\n```\n:::\n:::\n\n\n## Some details on metadata\n\nIn general, an `epi_df` object has the following fields in its metadata:\n\n* `geo_type`: the type for the geo values.\n* `time_type`: the type for the time values.\n* `as_of`: the time value at which the given data were available.\n\nMetadata for an `epi_df` object `x` can be accessed (and altered) via\n`attributes(x)$metadata`. The first two fields here, `geo_type` and `time_type`,\nare not currently used by any downstream functions in the `epiprocess` package,\nand serve only as useful bits of information to convey about the data set at\nhand. The last field here, `as_of`, is one of the most unique aspects of an\n`epi_df` object.\n\nIn brief, we can think of an `epi_df` object as a single snapshot of a data set\nthat contains the most up-to-date values of some signals of interest, as of the\ntime specified `as_of`. For example, if `as_of` is January 31, 2022, then the\n`epi_df` object has the most up-to-date version of the data available as of\nJanuary 31, 2022. The `epiprocess` package also provides a companion data\nstructure called `epi_archive`, which stores the full version history of a given\ndata set. See the [archive\nvignette](https://cmu-delphi.github.io/epiprocess/articles/archive.html) for\nmore.\n\nIf any of the `geo_type`, `time_type`, or `as_of` arguments are missing in a\ncall to `as_epi_df()`, then this function will try to infer them from the passed\nobject. Usually, `geo_type` and `time_type` can be inferred from the `geo_value`\nand `time_value` columns, respectively, but inferring the `as_of` field is not\nas easy. See the documentation for `as_epi_df()` more details.\n\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-4_1c364218e936aa6527bd0675ab37d455'}\n\n```{.r .cell-code}\nx <- as_epi_df(cases) %>%\n select(geo_value, time_value, total_cases = value)\n\nattributes(x)$metadata\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> $geo_type\n#> [1] \"state\"\n#> \n#> $time_type\n#> [1] \"day\"\n#> \n#> $as_of\n#> [1] \"2023-03-10\"\n#> \n#> $other_keys\n#> character(0)\n```\n:::\n:::\n\n\n## Using additional key columns in `epi_df` {#sec-additional-keys}\n\nIn the following examples we will show how to create an `epi_df` with additional keys.\n\n### Converting a `tsibble` that has county code as an extra key\n\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-5_13ed9b2e299a25ed17b6425a15addc48'}\n\n```{.r .cell-code}\nset.seed(12345)\nex1 <- tibble(\n geo_value = rep(c(\"ca\", \"fl\", \"pa\"), each = 3),\n county_code = c(\n \"06059\", \"06061\", \"06067\", \"12111\", \"12113\", \"12117\",\n \"42101\", \"42103\", \"42105\"\n ),\n time_value = rep(\n seq(as.Date(\"2020-06-01\"), as.Date(\"2020-06-03\"), by = \"1 day\"),\n length.out = 9\n ),\n value = rpois(9, 5)\n) %>%\n as_tsibble(index = time_value, key = c(geo_value, county_code))\n\nex1 <- as_epi_df(x = ex1, as_of = \"2020-06-03\")\n```\n:::\n\n\nThe metadata now includes `county_code` as an extra key.\n\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-6_1c760ce7c25a1f6867568618118bb7ac'}\n\n```{.r .cell-code}\nattr(ex1, \"metadata\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> $geo_type\n#> [1] \"state\"\n#> \n#> $time_type\n#> [1] \"day\"\n#> \n#> $as_of\n#> [1] \"2020-06-03\"\n#> \n#> $other_keys\n#> [1] \"county_code\"\n```\n:::\n:::\n\n\n\n### Dealing with misspecified column names\n\n`epi_df` requires there to be columns `geo_value` and `time_value`, if they do not exist then `as_epi_df()` throws an error.\n\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-7_ed6a7fdb22f3e76485027e927056c297'}\n\n```{.r .cell-code}\nex2 <- data.frame(\n state = rep(c(\"ca\", \"fl\", \"pa\"), each = 3), # misnamed\n pol = rep(c(\"blue\", \"swing\", \"swing\"), each = 3), # extra key\n reported_date = rep(\n seq(as.Date(\"2020-06-01\"), as.Date(\"2020-06-03\"), by = \"day\"),\n length.out = 9\n ), # misnamed\n value = rpois(9, 5)\n)\nex2 %>% as_epi_df()\n```\n\n::: {.cell-output .cell-output-error}\n```\n#> Error in `guess_column_name()` at epiprocess/R/epi_df.R:233:3:\n#> ! There is no time_value column or similar name. See e.g.\n#> [`time_column_name()`] for a complete list\n```\n:::\n:::\n\n\nThe columns should be renamed to match `epi_df` format.\n\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-8_55da8bb70c4f752a59c17e051f950ce5'}\n\n```{.r .cell-code}\nex2 <- ex2 %>%\n rename(geo_value = state, time_value = reported_date) %>%\n as_epi_df(\n as_of = \"2020-06-03\",\n other_keys = \"pol\"\n )\n\nattr(ex2, \"metadata\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> $geo_type\n#> [1] \"state\"\n#> \n#> $time_type\n#> [1] \"day\"\n#> \n#> $as_of\n#> [1] \"2020-06-03\"\n#> \n#> $other_keys\n#> [1] \"pol\"\n```\n:::\n:::\n\n\n### Adding additional keys to an `epi_df` object\n\nIn the above examples, all the keys are added to objects prior to conversion to\n`epi_df` objects. But this can also be accomplished afterward.\nWe'll look at an included dataset and filter to a single state for simplicity.\n\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-9_c65c3cc6cc393f7a99385702f28b05ad'}\n\n```{.r .cell-code}\nex3 <- jhu_csse_county_level_subset %>%\n filter(time_value > \"2021-12-01\", state_name == \"Massachusetts\") %>%\n slice_tail(n = 6)\n\nattr(ex3, \"metadata\") # geo_type is county currently\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> $geo_type\n#> [1] \"county\"\n#> \n#> $time_type\n#> [1] \"day\"\n#> \n#> $as_of\n#> [1] \"2024-08-22 19:40:32 PDT\"\n#> \n#> $other_keys\n#> character(0)\n```\n:::\n:::\n\n\nNow we add `state` (MA) and `pol` as new columns to the data and as new keys to the metadata. The \"state\" `geo_type` anticipates lower-case abbreviations, so we'll match that.\n\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-10_6335a687a09e5e296e78d4e2c39b281e'}\n\n```{.r .cell-code}\nex3 <- ex3 %>%\n as_tibble() %>% # drop the `epi_df` class before adding additional metadata\n mutate(\n state = rep(tolower(\"MA\"), 6),\n pol = rep(c(\"blue\", \"swing\", \"swing\"), each = 2)\n ) %>%\n as_epi_df(other_keys = c(\"state\", \"pol\"))\n\nattr(ex3, \"metadata\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> $geo_type\n#> [1] \"county\"\n#> \n#> $time_type\n#> [1] \"day\"\n#> \n#> $as_of\n#> [1] \"2024-09-30 16:41:57 PDT\"\n#> \n#> $other_keys\n#> [1] \"state\" \"pol\"\n```\n:::\n:::\n\n\nNote that the two additional keys we added, `state` and `pol`, are specified as a character vector in the `other_keys` component of the `additional_metadata` list. They must be specified in this manner so that downstream actions on the `epi_df`, like model fitting and prediction, can recognize and use these keys.\n\n\n\n## Working with `epi_df` objects downstream\n\nData in `epi_df` format should be easy to work with downstream, since it is a\nvery standard tabular data format; in the other vignettes, we'll walk through\nsome basic signal processing tasks using functions provided in the `epiprocess`\npackage. Of course, we can also write custom code for other downstream uses,\nlike plotting, which is pretty easy to do `ggplot2`.\n\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-11_355de57e64e10837f1f429b4603f237d'}\n\n```{.r .cell-code}\nggplot(x, aes(x = time_value, y = total_cases, color = geo_value)) +\n geom_line() +\n scale_color_brewer(palette = \"Set1\") +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %Y\") +\n labs(x = \"Date\", y = \"Cumulative COVID-19 cases\", color = \"State\")\n```\n\n::: {.cell-output-display}\n![](epidf_files/figure-html/unnamed-chunk-11-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nFinally, we'll examine some data from other packages just to show how\nwe might get them into `epi_df` format.\nThe first is data on daily new (not cumulative) SARS\ncases in Canada in 2003, from the\n[outbreaks](https://github.com/reconverse/outbreaks) package. New cases are\nbroken into a few categories by provenance.\n\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-12_45ec873c79b7f5c7b5732235ee7e34ff'}\n\n```{.r .cell-code}\nx <- outbreaks::sars_canada_2003 %>%\n mutate(geo_value = \"ca\") %>%\n select(geo_value, time_value = date, starts_with(\"cases\")) %>%\n as_epi_df()\n\nhead(x)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> An `epi_df` object, 6 x 6 with metadata:\n#> * geo_type = state\n#> * time_type = day\n#> * as_of = 2024-09-30 16:41:57.652717\n#> \n#> # A tibble: 6 × 6\n#> geo_value time_value cases_travel cases_household cases_healthcare\n#> * \n#> 1 ca 2003-02-23 1 0 0\n#> 2 ca 2003-02-24 0 0 0\n#> 3 ca 2003-02-25 0 0 0\n#> 4 ca 2003-02-26 0 1 0\n#> 5 ca 2003-02-27 0 0 0\n#> 6 ca 2003-02-28 1 0 0\n#> # ℹ 1 more variable: cases_other \n```\n:::\n:::\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-13_dfc91abe2d4b1af7653920626ab2e6d8'}\n\n```{.r .cell-code code-fold=\"true\"}\nx <- x %>%\n pivot_longer(starts_with(\"cases\"), names_to = \"type\") %>%\n mutate(type = substring(type, 7))\n\nggplot(x, aes(x = time_value, y = value)) +\n geom_col(aes(fill = type), just = 0.5) +\n scale_y_continuous(breaks = 0:4 * 2, expand = expansion(c(0, 0.05))) +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %Y\") +\n labs(x = \"Date\", y = \"SARS cases in Canada\", fill = \"Type\")\n```\n\n::: {.cell-output-display}\n![](epidf_files/figure-html/unnamed-chunk-13-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nThis next example examines data on new cases of Ebola in Sierra Leone in 2014 (from the same package).\n\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-14_ed7ca3d4b9c9eebb3ec4511a1f697a14'}\n\n```{.r .cell-code}\nx <- outbreaks::ebola_sierraleone_2014 %>%\n mutate(\n cases = ifelse(status == \"confirmed\", 1, 0),\n province = case_when(\n district %in% c(\"Kailahun\", \"Kenema\", \"Kono\") ~ \"Eastern\",\n district %in% c(\n \"Bombali\", \"Kambia\", \"Koinadugu\",\n \"Port Loko\", \"Tonkolili\"\n ) ~ \"Northern\",\n district %in% c(\"Bo\", \"Bonthe\", \"Moyamba\", \"Pujehun\") ~ \"Sourthern\",\n district %in% c(\"Western Rural\", \"Western Urban\") ~ \"Western\"\n )\n ) %>%\n select(geo_value = province, time_value = date_of_onset, cases) %>%\n filter(cases == 1) %>%\n group_by(geo_value, time_value) %>%\n summarise(cases = sum(cases)) %>%\n as_epi_df()\n```\n:::\n\n::: {.cell layout-align=\"center\" hash='epidf_cache/html/unnamed-chunk-15_43837d6f58c4eaea8f6160ffffe59d5b'}\n\n```{.r .cell-code code-fold=\"true\"}\nggplot(x, aes(x = time_value, y = cases)) +\n geom_col(aes(fill = geo_value), show.legend = FALSE) +\n facet_wrap(~geo_value, scales = \"free_y\") +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %Y\") +\n labs(x = \"Date\", y = \"Confirmed cases of Ebola in Sierra Leone\")\n```\n\n::: {.cell-output-display}\n![](epidf_files/figure-html/unnamed-chunk-15-1.svg){fig-align='center' width=90%}\n:::\n:::\n", "supporting": [], "filters": [ "rmarkdown/pagebreak.lua" diff --git a/_freeze/epidf/figure-html/unnamed-chunk-11-1.svg b/_freeze/epidf/figure-html/unnamed-chunk-11-1.svg index dea577d..6619e3f 100644 --- a/_freeze/epidf/figure-html/unnamed-chunk-11-1.svg +++ b/_freeze/epidf/figure-html/unnamed-chunk-11-1.svg @@ -1,516 +1,523 @@ - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - - - + + + + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/_freeze/epidf/figure-html/unnamed-chunk-13-1.svg b/_freeze/epidf/figure-html/unnamed-chunk-13-1.svg index 86623eb..0ceab63 100644 --- a/_freeze/epidf/figure-html/unnamed-chunk-13-1.svg +++ b/_freeze/epidf/figure-html/unnamed-chunk-13-1.svg @@ -1,880 +1,887 @@ - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - - - + + + + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - - + + - - + + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + + - - - - + + - - + + + + - - - - + + + + + - - - - - + + - - + + + + - - - - + + + + - - - - + + + - - - + + - - + + + - - - + + - - + + - - + + - - + + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - - + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/_freeze/epidf/figure-html/unnamed-chunk-15-1.svg b/_freeze/epidf/figure-html/unnamed-chunk-15-1.svg index c196909..7193232 100644 --- a/_freeze/epidf/figure-html/unnamed-chunk-15-1.svg +++ b/_freeze/epidf/figure-html/unnamed-chunk-15-1.svg @@ -1,2094 +1,2101 @@ - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - - - - - - - - - - - - - - - - - - - - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/_freeze/outliers/execute-results/html.json b/_freeze/outliers/execute-results/html.json index bcf951b..f8c6338 100644 --- a/_freeze/outliers/execute-results/html.json +++ b/_freeze/outliers/execute-results/html.json @@ -1,7 +1,7 @@ { - "hash": "2c87fd6b2160a0e6e82f132ff958832d", + "hash": "5bc615a2bdf00f9db22f9d3adb7a86e2", "result": { - "markdown": "# Detect and correct outliers in signals\n\nThis chapter describes functionality for detecting and correcting outliers in\nsignals in the `detect_outlr()` and `correct_outlr()` functions provided in the\n`epiprocess` package. These functions is designed to be modular and extendable,\nso that you can define your own outlier detection and correction routines and\napply them to `epi_df` objects. We'll demonstrate this using state-level daily\nreported COVID-19 case counts from FL and NJ.\n\n\n\n\n::: {.cell layout-align=\"center\" hash='outliers_cache/html/unnamed-chunk-2_a04e38e37e2a0cee4145786b428621e0'}\n\n```{.r .cell-code}\nx <- incidence_num_outlier_example\n```\n:::\n\n::: {.cell layout-align=\"center\" hash='outliers_cache/html/unnamed-chunk-3_eeb1c583efb1d858ceb57a9c288edf2e'}\n\n```{.r .cell-code code-fold=\"true\"}\nggplot(x, aes(x = time_value, y = cases, color = geo_value)) +\n geom_line() +\n scale_color_manual(values = c(3, 6)) +\n geom_hline(yintercept = 0, linetype = 3) +\n facet_wrap(~geo_value, scales = \"free_y\", ncol = 1) +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %Y\") +\n labs(x = \"Date\", y = \"Reported COVID-19 counts\")\n```\n\n::: {.cell-output-display}\n![](outliers_files/figure-html/unnamed-chunk-3-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nThere are multiple outliers in these data that a modeler may want to detect and\ncorrect. We'll discuss those two tasks in turn.\n\n## Outlier detection\n\nThe `detect_outlr()` function allows us to run multiple outlier detection\nmethods on a given signal, and then (optionally) combine the results from those\nmethods. Here, we'll investigate outlier detection results from the following\nmethods.\n\n1. Detection based on a rolling median, using `detect_outlr_rm()`, which \n computes a rolling median on with a default window size of `n` time points \n centered at the time point under consideration, and then computes thresholds \n based on a multiplier times a rolling IQR computed on the residuals. \n2. Detection based on a seasonal-trend decomposition using LOESS (STL), using\n `detect_outlr_stl()`, which is similar to the rolling median method but \n replaces the rolling median with fitted values from STL. \n3. Detection based on an STL decomposition, but without seasonality term, which\n amounts to smoothing using LOESS.\n\nThe outlier detection methods are specified using a `tibble` that is passed to\n`detect_outlr()`, with one row per method, and whose columms specify the\noutlier detection function, any input arguments (only nondefault values need to\nbe supplied), and an abbreviated name for the method used in tracking results.\nAbbreviations \"rm\" and \"stl\" can be used for the built-in detection functions \n`detect_outlr_rm()` and `detect_outlr_stl()`, respectively.\n\n\n::: {.cell layout-align=\"center\" hash='outliers_cache/html/unnamed-chunk-4_d718f9b3ce1f2b62cbbefda0c73956dc'}\n\n```{.r .cell-code}\ndetection_methods <- bind_rows(\n tibble(\n method = \"rm\",\n args = list(list(\n detect_negatives = TRUE,\n detection_multiplier = 2.5\n )),\n abbr = \"rm\"\n ),\n tibble(\n method = \"stl\",\n args = list(list(\n detect_negatives = TRUE,\n detection_multiplier = 2.5,\n seasonal_period = 7\n )),\n abbr = \"stl_seasonal\"\n ),\n tibble(\n method = \"stl\",\n args = list(list(\n detect_negatives = TRUE,\n detection_multiplier = 2.5,\n seasonal_period = NULL\n )),\n abbr = \"stl_nonseasonal\"\n )\n)\n\ndetection_methods\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> # A tibble: 3 × 3\n#> method args abbr \n#> \n#> 1 rm rm \n#> 2 stl stl_seasonal \n#> 3 stl stl_nonseasonal\n```\n:::\n:::\n\n\nAdditionally, we'll form combined lower and upper thresholds, calculated as the\nmedian of the lower and upper thresholds from the methods at each time point.\nNote that using this combined median threshold is equivalent to using a majority\nvote across the base methods to determine whether a value is an outlier.\n\n\n::: {.cell layout-align=\"center\" hash='outliers_cache/html/unnamed-chunk-5_8b0c1909c0789a5ed4ad41dc03bdbcc0'}\n\n```{.r .cell-code}\nx <- x %>%\n group_by(geo_value) %>%\n mutate(\n outlier_info = detect_outlr(\n x = time_value, y = cases,\n methods = detection_methods,\n combiner = \"median\"\n )\n ) %>%\n ungroup() %>%\n unnest(outlier_info)\n\nx\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> An `epi_df` object, 730 x 15 with metadata:\n#> * geo_type = state\n#> * time_type = day\n#> * as_of = 2022-05-21 15:17:14\n#> \n#> # A tibble: 730 × 15\n#> geo_value time_value cases rm_lower rm_upper rm_replacement\n#> * \n#> 1 fl 2020-06-01 667 345 2195 667\n#> 2 nj 2020-06-01 486 64.4 926. 486\n#> 3 fl 2020-06-02 617 406. 2169. 617\n#> 4 nj 2020-06-02 658 140. 841. 658\n#> 5 fl 2020-06-03 1317 468. 2142. 1317\n#> 6 nj 2020-06-03 541 216 756 541\n#> # ℹ 724 more rows\n#> # ℹ 9 more variables: stl_seasonal_lower , stl_seasonal_upper , …\n```\n:::\n:::\n\n\nTo visualize the results, we define a convenience function for and call it on \neach state separately (hidden below the fold).\n\n\n::: {.cell layout-align=\"center\" hash='outliers_cache/html/unnamed-chunk-6_b18b51621bf6de1276da27cedba0106c'}\n\n```{.r .cell-code code-fold=\"true\"}\n# Plot outlier detection bands and/or points identified as outliers\nplot_outlr <- function(\n x, signal, method_abbr, bands = TRUE, points = TRUE,\n facet_vars = vars(geo_value), nrow = NULL, ncol = NULL,\n scales = \"fixed\") {\n # Convert outlier detection results to long format\n signal <- rlang::enquo(signal)\n x_long <- x %>%\n pivot_longer(\n cols = starts_with(method_abbr),\n names_to = c(\"method\", \".value\"),\n names_pattern = \"(.+)_(.+)\"\n )\n\n # Start of plot with observed data\n p <- ggplot() +\n geom_line(data = x, mapping = aes(x = time_value, y = !!signal))\n\n # If requested, add bands\n if (bands) {\n p <- p + geom_ribbon(\n data = x_long,\n aes(\n x = time_value, ymin = lower, ymax = upper,\n color = method\n ), fill = NA\n )\n }\n\n # If requested, add points\n if (points) {\n x_detected <- x_long %>% filter((!!signal < lower) | (!!signal > upper))\n p <- p + geom_point(\n data = x_detected,\n aes(\n x = time_value, y = !!signal, color = method,\n shape = method\n )\n )\n }\n\n # If requested, add faceting\n if (!is.null(facet_vars)) {\n p <- p + facet_wrap(facet_vars, nrow = nrow, ncol = ncol, scales = scales)\n }\n\n return(p)\n}\n```\n:::\n\n\nNow we produce plots for each state at a time, faceting by the detection method.\n\n\n::: {.cell layout-align=\"center\" hash='outliers_cache/html/unnamed-chunk-7_5405f07bf3cd51cfe2ca67bccef65fe0'}\n\n```{.r .cell-code code-fold=\"true\"}\nmethod_abbr <- c(detection_methods$abbr, \"combined\")\n\nplot_outlr(x %>% filter(geo_value == \"fl\"), cases, method_abbr,\n facet_vars = vars(method), scales = \"free_y\", ncol = 2\n) +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %Y\") +\n labs(\n x = \"Date\", y = \"Reported COVID-19 counts\", color = \"Method\",\n shape = \"Method\"\n ) +\n scale_color_brewer(palette = \"Set1\") +\n ggtitle(\"Florida\") +\n theme(legend.position = \"bottom\")\n```\n\n::: {.cell-output-display}\n![](outliers_files/figure-html/unnamed-chunk-7-1.svg){fig-align='center' width=90%}\n:::\n\n```{.r .cell-code code-fold=\"true\"}\nplot_outlr(x %>% filter(geo_value == \"nj\"), cases, method_abbr,\n facet_vars = vars(method), scales = \"free_y\", ncol = 2\n) +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %Y\") +\n labs(\n x = \"Date\", y = \"Reported COVID-19 counts\", color = \"Method\",\n shape = \"Method\"\n ) +\n scale_color_brewer(palette = \"Set1\") +\n ggtitle(\"New Jersey\") +\n theme(legend.position = \"bottom\")\n```\n\n::: {.cell-output-display}\n![](outliers_files/figure-html/unnamed-chunk-7-2.svg){fig-align='center' width=90%}\n:::\n:::\n\n\n## Outlier correction\n\nFinally, in order to correct outliers, we can use the posited replacement values\nreturned by each outlier detection method. Below we use the replacement value\nfrom the combined method, which is defined by the median of replacement values \nfrom the base methods at each time point.\n\n\n::: {.cell layout-align=\"center\" hash='outliers_cache/html/unnamed-chunk-8_747bff7bae49f5f0304632fd3b1558a9'}\n\n```{.r .cell-code}\ny <- x %>%\n mutate(cases_corrected = combined_replacement) %>%\n select(geo_value, time_value, cases, cases_corrected)\n\ny %>% filter(cases != cases_corrected)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> An `epi_df` object, 22 x 4 with metadata:\n#> * geo_type = state\n#> * time_type = day\n#> * as_of = 2022-05-21 15:17:14\n#> \n#> # A tibble: 22 × 4\n#> geo_value time_value cases cases_corrected\n#> * \n#> 1 fl 2020-07-12 15300 10181 \n#> 2 nj 2020-07-19 -8 320.\n#> 3 nj 2020-08-13 694 404.\n#> 4 nj 2020-08-14 619 397.\n#> 5 nj 2020-08-16 40 366 \n#> 6 nj 2020-08-22 555 360 \n#> # ℹ 16 more rows\n```\n:::\n:::\n\n::: {.cell layout-align=\"center\" hash='outliers_cache/html/unnamed-chunk-9_d88f9e8692dcd1d4f7b70de883a83a80'}\n\n```{.r .cell-code code-fold=\"true\"}\ny %>%\n pivot_longer(starts_with(\"cases\")) %>%\n ggplot(aes(x = time_value)) +\n geom_line(aes(y = value, color = name, linetype = name)) +\n scale_color_brewer(palette = \"Set1\") +\n scale_linetype_manual(values = c(2, 1)) +\n geom_hline(yintercept = 0) +\n facet_wrap(vars(geo_value), scales = \"free_y\", ncol = 1) +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %Y\") +\n labs(x = \"Date\", y = \"Reported COVID-19 counts\") +\n theme(legend.position = \"bottom\", legend.title = element_blank())\n```\n\n::: {.cell-output-display}\n![](outliers_files/figure-html/unnamed-chunk-9-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nMore advanced correction functionality will be coming at some point in the \nfuture. \n\n", + "markdown": "# Detect and correct outliers in signals\n\nThis chapter describes functionality for detecting and correcting outliers in\nsignals in the `detect_outlr()` and `correct_outlr()` functions provided in the\n`epiprocess` package. These functions is designed to be modular and extendable,\nso that you can define your own outlier detection and correction routines and\napply them to `epi_df` objects. We'll demonstrate this using state-level daily\nreported COVID-19 case counts from FL and NJ.\n\n\n\n\n::: {.cell layout-align=\"center\" hash='outliers_cache/html/unnamed-chunk-2_fce8088b6b75a2935c681ab7585edf92'}\n\n```{.r .cell-code}\nincidence_num_outlier_example\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> An `epi_df` object, 730 x 3 with metadata:\n#> * geo_type = state\n#> * time_type = day\n#> * as_of = 2024-08-22 19:40:18.860591\n#> \n#> # A tibble: 730 × 3\n#> geo_value time_value cases\n#> * \n#> 1 fl 2020-06-01 667\n#> 2 nj 2020-06-01 486\n#> 3 fl 2020-06-02 617\n#> 4 nj 2020-06-02 658\n#> 5 fl 2020-06-03 1317\n#> 6 nj 2020-06-03 541\n#> # ℹ 724 more rows\n```\n:::\n:::\n\n::: {.cell layout-align=\"center\" hash='outliers_cache/html/unnamed-chunk-3_c1719525e7d4141c953ea1f5919394b6'}\n\n```{.r .cell-code code-fold=\"true\"}\nggplot(incidence_num_outlier_example, aes(x = time_value, y = cases, color = geo_value)) +\n geom_line() +\n scale_color_manual(values = c(3, 6)) +\n geom_hline(yintercept = 0, linetype = 3) +\n facet_wrap(~geo_value, scales = \"free_y\", ncol = 1) +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %Y\") +\n labs(x = \"Date\", y = \"Reported COVID-19 counts\")\n```\n\n::: {.cell-output-display}\n![](outliers_files/figure-html/unnamed-chunk-3-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nThere are multiple outliers in these data that a modeler may want to detect and\ncorrect. We'll discuss those two tasks in turn.\n\n## Outlier detection\n\nThe `detect_outlr()` function allows us to run multiple outlier detection\nmethods on a given signal, and then (optionally) combine the results from those\nmethods. Here, we'll investigate outlier detection results from the following\nmethods.\n\n1. Detection based on a rolling median, using `detect_outlr_rm()`, which\n computes a rolling median on with a default window size of `n` time points\n centered at the time point under consideration, and then computes thresholds\n based on a multiplier times a rolling IQR computed on the residuals.\n2. Detection based on a seasonal-trend decomposition using LOESS (STL), using\n `detect_outlr_stl()`, which is similar to the rolling median method but\n replaces the rolling median with fitted values from STL.\n3. Detection based on an STL decomposition, but without seasonality term, which\n amounts to smoothing using LOESS.\n\nThe outlier detection methods are specified using a `tibble` that is passed to\n`detect_outlr()`, with one row per method, and whose columms specify the\noutlier detection function, any input arguments (only nondefault values need to\nbe supplied), and an abbreviated name for the method used in tracking results.\nAbbreviations \"rm\" and \"stl\" can be used for the built-in detection functions\n`detect_outlr_rm()` and `detect_outlr_stl()`, respectively.\n\n\n::: {.cell layout-align=\"center\" hash='outliers_cache/html/unnamed-chunk-4_04ab87a0055f799565c3334dbeab9ff6'}\n\n```{.r .cell-code}\ndetection_methods <- bind_rows(\n tibble(\n method = \"rm\",\n args = list(list(\n detect_negatives = TRUE,\n detection_multiplier = 2.5\n )),\n abbr = \"rm\"\n ),\n tibble(\n method = \"stl\",\n args = list(list(\n detect_negatives = TRUE,\n detection_multiplier = 2.5,\n seasonal_period = 7\n )),\n abbr = \"stl_seasonal\"\n )\n)\n\ndetection_methods\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> # A tibble: 2 × 3\n#> method args abbr \n#> \n#> 1 rm rm \n#> 2 stl stl_seasonal\n```\n:::\n:::\n\n\nAdditionally, we'll form combined lower and upper thresholds, calculated as the\nmedian of the lower and upper thresholds from the methods at each time point.\nNote that using this combined median threshold is equivalent to using a majority\nvote across the base methods to determine whether a value is an outlier.\n\n\n::: {.cell layout-align=\"center\" hash='outliers_cache/html/unnamed-chunk-5_ad8a07e4e67cba15a5f0d0cff3d14cb7'}\n\n```{.r .cell-code}\nx <- incidence_num_outlier_example %>%\n group_by(geo_value) %>%\n mutate(\n outlier_info = detect_outlr(\n x = time_value, y = cases,\n methods = detection_methods,\n combiner = \"median\"\n )\n ) %>%\n unpack(outlier_info) %>%\n ungroup()\n\nx\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> # A tibble: 730 × 14\n#> geo_value time_value cases rm_geo_value rm_lower rm_upper rm_replacement\n#> \n#> 1 fl 2020-06-01 667 0 345 2195 667\n#> 2 nj 2020-06-01 486 0 64.4 926. 486\n#> 3 fl 2020-06-02 617 0 406. 2169. 617\n#> 4 nj 2020-06-02 658 0 140. 841. 658\n#> 5 fl 2020-06-03 1317 0 468. 2142. 1317\n#> 6 nj 2020-06-03 541 0 216 756 541\n#> # ℹ 724 more rows\n#> # ℹ 7 more variables: stl_seasonal_geo_value , …\n```\n:::\n:::\n\n\nTo visualize the results, we define a convenience function for and call it on\neach state separately (hidden below the fold).\n\n\n::: {.cell layout-align=\"center\" hash='outliers_cache/html/unnamed-chunk-6_a45482fb876b98553f0f4e0046624c6a'}\n\n```{.r .cell-code code-fold=\"true\"}\n# Plot outlier detection bands and/or points identified as outliers\nplot_outlr <- function(\n x, signal, method_abbr, bands = TRUE, points = TRUE,\n facet_vars = vars(geo_value), nrow = NULL, ncol = NULL,\n scales = \"fixed\") {\n # Convert outlier detection results to long format\n signal <- rlang::enquo(signal)\n x_long <- x %>%\n pivot_longer(\n cols = starts_with(method_abbr),\n names_to = c(\"method\", \".value\"),\n names_pattern = \"(.+)_(.+)\"\n )\n\n # Start of plot with observed data\n p <- ggplot() +\n geom_line(data = x, mapping = aes(x = time_value, y = !!signal))\n\n # If requested, add bands\n if (bands) {\n p <- p + geom_ribbon(\n data = x_long,\n aes(\n x = time_value, ymin = lower, ymax = upper,\n color = method\n ), fill = NA\n )\n }\n\n # If requested, add points\n if (points) {\n x_detected <- x_long %>% filter((!!signal < lower) | (!!signal > upper))\n p <- p + geom_point(\n data = x_detected,\n aes(\n x = time_value, y = !!signal, color = method,\n shape = method\n )\n )\n }\n\n # If requested, add faceting\n if (!is.null(facet_vars)) {\n p <- p + facet_wrap(facet_vars, nrow = nrow, ncol = ncol, scales = scales)\n }\n\n return(p)\n}\n```\n:::\n\n\nNow we produce plots for each state at a time, faceting by the detection method.\n\n\n::: {.cell layout-align=\"center\" hash='outliers_cache/html/unnamed-chunk-7_6e64854f9f9a422f95e2e6614fec9c3b'}\n\n```{.r .cell-code code-fold=\"true\"}\nmethod_abbr <- c(detection_methods$abbr, \"combined\")\n\nplot_outlr(x %>% filter(geo_value == \"fl\"), cases, method_abbr,\n facet_vars = vars(method), scales = \"free_y\", ncol = 2\n) +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %Y\") +\n labs(\n x = \"Date\", y = \"Reported COVID-19 counts\", color = \"Method\",\n shape = \"Method\"\n ) +\n scale_color_brewer(palette = \"Set1\") +\n ggtitle(\"Florida\") +\n theme(legend.position = \"bottom\")\n```\n\n::: {.cell-output-display}\n![](outliers_files/figure-html/unnamed-chunk-7-1.svg){fig-align='center' width=90%}\n:::\n\n```{.r .cell-code code-fold=\"true\"}\nplot_outlr(x %>% filter(geo_value == \"nj\"), cases, method_abbr,\n facet_vars = vars(method), scales = \"free_y\", ncol = 2\n) +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %Y\") +\n labs(\n x = \"Date\", y = \"Reported COVID-19 counts\", color = \"Method\",\n shape = \"Method\"\n ) +\n scale_color_brewer(palette = \"Set1\") +\n ggtitle(\"New Jersey\") +\n theme(legend.position = \"bottom\")\n```\n\n::: {.cell-output-display}\n![](outliers_files/figure-html/unnamed-chunk-7-2.svg){fig-align='center' width=90%}\n:::\n:::\n\n\n## Outlier correction\n\nFinally, in order to correct outliers, we can use the posited replacement values\nreturned by each outlier detection method. Below we use the replacement value\nfrom the combined method, which is defined by the median of replacement values\nfrom the base methods at each time point.\n\n\n::: {.cell layout-align=\"center\" hash='outliers_cache/html/unnamed-chunk-8_dbd71109e1d8e19afa0883a5b3087f24'}\n\n```{.r .cell-code}\ny <- x %>%\n mutate(cases_corrected = combined_replacement) %>%\n select(geo_value, time_value, cases, cases_corrected)\n\ny %>% filter(cases != cases_corrected)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> # A tibble: 34 × 4\n#> geo_value time_value cases cases_corrected\n#> \n#> 1 fl 2020-07-12 15300 10181 \n#> 2 nj 2020-07-19 -8 249.\n#> 3 nj 2020-07-31 748 405 \n#> 4 fl 2020-08-12 8109 5803.\n#> 5 nj 2020-08-13 694 381 \n#> 6 nj 2020-08-14 619 381 \n#> # ℹ 28 more rows\n```\n:::\n:::\n\n::: {.cell layout-align=\"center\" hash='outliers_cache/html/unnamed-chunk-9_d88f9e8692dcd1d4f7b70de883a83a80'}\n\n```{.r .cell-code code-fold=\"true\"}\ny %>%\n pivot_longer(starts_with(\"cases\")) %>%\n ggplot(aes(x = time_value)) +\n geom_line(aes(y = value, color = name, linetype = name)) +\n scale_color_brewer(palette = \"Set1\") +\n scale_linetype_manual(values = c(2, 1)) +\n geom_hline(yintercept = 0) +\n facet_wrap(vars(geo_value), scales = \"free_y\", ncol = 1) +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %Y\") +\n labs(x = \"Date\", y = \"Reported COVID-19 counts\") +\n theme(legend.position = \"bottom\", legend.title = element_blank())\n```\n\n::: {.cell-output-display}\n![](outliers_files/figure-html/unnamed-chunk-9-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nMore advanced correction functionality will be coming at some point in the\nfuture.\n\n", "supporting": [], "filters": [ "rmarkdown/pagebreak.lua" diff --git a/_freeze/outliers/figure-html/unnamed-chunk-3-1.svg b/_freeze/outliers/figure-html/unnamed-chunk-3-1.svg index c6617b1..be28060 100644 --- a/_freeze/outliers/figure-html/unnamed-chunk-3-1.svg +++ b/_freeze/outliers/figure-html/unnamed-chunk-3-1.svg @@ -1,606 +1,613 @@ - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - - - - - - - - - - - - - - - - - - - - - + + - - - - + + + + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/_freeze/outliers/figure-html/unnamed-chunk-7-1.svg b/_freeze/outliers/figure-html/unnamed-chunk-7-1.svg index 32ce016..e805ef5 100644 --- a/_freeze/outliers/figure-html/unnamed-chunk-7-1.svg +++ b/_freeze/outliers/figure-html/unnamed-chunk-7-1.svg @@ -1,1201 +1,1407 @@ - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + + + + + + + + + + + + - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - - - - - - - - - - - - - - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - - - - - - - - - - - - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + + + + + + + + + + + + + + + - - + + - - + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/_freeze/outliers/figure-html/unnamed-chunk-7-2.svg b/_freeze/outliers/figure-html/unnamed-chunk-7-2.svg index d2e3de1..ed9e9a6 100644 --- a/_freeze/outliers/figure-html/unnamed-chunk-7-2.svg +++ b/_freeze/outliers/figure-html/unnamed-chunk-7-2.svg @@ -1,1194 +1,1392 @@ - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + + + + + + - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - - - - - - - - - - - - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - - - - - - - - - - - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + + + + + + + + + + + + + + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/_freeze/outliers/figure-html/unnamed-chunk-9-1.svg b/_freeze/outliers/figure-html/unnamed-chunk-9-1.svg index c42bd19..1d4053a 100644 --- a/_freeze/outliers/figure-html/unnamed-chunk-9-1.svg +++ b/_freeze/outliers/figure-html/unnamed-chunk-9-1.svg @@ -1,611 +1,618 @@ - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - - - - - - - - - - - - - - - - - - - - - + + - - - - + + + + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + + - - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/_freeze/slide/execute-results/html.json b/_freeze/slide/execute-results/html.json index 70b9578..cef2d48 100644 --- a/_freeze/slide/execute-results/html.json +++ b/_freeze/slide/execute-results/html.json @@ -1,7 +1,7 @@ { - "hash": "74d3db577831e2a6d86c6a6c2939a7d3", + "hash": "1c6c5e6e0f48d1e2eba243ce492d70c0", "result": { - "markdown": "# Sliding computations {#sec-sliding}\n\nA central tool in the `{epiprocess}` package is `epi_slide()`, which is based\non the powerful functionality provided in the \n[`slider`](https://cran.r-project.org/web/packages/slider) package. In\n`{epiprocess}`, to \"slide\" means to apply a computation---represented as a\nfunction or formula---over a sliding/rolling data window. Suitable\ngroupings can always be achieved by a preliminary call to `group_by()`.\n\nBy default, the meaning of one time step is inferred from the `time_value`\ncolumn of the `epi_df` object under consideration, based on the way this column\nunderstands addition and subtraction. For example, if the time values are coded\nas `Date` objects, then one time step is one day, since \n`as.Date(\"2022-01-01\") + 1` equals `as.Date(\"2022-01-02\")`. Alternatively, the time step can be specified\nmanually in the call to `epi_slide()`; you can read the documentation for more\ndetails. Furthermore, the alignment of the running window used in `epi_slide()`\ncan be \"right\", \"center\", or \"left\"; the default is \"right\", and is what we use\nin this vignette.\n\nAs in getting started guide, we'll fetch daily reported COVID-19 cases from CA,\nFL, NY, and TX (note: here we're using new, not cumulative cases) using the\n[`epidatr`](https://github.com/cmu-delphi/epidatr) package,\nand then convert this to `epi_df` format.\n\n\n\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-2_feb3ab09af2a656b7552aabd4fb92768'}\n\n```{.r .cell-code}\nlibrary(epidatr)\nlibrary(epiprocess)\nlibrary(epipredict)\n```\n:::\n\n\nThe example data we'll use is part of the package and has 2,684 rows and 3 columns.\n\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-3_de5ebab547ecc5d1e32e4f6b65aac60b'}\n\n```{.r .cell-code}\ndata(jhu_csse_daily_subset)\nx <- jhu_csse_daily_subset %>%\n select(geo_value, time_value, cases) %>%\n arrange(geo_value, time_value) %>%\n as_epi_df()\n```\n:::\n\n\n\n## Slide with a formula\n\nWe first demonstrate how to apply a 7-day trailing average to the daily cases in\norder to smooth the signal, by passing in a formula for the first argument of\n`epi_slide()`. To do this computation per state, we first call `group_by()`.\n\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-4_13b28969f566d77bd0c5e1e88a551491'}\n\n```{.r .cell-code}\nx %>%\n group_by(geo_value) %>%\n epi_slide(~ mean(.x$cases), before = 6) %>%\n ungroup()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> An `epi_df` object, 4,026 x 4 with metadata:\n#> * geo_type = state\n#> * time_type = day\n#> * as_of = 2022-05-23 13:17:07\n#> \n#> # A tibble: 4,026 × 4\n#> geo_value time_value cases slide_value\n#> * \n#> 1 ca 2020-03-01 6 6 \n#> 2 ca 2020-03-02 4 5 \n#> 3 ca 2020-03-03 6 5.33\n#> 4 ca 2020-03-04 11 6.75\n#> 5 ca 2020-03-05 10 7.4 \n#> 6 ca 2020-03-06 18 9.17\n#> # ℹ 4,020 more rows\n```\n:::\n:::\n\n\nThe formula specified has access to all non-grouping columns present in the\noriginal `epi_df` object (and must refer to them with the prefix `.x$`). As we\ncan see, the function `epi_slide()` returns an `epi_df` object with a new column\nappended that contains the results (from sliding), named `slide_value` as the\ndefault. We can of course change this post hoc, or we can instead specify a new\nname up front using the `new_col_name` argument:\n\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-5_cf02a2675d6bbdf3eb316e16406a82e5'}\n\n```{.r .cell-code}\nx %>%\n group_by(geo_value) %>%\n epi_slide(~ mean(.x$cases), before = 6, new_col_name = \"cases_7dav\") %>%\n ungroup()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> An `epi_df` object, 4,026 x 4 with metadata:\n#> * geo_type = state\n#> * time_type = day\n#> * as_of = 2022-05-23 13:17:07\n#> \n#> # A tibble: 4,026 × 4\n#> geo_value time_value cases cases_7dav\n#> * \n#> 1 ca 2020-03-01 6 6 \n#> 2 ca 2020-03-02 4 5 \n#> 3 ca 2020-03-03 6 5.33\n#> 4 ca 2020-03-04 11 6.75\n#> 5 ca 2020-03-05 10 7.4 \n#> 6 ca 2020-03-06 18 9.17\n#> # ℹ 4,020 more rows\n```\n:::\n:::\n\n\nSome other information is available in additional variables:\n\n* `.group_key` is a one-row tibble containing the values of the grouping\n variables for the associated group\n* `.ref_time_value` is the reference time value the time window was based on\n\nLike in `group_modify()`, there are alternative names for these variables as\nwell: `.` can be used instead of `.x`, `.y` instead of `.group_key`, and `.z`\ninstead of `.ref_time_value`.\n\n## Slide with a function \n\nWe can also pass a function for the first argument in `epi_slide()`. In this\ncase, the passed function must accept the following arguments:\n\nIn this case, the passed function `f` must accept the following arguments: a\ndata frame with the same column names as the original object, minus any grouping\nvariables, containing the time window data for one group-`ref_time_value`\ncombination; followed by a one-row tibble containing the values of the grouping\nvariables for the associated group; followed by the associated `ref_time_value`.\nIt can accept additional arguments; `epi_slide()` will forward any `...` args it\nreceives to `f`.\n\nRecreating the last example of a 7-day trailing average:\n\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-6_63c4174606b3c7249ee9ddd5f3171d78'}\n\n```{.r .cell-code}\nx %>%\n group_by(geo_value) %>%\n epi_slide(function(x, gk, rtv) mean(x$cases),\n before = 6, new_col_name = \"cases_7dav\"\n ) %>%\n ungroup()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> An `epi_df` object, 4,026 x 4 with metadata:\n#> * geo_type = state\n#> * time_type = day\n#> * as_of = 2022-05-23 13:17:07\n#> \n#> # A tibble: 4,026 × 4\n#> geo_value time_value cases cases_7dav\n#> * \n#> 1 ca 2020-03-01 6 6 \n#> 2 ca 2020-03-02 4 5 \n#> 3 ca 2020-03-03 6 5.33\n#> 4 ca 2020-03-04 11 6.75\n#> 5 ca 2020-03-05 10 7.4 \n#> 6 ca 2020-03-06 18 9.17\n#> # ℹ 4,020 more rows\n```\n:::\n:::\n\n\n## Slide the tidy way\n\nPerhaps the most convenient way to setup a computation in `epi_slide()` is to\npass in an expression for tidy evaluation. In this case, we can simply define\nthe name of the new column directly as part of the expression, setting it equal\nto a computation in which we can access any columns of `x` by name, just as we\nwould in a call to `dplyr::mutate()`, or any of the `dplyr` verbs. For example:\n\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-7_86937bdc4f9b436be5721bf89cb48542'}\n\n```{.r .cell-code}\nx <- x %>%\n group_by(geo_value) %>%\n epi_slide(cases_7dav = mean(cases), before = 6) %>%\n ungroup()\n```\n:::\n\nIn addition to referring to individual columns by name, you can refer to the\ntime window data as an `epi_df` or `tibble` using `.x`. Similarly, the other arguments of the function format are available through the magic names `.group_key` and `.ref_time_value`, and the tidyverse \"pronouns\" `.data` and `.env` can also be used.\n\nAs a simple sanity check, we visualize the 7-day trailing averages computed on\ntop of the original counts.\n\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-8_4be7d7ffd8b84de93dbeff6c68bf1113'}\n\n```{.r .cell-code code-fold=\"true\"}\ncols <- RColorBrewer::brewer.pal(7, \"Set1\")[-6]\nggplot(x, aes(x = time_value)) +\n geom_col(aes(y = cases, fill = geo_value),\n alpha = 0.5,\n show.legend = FALSE\n ) +\n scale_y_continuous(expand = expansion(c(0, 0.05))) +\n geom_line(aes(y = cases_7dav, col = geo_value), show.legend = FALSE) +\n scale_fill_manual(values = cols) +\n scale_color_manual(values = cols) +\n facet_wrap(~geo_value, scales = \"free_y\") +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %Y\") +\n labs(x = \"Date\", y = \"Reported COVID-19 cases\")\n```\n\n::: {.cell-output-display}\n![](slide_files/figure-html/unnamed-chunk-8-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nAs we can see from the center top panel, it looks like Florida moved to weekly \nreporting of COVID-19 cases in summer of 2021, while California occasionally reported negative cases counts!\n\n## Running a local forecaster {#sec-local-forecaster}\n\nAs a more complex example, we preview some of the functionality of `{epipredict}` described in future chapters, and use a forecaster based on a\nlocal (in time)\nautoregression or \"AR model\". AR models can be fit in numerous ways \n(using base R\nfunctions and various packages), but here we the `arx_forecaster()`, implemented in `{epipredict}` both\nprovides a more advanced example of sliding a function over an `epi_df` object,\nand it allows us to be a bit more flexible in defining a *probabilistic*\nforecaster: one that outputs not just a point prediction, but a notion of\nuncertainty around this. In particular, our forecaster will output a point\nprediction along with an 90\\% uncertainty band, represented by a predictive\nquantiles at the 5\\% and 95\\% levels (lower and upper endpoints of the\nuncertainty band).\n\nThe function signature below, is a probabilistic AR forecaster. The\n`lags` argument indicates which lags to use in the model, and `ahead` indicates\nhow far ahead in the future to make forecasts (both are encoded in terms of the\nunits of the `time_value` column; so, days, in the working `epi_df` being\nconsidered in this vignette).\n\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-9_079e5420d9e5d2f5501eb74de8b45cb6'}\n\n```{.r .cell-code}\narx_forecaster <- function(\n epi_df, \n outcome, # the outcome column name in `epi_df`\n predictors, # a character vector, containing 1 or more predictors in `epi_df`\n trainer = quantile_reg(), \n args_list = arx_args_list(\n lags = c(0, 7, 14), \n ahead = 7,\n quantile_levels = c(0.05, 0.95)\n )\n)\n```\n:::\n\n\nWe go ahead and slide this AR forecaster over the working `epi_df` of COVID-19 \ncases. Note that we actually model the `cases_7dav` column, to operate on the \nscale of smoothed COVID-19 cases. This is clearly equivalent, up to a constant,\nto modeling weekly sums of COVID-19 cases.\n\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-10_9e2bba94dc13dd185ad30b365f0a4eb4'}\n\n```{.r .cell-code}\nfc_time_values <- seq(\n from = as.Date(\"2020-06-01\"),\n to = as.Date(\"2021-12-01\"),\n by = \"1 months\"\n)\n\nfcasts <- epi_slide(\n x,\n ~ arx_forecaster(\n epi_data = .x,\n outcome = \"cases_7dav\",\n predictors = \"cases_7dav\",\n trainer = quantile_reg(),\n args_list = arx_args_list(ahead = 7)\n )$predictions,\n before = 119,\n ref_time_values = fc_time_values,\n new_col_name = \"fc\"\n)\n\n# grab just the relevant columns, and make them easier to plot\nfcasts <- fcasts %>%\n select(\n geo_value, time_value, cases_7dav,\n contains(\"_distn\"), fc_target_date\n ) %>%\n pivot_quantiles_wider(contains(\"_distn\"))\nfcasts\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> # A tibble: 114 × 7\n#> geo_value time_value cases_7dav fc_target_date `0.05` `0.5` `0.95`\n#> \n#> 1 ca 2020-06-01 2655. 2020-06-08 1940. 2694. 3840.\n#> 2 fl 2020-06-01 726. 2020-06-08 558. 747. 1290.\n#> 3 ga 2020-06-01 643. 2020-06-08 520. 638. 1083.\n#> 4 ny 2020-06-01 1278. 2020-06-08 821. 1044. 1864.\n#> 5 pa 2020-06-01 603. 2020-06-08 450. 570. 1080.\n#> 6 tx 2020-06-01 1002. 2020-06-08 716. 1134. 1950.\n#> # ℹ 108 more rows\n```\n:::\n:::\n\n\nNote that here we have used an argument `ref_time_values` to perform the\nsliding computation (here, compute a forecast) at a specific subset of reference\ntime values. We get out 4 new columns: `fc_target_date`, `0.05`, `0.5`, `0.95`\nthat correspond to the date the forecast is for (rather than the date it was made on, the point forecast, and the lower and upper endpoints of the\n95\\% prediction band.[^1]\n\n[^1]: If instead we had set `as_list_col = TRUE`\nin the call to `epi_slide()`, then we would have gotten a list column `fc`, \nwhere each element of `fc` contains these results.\n\nTo finish off, we plot the forecasts at some times (spaced out by a few months)\nover the last year, at multiple horizons: 7, 14, 21, and 28 days ahead. To do \nso, we encapsulate the process of generating forecasts into a simple function, \nso that we can call it a few times.\n\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-11_d30fdf1ff99b2e470d215f81656d5b01'}\n\n```{.r .cell-code}\nk_week_ahead <- function(ahead = 7) {\n epi_slide(\n x,\n ~ arx_forecaster(\n epi_data = .x,\n outcome = \"cases_7dav\",\n predictors = \"cases_7dav\",\n trainer = quantile_reg(),\n args_list = arx_args_list(ahead = ahead)\n )$predictions,\n before = 119,\n ref_time_values = fc_time_values,\n new_col_name = \"fc\"\n ) %>%\n select(\n geo_value, time_value, cases_7dav, contains(\"_distn\"),\n fc_target_date\n ) %>%\n pivot_quantiles_wider(contains(\"_distn\"))\n}\n\n# First generate the forecasts, and bind them together\nz <- map(c(7, 14, 21, 28), k_week_ahead) %>% list_rbind()\n```\n:::\n\n\nThen we can plot the on top of the observed data\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-12_f17b1e21df0fa2849ed240533f7e168f'}\n\n```{.r .cell-code code-fold=\"true\"}\nggplot(z) +\n geom_line(data = x, aes(x = time_value, y = cases_7dav), color = \"gray50\") +\n geom_ribbon(aes(\n x = fc_target_date, ymin = `0.05`, ymax = `0.95`,\n group = time_value, fill = geo_value\n ), alpha = 0.4) +\n geom_line(aes(x = fc_target_date, y = `0.5`, group = time_value)) +\n geom_point(aes(x = fc_target_date, y = `0.5`, group = time_value), size = 0.5) +\n # geom_vline(data = tibble(x = fc_time_values), aes(xintercept = x),\n # linetype = 2, alpha = 0.5) +\n facet_wrap(vars(geo_value), scales = \"free_y\", nrow = 3) +\n scale_y_continuous(expand = expansion(c(0, 0.05))) +\n scale_x_date(minor_breaks = \"1 months\", date_labels = \"%b %Y\") +\n scale_fill_viridis_d(guide = \"none\", end = .9) +\n labs(x = \"Date\", y = \"Reported COVID-19 cases\")\n```\n\n::: {.cell-output-display}\n![](slide_files/figure-html/unnamed-chunk-12-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nTwo points are worth making. First, the AR model's performance here is pretty\nspotty. At various points in time, we can see that its forecasts are volatile\n(its point predictions are all over the place), or overconfident (its bands are\ntoo narrow), or both at the same time. This is only meant as a simple demo and\nnot entirely unexpected given the way the AR model is set up. The\n[`epipredict`](https://cmu-delphi.github.io/epipredict) package, \noffers a suite of predictive modeling tools \nthat improve on many of the shortcomings of the above simple AR model (simply \nusing all states for training rather than 6 is a huge improvement).\n\nSecond, the AR forecaster here is using finalized data, meaning, it uses the\nlatest versions of signal values (reported COVID-19 cases) available, for both\ntraining models and making predictions historically. However, this is not\nreflective of the provisional nature of the data that it must cope with in a\ntrue forecast task. Training and making predictions on finalized data can lead\nto an overly optimistic sense of accuracy; see, for example, \n[@McDonaldBien2021] and references\ntherein. Fortunately, the `epiprocess` package provides a data structure called\n`epi_archive` that can be used to store all data revisions, and furthermore, an\n`epi_archive` object knows how to slide computations in the correct\nversion-aware sense (for the computation at each reference time $t$, it uses\nonly data that would have been available as of $t$). We will revisit this \nexample in the [archive \nvignette](https://cmu-delphi.github.io/epiprocess/articles/archive.html).\n", + "markdown": "# Sliding computations {#sec-sliding}\n\nA central tool in the `{epiprocess}` package is `epi_slide()`, which is based on\nthe powerful functionality provided in the\n[`slider`](https://cran.r-project.org/web/packages/slider) package. In\n`epiprocess`, to \"slide\" means to apply a computation---represented as a\nfunction or formula---over a sliding/rolling data window. The function always\napplies the slide inside each group and the grouping is assumed to be across all\ngroup keys of the `epi_df` (this is the grouping used by default if you do not\ngroup the `epi_df` with a `group_by()`).\n\nBy default, the `.window_size` units depend on the `time_type` of the `epi_df`,\nwhich is determined from the types in the `time_value` column of the `epi_df`.\nSee the \"Details\" in `epi_slide()` for more.\n\nAs in getting started guide, we'll fetch daily reported COVID-19 cases from CA,\nFL, NY, and TX (note: here we're using new, not cumulative cases) using the\n[`epidatr`](https://github.com/cmu-delphi/epidatr) package, and then convert\nthis to `epi_df` format.\n\n\n\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-2_feb3ab09af2a656b7552aabd4fb92768'}\n\n```{.r .cell-code}\nlibrary(epidatr)\nlibrary(epiprocess)\nlibrary(epipredict)\n```\n:::\n\n\nThe example data we'll use is part of the package and has 2,684 rows and 3 columns.\n\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-3_b89fbf7446d0088fa754a437bf6f03e8'}\n\n```{.r .cell-code}\ndata(jhu_csse_daily_subset)\nedf <- jhu_csse_daily_subset %>%\n select(geo_value, time_value, cases) %>%\n arrange(geo_value, time_value) %>%\n as_epi_df()\n```\n:::\n\n\n## Optimized rolling mean and sums\n\nFor the two most common sliding operations, we offer two optimized versions:\n`epi_slide_mean()` and `epi_slide_sum()`. This example gets the 7-day trailing\naverage of the daily cases. Note that the name of the column(s) that we want to\naverage is specified as the first argument of `epi_slide_mean()`.\n\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-4_a2804c3f9814a21c5786987a3f2838e3'}\n\n```{.r .cell-code}\nedf %>%\n group_by(geo_value) %>%\n epi_slide_mean(\"cases\", .window_size = 7, na.rm = TRUE) %>%\n ungroup() %>%\n head(10)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> An `epi_df` object, 10 x 4 with metadata:\n#> * geo_type = state\n#> * time_type = day\n#> * as_of = 2024-08-22 19:40:48.296938\n#> \n#> # A tibble: 10 × 4\n#> geo_value time_value cases slide_value_cases\n#> * \n#> 1 ca 2020-03-01 6 6 \n#> 2 ca 2020-03-02 4 5 \n#> 3 ca 2020-03-03 6 5.33\n#> 4 ca 2020-03-04 11 6.75\n#> 5 ca 2020-03-05 10 7.4 \n#> 6 ca 2020-03-06 18 9.17\n#> # ℹ 4 more rows\n```\n:::\n:::\n\n\nNote that we passed `na.rm = TRUE` to `data.table::frollmean()` via `...` to\n`epi_slide_mean`.\n\nThe following computes the 7-day trailing sum of daily cases (and passed `na.rm`\nto `data.table::frollsum()` similarly):\n\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-5_5f3bbe76c94ac276d887fce7ad154af5'}\n\n```{.r .cell-code}\nedf %>%\n group_by(geo_value) %>%\n epi_slide_sum(\"cases\", .window_size = 7, na.rm = TRUE) %>%\n ungroup() %>%\n head(10)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> An `epi_df` object, 10 x 4 with metadata:\n#> * geo_type = state\n#> * time_type = day\n#> * as_of = 2024-08-22 19:40:48.296938\n#> \n#> # A tibble: 10 × 4\n#> geo_value time_value cases slide_value_cases\n#> * \n#> 1 ca 2020-03-01 6 6\n#> 2 ca 2020-03-02 4 10\n#> 3 ca 2020-03-03 6 16\n#> 4 ca 2020-03-04 11 27\n#> 5 ca 2020-03-05 10 37\n#> 6 ca 2020-03-06 18 55\n#> # ℹ 4 more rows\n```\n:::\n:::\n\n\n## General sliding with a formula\n\nThe previous computations can also be performed using `epi_slide()`, which can\nbe used for more general sliding computations (but is much slower for the\nspecific cases of mean and sum).\n\nThe same 7-day trailing average of daily cases can be computed by passing in a\nformula for the first argument of `epi_slide()`:\n\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-6_a5a7685ae8752595826c00ed5173f752'}\n\n```{.r .cell-code}\nedf %>%\n group_by(geo_value) %>%\n epi_slide(~ mean(.x$cases, na.rm = TRUE), .window_size = 7) %>%\n ungroup() %>%\n head(10)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> An `epi_df` object, 10 x 4 with metadata:\n#> * geo_type = state\n#> * time_type = day\n#> * as_of = 2024-08-22 19:40:48.296938\n#> \n#> # A tibble: 10 × 4\n#> geo_value time_value cases slide_value\n#> * \n#> 1 ca 2020-03-01 6 6 \n#> 2 ca 2020-03-02 4 5 \n#> 3 ca 2020-03-03 6 5.33\n#> 4 ca 2020-03-04 11 6.75\n#> 5 ca 2020-03-05 10 7.4 \n#> 6 ca 2020-03-06 18 9.17\n#> # ℹ 4 more rows\n```\n:::\n:::\n\n\nIf your formula returns a data.frame, then the columns of the data.frame\nwill be unpacked into the resulting `epi_df`. For example, the following\ncomputes the 7-day trailing average of daily cases and the 7-day trailing sum of\ndaily cases:\n\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-7_3f4404872d142bf1fb425e712231a143'}\n\n```{.r .cell-code}\nedf %>%\n group_by(geo_value) %>%\n epi_slide(\n ~ data.frame(cases_mean = mean(.x$cases, na.rm = TRUE), cases_sum = sum(.x$cases, na.rm = TRUE)),\n .window_size = 7\n ) %>%\n ungroup() %>%\n head(10)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> An `epi_df` object, 10 x 5 with metadata:\n#> * geo_type = state\n#> * time_type = day\n#> * as_of = 2024-08-22 19:40:48.296938\n#> \n#> # A tibble: 10 × 5\n#> geo_value time_value cases cases_mean cases_sum\n#> * \n#> 1 ca 2020-03-01 6 6 6\n#> 2 ca 2020-03-02 4 5 10\n#> 3 ca 2020-03-03 6 5.33 16\n#> 4 ca 2020-03-04 11 6.75 27\n#> 5 ca 2020-03-05 10 7.4 37\n#> 6 ca 2020-03-06 18 9.17 55\n#> # ℹ 4 more rows\n```\n:::\n:::\n\n\nNote that this formula has access to all non-grouping columns present in the\noriginal `epi_df` object and must refer to them with the prefix `.x$...`. As we\ncan see, the function `epi_slide()` returns an `epi_df` object with a new column\nappended that contains the results (from sliding), named `slide_value` as the\ndefault.\n\nSome other information is available in additional variables:\n\n* `.group_key` is a one-row tibble containing the values of the grouping\n variables for the associated group\n* `.ref_time_value` is the reference time value the time window was based on\n\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-8_6fef540b7403e96758a8385c45ac0b68'}\n\n```{.r .cell-code}\n# Returning geo_value in the formula\nedf %>%\n group_by(geo_value) %>%\n epi_slide(~ .x$geo_value[[1]], .window_size = 7) %>%\n ungroup() %>%\n head(10)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> An `epi_df` object, 10 x 4 with metadata:\n#> * geo_type = state\n#> * time_type = day\n#> * as_of = 2024-08-22 19:40:48.296938\n#> \n#> # A tibble: 10 × 4\n#> geo_value time_value cases slide_value\n#> * \n#> 1 ca 2020-03-01 6 ca \n#> 2 ca 2020-03-02 4 ca \n#> 3 ca 2020-03-03 6 ca \n#> 4 ca 2020-03-04 11 ca \n#> 5 ca 2020-03-05 10 ca \n#> 6 ca 2020-03-06 18 ca \n#> # ℹ 4 more rows\n```\n:::\n\n```{.r .cell-code}\n# Returning time_value in the formula\nedf %>%\n group_by(geo_value) %>%\n epi_slide(~ .x$time_value[[1]], .window_size = 7) %>%\n ungroup() %>%\n head(10)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> An `epi_df` object, 10 x 4 with metadata:\n#> * geo_type = state\n#> * time_type = day\n#> * as_of = 2024-08-22 19:40:48.296938\n#> \n#> # A tibble: 10 × 4\n#> geo_value time_value cases slide_value\n#> * \n#> 1 ca 2020-03-01 6 2020-02-24 \n#> 2 ca 2020-03-02 4 2020-02-25 \n#> 3 ca 2020-03-03 6 2020-02-26 \n#> 4 ca 2020-03-04 11 2020-02-27 \n#> 5 ca 2020-03-05 10 2020-02-28 \n#> 6 ca 2020-03-06 18 2020-02-29 \n#> # ℹ 4 more rows\n```\n:::\n:::\n\n\nWhile the computations above do not look very useful, these can be used as\nbuilding blocks for computations that do something different depending on the\ngeo_value or ref_time_value.\n\n## Slide the tidy way\n\nPerhaps the most convenient way to setup a computation in `epi_slide()` is to\npass in an expression for tidy evaluation. In this case, we can simply define\nthe name of the new column directly as part of the expression, setting it equal\nto a computation in which we can access any columns of `.x` by name, just as we\nwould in a call to `dplyr::mutate()`, or any of the `dplyr` verbs. For example:\n\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-9_099c56974dc1b1b9333fc886ce5e0236'}\n\n```{.r .cell-code}\nslide_output <- edf %>%\n group_by(geo_value) %>%\n epi_slide(cases_7dav = mean(cases, na.rm = TRUE), .window_size = 7) %>%\n ungroup() %>%\n head(10)\n```\n:::\n\n\nIn addition to referring to individual columns by name, you can refer to\n`epi_df` time window as `.x` (`.group_key` and `.ref_time_value` are still\navailable). Also, the tidyverse \"pronouns\" `.data` and `.env` can also be used\nif you need distinguish between the data and environment.\n\nAs a simple sanity check, we visualize the 7-day trailing averages computed on\ntop of the original counts:\n\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-10_4334cefaa041ba7576fa06309b28992d'}\n\n```{.r .cell-code}\nlibrary(ggplot2)\ntheme_set(theme_bw())\n\nggplot(slide_output, aes(x = time_value)) +\n geom_col(aes(y = cases, fill = geo_value), alpha = 0.5, show.legend = FALSE) +\n geom_line(aes(y = cases_7dav, col = geo_value), show.legend = FALSE) +\n facet_wrap(~geo_value, scales = \"free_y\") +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %y\") +\n labs(x = \"Date\", y = \"Reported COVID-19 cases\")\n```\n\n::: {.cell-output-display}\n![](slide_files/figure-html/unnamed-chunk-10-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nAs we can see from the top right panel, it looks like Texas moved to weekly\nreporting of COVID-19 cases in summer of 2021.\n\n## Slide with a function\n\nWe can also pass a function to the second argument in `epi_slide()`. In this\ncase, the passed function `.f` must have the form `function(x, g, t, ...)`,\nwhere\n\n- \"x\" is an epi_df with the same column names as the archive's `DT`, minus\n the `version` column\n- \"g\" is a one-row tibble containing the values of the grouping variables\nfor the associated group\n- \"t\" is the ref_time_value for the current window\n- \"...\" are additional arguments\n\nRecreating the last example of a 7-day trailing average:\n\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-11_12a17a34c13a7e8307744518a41d53a4'}\n\n```{.r .cell-code}\nx <- edf %>%\n group_by(geo_value) %>%\n epi_slide(function(x, g, t) mean(x$cases, na.rm = TRUE), .window_size = 7, .new_col_name = \"cases_7dav\") %>%\n ungroup()\nx %>%\n head(10)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> An `epi_df` object, 10 x 4 with metadata:\n#> * geo_type = state\n#> * time_type = day\n#> * as_of = 2024-08-22 19:40:48.296938\n#> \n#> # A tibble: 10 × 4\n#> geo_value time_value cases cases_7dav\n#> * \n#> 1 ca 2020-03-01 6 6 \n#> 2 ca 2020-03-02 4 5 \n#> 3 ca 2020-03-03 6 5.33\n#> 4 ca 2020-03-04 11 6.75\n#> 5 ca 2020-03-05 10 7.4 \n#> 6 ca 2020-03-06 18 9.17\n#> # ℹ 4 more rows\n```\n:::\n:::\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-12_34e93e3b2dc10094fb15530dac8e7f0f'}\n\n```{.r .cell-code code-fold=\"true\"}\ncols <- RColorBrewer::brewer.pal(7, \"Set1\")[-6]\nggplot(x, aes(x = time_value)) +\n geom_col(aes(y = cases, fill = geo_value),\n alpha = 0.5,\n show.legend = FALSE\n ) +\n scale_y_continuous(expand = expansion(c(0, 0.05))) +\n geom_line(aes(y = cases_7dav, col = geo_value), show.legend = FALSE) +\n scale_fill_manual(values = cols) +\n scale_color_manual(values = cols) +\n facet_wrap(~geo_value, scales = \"free_y\") +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %Y\") +\n labs(x = \"Date\", y = \"Reported COVID-19 cases\")\n```\n\n::: {.cell-output-display}\n![](slide_files/figure-html/unnamed-chunk-12-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nAs we can see from the center top panel, it looks like Florida moved to weekly\nreporting of COVID-19 cases in summer of 2021, while California occasionally\nreported negative cases counts!\n\n## Running a local forecaster {#sec-local-forecaster}\n\nAs a more complex example, we preview some of the functionality of\n`{epipredict}` described in future chapters, and use a forecaster based on a\nlocal (in time) autoregression or \"AR model\". AR models can be fit in numerous\nways (using base R functions and various packages), but here we the\n`arx_forecaster()`, implemented in `{epipredict}` both provides a more advanced\nexample of sliding a function over an `epi_df` object, and it allows us to be a\nbit more flexible in defining a *probabilistic* forecaster: one that outputs not\njust a point prediction, but a notion of uncertainty around this. In particular,\nour forecaster will output a point prediction along with an 90\\% uncertainty\nband, represented by a predictive quantiles at the 5\\% and 95\\% levels (lower\nand upper endpoints of the uncertainty band).\n\nThe function signature below, is a probabilistic AR forecaster. The\n`lags` argument indicates which lags to use in the model, and `ahead` indicates\nhow far ahead in the future to make forecasts (both are encoded in terms of the\nunits of the `time_value` column; so, days, in the working `epi_df` being\nconsidered in this vignette).\n\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-13_190292c89295d7aec9312bafe5b82171'}\n\n```{.r .cell-code}\narx_forecaster <- function(\n epi_df,\n outcome, # the outcome column name in `epi_df`\n predictors, # a character vector, containing 1 or more predictors in `epi_df`\n trainer = quantile_reg(),\n args_list = arx_args_list(\n lags = c(0, 7, 14),\n ahead = 7,\n quantile_levels = c(0.05, 0.95)\n )) {\n ...\n}\n```\n:::\n\n\nWe go ahead and slide this AR forecaster over the working `epi_df` of COVID-19\ncases. Note that we actually model the `cases_7dav` column, to operate on the\nscale of smoothed COVID-19 cases. This is clearly equivalent, up to a constant,\nto modeling weekly sums of COVID-19 cases.\n\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-14_122fc4479578275735303f9c21ce9c5e'}\n\n```{.r .cell-code}\nfc_time_values <- seq(\n from = as.Date(\"2020-06-01\"),\n to = as.Date(\"2021-12-01\"),\n by = \"1 months\"\n)\n\nfcasts <- epi_slide(\n x,\n .f = ~ arx_forecaster(\n epi_data = .x,\n outcome = \"cases_7dav\",\n predictors = \"cases_7dav\",\n trainer = quantile_reg(),\n args_list = arx_args_list(ahead = 7)\n )$predictions,\n .window_size = 120,\n .ref_time_values = fc_time_values\n)\n\n# grab just the relevant columns, and make them easier to plot\nfcasts <- fcasts %>%\n select(geo_value, time_value, cases_7dav, .pred, .pred_distn) %>%\n pivot_quantiles_wider(\".pred_distn\")\nfcasts\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n#> # A tibble: 114 × 7\n#> # Groups: geo_value [6]\n#> geo_value time_value cases_7dav .pred `0.05` `0.5` `0.95`\n#> \n#> 1 ca 2020-06-01 2694 2332. 2266. 2332. 2957.\n#> 2 ca 2020-07-01 6722 7979. 7081. 7979. 8999.\n#> 3 ca 2020-08-01 8284. 7339. 6745. 7339. 7630.\n#> 4 ca 2020-09-01 4707. 3291. 3264. 3291. 7571.\n#> 5 ca 2020-10-01 3360. 4270. 3213. 4270. 5714.\n#> 6 ca 2020-11-01 4441. 4172. 4028. 4172. 5491.\n#> # ℹ 108 more rows\n```\n:::\n:::\n\n\nNote that we have used the argument `.ref_time_values` to compute the forecast\nat a specific subset of reference time values. We get out 4 new columns:\n`fc_target_date`, `0.05`, `0.5`, `0.95` that correspond to the date the forecast\nis for (rather than the date it was made on), the point forecast, and the lower\nand upper endpoints of the 95\\% prediction band.[^1]\n\n[^1]: If instead we had set `as_list_col = TRUE` in the call to `epi_slide()`,\nthen we would have gotten a list column `fc`, where each element of `fc`\ncontains these results.\n\nTo finish off, we plot the forecasts at some times (spaced out by a few months)\nover the last year, at multiple horizons: 7, 14, 21, and 28 days ahead. To do\nso, we encapsulate the process of generating forecasts into a simple function,\nso that we can call it a few times.\n\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-15_70aa61239ab94d7efb5385e12b4cccb7'}\n\n```{.r .cell-code}\nk_week_ahead <- function(ahead = 7) {\n epi_slide(\n x,\n ~ arx_forecaster(\n epi_data = .x,\n outcome = \"cases_7dav\",\n predictors = \"cases_7dav\",\n trainer = quantile_reg(),\n args_list = arx_args_list(ahead = ahead)\n )$predictions,\n .window_size = 120,\n .ref_time_values = fc_time_values\n ) %>%\n select(geo_value, time_value, cases_7dav, .pred, .pred_distn) %>%\n pivot_quantiles_wider(\".pred_distn\")\n}\n\n# First generate the forecasts, and bind them together\nz <- map(c(7, 14, 21, 28), k_week_ahead) %>% list_rbind()\n```\n:::\n\n::: {.cell layout-align=\"center\" hash='slide_cache/html/unnamed-chunk-16_b4bf5f3aa24918e1188f8225b7963187'}\n\n```{.r .cell-code code-fold=\"true\"}\nggplot(z) +\n geom_line(data = x, aes(x = time_value, y = cases_7dav), color = \"gray50\") +\n geom_ribbon(aes(\n x = time_value, ymin = `0.05`, ymax = `0.95`,\n group = time_value, fill = geo_value\n ), alpha = 0.4) +\n geom_line(aes(x = time_value, y = `0.5`, group = time_value)) +\n geom_point(aes(x = time_value, y = `0.5`, group = time_value), size = 0.5) +\n # geom_vline(data = tibble(x = fc_time_values), aes(xintercept = x),\n # linetype = 2, alpha = 0.5) +\n facet_wrap(vars(geo_value), scales = \"free_y\", nrow = 3) +\n scale_y_continuous(expand = expansion(c(0, 0.05))) +\n scale_x_date(minor_breaks = \"1 months\", date_labels = \"%b %Y\") +\n scale_fill_viridis_d(guide = \"none\", end = .9) +\n labs(x = \"Date\", y = \"Reported COVID-19 cases\")\n```\n\n::: {.cell-output-display}\n![](slide_files/figure-html/unnamed-chunk-16-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nTwo points are worth making. First, the AR model's performance here is pretty\nspotty. At various points in time, we can see that its forecasts are volatile\n(its point predictions are all over the place), or overconfident (its bands are\ntoo narrow), or both at the same time. This is only meant as a simple demo and\nnot entirely unexpected given the way the AR model is set up. The\n[`epipredict`](https://cmu-delphi.github.io/epipredict) package,\noffers a suite of predictive modeling tools\nthat improve on many of the shortcomings of the above simple AR model (simply\nusing all states for training rather than 6 is a huge improvement).\n\nSecond, the AR forecaster here is using finalized data, meaning, it uses the\nlatest versions of signal values (reported COVID-19 cases) available, for both\ntraining models and making predictions historically. However, this is not\nreflective of the provisional nature of the data that it must cope with in a\ntrue forecast task. Training and making predictions on finalized data can lead\nto an overly optimistic sense of accuracy; see, for example,\n[@McDonaldBien2021] and references\ntherein. Fortunately, the `epiprocess` package provides a data structure called\n`epi_archive` that can be used to store all data revisions, and furthermore, an\n`epi_archive` object knows how to slide computations in the correct\nversion-aware sense (for the computation at each reference time $t$, it uses\nonly data that would have been available as of $t$). We will revisit this\nexample in the [archive\nvignette](https://cmu-delphi.github.io/epiprocess/articles/archive.html).\n", "supporting": [], "filters": [ "rmarkdown/pagebreak.lua" diff --git a/_freeze/slide/figure-html/unnamed-chunk-10-1.svg b/_freeze/slide/figure-html/unnamed-chunk-10-1.svg new file mode 100644 index 0000000..5345f6d --- /dev/null +++ b/_freeze/slide/figure-html/unnamed-chunk-10-1.svg @@ -0,0 +1,308 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/_freeze/slide/figure-html/unnamed-chunk-12-1.svg b/_freeze/slide/figure-html/unnamed-chunk-12-1.svg index 1c5a420..3df4513 100644 --- a/_freeze/slide/figure-html/unnamed-chunk-12-1.svg +++ b/_freeze/slide/figure-html/unnamed-chunk-12-1.svg @@ -1,2781 +1,16886 @@ - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/_freeze/slide/figure-html/unnamed-chunk-16-1.svg b/_freeze/slide/figure-html/unnamed-chunk-16-1.svg new file mode 100644 index 0000000..59929b8 --- /dev/null +++ b/_freeze/slide/figure-html/unnamed-chunk-16-1.svg @@ -0,0 +1,2860 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/_freeze/slide/figure-html/unnamed-chunk-8-1.svg b/_freeze/slide/figure-html/unnamed-chunk-8-1.svg index ad8c8f0..9cc05d8 100644 --- a/_freeze/slide/figure-html/unnamed-chunk-8-1.svg +++ b/_freeze/slide/figure-html/unnamed-chunk-8-1.svg @@ -1,16903 +1,16886 @@ - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + + + - - + + - - + + + + + - - + + - - - - - + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + + - - + + - - - + + - - + + - - + + - - - + + - - + + - - + + - - - + + - - + + - - - + + - - + + - - - - + + - - + + - - + + - - - - + + - - + + + - - - - - + + - - + + - - - - - + + - - + + - - + + - - - - + + - - + + - - - - - + + + - - + + + - - - - - + + - - + + - - + + - - + + + - - - + + - - + + - - + + + - - - - + + - - + + - - - - - + + - - + + + - - + + - - + + - - - - - - - - - - - - + + - - - - - - + + + - - - - - + + - - + + - - - - - + + - - + + + - - - - - + + + - - + + - - - - - + + - - + + + + - - + + - - + + - - - + + + + - - + + - - - - + + + + + - - + + - - + + + + + - - - - - + + - - + + + + + - - - - + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - - - - - + + - - + + + + + - - + + - - - + + + + + - - + + - - + + - - - + + + + - - + + - - + + + + + - - + + - - - - - + + - - + + + + - - - - - + + - - - - + + - - + + + - - + + - - - - - + + - - + + + - - - + + + - - - + + - - + + - - + + + - - - - + + - - + + - - - + + - - - + + + - - + + - - - + + - - - + + - - + + + - - - + + - - + + - - + + - - + + + - - - + + + - - - + + - - + + + - - - + + + - - + + - - + + + - - - + + + - - - + + - - + + - - + + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + + - - + + + - - + + - - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - - + + - - + + - - - - + + + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - - + + + + - - + + - - + + - - + + - - + + + - - + + - - + + + + - - + + - - - - + + - - + + + - - + + + - - - + + - - + + + - - + + - - + + - - - + + - - + + + - - + + + - - + + - - - + + - - + + - - + + + - - - + + - - + + + + - - + + - - + + - - - - - + + + + + + - - + + + + + - - - - - + + - - + + - - - - - + + + - - + + - - - - - + + - - + + - - - - - + + + + + - - - - - + + - - + + + + + - - - - - + + - - + + + - - + + - - - - + + - - + + - - + + + + - - - - + + - - + + - - - - + + + + + - - + + - - + + - - - - - + + - - + + + - - - + + - - + + + - - + + + - - + + - - - + + - - - + + - - + + + - - - + + - - - + + - - + + - - + + + - - + + - - + + - - + + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + + - - + + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - + + - - + + - - + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - - - - + + - - + + - - - - + + + + - - + + - - - + + + + - - - + + - - + + + + + - - + + - - - - + + + - - + + + - - + + - - + + + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + + - - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - - + + - - + + - - + + + + + - - - - + + - - + + - - + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - - + + + - - + + - - + + + - - + + - - + + - - - - + + - - + + - - + + + + - - - - + + - - + + - - - + + + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - - + + - - + + - - + + + + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - - + + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - - + + - - + + - - - - - + + + - - + + - - + + - - - + + - - + + + - - + + + - - - + + - - + + - - + + + + + - - + + - - - + + + + + - - + + - - + + + - - + + - - + + - - + + - - - + + + - - + + + - - - + + - - - + + - - + + + + - - - + + - - - + + - - + + + + - - - + + - - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + + - - - + + + + + + - - - - - + + + + + + - - + + - - - - - + + + + - - + + - - + + - - + + - - - + + + - - + + - - - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + + - - + + + - - - + + - - + + + + - - + + - - + + - - - + + + + + - - + + - - + + - - - + + - - + + - - + + - - + + - - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + + - - + + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + + - - + + - - + + - - - + + - - + + - - - + + - - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + + - - + + - - + + - - + + - - + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + + - - + + - - + + - - - + + - - + + - - + + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + + + - - - + + - - + + - - + + - - + + - - + + - - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + + - - + + - - + + + + - - + + - - + + - - + + - - - + + + + - - - + + - - + + - - - - + + + + - - + + - - + + + + - - - - + + - - + + - - + + - - + + - - - - + + + - - + + - - - - + + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + + - - + + - - + + + - - + + - - + + - - + + - - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + + + + - - + + + + + + - - + + + + + - - - - + + - - + + - - - - - + + - - + + - - - - - + + + - - + + - - - - + + + - - + + - - + + - - - - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + + + - - + + - - + + + - - + + - - - + + - - + + - - + + + + - - + + - - - - + + - - + + + + + - - + + - - - + + - - + + - - + + - - - - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + + + - - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + + - - - + + - - + + + - - + + - - + + - - - - + + - - + + - - + + - - + + + - - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + + - - + + - - - + + - - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - - + + - - + + - - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + + - - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - - + + - - + + - - + + + - - - + + + - - - + + - - + + - - + + - - + + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + + - - - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + + - - + + - - - + + - - + + - - + + - - + + - - + + + - - + + - - + + + - - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - - + + + - - + + - - + + - - - + + - - + + - - + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - - + + - - + + - - + + - - + + - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - + + - - + + - - + + + - - + + - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + - - - + + - - + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/_freeze/sliding-forecasters/execute-results/html.json b/_freeze/sliding-forecasters/execute-results/html.json index 474dbe2..74fa0c2 100644 --- a/_freeze/sliding-forecasters/execute-results/html.json +++ b/_freeze/sliding-forecasters/execute-results/html.json @@ -1,7 +1,7 @@ { - "hash": "bcaf5b1ddd41612a320d992385364b2b", + "hash": "a74b5381d41b5132ec6860cad9e9e252", "result": { - "markdown": "# Pseudo-prospective forecast inspection\n\n\n::: {.cell}\n\n:::\n\n\n\nA key function from the epiprocess package is `epi_slide()`, which allows the\nuser to apply a function or formula-based computation over variables in an\n`epi_df` over a running window of `n` time steps (see the following `epiprocess`\nvignette to go over the basics of the function: [\"Slide a computation over\nsignal values\"](https://cmu-delphi.github.io/epiprocess/articles/slide.html)).\nThe equivalent sliding method for an `epi_archive` object can be called by using\nthe wrapper function `epix_slide()` (refer to the following vignette for the\nbasics of the function: [\"Work with archive objects and data\nrevisions\"](https://cmu-delphi.github.io/epiprocess/articles/archive.html)). The\nkey difference from `epi_slide()` is that it performs version-aware\ncomputations. That is, the function only uses data that would have been\navailable as of time t for that reference time.\n\nIn this vignette, we use `epi_slide()` and `epix_slide()` for backtesting our\n`arx_forecaster` on historical COVID-19 case data from the US and from Canada.\nMore precisely, we first demonstrate using `epi_slide()` to slide ARX\nforecasters over an `epi_df` object and compare the results obtained from using\ndifferent forecasting engines. We then compare these simple retrospective\nforecasts to more proper \"pseudoprospective\" forecasts generated using snapshots\nof the data that was available in real time, using `epix_slide()`.\n\n## Comparing different forecasting engines\n\n### Example using CLI and case data from US states\n\nFirst, we download the version history (i.e. archive) of the percentage of\ndoctor’s visits with CLI (COVID-like illness) computed from medical insurance\nclaims and the number of new confirmed COVID-19 cases per 100,000 population\n(daily) for all 50 states from the COVIDcast API. We process as before, with the\nmodification that we use `sync = \"locf\"` in `epix_merge()` so that the last\nversion of each observation can be carried forward to extrapolate unavailable\nversions for the less up-to-date input archive.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/grab-epi-data_89a9d4079f8ffc6080f83369668b2316'}\n\n```{.r .cell-code}\nus_raw_history_dfs <- readRDS(url(\n \"https://github.com/cmu-delphi/epipredict/raw/dev/vignettes/articles/all_states_covidcast_signals.rds\"\n))\n\nus_cli_archive <- us_raw_history_dfs[[1]] %>%\n select(geo_value, time_value, version = issue, percent_cli = value) %>%\n as_epi_archive(compactify = TRUE)\nus_cases_archive <- us_raw_history_dfs[[2]] %>%\n select(geo_value, time_value, version = issue, case_rate = value) %>%\n as_epi_archive(compactify = TRUE)\n\nus_archive <- epix_merge(\n us_cli_archive, us_cases_archive,\n sync = \"locf\", compactify = TRUE\n)\n```\n:::\n\n\nAfter obtaining the latest snapshot of the data, we produce forecasts on that\ndata using the default engine of simple linear regression and compare to a\nrandom forest.\n\nNote that all of the warnings about the forecast date being less than the most\nrecent update date of the data have been suppressed to avoid cluttering the\noutput.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/make-arx-kweek_404b2b076aee47ac4ed54f2b9ba369d2'}\n\n```{.r .cell-code}\n# Latest snapshot of data, and forecast dates\nus_latest <- epix_as_of(us_archive, max_version = max(us_archive$versions_end))\nfc_time_values <- seq(\n from = as.Date(\"2020-08-01\"),\n to = as.Date(\"2021-11-01\"),\n by = \"1 month\"\n)\naheads <- c(7, 14, 21, 28)\n\nk_week_ahead <- function(epi_df, outcome, predictors, ahead = 7, engine) {\n epi_slide(epi_df, ~ arx_forecaster(\n .x, outcome, predictors, engine,\n args_list = arx_args_list(ahead = ahead)\n )$predictions %>%\n select(-geo_value),\n before = 120L - 1L,\n ref_time_values = fc_time_values,\n new_col_name = \"fc\"\n ) %>%\n select(geo_value, time_value, starts_with(\"fc\")) %>%\n mutate(engine_type = engine$engine)\n}\n\n# Generate the forecasts and bind them together\nfc <- bind_rows(\n map(aheads, ~ k_week_ahead(\n us_latest, \"case_rate\", c(\"case_rate\", \"percent_cli\"), .x,\n engine = linear_reg()\n )) %>%\n list_rbind(),\n map(aheads, ~ k_week_ahead(\n us_latest, \"case_rate\", c(\"case_rate\", \"percent_cli\"), .x,\n engine = rand_forest(mode = \"regression\")\n )) %>%\n list_rbind()\n) %>%\n pivot_quantiles_wider(contains(\"_distn\"))\n```\n:::\n\n\nHere, `arx_forecaster()` does all the heavy lifting. It creates leads of the\ntarget (respecting time stamps and locations) along with lags of the features\n(here, the response and doctors visits), estimates a forecasting model using the\nspecified engine, creates predictions, and non-parametric confidence bands.\n\nTo see how the predictions compare, we plot them on top of the latest case\nrates. Note that even though we've fitted the model on all states,\nwe'll just display the\nresults for two states, California (CA) and Florida (FL), to get a sense of the\nmodel performance while keeping the graphic simple.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/plot-arx_c99e5bb41b438e998b3904771b96568d'}\n\n```{.r .cell-code code-fold=\"true\"}\nfc_cafl <- fc %>% filter(geo_value %in% c(\"ca\", \"fl\"))\nlatest_cafl <- us_latest %>% filter(geo_value %in% c(\"ca\", \"fl\"))\n\nggplot(fc_cafl, aes(fc_target_date, group = time_value, fill = engine_type)) +\n geom_line(\n data = latest_cafl, aes(x = time_value, y = case_rate),\n inherit.aes = FALSE, color = \"gray50\"\n ) +\n geom_ribbon(aes(ymin = `0.05`, ymax = `0.95`), alpha = 0.4) +\n geom_line(aes(y = fc_.pred)) +\n geom_point(aes(y = fc_.pred), size = 0.5) +\n geom_vline(aes(xintercept = time_value), linetype = 2, alpha = 0.5) +\n facet_grid(engine_type ~ geo_value, scales = \"free\") +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %y\") +\n scale_fill_brewer(palette = \"Set1\") +\n scale_y_continuous(expand = expansion(c(0, 0.05))) +\n labs(x = \"Date\", y = \"Reported COVID-19 case rates\") +\n theme(legend.position = \"none\")\n```\n\n::: {.cell-output-display}\n![](sliding-forecasters_files/figure-html/plot-arx-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nFor the two states of interest, simple linear regression clearly performs better\nthan random forest in terms of accuracy of the predictions and does not\nresult in such in overconfident predictions (overly narrow confidence bands).\nThough, in general, neither approach produces amazingly accurate forecasts.\nThis could be because\nthe behaviour is rather different across states and the effects of other notable\nfactors such as age and public health measures may be important to account for\nin such forecasting. Including such factors as well as making enhancements such\nas correcting for outliers are some improvements one could make to this simple\nmodel.[^1]\n\n[^1]: Note that, despite the above caveats, simple models like this tend to out-perform many far more complicated models in the online Covid forecasting due to those models high variance predictions.\n\n### Example using case data from Canada\n\nBy leveraging the flexibility of `epiprocess`, we can apply the same techniques\nto data from other sources. Since some collaborators are in British Columbia,\nCanada, we'll do essentially the same thing for Canada as we did above.\n\nThe [COVID-19 Canada Open Data Working Group](https://opencovid.ca/) collects\ndaily time series data on COVID-19 cases, deaths, recoveries, testing and\nvaccinations at the health region and province levels. Data are collected from\npublicly available sources such as government datasets and news releases.\nUnfortunately, there is no simple versioned source, so we have created our own\nfrom the Github commit history.\n\nFirst, we load versioned case rates at the provincial level. After converting\nthese to 7-day averages (due to highly variable provincial reporting\nmismatches), we then convert the data to an `epi_archive` object, and extract\nthe latest version from it. Finally, we run the same forcasting exercise as for\nthe American data, but here we compare the forecasts produced from using simple\nlinear regression with those from using boosted regression trees.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/get-can-fc_731a38443595452707f8f146a49636dd'}\n\n```{.r .cell-code}\n# source(\"drafts/canada-case-rates.R)\ncan <- epidatasets::can_prov_cases\ncan <- can %>%\n group_by(version, geo_value) %>%\n arrange(time_value) %>%\n mutate(cr_7dav = RcppRoll::roll_meanr(case_rate, n = 7L)) %>%\n as_epi_archive(compactify = TRUE)\n\ncan_latest <- epix_as_of(can, max_version = max(can$DT$version))\n\n# Generate the forecasts, and bind them together\ncan_fc <- bind_rows(\n map(aheads, ~ k_week_ahead(\n can_latest, \"cr_7dav\", \"cr_7dav\", .x, linear_reg()\n )) %>%\n list_rbind(),\n map(aheads, ~ k_week_ahead(\n can_latest, \"cr_7dav\", \"cr_7dav\", .x,\n boost_tree(mode = \"regression\", trees = 20)\n )) %>%\n list_rbind()\n) %>%\n pivot_quantiles_wider(contains(\"_distn\"))\n```\n:::\n\n\nThe first figure shows the results for all of the provinces using linear regression.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/plot-can-fc-lr_a609b4a2e0dd0f49e145eb8a6b3ff50e'}\n\n```{.r .cell-code code-fold=\"true\"}\nggplot(\n can_fc %>% filter(engine_type == \"lm\"),\n aes(x = fc_target_date, group = time_value)\n) +\n coord_cartesian(xlim = lubridate::ymd(c(\"2020-12-01\", NA))) +\n geom_line(\n data = can_latest, aes(x = time_value, y = cr_7dav),\n inherit.aes = FALSE, color = \"gray50\"\n ) +\n geom_ribbon(aes(ymin = `0.05`, ymax = `0.95`, fill = geo_value),\n alpha = 0.4\n ) +\n geom_line(aes(y = fc_.pred)) +\n geom_point(aes(y = fc_.pred), size = 0.5) +\n geom_vline(aes(xintercept = time_value), linetype = 2, alpha = 0.5) +\n facet_wrap(~geo_value, scales = \"free_y\", ncol = 3) +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %y\") +\n scale_y_continuous(expand = expansion(c(0, 0.05))) +\n labs(\n title = \"Using simple linear regression\", x = \"Date\",\n y = \"Reported COVID-19 case rates\"\n ) +\n theme(legend.position = \"none\")\n```\n\n::: {.cell-output-display}\n![](sliding-forecasters_files/figure-html/plot-can-fc-lr-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nCompare those forecasts with a related set using Gradient Boosting.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/plot-can-fc-boost_c606f4fff3ff4eb8919817c6e7100441'}\n\n```{.r .cell-code code-fold=\"true\"}\nggplot(\n can_fc %>% filter(engine_type == \"xgboost\"),\n aes(x = fc_target_date, group = time_value)\n) +\n coord_cartesian(xlim = lubridate::ymd(c(\"2020-12-01\", NA))) +\n geom_line(\n data = can_latest, aes(x = time_value, y = cr_7dav),\n inherit.aes = FALSE, color = \"gray50\"\n ) +\n geom_ribbon(aes(ymin = `0.05`, ymax = `0.95`, fill = geo_value),\n alpha = 0.4\n ) +\n geom_line(aes(y = fc_.pred)) +\n geom_point(aes(y = fc_.pred), size = 0.5) +\n geom_vline(aes(xintercept = time_value), linetype = 2, alpha = 0.5) +\n facet_wrap(~geo_value, scales = \"free_y\", ncol = 3) +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %y\") +\n scale_y_continuous(expand = expansion(c(0, 0.05))) +\n labs(\n title = \"Using boosted regression trees\", x = \"Date\",\n y = \"Reported COVID-19 case rates\"\n ) +\n theme(legend.position = \"none\")\n```\n\n::: {.cell-output-display}\n![](sliding-forecasters_files/figure-html/plot-can-fc-boost-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nBoth approaches tend to produce quite volatile forecasts (point predictions)\nand/or are overly confident (very narrow bands), particularly when boosted\nregression trees are used. But as this is meant to be a simple demonstration of\nsliding with different engines in `arx_forecaster`, we may devote another\nvignette to work on improving the predictive modelling using the suite of tools\navailable in epipredict.\n\n## Pseudoprospective vs. unfaithful retrospective forecasting\n\n### Example using case data from US states\n\nWe will now run pseudoprospective forecasts based on properly-versioned data\n(that would have been available in real-time) to forecast future COVID-19 case\nrates from current and past COVID-19 case rates for all states. That is, we can\nmake forecasts on the archive, `us_archive`, and compare those to forecasts on\n(time windows of) the latest data, `us_latest`, using the same general set-up as\nabove. For pseudoprospective forecasting, note that `us_archive` is fed into\n`epix_slide()`, while for simpler (unfaithful) retrospective forecasting,\n`us_latest` is fed into `epi_slide()`. #%% update to include percent_cli after\nthat issue is fixed?\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/make-ar-kweek-asof_bb2b98563c892027582a2aa804662dcc'}\n\n```{.r .cell-code}\nk_week_versioning <- function(ahead, version = c(\"faithful\", \"unfaithful\")) {\n version <- match.arg(version)\n if (version == \"faithful\") {\n epix_slide(\n us_archive,\n ~ arx_forecaster(\n .x, \"case_rate\", c(\"case_rate\", \"percent_cli\"),\n args_list = arx_args_list(ahead = ahead)\n )$predictions,\n before = 120 - 1,\n ref_time_values = fc_time_values,\n new_col_name = \"fc\"\n ) %>%\n mutate(version = \"version faithful\") %>%\n rename(geo_value = \"fc_geo_value\")\n } else {\n k_week_ahead(\n us_latest, \"case_rate\", c(\"case_rate\", \"percent_cli\"),\n ahead, linear_reg()\n ) %>% mutate(version = \"not version faithful\")\n }\n}\n\n# Generate the forecasts, and bind them together\nfc <- bind_rows(\n map(aheads, ~ k_week_versioning(.x, \"faithful\")) %>% list_rbind(),\n map(aheads, ~ k_week_versioning(.x, \"unfaithful\")) %>% list_rbind()\n) %>% pivot_quantiles_wider(fc_.pred_distn)\n```\n:::\n\n\nNow we can plot the results on top of the latest case rates. As before, we will only display and focus on the results for FL and CA for simplicity.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/plot-ar-asof_e484487727d34e88d3d51c13eeb6cdaa'}\n\n```{.r .cell-code code-fold=\"true\"}\nfc_cafl <- fc %>% filter(geo_value %in% c(\"ca\", \"fl\"))\nlatest_cafl <- us_latest %>% filter(geo_value %in% c(\"ca\", \"fl\"))\n\nggplot(fc_cafl, aes(x = fc_target_date, group = time_value)) +\n geom_line(\n data = latest_cafl, aes(x = time_value, y = case_rate),\n inherit.aes = FALSE, color = \"gray50\"\n ) +\n geom_ribbon(aes(ymin = `0.05`, ymax = `0.95`, fill = version), alpha = 0.4) +\n geom_line(aes(y = fc_.pred)) +\n geom_point(aes(y = fc_.pred), size = 0.5) +\n geom_vline(aes(xintercept = time_value), linetype = 2, alpha = 0.5) +\n facet_grid(version ~ geo_value, scales = \"free\") +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %y\") +\n scale_y_continuous(expand = expansion(c(0, 0.05))) +\n labs(x = \"Date\", y = \"Reported COVID-19 case rates\") +\n scale_fill_brewer(palette = \"Set1\") +\n theme(legend.position = \"none\")\n```\n\n::: {.cell-output-display}\n![](sliding-forecasters_files/figure-html/plot-ar-asof-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nAgain, we observe that the results are not great for these two states, but\nthat's likely due to the simplicity of the model (ex. the omission of key\nfactors such as age and public health measures) and the quality of the data (ex.\nwe have not personally corrected for anomalies in the data).\n\nWe shall leave it to the reader to try the above version aware and unaware\nforecasting exercise on the Canadian case rate data. The above code for the\nAmerican state data should be readily adaptable for this purpose.\n", + "markdown": "# Sliding version-unaware and version-aware ARX forecasters across dates\n\n\n::: {.cell}\n\n:::\n\n\nA key function from the epiprocess package is `epix_slide()` (refer to the\nfollowing vignette for the basics of the function: [\"Work with archive objects\nand data\nrevisions\"](https://cmu-delphi.github.io/epiprocess/articles/archive.html))\nwhich allows performing version-aware computations. That is, the function only\nuses data that would have been available as of time t for that reference time.\n\nIn this vignette, we use `epix_slide()` for backtesting our `arx_forecaster` on\nhistorical COVID-19 case data from the US and from Canada. We first examine the\nresults from a version-unaware forecaster, comparing two different fitting\nengines and then we contrast this with version-aware forecasting. The former\nwill proceed by constructing an `epi_archive` that erases its version\ninformation and then use `epix_slide()` to forecast the future. The latter will\nkeep the versioned data and proceed similarly by using `epix_slide()` to\nforecast the future.\n\n## Version-unaware forecasting\n\n### Example using CLI and case data from US states\n\nFirst, we download the version history (i.e. archive) of the percentage of\ndoctor’s visits with CLI (COVID-like illness) computed from medical insurance\nclaims and the number of new confirmed COVID-19 cases per 100,000 population\n(daily) for all 50 states from the COVIDcast API. We process as before, with the\nmodification that we use `sync = \"locf\"` in `epix_merge()` so that the last\nversion of each observation can be carried forward to extrapolate unavailable\nversions for the less up-to-date input archive.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/grab-epi-data_89a9d4079f8ffc6080f83369668b2316'}\n\n```{.r .cell-code}\nus_raw_history_dfs <- readRDS(url(\n \"https://github.com/cmu-delphi/epipredict/raw/dev/vignettes/articles/all_states_covidcast_signals.rds\"\n))\n\nus_cli_archive <- us_raw_history_dfs[[1]] %>%\n select(geo_value, time_value, version = issue, percent_cli = value) %>%\n as_epi_archive(compactify = TRUE)\nus_cases_archive <- us_raw_history_dfs[[2]] %>%\n select(geo_value, time_value, version = issue, case_rate = value) %>%\n as_epi_archive(compactify = TRUE)\n\nus_archive <- epix_merge(\n us_cli_archive, us_cases_archive,\n sync = \"locf\", compactify = TRUE\n)\n```\n:::\n\n\nWe then get latest snapshot of the data from the archive by using\n`epix_as_of()`. We then create fake version information by setting `version =\ntime_value`. This creates an archive that pretends to have the latest data\navailable (since at version time `x` it has all the data up to time_value `x`,\nwhich in reality is unrealistic because the time values of the data received at\nversion time `x` often lags by a few days, not to mention the later corrections\nthat are amended to the data).\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/make-arx-kweek_6ff04c287f0d7a0f9d4503649e56bd3a'}\n\n```{.r .cell-code}\n# Latest snapshot of data, and forecast dates\nus_latest <- us_archive %>%\n epix_as_of(version = max(.$versions_end)) %>%\n mutate(version = time_value) %>%\n as_epi_archive()\nfc_time_values <- seq(\n from = as.Date(\"2020-08-01\"),\n to = as.Date(\"2021-11-01\"),\n by = \"1 month\"\n)\naheads <- c(7, 14, 21, 28)\n\nforecast_k_week_ahead <- function(epi_archive, outcome, predictors, ahead = 7, engine) {\n epi_archive %>%\n epix_slide(\n .f = function(x, gk, rtv) {\n arx_forecaster(\n x, outcome, predictors, engine,\n args_list = arx_args_list(ahead = ahead)\n )$predictions %>%\n mutate(engine_type = engine$engine) %>%\n pivot_quantiles_wider(.pred_distn)\n },\n .before = 120,\n .versions = fc_time_values\n )\n}\n\n# Generate the forecasts and bind them together\nforecasts_version_unaware <- bind_rows(\n map(aheads, ~ forecast_k_week_ahead(\n us_latest,\n outcome = \"case_rate\",\n predictors = c(\"case_rate\", \"percent_cli\"),\n ahead = .x,\n engine = linear_reg()\n )),\n map(aheads, ~ forecast_k_week_ahead(\n us_latest,\n outcome = \"case_rate\",\n predictors = c(\"case_rate\", \"percent_cli\"),\n ahead = .x,\n engine = rand_forest(mode = \"regression\")\n ))\n)\n```\n:::\n\n\nHere, `arx_forecaster()` does all the heavy lifting. It creates leads of the\ntarget (respecting time stamps and locations) along with lags of the features\n(here, the response and doctors visits), estimates a forecasting model using the\nspecified engine, creates predictions, and non-parametric confidence bands.\n\nTo see how the predictions compare, we plot them on top of the latest case\nrates. Note that even though we've fitted the model on all states, we'll just\ndisplay the results for two states, California (CA) and Florida (FL), to get a\nsense of the model performance while keeping the graphic simple.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/plot-arx_2918e9947b5ecabca1115f6cc2d8eb62'}\n\n```{.r .cell-code code-fold=\"true\"}\nforecasts_filtered <- forecasts_version_unaware %>%\n tibble() %>%\n filter(geo_value %in% c(\"ca\", \"fl\"))\nlatest_data_filtered <- us_latest$DT %>%\n tibble() %>%\n filter(geo_value %in% c(\"ca\", \"fl\"))\n\nggplot(forecasts_filtered, aes(x = target_date, group = forecast_date, fill = engine_type)) +\n geom_line(\n data = latest_data_filtered, aes(x = time_value, y = case_rate),\n inherit.aes = FALSE, color = \"gray50\"\n ) +\n geom_ribbon(aes(ymin = `0.05`, ymax = `0.95`), alpha = 0.4) +\n geom_line(aes(y = .pred)) +\n geom_point(aes(y = .pred), size = 0.5) +\n geom_vline(aes(xintercept = forecast_date), linetype = 2, alpha = 0.5) +\n facet_grid(engine_type ~ geo_value, scales = \"free\") +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %y\") +\n scale_fill_brewer(palette = \"Set1\") +\n scale_y_continuous(expand = expansion(c(0, 0.05))) +\n labs(x = \"Date\", y = \"Reported COVID-19 case rates\") +\n theme(legend.position = \"none\")\n```\n\n::: {.cell-output-display}\n![](sliding-forecasters_files/figure-html/plot-arx-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nFor the two states of interest, simple linear regression clearly performs better\nthan random forest in terms of accuracy of the predictions and does not result\nin such in overconfident predictions (overly narrow confidence bands). Though,\nin general, neither approach produces amazingly accurate forecasts. This could\nbe because the behaviour is rather different across states and the effects of\nother notable factors such as age and public health measures may be important to\naccount for in such forecasting. Including such factors as well as making\nenhancements such as correcting for outliers are some improvements one could\nmake to this simple model.[^1]\n\n[^1]: Note that, despite the above caveats, simple models like this tend to\nout-perform many far more complicated models in the online Covid forecasting due\nto those models high variance predictions.\n\n### Example using case data from Canada\n\nBy leveraging the flexibility of `epiprocess`, we can apply the same techniques\nto data from other sources. Since some collaborators are in British Columbia,\nCanada, we'll do essentially the same thing for Canada as we did above.\n\nThe [COVID-19 Canada Open Data Working Group](https://opencovid.ca/) collects\ndaily time series data on COVID-19 cases, deaths, recoveries, testing and\nvaccinations at the health region and province levels. Data are collected from\npublicly available sources such as government datasets and news releases.\nUnfortunately, there is no simple versioned source, so we have created our own\nfrom the Github commit history.\n\nFirst, we load versioned case rates at the provincial level. After converting\nthese to 7-day averages (due to highly variable provincial reporting\nmismatches), we then convert the data to an `epi_archive` object, and extract\nthe latest version from it. Finally, we run the same forcasting exercise as for\nthe American data, but here we compare the forecasts produced from using simple\nlinear regression with those from using boosted regression trees.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/get-can-fc_2457088f4bfc3bada5f7c38814504be7'}\n\n```{.r .cell-code}\n# source(\"drafts/canada-case-rates.R)\ncan <- epidatasets::can_prov_cases\ncan <- can %>%\n group_by(version, geo_value) %>%\n arrange(time_value) %>%\n mutate(cr_7dav = RcppRoll::roll_meanr(case_rate, n = 7L)) %>%\n as_epi_archive(compactify = TRUE)\n\ncan_latest <- epix_as_of(can, max_version = max(can$DT$version)) %>%\n mutate(version = time_value) %>%\n as_epi_archive()\n\n# Generate the forecasts, and bind them together\ncan_fc <- bind_rows(\n map(\n aheads,\n ~ forecast_k_week_ahead(can_latest, \"cr_7dav\", \"cr_7dav\", .x, linear_reg())\n ),\n map(\n aheads,\n ~ forecast_k_week_ahead(can_latest, \"cr_7dav\", \"cr_7dav\", .x, boost_tree(mode = \"regression\", trees = 20))\n )\n)\n```\n:::\n\n\nThe first figure shows the results for all of the provinces using linear regression.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/plot-can-fc-lr_f7e7878c3f1a72f4cb9216d68aa63292'}\n\n```{.r .cell-code code-fold=\"true\"}\nggplot(\n can_fc %>% filter(engine_type == \"lm\"),\n aes(x = target_date, group = forecast_date)\n) +\n coord_cartesian(xlim = lubridate::ymd(c(\"2020-12-01\", NA))) +\n geom_line(\n data = can_latest$DT %>% tibble(), aes(x = time_value, y = cr_7dav),\n inherit.aes = FALSE, color = \"gray50\"\n ) +\n geom_ribbon(aes(ymin = `0.05`, ymax = `0.95`, fill = geo_value),\n alpha = 0.4\n ) +\n geom_line(aes(y = .pred)) +\n geom_point(aes(y = .pred), size = 0.5) +\n geom_vline(aes(xintercept = forecast_date), linetype = 2, alpha = 0.5) +\n facet_wrap(~geo_value, scales = \"free_y\", ncol = 3) +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %y\") +\n scale_y_continuous(expand = expansion(c(0, 0.05))) +\n labs(\n title = \"Using simple linear regression\", x = \"Date\",\n y = \"Reported COVID-19 case rates\"\n ) +\n theme(legend.position = \"none\")\n```\n\n::: {.cell-output-display}\n![](sliding-forecasters_files/figure-html/plot-can-fc-lr-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nCompare those forecasts with a related set using Gradient Boosting.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/plot-can-fc-boost_58502edbabde6914bca10e407c6f445f'}\n\n```{.r .cell-code code-fold=\"true\"}\nggplot(\n can_fc %>% filter(engine_type == \"xgboost\"),\n aes(x = target_date, group = forecast_date)\n) +\n coord_cartesian(xlim = lubridate::ymd(c(\"2020-12-01\", NA))) +\n geom_line(\n data = can_latest$DT %>% tibble(), aes(x = time_value, y = cr_7dav),\n inherit.aes = FALSE, color = \"gray50\"\n ) +\n geom_ribbon(aes(ymin = `0.05`, ymax = `0.95`, fill = geo_value),\n alpha = 0.4\n ) +\n geom_line(aes(y = .pred)) +\n geom_point(aes(y = .pred), size = 0.5) +\n geom_vline(aes(xintercept = forecast_date), linetype = 2, alpha = 0.5) +\n facet_wrap(~geo_value, scales = \"free_y\", ncol = 3) +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %y\") +\n scale_y_continuous(expand = expansion(c(0, 0.05))) +\n labs(\n title = \"Using boosted regression trees\", x = \"Date\",\n y = \"Reported COVID-19 case rates\"\n ) +\n theme(legend.position = \"none\")\n```\n\n::: {.cell-output-display}\n![](sliding-forecasters_files/figure-html/plot-can-fc-boost-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nBoth approaches tend to produce quite volatile forecasts (point predictions)\nand/or are overly confident (very narrow bands), particularly when boosted\nregression trees are used. But as this is meant to be a simple demonstration of\nsliding with different engines in `arx_forecaster`, we may devote another\nvignette to work on improving the predictive modelling using the suite of tools\navailable in epipredict.\n\n## Version-aware forecasting\n\n### Example using case data from US states\n\nWe will now run pseudoprospective forecasts based on properly-versioned data\n(that would have been available in real-time) to forecast future COVID-19 case\nrates from current and past COVID-19 case rates for all states. All we have to\ndo is use the historical archive of the data with version information,\n`us_archive`, instead of `us_latest` like we did above, in the argument to our\nforecaster wrapper `forecast_k_week_ahead()`. Below we do that computation, tag\nit, and combine it with the forecasts from one of the engines made above.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/make-ar-kweek-asof_4b8247ffc26ffb4f84a5646852522f3b'}\n\n```{.r .cell-code}\n# Generate the forecasts, and bind them together\nforecasts_version_aware <- bind_rows(\n map(aheads, ~ forecast_k_week_ahead(\n us_archive,\n outcome = \"case_rate\",\n predictors = c(\"case_rate\", \"percent_cli\"),\n ahead = .x,\n engine = linear_reg()\n )) %>%\n bind_rows() %>%\n mutate(version = \"version faithful\"),\n forecasts_version_unaware %>% filter(engine_type == \"lm\") %>% mutate(version = \"version unfaithful\")\n)\n```\n:::\n\n\nNow we can plot the results on top of the latest case rates. As before, we will\nonly display and focus on the results for FL and CA for simplicity.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/plot-ar-asof_884abfb3ac2d5a8dfe9bc404ff34c5d4'}\n\n```{.r .cell-code code-fold=\"true\"}\nforecasts_filtered <- forecasts_version_aware %>%\n tibble() %>%\n filter(geo_value %in% c(\"ca\", \"fl\"))\nlatest_data_filtered <- us_latest$DT %>%\n tibble() %>%\n select(-version) %>%\n filter(geo_value %in% c(\"ca\", \"fl\"))\n\nggplot(forecasts_filtered, aes(x = target_date, group = forecast_date, fill = version)) +\n geom_line(\n data = latest_data_filtered, aes(x = time_value, y = case_rate),\n inherit.aes = FALSE, color = \"gray50\"\n ) +\n geom_ribbon(aes(ymin = `0.05`, ymax = `0.95`), alpha = 0.4) +\n geom_line(aes(y = .pred)) +\n geom_point(aes(y = .pred), size = 0.5) +\n geom_vline(aes(xintercept = forecast_date), linetype = 2, alpha = 0.5) +\n facet_grid(version ~ geo_value, scales = \"free\") +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %y\") +\n scale_fill_brewer(palette = \"Set1\") +\n scale_y_continuous(expand = expansion(c(0, 0.05))) +\n labs(x = \"Date\", y = \"Reported COVID-19 case rates\") +\n theme(legend.position = \"none\")\n```\n\n::: {.cell-output-display}\n![](sliding-forecasters_files/figure-html/plot-ar-asof-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nAgain, we observe that the results are not great for these two states, but\nthat's likely due to the simplicity of the model (ex. the omission of key\nfactors such as age and public health measures) and the quality of the data (ex.\nwe have not personally corrected for anomalies in the data).\n\nWe shall leave it to the reader to try the above version aware and unaware\nforecasting exercise on the Canadian case rate data. The above code for the\nAmerican state data should be readily adaptable for this purpose.\n", "supporting": [], "filters": [ "rmarkdown/pagebreak.lua" diff --git a/_freeze/sliding-forecasters/figure-html/plot-ar-asof-1.svg b/_freeze/sliding-forecasters/figure-html/plot-ar-asof-1.svg index 7ffb5a0..c21dcfc 100644 --- a/_freeze/sliding-forecasters/figure-html/plot-ar-asof-1.svg +++ b/_freeze/sliding-forecasters/figure-html/plot-ar-asof-1.svg @@ -48,37 +48,37 @@ - + - + - + - + - + - + - + - + - + - + - + @@ -169,607 +169,607 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -835,968 +835,1096 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + + + - + + + + + - + + - + + - + - + - - + + - + - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - - + - + - - - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + + + - + + + + + - + - - + - - - - - + - + - + - - + + + - + - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + + - - + + + + + + + + + + - + - + + + - + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - - + - + - - - - - + - - - + - - - + - + - - - - - - - - - - - - - - - + + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - + - - - - - - - - - - - - - - - - + + - + - + - + - - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + + - + - + + + + + - + + + - + - + + - + - + - - + + + + + + + + + + + + + + + - + - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + - + - - - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -1865,47 +1993,45 @@ - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + @@ -1965,48 +2091,48 @@ - - + + - - + + + + + + - - + - - - + + - - - + + - - - - - - - + + - - + + + - - - + + + - - + + + + + diff --git a/_freeze/sliding-forecasters/figure-html/plot-arx-1.svg b/_freeze/sliding-forecasters/figure-html/plot-arx-1.svg index 6c1d4fc..a33628d 100644 --- a/_freeze/sliding-forecasters/figure-html/plot-arx-1.svg +++ b/_freeze/sliding-forecasters/figure-html/plot-arx-1.svg @@ -244,16 +244,16 @@ - + - + - + - + @@ -406,406 +406,412 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + + + + + + + - + @@ -906,114 +912,114 @@ - - - - + + + + - + - + - + - + - + - + - - + + - + - + - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - - - - - - - - - - + + + + + + + + + + + + + - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - - - - - - - - - - - + + + + + + + + + + + + + + @@ -1161,688 +1167,692 @@ - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - + - - - - - + - - + + + + - + - - + + - + - + + - + - - - - - - - - - - - - - - + - + + + + + + + + + + + + + + + + + + + + - + - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - - - - - - - - - - + + + + + + + + + + + + + - + - + - - - - - - - - - - - - - + + + + + + + + + + + + + - - + + - + - - + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + diff --git a/_freeze/sliding-forecasters/figure-html/plot-can-fc-boost-1.svg b/_freeze/sliding-forecasters/figure-html/plot-can-fc-boost-1.svg index 2c407fd..63e1091 100644 --- a/_freeze/sliding-forecasters/figure-html/plot-can-fc-boost-1.svg +++ b/_freeze/sliding-forecasters/figure-html/plot-can-fc-boost-1.svg @@ -126,17 +126,23 @@ - + - + - + + + + + + + @@ -253,5615 +259,5685 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - - - - - - - - - + + + + + + + + + - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + - + - - + + + + + + + + + + + + + + - + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - - - - - - - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - + + - + - - - - + - - - + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + - - + + + + + + + + + + + + + + + + + + - + - + + + + + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - - - - + + - - - - - - - - - - - - - - - - - - - + + - - - - - - - - - - + + - + - - + - - + + - - - - - - - - - - - - - - - + + + + + + + + - - - - - - - - + + + + + + + + + + + + + + + + - + - - - - - + + + - - + + + + + + + + + + + - + + + + - - + + + + + + + + + + + + + + + + + + + + + - + + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - - - - - - - - - + + - - - - - - - - - - - + + - - - - - - - - - - - - - - + + - - - - - - - - - - - - - - + + - - - - - - - - - - - - - - + + - + - + - + - + + + - - + + + + + + + + + + + + + - + - - + + + + + + + + + + + - - + + + + + + + + + + + + + + - - + + + + + + + + + + + + - + + + - - + + + + + + + + + + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - + - + - + - + - + - + - - - + - - - + - - - - + - + - - - - - - - - - - - + + + - + - + - - + - + - - - - - - - - + + - + + + - + - - - + - - + - + + - + + - - - - - - - - + + - + - - - - - - + + + + + + + + + + + - + - - - - - - - - + + + - + - - - + - - - - + + + + + + + + - + - + + - - - - - - - - + + - + + + - + - + - + - - + + + + + + + + + + + + + - + - + - - + + + + + + + + + + - + + + - + - - + + + + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - - + + - + - + - + - + - - + - - - - - - - - - - - - - - - - - - - - - - - - - - + + - + - - - - - - - - - + + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + - + - + - + - + + - + - - + + + + + + + + + + + + + + + + + + + + + + + + + + - + - - + + + + + + + + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - + - - - - - - - - - - + + - - - - - - - - - - - - - - + + - + - - - - - - - - - - - - - + + - + - + - - - - - - - - - - - - - + + + - + - - - - - + + + + + + + + + + + - - + + + + + + + + + + + + + - - - - - - - - - - - - - - + + - + + + - - + + + + + + + + + + + + + + - - + + + + + + + + + + + + + + - + - - + + + + + + + + + + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - + - + - + - + - - + + - + - + - + - + + - + - + - - + - - - + - - + - + - + + - + - - - + + - + - - + - + - - - + - + - + + - + - + - + - + + + - + - - - - - - - - + + + + - + - + - - - - - + - + + - + - - - - - - - - - - + - - - - - + + + + + + + + + + + + + + + - + - - - - - - - - - - + - - - - - + + + + + + + + + - + + + + + + - + - + + + + - + + + + - + - + + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - - + + - + - + - + - + - + - - + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - + - + - + - - + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - - - + + - + - + - - - + - - - - - - - - - + + - - - - - - - - - - - - - - - - - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + - + - + - - - - + + - + - - + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - - - - - - - - + + - - - - - - - - - - - - - - - + + - + - - + - - + - - - + + + + + + + + + + + + + + - + - + + + - - - + + + + + + + + + + + + - + - - - + - + + + - + - + - - + + - + - - - + + + - - - - - - - - + + + - + + - + - - - + - - - - + - - - - - - - - + + + + + - + + + + + + - + - + - - - - - - - - - - - - + + + + - + + + + - + - + + + + + + - + - + - + - + - + - + + + + + - - + + + + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - - + + - + - + - + - - + - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - + - + - - + + + + + + + + - + - + - + + - + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + - + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - - - + + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - + - + - - + + + + + + + + + - + - + + + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + - + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - + - + - + - - - - - - - - + - - - - - - - - - - - - - - - - - - - - - - - + - - - - - - - - - + - - - - - - - - - - - - - - - + - - - - - - - - - - - - - - + - - - - - - - - - - + - - - - - - - - - - + - - - - - - - - - - - - - - - - + - + - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + - + - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - + - - - - - - - + + + + + + - + - - - - - - - - - - - - - - - - + + + + + + + + + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - + + + + + + - - - - - - + + + + + + - - - - - - + + + + + + - - - - - - - - - - - + + + + + + + + + + + - - - - - - + + + + + + - - - - - - + + + + + + - - - - - - + + + + + + - - - - - - - - - - - + + + + + + + + + + + - - - - - - + + + + + + - - - - - - + + + + + + - - - - - - + + + + + + - - - - - - + + + + + + - + - - + + - - + + - - + + - - - - - - + + + + + + + + - - + - - + + - - + + - - - - - + + + + + + - - + - - + + - - + + - - - - - + + + + + + - - + - - + + - - + + - - + + - - - - - - - - - + + + + + + + + - + - - + + - - + + - - + + + + + + - - - - - - - - - - + - + - - + + - - + + + + + + - - - - - - - - - - + - - + + - - + + - - + + - - - - - - - - + + + + + + + + - + - - + + - - + + - - + + - - - - - - - - + + + + + + + + - + - - + + - - + + - - + + - - - - - - - - + + + + + + + + + + + + + - + - + - - + + - - + + - - - - - + + + + + + + + + + - + - - + + - - + + - - - - + + + + + + - + + + - - + + + - - - - + + + + + + + + + + + + + + + + + + + + + + + + - - + + - - + + - - + + - - - - + + + + - - - - + + + + @@ -5894,36 +5970,36 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/_freeze/sliding-forecasters/figure-html/plot-can-fc-lr-1.svg b/_freeze/sliding-forecasters/figure-html/plot-can-fc-lr-1.svg index f082558..5df52bb 100644 --- a/_freeze/sliding-forecasters/figure-html/plot-can-fc-lr-1.svg +++ b/_freeze/sliding-forecasters/figure-html/plot-can-fc-lr-1.svg @@ -337,10 +337,10 @@ - + - + @@ -397,10 +397,10 @@ - + - + @@ -433,13 +433,13 @@ - + - + - + @@ -454,7 +454,7 @@ - + @@ -463,7 +463,7 @@ - + @@ -517,13 +517,13 @@ - + - + - + @@ -562,13 +562,13 @@ - + - + - + @@ -601,22 +601,22 @@ - + - + - + - + @@ -676,16 +676,16 @@ - + - + - + - + @@ -721,13 +721,13 @@ - + - + - + @@ -751,34 +751,34 @@ - + - + - + - + - + - + - + @@ -910,7 +910,7 @@ - + @@ -925,16 +925,16 @@ - + - + - + @@ -1069,7 +1069,7 @@ - + @@ -1090,25 +1090,25 @@ - + - + - + - + - + - + - + @@ -1117,769 +1117,769 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -1954,341 +1954,335 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -2321,13 +2315,13 @@ - + - + - + @@ -2342,106 +2336,106 @@ - + - + - - + + - + - + - - + + - + - + - + - - + + - + - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + - - - - - - - - - + + + + + + + + + - + - - - - - - - - - + + + + + + + + + - - - - - - - - - - - - + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + @@ -2485,13 +2479,13 @@ - + - + - + @@ -2521,16 +2515,16 @@ - + - + - + - + @@ -2545,94 +2539,94 @@ - + - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -2676,19 +2670,19 @@ - + - + - + - + - + @@ -2721,19 +2715,19 @@ - + - + - + - + - + @@ -2748,116 +2742,116 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -2901,19 +2895,19 @@ - + - + - + - + - + @@ -2946,16 +2940,16 @@ - + - + - + - + @@ -2970,122 +2964,122 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + - + - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - - - - - - - - - - - + + + + + + + + + + + + + + @@ -3195,139 +3189,139 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - + + + - - - - - - - - - - - - + + + + + + + + + + + + - + - + - + - + - + - + - + - + - - - - - + + + + + - + - - - - + + + + - - - - - - - - - - + + + + + + + + + + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -3371,13 +3365,13 @@ - + - + - + @@ -3410,13 +3404,13 @@ - + - + - + @@ -3431,1219 +3425,1219 @@ - + - + - - + + - + - + - - + + - + - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - + + - + - + - + - - + + - + - + - + - + - + - - - - - - - - + + - - - - - - - - - - - - - + + + + + + + + - - + + + + + + + + + + + + + - + - - + - - - - + - - - - - - - - - - - - - - + + + + + + - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - + + + + + + + + + + + + + + - + - - - - - - - - - - - - - + + + + + + + + + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - + - - + + + - + + + - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - + + + - + - + - - - + + + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - - - - - + + + + + + + + - - - + + + + + + + + - + - + - + + - + - - + - + - + + - + - - - + - - - + + + + + - - - - - - - - - - - - - + + + + + + + + + + + + + - + - + - - - + + + - - - - - - - + + + + + + + - + - + - + - - - - - - - - - - - - - + + + + + + + + + + + + + @@ -4687,427 +4681,421 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + + + - + - - - + + + - + - - - + - + - + - + - + - - + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + + + + - + + + - + - - - - + - - - + + + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -5117,7 +5105,7 @@ - + @@ -5142,7 +5130,7 @@ - + @@ -5153,7 +5141,7 @@ - + @@ -5170,7 +5158,7 @@ - + @@ -5186,7 +5174,7 @@ - + @@ -5198,7 +5186,7 @@ - + @@ -5210,7 +5198,7 @@ - + @@ -5228,7 +5216,7 @@ - + @@ -5258,7 +5246,7 @@ - + @@ -5284,7 +5272,7 @@ - + @@ -5296,7 +5284,7 @@ - + @@ -5317,7 +5305,7 @@ - + @@ -5442,26 +5430,26 @@ - - + + - - + + - - + + - - + + - - - - + + + + @@ -5485,234 +5473,234 @@ - - + + - - + + - - + + - - - + + + - - + + - - + + - - + + - - + + - - + + - - - - - + + + + + - - + + - - + + - - + + - - - + + + - + - - + + - - + + - - + + - - + + - - - - - + + + + + - - + + - - + + - - + + - - + + - - - - + + + + - - + + - - + + - - + + - - + + - - - - + + + + - - + + - - + + - - + + - - - + + + - + - - + + - - + + - - + + - - - - + + + + - + - - + + - - + + - - + + - - + + - - - - - + + + + + - + - - + + - - + + - - + + - - - - + + + + diff --git a/archive.qmd b/archive.qmd index 36e24f0..2a552c6 100644 --- a/archive.qmd +++ b/archive.qmd @@ -16,8 +16,8 @@ claims, available through the [COVIDcast API](https://cmu-delphi.github.io/delphi-epidata/api/covidcast.html). This signal is subject to very heavy and regular revision; you can read more about it on its [API documentation -page](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html). We'll use the offline version stored in `{epidatasets}`. - +page](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html). +We'll use the offline version stored in `{epidatasets}`. ```{r, include=FALSE} source("_common.R") @@ -36,7 +36,7 @@ tibble, provided that it has (at least) the following columns: the data for January 14, 2022 that were available one day later. As we can see from the above, the data frame returned by -`epidatr::covidcast()` has the columns required for the `epi_archive` +`epidatr::pub_covidcast()` has the columns required for the `epi_archive` format, so we use `as_epi_archive()` to cast it into `epi_archive` format.[^1] @@ -47,7 +47,7 @@ to the [compactify vignette](https://cmu-delphi.github.io/epiprocess/articles/co ```{r} x <- archive_cases_dv_subset_dt %>% - select(geo_value, time_value, version, percent_cli) %>% + select(geo_value, time_value, version, percent_cli) %>% as_epi_archive(compactify = TRUE) class(x) @@ -70,8 +70,8 @@ below). There can only be a single row per unique combination of key variables, and therefore the key variables are critical for figuring out how to generate a snapshot of data from the archive, as of a given version (also described below). -```{r, error=TRUE} -key(x$DT) +```{r} +data.table::key(x$DT) ``` In general, the last version of each observation is carried forward (LOCF) to @@ -100,7 +100,7 @@ the signal variables as of a given version. This can be accessed via `epix_as_of()`. ```{r} -x_snapshot <- epix_as_of(x, max_version = as.Date("2021-06-01")) +x_snapshot <- epix_as_of(x, version = as.Date("2021-06-01")) class(x_snapshot) x_snapshot max(x_snapshot$time_value) @@ -120,7 +120,7 @@ this case, since updates to the current version may still come in at a later point in time, due to various reasons, such as synchronization issues. ```{r} -x_latest <- epix_as_of(x, max_version = max(x$DT$version)) +x_latest <- epix_as_of(x, version = max(x$DT$version)) ``` Below, we pull several snapshots from the archive, spaced one month apart. We @@ -134,8 +134,9 @@ versions <- seq(as.Date("2020-06-01"), self_max - 1, by = "1 month") snapshots <- map( versions, function(v) { - epix_as_of(x, max_version = v) %>% mutate(version = v) - }) %>% + epix_as_of(x, version = v) %>% mutate(version = v) + } +) %>% list_rbind() %>% bind_rows(x_latest %>% mutate(version = self_max)) %>% mutate(latest = version == self_max) @@ -143,18 +144,22 @@ snapshots <- map( ```{r, fig.height=7} #| code-fold: true -ggplot(snapshots %>% filter(!latest), - aes(x = time_value, y = percent_cli)) + +ggplot( + snapshots %>% filter(!latest), + aes(x = time_value, y = percent_cli) +) + geom_line(aes(color = factor(version)), na.rm = TRUE) + geom_vline(aes(color = factor(version), xintercept = version), lty = 2) + - facet_wrap(~ geo_value, scales = "free_y", ncol = 1) + + facet_wrap(~geo_value, scales = "free_y", ncol = 1) + scale_x_date(minor_breaks = "month", date_labels = "%b %Y") + scale_color_viridis_d(option = "A", end = .9) + labs(x = "Date", y = "% of doctor's visits with CLI") + theme(legend.position = "none") + - geom_line(data = snapshots %>% filter(latest), - aes(x = time_value, y = percent_cli), - inherit.aes = FALSE, color = "black", na.rm = TRUE) + geom_line( + data = snapshots %>% filter(latest), + aes(x = time_value, y = percent_cli), + inherit.aes = FALSE, color = "black", na.rm = TRUE + ) ``` We can see some interesting and highly nontrivial revision behavior: at some @@ -164,7 +169,6 @@ they overestimate it (both states towards the beginning of 2021), though not quite as dramatically. Modeling the revision process, which is often called *backfill modeling*, is an important statistical problem in it of itself. - ## Merging `epi_archive` objects Now we demonstrate how to merge two `epi_archive` objects together, e.g., so diff --git a/epidf.qmd b/epidf.qmd index 73d17bc..0e1f654 100644 --- a/epidf.qmd +++ b/epidf.qmd @@ -5,7 +5,7 @@ source("_common.R") ``` -We'll start by showing how to get data into +We'll start by showing how to get data into `epi_df`, which is just a tibble with a bit of special structure, and is the format assumed by all of the functions in the `epiprocess` package. An `epi_df` object has (at least) the @@ -43,16 +43,13 @@ cases <- pub_covidcast( colnames(cases) ``` -As we can see, a data frame returned by `epidatr::covidcast()` has the +As we can see, a data frame returned by `epidatr::pub_covidcast()` has the columns required for an `epi_df` object (along with many others). We can use `as_epi_df()`, with specification of some relevant metadata, to bring the data frame into `epi_df` format. ```{r, message = FALSE} -x <- as_epi_df(cases, - geo_type = "state", - time_type = "day", - as_of = max(cases$issue)) %>% +x <- as_epi_df(cases, as_of = max(cases$issue)) %>% select(geo_value, time_value, total_cases = value) class(x) @@ -64,7 +61,7 @@ attributes(x)$metadata ## Some details on metadata In general, an `epi_df` object has the following fields in its metadata: - + * `geo_type`: the type for the geo values. * `time_type`: the type for the time values. * `as_of`: the time value at which the given data were available. @@ -86,10 +83,10 @@ data set. See the [archive vignette](https://cmu-delphi.github.io/epiprocess/articles/archive.html) for more. -If any of the `geo_type`, `time_type`, or `as_of` arguments are missing in a +If any of the `geo_type`, `time_type`, or `as_of` arguments are missing in a call to `as_epi_df()`, then this function will try to infer them from the passed object. Usually, `geo_type` and `time_type` can be inferred from the `geo_value` -and `time_value` columns, respectively, but inferring the `as_of` field is not +and `time_value` columns, respectively, but inferring the `as_of` field is not as easy. See the documentation for `as_epi_df()` more details. ```{r} @@ -109,25 +106,29 @@ In the following examples we will show how to create an `epi_df` with additional set.seed(12345) ex1 <- tibble( geo_value = rep(c("ca", "fl", "pa"), each = 3), - county_code = c("06059", "06061", "06067", "12111", "12113", "12117", - "42101", "42103", "42105"), + county_code = c( + "06059", "06061", "06067", "12111", "12113", "12117", + "42101", "42103", "42105" + ), time_value = rep( - seq(as.Date("2020-06-01"), as.Date("2020-06-03"), by = "1 day"), - length.out = 9), + seq(as.Date("2020-06-01"), as.Date("2020-06-03"), by = "1 day"), + length.out = 9 + ), value = rpois(9, 5) -) %>% +) %>% as_tsibble(index = time_value, key = c(geo_value, county_code)) -ex1 <- as_epi_df(x = ex1, geo_type = "state", time_type = "day", as_of = "2020-06-03") +ex1 <- as_epi_df(x = ex1, as_of = "2020-06-03") ``` The metadata now includes `county_code` as an extra key. + ```{r} attr(ex1, "metadata") ``` -### Dealing with misspecified column names +### Dealing with misspecified column names `epi_df` requires there to be columns `geo_value` and `time_value`, if they do not exist then `as_epi_df()` throws an error. @@ -136,27 +137,27 @@ ex2 <- data.frame( state = rep(c("ca", "fl", "pa"), each = 3), # misnamed pol = rep(c("blue", "swing", "swing"), each = 3), # extra key reported_date = rep( - seq(as.Date("2020-06-01"), as.Date("2020-06-03"), by = "day"), - length.out = 9), # misnamed + seq(as.Date("2020-06-01"), as.Date("2020-06-03"), by = "day"), + length.out = 9 + ), # misnamed value = rpois(9, 5) -) -ex2 %>% as_epi_df() +) +ex2 %>% as_epi_df() ``` -The columns should be renamed to match `epi_df` format. +The columns should be renamed to match `epi_df` format. ```{r} -ex2 <- ex2 %>% +ex2 <- ex2 %>% rename(geo_value = state, time_value = reported_date) %>% - as_epi_df(geo_type = "state", - as_of = "2020-06-03", - additional_metadata = list(other_keys = "pol") + as_epi_df( + as_of = "2020-06-03", + other_keys = "pol" ) attr(ex2, "metadata") ``` - ### Adding additional keys to an `epi_df` object In the above examples, all the keys are added to objects prior to conversion to @@ -166,22 +167,23 @@ We'll look at an included dataset and filter to a single state for simplicity. ```{r} ex3 <- jhu_csse_county_level_subset %>% filter(time_value > "2021-12-01", state_name == "Massachusetts") %>% - slice_tail(n = 6) - + slice_tail(n = 6) + attr(ex3, "metadata") # geo_type is county currently ``` -Now we add `state` (MA) and `pol` as new columns to the data and as new keys to the metadata. The "state" `geo_type` anticipates lower-case abbreviations, so we'll match that. +Now we add `state` (MA) and `pol` as new columns to the data and as new keys to the metadata. The "state" `geo_type` anticipates lower-case abbreviations, so we'll match that. ```{r} -ex3 <- ex3 %>% +ex3 <- ex3 %>% as_tibble() %>% # drop the `epi_df` class before adding additional metadata mutate( state = rep(tolower("MA"), 6), - pol = rep(c("blue", "swing", "swing"), each = 2)) %>% - as_epi_df(additional_metadata = list(other_keys = c("state", "pol"))) + pol = rep(c("blue", "swing", "swing"), each = 2) + ) %>% + as_epi_df(other_keys = c("state", "pol")) -attr(ex3,"metadata") +attr(ex3, "metadata") ``` Note that the two additional keys we added, `state` and `pol`, are specified as a character vector in the `other_keys` component of the `additional_metadata` list. They must be specified in this manner so that downstream actions on the `epi_df`, like model fitting and prediction, can recognize and use these keys. @@ -199,17 +201,17 @@ package. Of course, we can also write custom code for other downstream uses, like plotting, which is pretty easy to do `ggplot2`. ```{r, message = FALSE, warning = FALSE} -ggplot(x, aes(x = time_value, y = total_cases, color = geo_value)) + +ggplot(x, aes(x = time_value, y = total_cases, color = geo_value)) + geom_line() + scale_color_brewer(palette = "Set1") + scale_x_date(minor_breaks = "month", date_labels = "%b %Y") + labs(x = "Date", y = "Cumulative COVID-19 cases", color = "State") ``` -Finally, we'll examine some data from other packages just to show how -we might get them into `epi_df` format. -The first is data on daily new (not cumulative) SARS -cases in Canada in 2003, from the +Finally, we'll examine some data from other packages just to show how +we might get them into `epi_df` format. +The first is data on daily new (not cumulative) SARS +cases in Canada in 2003, from the [outbreaks](https://github.com/reconverse/outbreaks) package. New cases are broken into a few categories by provenance. @@ -217,20 +219,20 @@ broken into a few categories by provenance. x <- outbreaks::sars_canada_2003 %>% mutate(geo_value = "ca") %>% select(geo_value, time_value = date, starts_with("cases")) %>% - as_epi_df(geo_type = "nation") + as_epi_df() head(x) ``` ```{r} #| code-fold: true -x <- x %>% +x <- x %>% pivot_longer(starts_with("cases"), names_to = "type") %>% mutate(type = substring(type, 7)) ggplot(x, aes(x = time_value, y = value)) + geom_col(aes(fill = type), just = 0.5) + - scale_y_continuous(breaks = 0:4*2, expand = expansion(c(0, 0.05))) + + scale_y_continuous(breaks = 0:4 * 2, expand = expansion(c(0, 0.05))) + scale_x_date(minor_breaks = "month", date_labels = "%b %Y") + labs(x = "Date", y = "SARS cases in Canada", fill = "Type") ``` @@ -243,27 +245,30 @@ x <- outbreaks::ebola_sierraleone_2014 %>% cases = ifelse(status == "confirmed", 1, 0), province = case_when( district %in% c("Kailahun", "Kenema", "Kono") ~ "Eastern", - district %in% c("Bombali", "Kambia", "Koinadugu", - "Port Loko", "Tonkolili") ~ "Northern", + district %in% c( + "Bombali", "Kambia", "Koinadugu", + "Port Loko", "Tonkolili" + ) ~ "Northern", district %in% c("Bo", "Bonthe", "Moyamba", "Pujehun") ~ "Sourthern", - district %in% c("Western Rural", "Western Urban") ~ "Western") - ) %>% + district %in% c("Western Rural", "Western Urban") ~ "Western" + ) + ) %>% select(geo_value = province, time_value = date_of_onset, cases) %>% filter(cases == 1) %>% - group_by(geo_value, time_value) %>% + group_by(geo_value, time_value) %>% summarise(cases = sum(cases)) %>% - as_epi_df(geo_type = "province") + as_epi_df() ``` ```{r} #| code-fold: true #| fig-width: 8 #| fig-height: 6 -ggplot(x, aes(x = time_value, y = cases)) + - geom_col(aes(fill = geo_value), show.legend = FALSE) + - facet_wrap(~ geo_value, scales = "free_y") + +ggplot(x, aes(x = time_value, y = cases)) + + geom_col(aes(fill = geo_value), show.legend = FALSE) + + facet_wrap(~geo_value, scales = "free_y") + scale_x_date(minor_breaks = "month", date_labels = "%b %Y") + - labs(x = "Date", y = "Confirmed cases of Ebola in Sierra Leone") + labs(x = "Date", y = "Confirmed cases of Ebola in Sierra Leone") ``` diff --git a/outliers.qmd b/outliers.qmd index 16c66e8..dc68d52 100644 --- a/outliers.qmd +++ b/outliers.qmd @@ -12,12 +12,12 @@ source("_common.R") ``` ```{r} -x <- incidence_num_outlier_example +incidence_num_outlier_example ``` ```{r, warning=FALSE, message=FALSE} #| code-fold: true -ggplot(x, aes(x = time_value, y = cases, color = geo_value)) + +ggplot(incidence_num_outlier_example, aes(x = time_value, y = cases, color = geo_value)) + geom_line() + scale_color_manual(values = c(3, 6)) + geom_hline(yintercept = 0, linetype = 3) + @@ -36,13 +36,13 @@ methods on a given signal, and then (optionally) combine the results from those methods. Here, we'll investigate outlier detection results from the following methods. -1. Detection based on a rolling median, using `detect_outlr_rm()`, which - computes a rolling median on with a default window size of `n` time points - centered at the time point under consideration, and then computes thresholds - based on a multiplier times a rolling IQR computed on the residuals. +1. Detection based on a rolling median, using `detect_outlr_rm()`, which + computes a rolling median on with a default window size of `n` time points + centered at the time point under consideration, and then computes thresholds + based on a multiplier times a rolling IQR computed on the residuals. 2. Detection based on a seasonal-trend decomposition using LOESS (STL), using - `detect_outlr_stl()`, which is similar to the rolling median method but - replaces the rolling median with fitted values from STL. + `detect_outlr_stl()`, which is similar to the rolling median method but + replaces the rolling median with fitted values from STL. 3. Detection based on an STL decomposition, but without seasonality term, which amounts to smoothing using LOESS. @@ -50,7 +50,7 @@ The outlier detection methods are specified using a `tibble` that is passed to `detect_outlr()`, with one row per method, and whose columms specify the outlier detection function, any input arguments (only nondefault values need to be supplied), and an abbreviated name for the method used in tracking results. -Abbreviations "rm" and "stl" can be used for the built-in detection functions +Abbreviations "rm" and "stl" can be used for the built-in detection functions `detect_outlr_rm()` and `detect_outlr_stl()`, respectively. ```{r} @@ -63,12 +63,7 @@ detection_methods = bind_rows( args = list(list(detect_negatives = TRUE, detection_multiplier = 2.5, seasonal_period = 7)), - abbr = "stl_seasonal"), - tibble(method = "stl", - args = list(list(detect_negatives = TRUE, - detection_multiplier = 2.5, - seasonal_period = NULL)), - abbr = "stl_nonseasonal")) + abbr = "stl_seasonal")) detection_methods ``` @@ -79,7 +74,7 @@ Note that using this combined median threshold is equivalent to using a majority vote across the base methods to determine whether a value is an outlier. ```{r} -x <- x %>% +x <- incidence_num_outlier_example %>% group_by(geo_value) %>% mutate( outlier_info = detect_outlr( @@ -87,51 +82,51 @@ x <- x %>% methods = detection_methods, combiner = "median") ) %>% - ungroup() %>% - unnest(outlier_info) + unpack(outlier_info) %>% + ungroup() x ``` -To visualize the results, we define a convenience function for and call it on +To visualize the results, we define a convenience function for and call it on each state separately (hidden below the fold). ```{r} #| code-fold: true # Plot outlier detection bands and/or points identified as outliers plot_outlr <- function( - x, signal, method_abbr, bands = TRUE, points = TRUE, + x, signal, method_abbr, bands = TRUE, points = TRUE, facet_vars = vars(geo_value), nrow = NULL, ncol = NULL, scales = "fixed") { - - # Convert outlier detection results to long format + + # Convert outlier detection results to long format signal <- rlang::enquo(signal) x_long <- x %>% pivot_longer( cols = starts_with(method_abbr), names_to = c("method", ".value"), names_pattern = "(.+)_(.+)") - + # Start of plot with observed data p <- ggplot() + geom_line(data = x, mapping = aes(x = time_value, y = !!signal)) # If requested, add bands - if (bands) - p <- p + geom_ribbon(data = x_long, - aes(x = time_value, ymin = lower, ymax = upper, + if (bands) + p <- p + geom_ribbon(data = x_long, + aes(x = time_value, ymin = lower, ymax = upper, color = method), fill = NA) # If requested, add points if (points) { x_detected <- x_long %>% filter((!!signal < lower) | (!!signal > upper)) - p <- p + geom_point(data = x_detected, - aes(x = time_value, y = !!signal, color = method, + p <- p + geom_point(data = x_detected, + aes(x = time_value, y = !!signal, color = method, shape = method)) } # If requested, add faceting - if (!is.null(facet_vars)) + if (!is.null(facet_vars)) p <- p + facet_wrap(facet_vars, nrow = nrow, ncol = ncol, scales = scales) return(p) @@ -159,7 +154,7 @@ plot_outlr(x %>% filter(geo_value == "nj"), cases, method_abbr, labs(x = "Date", y = "Reported COVID-19 counts", color = "Method", shape = "Method") + scale_color_brewer(palette = "Set1") + - ggtitle("New Jersey") + + ggtitle("New Jersey") + theme(legend.position = "bottom") ``` @@ -167,13 +162,13 @@ plot_outlr(x %>% filter(geo_value == "nj"), cases, method_abbr, Finally, in order to correct outliers, we can use the posited replacement values returned by each outlier detection method. Below we use the replacement value -from the combined method, which is defined by the median of replacement values +from the combined method, which is defined by the median of replacement values from the base methods at each time point. ```{r, fig.width = 8, fig.height = 7} -y <- x %>% +y <- x %>% mutate(cases_corrected = combined_replacement) %>% - select(geo_value, time_value, cases, cases_corrected) + select(geo_value, time_value, cases, cases_corrected) y %>% filter(cases != cases_corrected) ``` @@ -193,6 +188,6 @@ y %>% theme(legend.position = "bottom", legend.title = element_blank()) ``` -More advanced correction functionality will be coming at some point in the -future. +More advanced correction functionality will be coming at some point in the +future. diff --git a/renv.lock b/renv.lock index ad975e4..59ee76b 100644 --- a/renv.lock +++ b/renv.lock @@ -18,14 +18,14 @@ }, "DBI": { "Package": "DBI", - "Version": "1.2.2", + "Version": "1.2.3", "Source": "Repository", "Repository": "RSPM", "Requirements": [ "R", "methods" ], - "Hash": "164809cd72e1d5160b4cb3aa57f510fe" + "Hash": "065ae649b05f1ff66bb0c793107508f5" }, "DiceDesign": { "Package": "DiceDesign", @@ -50,14 +50,14 @@ }, "KernSmooth": { "Package": "KernSmooth", - "Version": "2.23-22", + "Version": "2.23-24", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Requirements": [ "R", "stats" ], - "Hash": "2fecebc3047322fa5930f74fae5de70f" + "Hash": "9f33a1ee37bbe8919eb2ec4b9f2473a5" }, "MASS": { "Package": "MASS", @@ -98,19 +98,6 @@ ], "Hash": "8c7115cd3a0e048bda2a7cd110549f7a" }, - "MatrixModels": { - "Package": "MatrixModels", - "Version": "0.5-3", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "Matrix", - "R", - "methods", - "stats" - ], - "Hash": "0776bf7526869e0286b0463cb72fb211" - }, "R6": { "Package": "R6", "Version": "2.5.1", @@ -133,18 +120,18 @@ }, "Rcpp": { "Package": "Rcpp", - "Version": "1.0.12", + "Version": "1.0.13", "Source": "Repository", "Repository": "RSPM", "Requirements": [ "methods", "utils" ], - "Hash": "5ea2700d21e038ace58269ecdbeb9ec0" + "Hash": "f27411eb6d9c3dada5edd444b8416675" }, "RcppEigen": { "Package": "RcppEigen", - "Version": "0.3.4.0.0", + "Version": "0.3.4.0.2", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -153,18 +140,18 @@ "stats", "utils" ], - "Hash": "df49e3306f232ec28f1604e36a202847" + "Hash": "4ac8e423216b8b70cb9653d1b3f71eb9" }, "RcppRoll": { "Package": "RcppRoll", - "Version": "0.3.0", + "Version": "0.3.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Requirements": [ "R", "Rcpp" ], - "Hash": "84a03997fbb5acfb3c9b43bad88fea1f" + "Hash": "6659c0ecb7b85f322f93e7f1e6ac7d35" }, "SQUAREM": { "Package": "SQUAREM", @@ -176,20 +163,6 @@ ], "Hash": "0cf10dab0d023d5b46a5a14387556891" }, - "SparseM": { - "Package": "SparseM", - "Version": "1.81", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "graphics", - "methods", - "stats", - "utils" - ], - "Hash": "2042cd9759cc89a453c4aefef0ce9aae" - }, "anytime": { "Package": "anytime", "Version": "0.3.9", @@ -214,13 +187,13 @@ }, "backports": { "Package": "backports", - "Version": "1.4.1", + "Version": "1.5.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Requirements": [ "R" ], - "Hash": "c39fbec8a30d23e721980b8afb31984c" + "Hash": "e1e1b9d75c37401117b636b7ae50827a" }, "base64enc": { "Package": "base64enc", @@ -234,7 +207,7 @@ }, "bayestestR": { "Package": "bayestestR", - "Version": "0.13.2", + "Version": "0.14.0", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -246,29 +219,23 @@ "stats", "utils" ], - "Hash": "4a6a2eebe2db1dfb1c792c4ed91e73dc" + "Hash": "71e7da5d38487173de67a1f0d763ceef" }, "bit": { "Package": "bit", - "Version": "4.0.5", + "Version": "4.5.0", "Source": "Repository", "Repository": "RSPM", - "RemoteType": "standard", - "RemotePkgRef": "bit", - "RemoteRef": "bit", - "RemoteRepos": "https://packagemanager.rstudio.com/all/latest", - "RemotePkgPlatform": "source", - "RemoteSha": "4.0.5", "Requirements": [ "R" ], - "Hash": "d242abec29412ce988848d0294b208fd" + "Hash": "5dc7b2677d65d0e874fc4aaf0e879987" }, "bit64": { "Package": "bit64", - "Version": "4.0.5", + "Version": "4.5.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Requirements": [ "R", "bit", @@ -276,19 +243,13 @@ "stats", "utils" ], - "Hash": "9fe98599ca456d6552421db0d6772d8f" + "Hash": "e84984bf5f12a18628d9a02322128dfd" }, "blob": { "Package": "blob", "Version": "1.2.4", "Source": "Repository", "Repository": "RSPM", - "RemoteType": "standard", - "RemotePkgRef": "blob", - "RemoteRef": "blob", - "RemoteRepos": "https://packagemanager.rstudio.com/all/latest", - "RemotePkgPlatform": "source", - "RemoteSha": "1.2.4", "Requirements": [ "methods", "rlang", @@ -298,14 +259,13 @@ }, "broom": { "Package": "broom", - "Version": "1.0.5", + "Version": "1.0.7", "Source": "Repository", "Repository": "RSPM", "Requirements": [ "R", "backports", "dplyr", - "ellipsis", "generics", "glue", "lifecycle", @@ -315,11 +275,11 @@ "tibble", "tidyr" ], - "Hash": "fd25391c3c4f6ecf0fa95f1e6d15378c" + "Hash": "8fcc818f3b9887aebaf206f141437cc9" }, "bslib": { "Package": "bslib", - "Version": "0.7.0", + "Version": "0.8.0", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -337,18 +297,18 @@ "rlang", "sass" ], - "Hash": "8644cc53f43828f19133548195d7e59e" + "Hash": "b299c6741ca9746fb227debcb0f9fb6c" }, "cachem": { "Package": "cachem", - "Version": "1.0.8", + "Version": "1.1.0", "Source": "Repository", "Repository": "RSPM", "Requirements": [ "fastmap", "rlang" ], - "Hash": "c35768291560ce302c0a6589f92e837d" + "Hash": "cd9a672193789068eb5a2aad65a0dedf" }, "callr": { "Package": "callr", @@ -377,7 +337,7 @@ }, "checkmate": { "Package": "checkmate", - "Version": "2.3.1", + "Version": "2.3.2", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -385,7 +345,7 @@ "backports", "utils" ], - "Hash": "c01cab1cb0f9125211a6fc99d540e315" + "Hash": "0e14e01ce07e7c88fd25de6d4260d26b" }, "class": { "Package": "class", @@ -402,14 +362,14 @@ }, "cli": { "Package": "cli", - "Version": "3.6.2", + "Version": "3.6.3", "Source": "Repository", "Repository": "RSPM", "Requirements": [ "R", "utils" ], - "Hash": "1216ac65ac55ec0058a6f75d7ca0fd52" + "Hash": "b21916dd77a27642b447374a5d30ecf3" }, "clipr": { "Package": "clipr", @@ -423,7 +383,7 @@ }, "clock": { "Package": "clock", - "Version": "0.7.0", + "Version": "0.7.1", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -435,7 +395,7 @@ "tzdb", "vctrs" ], - "Hash": "3d8a84cdf9f6f8564531c49b70f3833d" + "Hash": "3dcaebd52554438d12989e5061e15de8" }, "codetools": { "Package": "codetools", @@ -449,7 +409,7 @@ }, "colorspace": { "Package": "colorspace", - "Version": "2.1-0", + "Version": "2.1-1", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -459,7 +419,7 @@ "methods", "stats" ], - "Hash": "f20c47fd52fae58b4e377c37bb8c335b" + "Hash": "d954cb1c57e8d8b756165d7ba18aa55a" }, "conflicted": { "Package": "conflicted", @@ -476,7 +436,7 @@ }, "correlation": { "Package": "correlation", - "Version": "0.8.4", + "Version": "0.8.5", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -488,35 +448,29 @@ "parameters", "stats" ], - "Hash": "d8bd29a9abda6eed9aaab3ba5769f231" + "Hash": "0995955fd59a01caf80918913bc5066c" }, "cpp11": { "Package": "cpp11", - "Version": "0.4.7", + "Version": "0.5.0", "Source": "Repository", "Repository": "RSPM", "Requirements": [ "R" ], - "Hash": "5a295d7d963cc5035284dcdbaf334f4e" + "Hash": "91570bba75d0c9d3f1040c835cee8fba" }, "crayon": { "Package": "crayon", - "Version": "1.5.2", + "Version": "1.5.3", "Source": "Repository", "Repository": "RSPM", - "RemoteType": "standard", - "RemotePkgRef": "crayon", - "RemoteRef": "crayon", - "RemoteRepos": "https://packagemanager.rstudio.com/all/latest", - "RemotePkgPlatform": "source", - "RemoteSha": "1.5.2", "Requirements": [ "grDevices", "methods", "utils" ], - "Hash": "e8a1e41acf02548751f45c718d55aa6a" + "Hash": "859d96e65ef198fd43e82b9628d593ef" }, "credentials": { "Package": "credentials", @@ -534,28 +488,28 @@ }, "curl": { "Package": "curl", - "Version": "5.2.1", + "Version": "5.2.3", "Source": "Repository", "Repository": "RSPM", "Requirements": [ "R" ], - "Hash": "411ca2c03b1ce5f548345d2fc2685f7a" + "Hash": "d91263322a58af798f6cf3b13fd56dde" }, "data.table": { "Package": "data.table", - "Version": "1.15.4", + "Version": "1.16.0", "Source": "Repository", "Repository": "RSPM", "Requirements": [ "R", "methods" ], - "Hash": "8ee9ac56ef633d0c7cab8b2ca87d683e" + "Hash": "fb24e05d4a91d8b1c7ff8e284bde834a" }, "datawizard": { "Package": "datawizard", - "Version": "0.10.0", + "Version": "0.12.3", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -564,7 +518,7 @@ "stats", "utils" ], - "Hash": "62d6ec10346d3302a1299e1c54641d83" + "Hash": "611537168bbb78b57720de109ec1ad19" }, "dbplyr": { "Package": "dbplyr", @@ -612,12 +566,6 @@ "Version": "1.6.5", "Source": "Repository", "Repository": "RSPM", - "RemoteType": "standard", - "RemotePkgRef": "diagram", - "RemoteRef": "diagram", - "RemoteRepos": "https://packagemanager.rstudio.com/all/latest", - "RemotePkgPlatform": "source", - "RemoteSha": "1.6.5", "Requirements": [ "R", "graphics", @@ -628,7 +576,7 @@ }, "dials": { "Package": "dials", - "Version": "1.2.1", + "Version": "1.3.0", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -643,39 +591,56 @@ "purrr", "rlang", "scales", + "sfd", "tibble", "utils", "vctrs", "withr" ], - "Hash": "999e5fa12058a2bb3a8c204e637e4707" + "Hash": "f2fbe4e90fab23fc1f95bffcd3662878" + }, + "diffobj": { + "Package": "diffobj", + "Version": "0.3.5", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "crayon", + "methods", + "stats", + "tools", + "utils" + ], + "Hash": "bcaa8b95f8d7d01a5dedfd959ce88ab8" }, "digest": { "Package": "digest", - "Version": "0.6.35", + "Version": "0.6.37", "Source": "Repository", "Repository": "RSPM", "Requirements": [ "R", "utils" ], - "Hash": "698ece7ba5a4fa4559e3d537e7ec3d31" + "Hash": "33698c4b3127fc9f506654607fb73676" }, "distributional": { "Package": "distributional", - "Version": "0.4.0", + "Version": "0.5.0", "Source": "Repository", "Repository": "RSPM", "Requirements": [ "generics", "lifecycle", "numDeriv", + "pillar", "rlang", "stats", "utils", "vctrs" ], - "Hash": "3bad76869f2257ea4fd00a3c08c2bcce" + "Hash": "76e94de462aa18ea966a38956ecf4497" }, "doFuture": { "Package": "doFuture", @@ -721,12 +686,6 @@ "Version": "1.3.1", "Source": "Repository", "Repository": "RSPM", - "RemoteType": "standard", - "RemotePkgRef": "dtplyr", - "RemoteRef": "dtplyr", - "RemoteRepos": "https://packagemanager.rstudio.com/all/latest", - "RemotePkgPlatform": "source", - "RemoteSha": "1.3.1", "Requirements": [ "R", "cli", @@ -743,7 +702,7 @@ }, "effectsize": { "Package": "effectsize", - "Version": "0.8.7", + "Version": "0.8.9", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -756,7 +715,7 @@ "stats", "utils" ], - "Hash": "a6900d0b5bdcbb956dbb843643279e7c" + "Hash": "7aceb5e07b6d48171c6b56714cc305ea" }, "ellipsis": { "Package": "ellipsis", @@ -774,36 +733,33 @@ "Version": "0.0.1", "Source": "GitHub", "RemoteType": "github", - "RemoteHost": "api.github.com", "RemoteUsername": "cmu-delphi", "RemoteRepo": "epidatasets", "RemoteRef": "main", - "RemoteSha": "ca86f0326e4eb08316b40972c7d3c98217e9941e", - "Remotes": "cmu-delphi/epidatr, cmu-delphi/epiprocess", + "RemoteSha": "0632a77dc30655bbbb8c9667d7365f99ad0d5622", + "RemoteHost": "api.github.com", "Requirements": [ "R" ], - "Hash": "3deba70da0ce06354cbd3206b16e36a2" + "Hash": "d8715113bd6e6fbbddb664144d999dd0" }, "epidatr": { "Package": "epidatr", - "Version": "1.1.5", + "Version": "1.2.0", "Source": "GitHub", "RemoteType": "github", - "RemoteHost": "api.github.com", "RemoteUsername": "cmu-delphi", "RemoteRepo": "epidatr", "RemoteRef": "dev", - "RemoteSha": "626c30bc07f4aae3c3e6a6c6b825a6cd5eee1ce7", + "RemoteSha": "0b3480889091063e5b03358cea10670292a803e6", + "RemoteHost": "api.github.com", "Requirements": [ "MMWRweek", "R", "cachem", - "cachem", "checkmate", "cli", "glue", - "glue", "httr", "jsonlite", "magrittr", @@ -813,22 +769,20 @@ "readr", "tibble", "usethis", - "usethis", "xml2" ], - "Hash": "869d57a2ad4002670ad28939fe050e82" + "Hash": "3ad6e3cc0f0a1ff4b1e976b00ba3654d" }, "epipredict": { "Package": "epipredict", - "Version": "0.0.14", + "Version": "0.0.24", "Source": "GitHub", "RemoteType": "github", "RemoteHost": "api.github.com", "RemoteUsername": "cmu-delphi", "RemoteRepo": "epipredict", "RemoteRef": "dev", - "RemoteSha": "5e50a5a112b663eff85fcac5586875352157a5c4", - "Remotes": "cmu-delphi/epidatr, cmu-delphi/epiprocess, dajmcdon/smoothqr", + "RemoteSha": "36c4c0a88f77861302b35e95b815609f9014e90d", "Requirements": [ "R", "checkmate", @@ -843,10 +797,8 @@ "lifecycle", "magrittr", "parsnip", - "quantreg", "recipes", "rlang", - "smoothqr", "stats", "tibble", "tidyr", @@ -855,31 +807,27 @@ "vctrs", "workflows" ], - "Hash": "4531cf03e3c8955857df663d7366a8f4" + "Hash": "2015c74d601879eaeb391c269cb7551d" }, "epiprocess": { "Package": "epiprocess", - "Version": "0.7.7", + "Version": "0.9.0", "Source": "GitHub", "RemoteType": "github", "RemoteHost": "api.github.com", "RemoteUsername": "cmu-delphi", "RemoteRepo": "epiprocess", "RemoteRef": "dev", - "RemoteSha": "4e65e51bb56ab70cc98fa2d37dd35a4ab2336620", - "Remotes": "cmu-delphi/epidatr, reconverse/outbreaks, glmgen/genlasso", + "RemoteSha": "44e70950a0e3c3c2bd8da52e5234dc505d99bb00", "Requirements": [ "R", - "R6", "checkmate", "cli", "data.table", "dplyr", - "fabletools", - "feasts", - "generics", "genlasso", "ggplot2", + "glue", "lifecycle", "lubridate", "magrittr", @@ -891,47 +839,20 @@ "tidyselect", "tsibble", "utils", - "vctrs" + "vctrs", + "waldo" ], - "Hash": "998ba22373923380e1ce7e787d11af18" + "Hash": "057dc098224b8288fdbaf47e9fd5ed86" }, "evaluate": { "Package": "evaluate", - "Version": "0.23", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "methods" - ], - "Hash": "daf4a1246be12c1fa8c7705a0935c1a0" - }, - "fabletools": { - "Package": "fabletools", - "Version": "0.4.2", + "Version": "1.0.0", "Source": "Repository", "Repository": "RSPM", "Requirements": [ - "R", - "R6", - "distributional", - "dplyr", - "generics", - "ggdist", - "ggplot2", - "lifecycle", - "progressr", - "rlang", - "scales", - "stats", - "tibble", - "tidyr", - "tidyselect", - "tsibble", - "utils", - "vctrs" + "R" ], - "Hash": "005e92a674b01825e0feb29931c03c5e" + "Hash": "6b567375113ceb7d9f800de4dd42218e" }, "fansi": { "Package": "fansi", @@ -947,42 +868,17 @@ }, "farver": { "Package": "farver", - "Version": "2.1.1", + "Version": "2.1.2", "Source": "Repository", "Repository": "RSPM", - "Hash": "8106d78941f34855c440ddb946b8f7a5" + "Hash": "680887028577f3fa2a81e410ed0d6e42" }, "fastmap": { "Package": "fastmap", - "Version": "1.1.1", - "Source": "Repository", - "Repository": "RSPM", - "Hash": "f7736a18de97dea803bde0a2daaafb27" - }, - "feasts": { - "Package": "feasts", - "Version": "0.3.2", + "Version": "1.2.0", "Source": "Repository", "Repository": "RSPM", - "Requirements": [ - "R", - "dplyr", - "fabletools", - "ggplot2", - "grid", - "gtable", - "lifecycle", - "lubridate", - "rlang", - "scales", - "slider", - "tibble", - "tidyr", - "tsibble", - "utils", - "vctrs" - ], - "Hash": "d15631c019c27e50b1a99e3e9b3b53e1" + "Hash": "aa5e1cd11c2d15497494c5292d7ffcc8" }, "fontawesome": { "Package": "fontawesome", @@ -1001,12 +897,6 @@ "Version": "1.0.0", "Source": "Repository", "Repository": "RSPM", - "RemoteType": "standard", - "RemotePkgRef": "forcats", - "RemoteRef": "forcats", - "RemoteRepos": "https://packagemanager.rstudio.com/all/latest", - "RemotePkgPlatform": "source", - "RemoteSha": "1.0.0", "Requirements": [ "R", "cli", @@ -1047,12 +937,6 @@ "Version": "0.3.1", "Source": "Repository", "Repository": "RSPM", - "RemoteType": "standard", - "RemotePkgRef": "furrr", - "RemoteRef": "furrr", - "RemoteRepos": "https://packagemanager.rstudio.com/all/latest", - "RemotePkgPlatform": "source", - "RemoteSha": "0.3.1", "Requirements": [ "R", "future", @@ -1066,7 +950,7 @@ }, "future": { "Package": "future", - "Version": "1.33.2", + "Version": "1.34.0", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -1077,7 +961,7 @@ "parallelly", "utils" ], - "Hash": "fd7b1d69d16d0d114e4fa82db68f184c" + "Hash": "475771e3edb711591476be387c9a8c2e" }, "future.apply": { "Package": "future.apply", @@ -1145,7 +1029,7 @@ }, "gert": { "Package": "gert", - "Version": "2.0.1", + "Version": "2.1.2", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -1156,31 +1040,7 @@ "sys", "zip" ], - "Hash": "f70d3fe2d9e7654213a946963d1591eb" - }, - "ggdist": { - "Package": "ggdist", - "Version": "3.3.2", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "Rcpp", - "cli", - "distributional", - "ggplot2", - "glue", - "grid", - "gtable", - "numDeriv", - "quadprog", - "rlang", - "scales", - "tibble", - "vctrs", - "withr" - ], - "Hash": "86ebb3543cdad6520be9bf8863167a9a" + "Hash": "347d104ed332650b737f509a703c9c7f" }, "ggplot2": { "Package": "ggplot2", @@ -1315,12 +1175,6 @@ "Version": "1.0.1", "Source": "Repository", "Repository": "RSPM", - "RemoteType": "standard", - "RemotePkgRef": "gower", - "RemoteRef": "gower", - "RemoteRepos": "https://packagemanager.rstudio.com/all/latest", - "RemotePkgPlatform": "source", - "RemoteSha": "1.0.1", "Hash": "7a0051eef852c301b5efe2f7913dd45f" }, "gtable": { @@ -1340,7 +1194,7 @@ }, "hardhat": { "Package": "hardhat", - "Version": "1.3.1", + "Version": "1.4.0", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -1351,7 +1205,7 @@ "tibble", "vctrs" ], - "Hash": "921fd010cd788de75a9c71c2c3aee1f2" + "Hash": "e7aabf81944f6c6cbbcec1f85827a279" }, "haven": { "Package": "haven", @@ -1376,26 +1230,20 @@ }, "highr": { "Package": "highr", - "Version": "0.10", + "Version": "0.11", "Source": "Repository", "Repository": "RSPM", "Requirements": [ "R", "xfun" ], - "Hash": "06230136b2d2b9ba5805e1963fa6e890" + "Hash": "d65ba49117ca223614f71b60d85b8ab7" }, "hms": { "Package": "hms", "Version": "1.1.3", "Source": "Repository", "Repository": "RSPM", - "RemoteType": "standard", - "RemotePkgRef": "hms", - "RemoteRef": "hms", - "RemoteRepos": "https://packagemanager.rstudio.com/all/latest", - "RemotePkgPlatform": "source", - "RemoteSha": "1.1.3", "Requirements": [ "lifecycle", "methods", @@ -1438,7 +1286,7 @@ }, "httr2": { "Package": "httr2", - "Version": "1.0.1", + "Version": "1.0.5", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -1455,7 +1303,7 @@ "vctrs", "withr" ], - "Hash": "03d741c92fda96d98c3a3f22494e3b4a" + "Hash": "d84e4c33206aaace37714901ac2b00c3" }, "ids": { "Package": "ids", @@ -1526,7 +1374,7 @@ }, "insight": { "Package": "insight", - "Version": "0.19.10", + "Version": "0.20.4", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -1535,19 +1383,13 @@ "stats", "utils" ], - "Hash": "c15a38c9655cba66f5f5537a14c1bef4" + "Hash": "8457d6e682a49f2c87b698a830527b09" }, "ipred": { "Package": "ipred", - "Version": "0.9-14", + "Version": "0.9-15", "Source": "Repository", "Repository": "RSPM", - "RemoteType": "standard", - "RemotePkgRef": "ipred", - "RemoteRef": "ipred", - "RemoteRepos": "https://packagemanager.rstudio.com/all/latest", - "RemotePkgPlatform": "source", - "RemoteSha": "0.9-14", "Requirements": [ "MASS", "R", @@ -1557,7 +1399,7 @@ "rpart", "survival" ], - "Hash": "b25a108cbf4834be7c1b1f46ff30f888" + "Hash": "3c3e02183ef7b9225213b531d0ce43f5" }, "isoband": { "Package": "isoband", @@ -1593,17 +1435,17 @@ }, "jsonlite": { "Package": "jsonlite", - "Version": "1.8.8", + "Version": "1.8.9", "Source": "Repository", "Repository": "RSPM", "Requirements": [ "methods" ], - "Hash": "e1b9c55281c5adc4dd113652d9e26768" + "Hash": "4e993b65c2c3ffbffce7bb3e2c6f832b" }, "knitr": { "Package": "knitr", - "Version": "1.46", + "Version": "1.48", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -1615,7 +1457,7 @@ "xfun", "yaml" ], - "Hash": "6e008ab1d696a5283c79765fa7b56b47" + "Hash": "acf380f300c721da9fde7df115a5f86f" }, "labeling": { "Package": "labeling", @@ -1666,20 +1508,14 @@ }, "lhs": { "Package": "lhs", - "Version": "1.1.6", + "Version": "1.2.0", "Source": "Repository", "Repository": "RSPM", - "RemoteType": "standard", - "RemotePkgRef": "lhs", - "RemoteRef": "lhs", - "RemoteRepos": "https://packagemanager.rstudio.com/all/latest", - "RemotePkgPlatform": "source", - "RemoteSha": "1.1.6", "Requirements": [ "R", "Rcpp" ], - "Hash": "a007ff66aa9d478e220bf0493a7b1d95" + "Hash": "6d18e58d3d1de31b6e5415c1fe291113" }, "lifecycle": { "Package": "lifecycle", @@ -1767,7 +1603,7 @@ }, "modelbased": { "Package": "modelbased", - "Version": "0.8.7", + "Version": "0.8.8", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -1782,11 +1618,11 @@ "stats", "utils" ], - "Hash": "857859a5dd55f53a2c6ab14fbdb6acc1" + "Hash": "be0465e9a8078f1c5a15344a2c130266" }, "modeldata": { "Package": "modeldata", - "Version": "1.3.0", + "Version": "1.4.0", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -1797,7 +1633,7 @@ "rlang", "tibble" ], - "Hash": "6ac8ee87ffebd14b29586fce684c14cc" + "Hash": "a88b3cef9f6a41e075163e767ad8c8fa" }, "modelenv": { "Package": "modelenv", @@ -1817,12 +1653,6 @@ "Version": "0.1.11", "Source": "Repository", "Repository": "RSPM", - "RemoteType": "standard", - "RemotePkgRef": "modelr", - "RemoteRef": "modelr", - "RemoteRepos": "https://packagemanager.rstudio.com/all/latest", - "RemotePkgPlatform": "source", - "RemoteSha": "0.1.11", "Requirements": [ "R", "broom", @@ -1849,7 +1679,7 @@ }, "nlme": { "Package": "nlme", - "Version": "3.1-164", + "Version": "3.1-166", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -1859,7 +1689,7 @@ "stats", "utils" ], - "Hash": "a623a2239e642806158bc4dc3f51565d" + "Hash": "ccbb8846be320b627e6aa2b4616a2ded" }, "nnet": { "Package": "nnet", @@ -1885,13 +1715,13 @@ }, "openssl": { "Package": "openssl", - "Version": "2.1.2", + "Version": "2.2.2", "Source": "Repository", "Repository": "RSPM", "Requirements": [ "askpass" ], - "Hash": "ea2475b073243d9d338aa8f086ce973e" + "Hash": "d413e0fef796c9401a4419485f709ca1" }, "outbreaks": { "Package": "outbreaks", @@ -1905,7 +1735,7 @@ }, "parallelly": { "Package": "parallelly", - "Version": "1.37.1", + "Version": "1.38.0", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -1913,11 +1743,11 @@ "tools", "utils" ], - "Hash": "5410df8d22bd36e616f2a2343dbb328c" + "Hash": "6e8b139c1904f5e9e14c69db64453bbe" }, "parameters": { "Package": "parameters", - "Version": "0.21.6", + "Version": "0.22.2", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -1930,7 +1760,7 @@ "stats", "utils" ], - "Hash": "1f1bf75cb49c61df8287a0fa3b68126f" + "Hash": "ee0115da94a9cf7c451615415ce65c03" }, "parsnip": { "Package": "parsnip", @@ -1963,11 +1793,12 @@ }, "patchwork": { "Package": "patchwork", - "Version": "1.2.0", + "Version": "1.3.0", "Source": "Repository", "Repository": "RSPM", "Requirements": [ "cli", + "farver", "ggplot2", "grDevices", "graphics", @@ -1977,11 +1808,11 @@ "stats", "utils" ], - "Hash": "9c8ab14c00ac07e9e04d1664c0b74486" + "Hash": "e23fb9ecb1258207bcb763d78d513439" }, "performance": { "Package": "performance", - "Version": "0.11.0", + "Version": "0.12.3", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -1992,7 +1823,7 @@ "stats", "utils" ], - "Hash": "eb8ecde248cd610ae3097f5d00718cbd" + "Hash": "92be9503bc3394c464688fb6b03002e3" }, "pillar": { "Package": "pillar", @@ -2065,7 +1896,7 @@ }, "prodlim": { "Package": "prodlim", - "Version": "2023.08.28", + "Version": "2024.06.25", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -2080,7 +1911,7 @@ "stats", "survival" ], - "Hash": "c73e09a2039a0f75ac0a1e5454b39993" + "Hash": "d1e73a231e9442c29e21876f303382fc" }, "progress": { "Package": "progress", @@ -2110,14 +1941,14 @@ }, "ps": { "Package": "ps", - "Version": "1.7.6", + "Version": "1.8.0", "Source": "Repository", "Repository": "RSPM", "Requirements": [ "R", "utils" ], - "Hash": "dd2b9319ee0656c8acf45c7f40c59de7" + "Hash": "4b9c8485b0c7eecdf0a9ba5132a45576" }, "purrr": { "Package": "purrr", @@ -2134,44 +1965,16 @@ ], "Hash": "1cba04a4e9414bdefc9dcaa99649a8dc" }, - "quadprog": { - "Package": "quadprog", - "Version": "1.5-8", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R" - ], - "Hash": "5f919ae5e7f83a6f91dcf2288943370d" - }, - "quantreg": { - "Package": "quantreg", - "Version": "5.97", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "MASS", - "Matrix", - "MatrixModels", - "R", - "SparseM", - "graphics", - "methods", - "stats", - "survival" - ], - "Hash": "1bbc97f7d637ab3917c514a69047b2c1" - }, "ragg": { "Package": "ragg", - "Version": "1.3.0", + "Version": "1.3.3", "Source": "Repository", "Repository": "RSPM", "Requirements": [ "systemfonts", "textshaping" ], - "Hash": "082e1a198e3329d571f4448ef0ede4bc" + "Hash": "0595fe5e47357111f29ad19101c7d271" }, "ranger": { "Package": "ranger", @@ -2236,7 +2039,7 @@ }, "recipes": { "Package": "recipes", - "Version": "1.0.10", + "Version": "1.1.0", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -2245,7 +2048,6 @@ "cli", "clock", "dplyr", - "ellipsis", "generics", "glue", "gower", @@ -2265,7 +2067,7 @@ "vctrs", "withr" ], - "Hash": "69783cdd607c58fffb21c5c26c6ededf" + "Hash": "fc6672e55fcd1b5c461a3529ff6b1b08" }, "rematch": { "Package": "rematch", @@ -2286,17 +2088,17 @@ }, "renv": { "Package": "renv", - "Version": "1.0.7", + "Version": "1.0.9", "Source": "Repository", "Repository": "RSPM", "Requirements": [ "utils" ], - "Hash": "397b7b2a265bc5a7a06852524dabae20" + "Hash": "ef233f0e9064fc88c898b340c9add5c2" }, "reprex": { "Package": "reprex", - "Version": "2.1.0", + "Version": "2.1.1", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -2314,22 +2116,22 @@ "utils", "withr" ], - "Hash": "1425f91b4d5d9a8f25352c44a3d914ed" + "Hash": "97b1d5361a24d9fb588db7afe3e5bcbf" }, "rlang": { "Package": "rlang", - "Version": "1.1.3", + "Version": "1.1.4", "Source": "Repository", "Repository": "RSPM", "Requirements": [ "R", "utils" ], - "Hash": "42548638fae05fd9a9b5f3f437fbbbe2" + "Hash": "3eec01f8b1dee337674b2e34ab1f9bc1" }, "rmarkdown": { "Package": "rmarkdown", - "Version": "2.26", + "Version": "2.28", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -2348,7 +2150,7 @@ "xfun", "yaml" ], - "Hash": "9b148e7f95d33aac01f31282d49e4f44" + "Hash": "062470668513dcda416927085ee9bdc7" }, "rpart": { "Package": "rpart", @@ -2460,7 +2262,7 @@ }, "see": { "Package": "see", - "Version": "0.8.4", + "Version": "0.9.0", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -2478,7 +2280,7 @@ "performance", "stats" ], - "Hash": "3d2fd0b72314499e6af4fd20d39309dc" + "Hash": "743de04e180938d89e913f392dc9a104" }, "selectr": { "Package": "selectr", @@ -2506,6 +2308,19 @@ ], "Hash": "3f9796a8d0a0e8c6eb49a4b029359d1f" }, + "sfd": { + "Package": "sfd", + "Version": "0.1.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cli", + "rlang", + "tibble" + ], + "Hash": "8798f23058ead1d2ffd1223dfc0c8906" + }, "shape": { "Package": "shape", "Version": "1.4.6.1", @@ -2533,28 +2348,9 @@ ], "Hash": "a584625e2b9e4fad4be135c8ea5c99aa" }, - "smoothqr": { - "Package": "smoothqr", - "Version": "0.1.1", - "Source": "GitHub", - "RemoteType": "github", - "RemoteHost": "api.github.com", - "RemoteUsername": "dajmcdon", - "RemoteRepo": "smoothqr", - "RemoteRef": "main", - "RemoteSha": "3def5f0183671c1974676d08e469d538e15acea8", - "Requirements": [ - "cli", - "dplyr", - "quantreg", - "rlang", - "tibble" - ], - "Hash": "d7b8b29158f8d7a450e539d11c5c667b" - }, "stringi": { "Package": "stringi", - "Version": "1.8.3", + "Version": "1.8.4", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -2563,7 +2359,7 @@ "tools", "utils" ], - "Hash": "058aebddea264f4c99401515182e656a" + "Hash": "39e1144fd75428983dc3f63aa53dfa91" }, "stringr": { "Package": "stringr", @@ -2584,7 +2380,7 @@ }, "survival": { "Package": "survival", - "Version": "3.6-4", + "Version": "3.7-0", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -2596,7 +2392,7 @@ "stats", "utils" ], - "Hash": "e6e3071f471513e4b85f98ca041303c7" + "Hash": "5aaa9cbaf4aba20f8e06fdea1850a398" }, "sys": { "Package": "sys", @@ -2607,26 +2403,28 @@ }, "systemfonts": { "Package": "systemfonts", - "Version": "1.0.6", + "Version": "1.1.0", "Source": "Repository", "Repository": "RSPM", "Requirements": [ "R", - "cpp11" + "cpp11", + "lifecycle" ], - "Hash": "6d538cff441f0f1f36db2209ac7495ac" + "Hash": "213b6b8ed5afbf934843e6c3b090d418" }, "textshaping": { "Package": "textshaping", - "Version": "0.3.7", + "Version": "0.4.0", "Source": "Repository", "Repository": "RSPM", "Requirements": [ "R", "cpp11", + "lifecycle", "systemfonts" ], - "Hash": "997aac9ad649e0ef3b97f96cddd5622b" + "Hash": "5142f8bc78ed3d819d26461b641627ce" }, "tibble": { "Package": "tibble", @@ -2722,12 +2520,6 @@ "Version": "2.0.0", "Source": "Repository", "Repository": "RSPM", - "RemoteType": "standard", - "RemotePkgRef": "tidyverse", - "RemoteRef": "tidyverse", - "RemoteRepos": "https://packagemanager.rstudio.com/all/latest", - "RemotePkgPlatform": "source", - "RemoteSha": "2.0.0", "Requirements": [ "R", "broom", @@ -2765,7 +2557,7 @@ }, "timeDate": { "Package": "timeDate", - "Version": "4032.109", + "Version": "4041.110", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -2775,7 +2567,7 @@ "stats", "utils" ], - "Hash": "fa276a2ec2555d74b4eabf56fba3d209" + "Hash": "c5e48e8ac24d4472ddb122bcdeb011ad" }, "timechange": { "Package": "timechange", @@ -2790,17 +2582,17 @@ }, "tinytex": { "Package": "tinytex", - "Version": "0.50", + "Version": "0.53", "Source": "Repository", "Repository": "RSPM", "Requirements": [ "xfun" ], - "Hash": "be7a76845222ad20adb761f462eed3ea" + "Hash": "9db859e8aabbb474293dde3097839420" }, "tsibble": { "Package": "tsibble", - "Version": "1.1.4", + "Version": "1.1.5", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -2817,7 +2609,7 @@ "tidyselect", "vctrs" ], - "Hash": "d5da786ac5a28f62ca2eb8255ad7b9f3" + "Hash": "a75e397766b45996310908b5b32557ba" }, "tune": { "Package": "tune", @@ -2866,7 +2658,7 @@ }, "usethis": { "Package": "usethis", - "Version": "2.2.3", + "Version": "3.0.0", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -2893,7 +2685,7 @@ "withr", "yaml" ], - "Hash": "d524fd42c517035027f866064417d7e6" + "Hash": "b2fbf93c2127bedd2cbe9b799530d5d2" }, "utf8": { "Package": "utf8", @@ -2907,13 +2699,13 @@ }, "uuid": { "Package": "uuid", - "Version": "1.2-0", + "Version": "1.2-1", "Source": "Repository", "Repository": "RSPM", "Requirements": [ "R" ], - "Hash": "303c19bfd970bece872f93a824e323d9" + "Hash": "34e965e62a41fcafb1ca60e9b142085b" }, "vctrs": { "Package": "vctrs", @@ -2965,6 +2757,23 @@ ], "Hash": "390f9315bc0025be03012054103d227c" }, + "waldo": { + "Package": "waldo", + "Version": "0.5.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cli", + "diffobj", + "glue", + "methods", + "rematch2", + "rlang", + "tibble" + ], + "Hash": "16aa934a49658677d8041df9017329b9" + }, "warp": { "Package": "warp", "Version": "0.2.1", @@ -2984,7 +2793,7 @@ }, "withr": { "Package": "withr", - "Version": "3.0.0", + "Version": "3.0.1", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -2992,7 +2801,7 @@ "grDevices", "graphics" ], - "Hash": "d31b6c62c10dcf11ec530ca6b0dd5d35" + "Hash": "07909200e8bbe90426fbfeb73e1e27aa" }, "workflows": { "Package": "workflows", @@ -3046,19 +2855,20 @@ }, "xfun": { "Package": "xfun", - "Version": "0.43", + "Version": "0.47", "Source": "Repository", "Repository": "RSPM", "Requirements": [ + "R", "grDevices", "stats", "tools" ], - "Hash": "ab6371d8653ce5f2f9290f4ec7b42a8e" + "Hash": "36ab21660e2d095fef0d83f689e0477c" }, "xgboost": { "Package": "xgboost", - "Version": "1.7.7.1", + "Version": "1.7.8.1", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -3068,7 +2878,7 @@ "jsonlite", "methods" ], - "Hash": "6303e61eac62aef7bd2b396ef7e24386" + "Hash": "f7aa70849f72103d78c99df10eae6164" }, "xml2": { "Package": "xml2", @@ -3085,10 +2895,10 @@ }, "yaml": { "Package": "yaml", - "Version": "2.3.8", + "Version": "2.3.10", "Source": "Repository", "Repository": "RSPM", - "Hash": "29240487a071f535f5e5d5a323b7afbd" + "Hash": "51dab85c6c98e50a18d7551e9d49f76c" }, "yardstick": { "Package": "yardstick", diff --git a/renv/activate.R b/renv/activate.R index d13f993..c360bf2 100644 --- a/renv/activate.R +++ b/renv/activate.R @@ -2,7 +2,7 @@ local({ # the requested version of renv - version <- "1.0.7" + version <- "1.0.9" attr(version, "sha") <- NULL # the project directory @@ -98,6 +98,66 @@ local({ unloadNamespace("renv") # load bootstrap tools + ansify <- function(text) { + if (renv_ansify_enabled()) + renv_ansify_enhanced(text) + else + renv_ansify_default(text) + } + + renv_ansify_enabled <- function() { + + override <- Sys.getenv("RENV_ANSIFY_ENABLED", unset = NA) + if (!is.na(override)) + return(as.logical(override)) + + pane <- Sys.getenv("RSTUDIO_CHILD_PROCESS_PANE", unset = NA) + if (identical(pane, "build")) + return(FALSE) + + testthat <- Sys.getenv("TESTTHAT", unset = "false") + if (tolower(testthat) %in% "true") + return(FALSE) + + iderun <- Sys.getenv("R_CLI_HAS_HYPERLINK_IDE_RUN", unset = "false") + if (tolower(iderun) %in% "false") + return(FALSE) + + TRUE + + } + + renv_ansify_default <- function(text) { + text + } + + renv_ansify_enhanced <- function(text) { + + # R help links + pattern <- "`\\?(renv::(?:[^`])+)`" + replacement <- "`\033]8;;ide:help:\\1\a?\\1\033]8;;\a`" + text <- gsub(pattern, replacement, text, perl = TRUE) + + # runnable code + pattern <- "`(renv::(?:[^`])+)`" + replacement <- "`\033]8;;ide:run:\\1\a\\1\033]8;;\a`" + text <- gsub(pattern, replacement, text, perl = TRUE) + + # return ansified text + text + + } + + renv_ansify_init <- function() { + + envir <- renv_envir_self() + if (renv_ansify_enabled()) + assign("ansify", renv_ansify_enhanced, envir = envir) + else + assign("ansify", renv_ansify_default, envir = envir) + + } + `%||%` <- function(x, y) { if (is.null(x)) y else x } @@ -142,7 +202,10 @@ local({ # compute common indent indent <- regexpr("[^[:space:]]", lines) common <- min(setdiff(indent, -1L)) - leave - paste(substring(lines, common), collapse = "\n") + text <- paste(substring(lines, common), collapse = "\n") + + # substitute in ANSI links for executable renv code + ansify(text) } @@ -306,7 +369,11 @@ local({ ) if ("headers" %in% names(formals(utils::download.file))) - args$headers <- renv_bootstrap_download_custom_headers(url) + { + headers <- renv_bootstrap_download_custom_headers(url) + if (length(headers) && is.character(headers)) + args$headers <- headers + } do.call(utils::download.file, args) @@ -385,10 +452,22 @@ local({ for (type in types) { for (repos in renv_bootstrap_repos()) { + # build arguments for utils::available.packages() call + args <- list(type = type, repos = repos) + + # add custom headers if available -- note that + # utils::available.packages() will pass this to download.file() + if ("headers" %in% names(formals(utils::download.file))) + { + headers <- renv_bootstrap_download_custom_headers(url) + if (length(headers) && is.character(headers)) + args$headers <- headers + } + # retrieve package database db <- tryCatch( as.data.frame( - utils::available.packages(type = type, repos = repos), + do.call(utils::available.packages, args), stringsAsFactors = FALSE ), error = identity @@ -470,6 +549,14 @@ local({ } + renv_bootstrap_github_token <- function() { + for (envvar in c("GITHUB_TOKEN", "GITHUB_PAT", "GH_TOKEN")) { + envval <- Sys.getenv(envvar, unset = NA) + if (!is.na(envval)) + return(envval) + } + } + renv_bootstrap_download_github <- function(version) { enabled <- Sys.getenv("RENV_BOOTSTRAP_FROM_GITHUB", unset = "TRUE") @@ -477,16 +564,16 @@ local({ return(FALSE) # prepare download options - pat <- Sys.getenv("GITHUB_PAT") - if (nzchar(Sys.which("curl")) && nzchar(pat)) { + token <- renv_bootstrap_github_token() + if (nzchar(Sys.which("curl")) && nzchar(token)) { fmt <- "--location --fail --header \"Authorization: token %s\"" - extra <- sprintf(fmt, pat) + extra <- sprintf(fmt, token) saved <- options("download.file.method", "download.file.extra") options(download.file.method = "curl", download.file.extra = extra) on.exit(do.call(base::options, saved), add = TRUE) - } else if (nzchar(Sys.which("wget")) && nzchar(pat)) { + } else if (nzchar(Sys.which("wget")) && nzchar(token)) { fmt <- "--header=\"Authorization: token %s\"" - extra <- sprintf(fmt, pat) + extra <- sprintf(fmt, token) saved <- options("download.file.method", "download.file.extra") options(download.file.method = "wget", download.file.extra = extra) on.exit(do.call(base::options, saved), add = TRUE) diff --git a/slide.qmd b/slide.qmd index 3cd4a4f..f34529c 100644 --- a/slide.qmd +++ b/slide.qmd @@ -1,26 +1,22 @@ # Sliding computations {#sec-sliding} -A central tool in the `{epiprocess}` package is `epi_slide()`, which is based -on the powerful functionality provided in the +A central tool in the `{epiprocess}` package is `epi_slide()`, which is based on +the powerful functionality provided in the [`slider`](https://cran.r-project.org/web/packages/slider) package. In -`{epiprocess}`, to "slide" means to apply a computation---represented as a -function or formula---over a sliding/rolling data window. Suitable -groupings can always be achieved by a preliminary call to `group_by()`. - -By default, the meaning of one time step is inferred from the `time_value` -column of the `epi_df` object under consideration, based on the way this column -understands addition and subtraction. For example, if the time values are coded -as `Date` objects, then one time step is one day, since -`as.Date("2022-01-01") + 1` equals `as.Date("2022-01-02")`. Alternatively, the time step can be specified -manually in the call to `epi_slide()`; you can read the documentation for more -details. Furthermore, the alignment of the running window used in `epi_slide()` -can be "right", "center", or "left"; the default is "right", and is what we use -in this vignette. +`epiprocess`, to "slide" means to apply a computation---represented as a +function or formula---over a sliding/rolling data window. The function always +applies the slide inside each group and the grouping is assumed to be across all +group keys of the `epi_df` (this is the grouping used by default if you do not +group the `epi_df` with a `group_by()`). + +By default, the `.window_size` units depend on the `time_type` of the `epi_df`, +which is determined from the types in the `time_value` column of the `epi_df`. +See the "Details" in `epi_slide()` for more. As in getting started guide, we'll fetch daily reported COVID-19 cases from CA, FL, NY, and TX (note: here we're using new, not cumulative cases) using the -[`epidatr`](https://github.com/cmu-delphi/epidatr) package, -and then convert this to `epi_df` format. +[`epidatr`](https://github.com/cmu-delphi/epidatr) package, and then convert +this to `epi_df` format. ```{r} #| include: false @@ -37,98 +33,174 @@ The example data we'll use is part of the package and has 2,684 rows and 3 colum ```{r} data(jhu_csse_daily_subset) -x <- jhu_csse_daily_subset %>% +edf <- jhu_csse_daily_subset %>% select(geo_value, time_value, cases) %>% arrange(geo_value, time_value) %>% as_epi_df() ``` +## Optimized rolling mean and sums -## Slide with a formula - -We first demonstrate how to apply a 7-day trailing average to the daily cases in -order to smooth the signal, by passing in a formula for the first argument of -`epi_slide()`. To do this computation per state, we first call `group_by()`. +For the two most common sliding operations, we offer two optimized versions: +`epi_slide_mean()` and `epi_slide_sum()`. This example gets the 7-day trailing +average of the daily cases. Note that the name of the column(s) that we want to +average is specified as the first argument of `epi_slide_mean()`. ```{r} -x %>% - group_by(geo_value) %>% - epi_slide(~ mean(.x$cases), before = 6) %>% - ungroup() +edf %>% + group_by(geo_value) %>% + epi_slide_mean("cases", .window_size = 7, na.rm = TRUE) %>% + ungroup() %>% + head(10) ``` -The formula specified has access to all non-grouping columns present in the -original `epi_df` object (and must refer to them with the prefix `.x$`). As we -can see, the function `epi_slide()` returns an `epi_df` object with a new column -appended that contains the results (from sliding), named `slide_value` as the -default. We can of course change this post hoc, or we can instead specify a new -name up front using the `new_col_name` argument: +Note that we passed `na.rm = TRUE` to `data.table::frollmean()` via `...` to +`epi_slide_mean`. + +The following computes the 7-day trailing sum of daily cases (and passed `na.rm` +to `data.table::frollsum()` similarly): ```{r} -x %>% +edf %>% group_by(geo_value) %>% - epi_slide(~ mean(.x$cases), before = 6, new_col_name = "cases_7dav") %>% - ungroup() + epi_slide_sum("cases", .window_size = 7, na.rm = TRUE) %>% + ungroup() %>% + head(10) ``` -Some other information is available in additional variables: +## General sliding with a formula -* `.group_key` is a one-row tibble containing the values of the grouping - variables for the associated group -* `.ref_time_value` is the reference time value the time window was based on +The previous computations can also be performed using `epi_slide()`, which can +be used for more general sliding computations (but is much slower for the +specific cases of mean and sum). -Like in `group_modify()`, there are alternative names for these variables as -well: `.` can be used instead of `.x`, `.y` instead of `.group_key`, and `.z` -instead of `.ref_time_value`. +The same 7-day trailing average of daily cases can be computed by passing in a +formula for the first argument of `epi_slide()`: -## Slide with a function +```{r} +edf %>% + group_by(geo_value) %>% + epi_slide(~ mean(.x$cases, na.rm = TRUE), .window_size = 7) %>% + ungroup() %>% + head(10) +``` -We can also pass a function for the first argument in `epi_slide()`. In this -case, the passed function must accept the following arguments: +If your formula returns a data.frame, then the columns of the data.frame +will be unpacked into the resulting `epi_df`. For example, the following +computes the 7-day trailing average of daily cases and the 7-day trailing sum of +daily cases: -In this case, the passed function `f` must accept the following arguments: a -data frame with the same column names as the original object, minus any grouping -variables, containing the time window data for one group-`ref_time_value` -combination; followed by a one-row tibble containing the values of the grouping -variables for the associated group; followed by the associated `ref_time_value`. -It can accept additional arguments; `epi_slide()` will forward any `...` args it -receives to `f`. +```{r} +edf %>% + group_by(geo_value) %>% + epi_slide( + ~ data.frame(cases_mean = mean(.x$cases, na.rm = TRUE), cases_sum = sum(.x$cases, na.rm = TRUE)), + .window_size = 7 + ) %>% + ungroup() %>% + head(10) +``` -Recreating the last example of a 7-day trailing average: +Note that this formula has access to all non-grouping columns present in the +original `epi_df` object and must refer to them with the prefix `.x$...`. As we +can see, the function `epi_slide()` returns an `epi_df` object with a new column +appended that contains the results (from sliding), named `slide_value` as the +default. + +Some other information is available in additional variables: + +* `.group_key` is a one-row tibble containing the values of the grouping + variables for the associated group +* `.ref_time_value` is the reference time value the time window was based on ```{r} -x %>% - group_by(geo_value) %>% - epi_slide(function(x, gk, rtv) mean(x$cases), - before = 6, new_col_name = "cases_7dav") %>% - ungroup() +# Returning geo_value in the formula +edf %>% + group_by(geo_value) %>% + epi_slide(~ .x$geo_value[[1]], .window_size = 7) %>% + ungroup() %>% + head(10) + +# Returning time_value in the formula +edf %>% + group_by(geo_value) %>% + epi_slide(~ .x$time_value[[1]], .window_size = 7) %>% + ungroup() %>% + head(10) ``` +While the computations above do not look very useful, these can be used as +building blocks for computations that do something different depending on the +geo_value or ref_time_value. + ## Slide the tidy way Perhaps the most convenient way to setup a computation in `epi_slide()` is to pass in an expression for tidy evaluation. In this case, we can simply define the name of the new column directly as part of the expression, setting it equal -to a computation in which we can access any columns of `x` by name, just as we +to a computation in which we can access any columns of `.x` by name, just as we would in a call to `dplyr::mutate()`, or any of the `dplyr` verbs. For example: ```{r} -x <- x %>% - group_by(geo_value) %>% - epi_slide(cases_7dav = mean(cases), before = 6) %>% - ungroup() +slide_output <- edf %>% + group_by(geo_value) %>% + epi_slide(cases_7dav = mean(cases, na.rm = TRUE), .window_size = 7) %>% + ungroup() %>% + head(10) ``` -In addition to referring to individual columns by name, you can refer to the -time window data as an `epi_df` or `tibble` using `.x`. Similarly, the other arguments of the function format are available through the magic names `.group_key` and `.ref_time_value`, and the tidyverse "pronouns" `.data` and `.env` can also be used. + +In addition to referring to individual columns by name, you can refer to +`epi_df` time window as `.x` (`.group_key` and `.ref_time_value` are still +available). Also, the tidyverse "pronouns" `.data` and `.env` can also be used +if you need distinguish between the data and environment. As a simple sanity check, we visualize the 7-day trailing averages computed on -top of the original counts. +top of the original counts: + +```{r, message = FALSE, warning = FALSE, fig.width = 9, fig.height = 6} +library(ggplot2) +theme_set(theme_bw()) + +ggplot(slide_output, aes(x = time_value)) + + geom_col(aes(y = cases, fill = geo_value), alpha = 0.5, show.legend = FALSE) + + geom_line(aes(y = cases_7dav, col = geo_value), show.legend = FALSE) + + facet_wrap(~geo_value, scales = "free_y") + + scale_x_date(minor_breaks = "month", date_labels = "%b %y") + + labs(x = "Date", y = "Reported COVID-19 cases") +``` + +As we can see from the top right panel, it looks like Texas moved to weekly +reporting of COVID-19 cases in summer of 2021. + +## Slide with a function + +We can also pass a function to the second argument in `epi_slide()`. In this +case, the passed function `.f` must have the form `function(x, g, t, ...)`, +where + +- "x" is an epi_df with the same column names as the archive's `DT`, minus + the `version` column +- "g" is a one-row tibble containing the values of the grouping variables +for the associated group +- "t" is the ref_time_value for the current window +- "..." are additional arguments + +Recreating the last example of a 7-day trailing average: + +```{r} +x <- edf %>% + group_by(geo_value) %>% + epi_slide(function(x, g, t) mean(x$cases, na.rm = TRUE), .window_size = 7, .new_col_name = "cases_7dav") %>% + ungroup() +x %>% + head(10) +``` ```{r, message = FALSE, warning = FALSE, fig.width = 8, fig.height = 6} #| code-fold: true cols <- RColorBrewer::brewer.pal(7, "Set1")[-6] ggplot(x, aes(x = time_value)) + - geom_col(aes(y = cases, fill = geo_value), alpha = 0.5, + geom_col(aes(y = cases, fill = geo_value), alpha = 0.5, show.legend = FALSE) + scale_y_continuous(expand = expansion(c(0, 0.05))) + geom_line(aes(y = cases_7dav, col = geo_value), show.legend = FALSE) + @@ -139,23 +211,23 @@ ggplot(x, aes(x = time_value)) + labs(x = "Date", y = "Reported COVID-19 cases") ``` -As we can see from the center top panel, it looks like Florida moved to weekly -reporting of COVID-19 cases in summer of 2021, while California occasionally reported negative cases counts! +As we can see from the center top panel, it looks like Florida moved to weekly +reporting of COVID-19 cases in summer of 2021, while California occasionally +reported negative cases counts! ## Running a local forecaster {#sec-local-forecaster} -As a more complex example, we preview some of the functionality of `{epipredict}` described in future chapters, and use a forecaster based on a -local (in time) -autoregression or "AR model". AR models can be fit in numerous ways -(using base R -functions and various packages), but here we the `arx_forecaster()`, implemented in `{epipredict}` both -provides a more advanced example of sliding a function over an `epi_df` object, -and it allows us to be a bit more flexible in defining a *probabilistic* -forecaster: one that outputs not just a point prediction, but a notion of -uncertainty around this. In particular, our forecaster will output a point -prediction along with an 90\% uncertainty band, represented by a predictive -quantiles at the 5\% and 95\% levels (lower and upper endpoints of the -uncertainty band). +As a more complex example, we preview some of the functionality of +`{epipredict}` described in future chapters, and use a forecaster based on a +local (in time) autoregression or "AR model". AR models can be fit in numerous +ways (using base R functions and various packages), but here we the +`arx_forecaster()`, implemented in `{epipredict}` both provides a more advanced +example of sliding a function over an `epi_df` object, and it allows us to be a +bit more flexible in defining a *probabilistic* forecaster: one that outputs not +just a point prediction, but a notion of uncertainty around this. In particular, +our forecaster will output a point prediction along with an 90\% uncertainty +band, represented by a predictive quantiles at the 5\% and 95\% levels (lower +and upper endpoints of the uncertainty band). The function signature below, is a probabilistic AR forecaster. The `lags` argument indicates which lags to use in the model, and `ahead` indicates @@ -165,97 +237,93 @@ considered in this vignette). ```{r eval=FALSE} arx_forecaster <- function( - epi_df, + epi_df, outcome, # the outcome column name in `epi_df` predictors, # a character vector, containing 1 or more predictors in `epi_df` - trainer = quantile_reg(), + trainer = quantile_reg(), args_list = arx_args_list( - lags = c(0, 7, 14), + lags = c(0, 7, 14), ahead = 7, quantile_levels = c(0.05, 0.95) ) -) - +) { + ... +} ``` -We go ahead and slide this AR forecaster over the working `epi_df` of COVID-19 -cases. Note that we actually model the `cases_7dav` column, to operate on the +We go ahead and slide this AR forecaster over the working `epi_df` of COVID-19 +cases. Note that we actually model the `cases_7dav` column, to operate on the scale of smoothed COVID-19 cases. This is clearly equivalent, up to a constant, to modeling weekly sums of COVID-19 cases. ```{r, warning=FALSE} fc_time_values <- seq( - from = as.Date("2020-06-01"), - to = as.Date("2021-12-01"), + from = as.Date("2020-06-01"), + to = as.Date("2021-12-01"), by = "1 months") fcasts <- epi_slide( - x, - ~ arx_forecaster( - epi_data = .x, - outcome = "cases_7dav", - predictors = "cases_7dav", - trainer = quantile_reg(), - args_list = arx_args_list(ahead = 7))$predictions, - before = 119, - ref_time_values = fc_time_values, - new_col_name = "fc") + x, + .f = ~ arx_forecaster( + epi_data = .x, + outcome = "cases_7dav", + predictors = "cases_7dav", + trainer = quantile_reg(), + args_list = arx_args_list(ahead = 7))$predictions, + .window_size = 120, + .ref_time_values = fc_time_values) # grab just the relevant columns, and make them easier to plot fcasts <- fcasts %>% - select(geo_value, time_value, cases_7dav, - contains("_distn"), fc_target_date) %>% - pivot_quantiles_wider(contains("_distn")) + select(geo_value, time_value, cases_7dav, .pred, .pred_distn) %>% + pivot_quantiles_wider(".pred_distn") fcasts ``` -Note that here we have used an argument `ref_time_values` to perform the -sliding computation (here, compute a forecast) at a specific subset of reference -time values. We get out 4 new columns: `fc_target_date`, `0.05`, `0.5`, `0.95` -that correspond to the date the forecast is for (rather than the date it was made on, the point forecast, and the lower and upper endpoints of the -95\% prediction band.[^1] +Note that we have used the argument `.ref_time_values` to compute the forecast +at a specific subset of reference time values. We get out 4 new columns: +`fc_target_date`, `0.05`, `0.5`, `0.95` that correspond to the date the forecast +is for (rather than the date it was made on), the point forecast, and the lower +and upper endpoints of the 95\% prediction band.[^1] -[^1]: If instead we had set `as_list_col = TRUE` -in the call to `epi_slide()`, then we would have gotten a list column `fc`, -where each element of `fc` contains these results. +[^1]: If instead we had set `as_list_col = TRUE` in the call to `epi_slide()`, +then we would have gotten a list column `fc`, where each element of `fc` +contains these results. To finish off, we plot the forecasts at some times (spaced out by a few months) -over the last year, at multiple horizons: 7, 14, 21, and 28 days ahead. To do -so, we encapsulate the process of generating forecasts into a simple function, +over the last year, at multiple horizons: 7, 14, 21, and 28 days ahead. To do +so, we encapsulate the process of generating forecasts into a simple function, so that we can call it a few times. ```{r, message = FALSE, warning = FALSE} k_week_ahead <- function(ahead = 7) { epi_slide( - x, + x, ~ arx_forecaster( - epi_data = .x, - outcome = "cases_7dav", - predictors = "cases_7dav", - trainer = quantile_reg(), - args_list = arx_args_list(ahead = ahead))$predictions, - before = 119, - ref_time_values = fc_time_values, - new_col_name = "fc") %>% - select(geo_value, time_value, cases_7dav, contains("_distn"), - fc_target_date) %>% - pivot_quantiles_wider(contains("_distn")) + epi_data = .x, + outcome = "cases_7dav", + predictors = "cases_7dav", + trainer = quantile_reg(), + args_list = arx_args_list(ahead = ahead))$predictions, + .window_size = 120, + .ref_time_values = fc_time_values) %>% + select(geo_value, time_value, cases_7dav, .pred, .pred_distn) %>% + pivot_quantiles_wider(".pred_distn") } # First generate the forecasts, and bind them together z <- map(c(7, 14, 21, 28), k_week_ahead) %>% list_rbind() ``` -Then we can plot the on top of the observed data ```{r, fig.width=8, fig.height=9} #| code-fold: true ggplot(z) + - geom_line(data = x, aes(x = time_value, y = cases_7dav), color = "gray50") + - geom_ribbon(aes(x = fc_target_date, ymin = `0.05`, ymax = `0.95`, - group = time_value, fill = geo_value), alpha = 0.4) + - geom_line(aes(x = fc_target_date, y = `0.5`, group = time_value)) + - geom_point(aes(x = fc_target_date, y = `0.5`, group = time_value), size = 0.5) + - #geom_vline(data = tibble(x = fc_time_values), aes(xintercept = x), + geom_line(data = x, aes(x = time_value, y = cases_7dav), color = "gray50") + + geom_ribbon(aes(x = time_value, ymin = `0.05`, ymax = `0.95`, + group = time_value, fill = geo_value), alpha = 0.4) + + geom_line(aes(x = time_value, y = `0.5`, group = time_value)) + + geom_point(aes(x = time_value, y = `0.5`, group = time_value), size = 0.5) + + #geom_vline(data = tibble(x = fc_time_values), aes(xintercept = x), # linetype = 2, alpha = 0.5) + facet_wrap(vars(geo_value), scales = "free_y", nrow = 3) + scale_y_continuous(expand = expansion(c(0, 0.05))) + @@ -269,9 +337,9 @@ spotty. At various points in time, we can see that its forecasts are volatile (its point predictions are all over the place), or overconfident (its bands are too narrow), or both at the same time. This is only meant as a simple demo and not entirely unexpected given the way the AR model is set up. The -[`epipredict`](https://cmu-delphi.github.io/epipredict) package, -offers a suite of predictive modeling tools -that improve on many of the shortcomings of the above simple AR model (simply +[`epipredict`](https://cmu-delphi.github.io/epipredict) package, +offers a suite of predictive modeling tools +that improve on many of the shortcomings of the above simple AR model (simply using all states for training rather than 6 is a huge improvement). Second, the AR forecaster here is using finalized data, meaning, it uses the @@ -279,12 +347,12 @@ latest versions of signal values (reported COVID-19 cases) available, for both training models and making predictions historically. However, this is not reflective of the provisional nature of the data that it must cope with in a true forecast task. Training and making predictions on finalized data can lead -to an overly optimistic sense of accuracy; see, for example, +to an overly optimistic sense of accuracy; see, for example, [@McDonaldBien2021] and references therein. Fortunately, the `epiprocess` package provides a data structure called `epi_archive` that can be used to store all data revisions, and furthermore, an `epi_archive` object knows how to slide computations in the correct version-aware sense (for the computation at each reference time $t$, it uses -only data that would have been available as of $t$). We will revisit this -example in the [archive +only data that would have been available as of $t$). We will revisit this +example in the [archive vignette](https://cmu-delphi.github.io/epiprocess/articles/archive.html). diff --git a/sliding-forecasters.qmd b/sliding-forecasters.qmd index 8aee741..65d8802 100644 --- a/sliding-forecasters.qmd +++ b/sliding-forecasters.qmd @@ -1,33 +1,27 @@ -# Pseudo-prospective forecast inspection +# Sliding version-unaware and version-aware ARX forecasters across dates ```{r} #| echo: false source("_common.R") ``` +A key function from the epiprocess package is `epix_slide()` (refer to the +following vignette for the basics of the function: ["Work with archive objects +and data +revisions"](https://cmu-delphi.github.io/epiprocess/articles/archive.html)) +which allows performing version-aware computations. That is, the function only +uses data that would have been available as of time t for that reference time. -A key function from the epiprocess package is `epi_slide()`, which allows the -user to apply a function or formula-based computation over variables in an -`epi_df` over a running window of `n` time steps (see the following `epiprocess` -vignette to go over the basics of the function: ["Slide a computation over -signal values"](https://cmu-delphi.github.io/epiprocess/articles/slide.html)). -The equivalent sliding method for an `epi_archive` object can be called by using -the wrapper function `epix_slide()` (refer to the following vignette for the -basics of the function: ["Work with archive objects and data -revisions"](https://cmu-delphi.github.io/epiprocess/articles/archive.html)). The -key difference from `epi_slide()` is that it performs version-aware -computations. That is, the function only uses data that would have been -available as of time t for that reference time. - -In this vignette, we use `epi_slide()` and `epix_slide()` for backtesting our -`arx_forecaster` on historical COVID-19 case data from the US and from Canada. -More precisely, we first demonstrate using `epi_slide()` to slide ARX -forecasters over an `epi_df` object and compare the results obtained from using -different forecasting engines. We then compare these simple retrospective -forecasts to more proper "pseudoprospective" forecasts generated using snapshots -of the data that was available in real time, using `epix_slide()`. - -## Comparing different forecasting engines +In this vignette, we use `epix_slide()` for backtesting our `arx_forecaster` on +historical COVID-19 case data from the US and from Canada. We first examine the +results from a version-unaware forecaster, comparing two different fitting +engines and then we contrast this with version-aware forecasting. The former +will proceed by constructing an `epi_archive` that erases its version +information and then use `epix_slide()` to forecast the future. The latter will +keep the versioned data and proceed similarly by using `epix_slide()` to +forecast the future. + +## Version-unaware forecasting ### Example using CLI and case data from US states @@ -57,17 +51,20 @@ us_archive <- epix_merge( ) ``` -After obtaining the latest snapshot of the data, we produce forecasts on that -data using the default engine of simple linear regression and compare to a -random forest. - -Note that all of the warnings about the forecast date being less than the most -recent update date of the data have been suppressed to avoid cluttering the -output. +We then get latest snapshot of the data from the archive by using +`epix_as_of()`. We then create fake version information by setting `version = +time_value`. This creates an archive that pretends to have the latest data +available (since at version time `x` it has all the data up to time_value `x`, +which in reality is unrealistic because the time values of the data received at +version time `x` often lags by a few days, not to mention the later corrections +that are amended to the data). ```{r make-arx-kweek, warning = FALSE} # Latest snapshot of data, and forecast dates -us_latest <- epix_as_of(us_archive, max_version = max(us_archive$versions_end)) +us_latest <- us_archive %>% + epix_as_of(version = max(.$versions_end)) %>% + mutate(version = time_value) %>% + as_epi_archive() fc_time_values <- seq( from = as.Date("2020-08-01"), to = as.Date("2021-11-01"), @@ -75,34 +72,39 @@ fc_time_values <- seq( ) aheads <- c(7, 14, 21, 28) -k_week_ahead <- function(epi_df, outcome, predictors, ahead = 7, engine) { - epi_slide(epi_df, ~ arx_forecaster( - .x, outcome, predictors, engine, - args_list = arx_args_list(ahead = ahead) - )$predictions %>% - select(-geo_value), - before = 120L - 1L, - ref_time_values = fc_time_values, - new_col_name = "fc" - ) %>% - select(geo_value, time_value, starts_with("fc")) %>% - mutate(engine_type = engine$engine) +forecast_k_week_ahead <- function(epi_archive, outcome, predictors, ahead = 7, engine) { + epi_archive %>% + epix_slide( + .f = function(x, gk, rtv) { + arx_forecaster( + x, outcome, predictors, engine, + args_list = arx_args_list(ahead = ahead) + )$predictions %>% + mutate(engine_type = engine$engine) %>% + pivot_quantiles_wider(.pred_distn) + }, + .before = 120, + .versions = fc_time_values + ) } # Generate the forecasts and bind them together -fc <- bind_rows( - map(aheads, ~ k_week_ahead( - us_latest, "case_rate", c("case_rate", "percent_cli"), .x, +forecasts_version_unaware <- bind_rows( + map(aheads, ~ forecast_k_week_ahead( + us_latest, + outcome = "case_rate", + predictors = c("case_rate", "percent_cli"), + ahead = .x, engine = linear_reg() - )) %>% - list_rbind(), - map(aheads, ~ k_week_ahead( - us_latest, "case_rate", c("case_rate", "percent_cli"), .x, + )), + map(aheads, ~ forecast_k_week_ahead( + us_latest, + outcome = "case_rate", + predictors = c("case_rate", "percent_cli"), + ahead = .x, engine = rand_forest(mode = "regression") - )) %>% - list_rbind() -) %>% - pivot_quantiles_wider(contains("_distn")) + )) +) ``` Here, `arx_forecaster()` does all the heavy lifting. It creates leads of the @@ -111,25 +113,28 @@ target (respecting time stamps and locations) along with lags of the features specified engine, creates predictions, and non-parametric confidence bands. To see how the predictions compare, we plot them on top of the latest case -rates. Note that even though we've fitted the model on all states, -we'll just display the -results for two states, California (CA) and Florida (FL), to get a sense of the -model performance while keeping the graphic simple. +rates. Note that even though we've fitted the model on all states, we'll just +display the results for two states, California (CA) and Florida (FL), to get a +sense of the model performance while keeping the graphic simple. ```{r plot-arx, message = FALSE, warning = FALSE, fig.width = 9, fig.height = 6} #| code-fold: true -fc_cafl <- fc %>% filter(geo_value %in% c("ca", "fl")) -latest_cafl <- us_latest %>% filter(geo_value %in% c("ca", "fl")) - -ggplot(fc_cafl, aes(fc_target_date, group = time_value, fill = engine_type)) + +forecasts_filtered <- forecasts_version_unaware %>% + tibble() %>% + filter(geo_value %in% c("ca", "fl")) +latest_data_filtered <- us_latest$DT %>% + tibble() %>% + filter(geo_value %in% c("ca", "fl")) + +ggplot(forecasts_filtered, aes(x = target_date, group = forecast_date, fill = engine_type)) + geom_line( - data = latest_cafl, aes(x = time_value, y = case_rate), + data = latest_data_filtered, aes(x = time_value, y = case_rate), inherit.aes = FALSE, color = "gray50" ) + geom_ribbon(aes(ymin = `0.05`, ymax = `0.95`), alpha = 0.4) + - geom_line(aes(y = fc_.pred)) + - geom_point(aes(y = fc_.pred), size = 0.5) + - geom_vline(aes(xintercept = time_value), linetype = 2, alpha = 0.5) + + geom_line(aes(y = .pred)) + + geom_point(aes(y = .pred), size = 0.5) + + geom_vline(aes(xintercept = forecast_date), linetype = 2, alpha = 0.5) + facet_grid(engine_type ~ geo_value, scales = "free") + scale_x_date(minor_breaks = "month", date_labels = "%b %y") + scale_fill_brewer(palette = "Set1") + @@ -139,17 +144,18 @@ ggplot(fc_cafl, aes(fc_target_date, group = time_value, fill = engine_type)) + ``` For the two states of interest, simple linear regression clearly performs better -than random forest in terms of accuracy of the predictions and does not -result in such in overconfident predictions (overly narrow confidence bands). -Though, in general, neither approach produces amazingly accurate forecasts. -This could be because -the behaviour is rather different across states and the effects of other notable -factors such as age and public health measures may be important to account for -in such forecasting. Including such factors as well as making enhancements such -as correcting for outliers are some improvements one could make to this simple -model.[^1] - -[^1]: Note that, despite the above caveats, simple models like this tend to out-perform many far more complicated models in the online Covid forecasting due to those models high variance predictions. +than random forest in terms of accuracy of the predictions and does not result +in such in overconfident predictions (overly narrow confidence bands). Though, +in general, neither approach produces amazingly accurate forecasts. This could +be because the behaviour is rather different across states and the effects of +other notable factors such as age and public health measures may be important to +account for in such forecasting. Including such factors as well as making +enhancements such as correcting for outliers are some improvements one could +make to this simple model.[^1] + +[^1]: Note that, despite the above caveats, simple models like this tend to +out-perform many far more complicated models in the online Covid forecasting due +to those models high variance predictions. ### Example using case data from Canada @@ -180,21 +186,19 @@ can <- can %>% mutate(cr_7dav = RcppRoll::roll_meanr(case_rate, n = 7L)) %>% as_epi_archive(compactify = TRUE) -can_latest <- epix_as_of(can, max_version = max(can$DT$version)) +can_latest <- epix_as_of(can, max_version = max(can$DT$version)) %>% + mutate(version = time_value) %>% + as_epi_archive() # Generate the forecasts, and bind them together can_fc <- bind_rows( - map(aheads, ~ k_week_ahead( - can_latest, "cr_7dav", "cr_7dav", .x, linear_reg() - )) %>% - list_rbind(), - map(aheads, ~ k_week_ahead( - can_latest, "cr_7dav", "cr_7dav", .x, - boost_tree(mode = "regression", trees = 20) - )) %>% - list_rbind() -) %>% - pivot_quantiles_wider(contains("_distn")) + map(aheads, + ~ forecast_k_week_ahead(can_latest, "cr_7dav", "cr_7dav", .x, linear_reg()) + ), + map(aheads, + ~ forecast_k_week_ahead(can_latest, "cr_7dav", "cr_7dav", .x, boost_tree(mode = "regression", trees = 20)) + ) +) ``` The first figure shows the results for all of the provinces using linear regression. @@ -203,19 +207,19 @@ The first figure shows the results for all of the provinces using linear regress #| code-fold: true ggplot( can_fc %>% filter(engine_type == "lm"), - aes(x = fc_target_date, group = time_value) + aes(x = target_date, group = forecast_date) ) + coord_cartesian(xlim = lubridate::ymd(c("2020-12-01", NA))) + geom_line( - data = can_latest, aes(x = time_value, y = cr_7dav), + data = can_latest$DT %>% tibble(), aes(x = time_value, y = cr_7dav), inherit.aes = FALSE, color = "gray50" ) + geom_ribbon(aes(ymin = `0.05`, ymax = `0.95`, fill = geo_value), alpha = 0.4 ) + - geom_line(aes(y = fc_.pred)) + - geom_point(aes(y = fc_.pred), size = 0.5) + - geom_vline(aes(xintercept = time_value), linetype = 2, alpha = 0.5) + + geom_line(aes(y = .pred)) + + geom_point(aes(y = .pred), size = 0.5) + + geom_vline(aes(xintercept = forecast_date), linetype = 2, alpha = 0.5) + facet_wrap(~geo_value, scales = "free_y", ncol = 3) + scale_x_date(minor_breaks = "month", date_labels = "%b %y") + scale_y_continuous(expand = expansion(c(0, 0.05))) + @@ -232,19 +236,19 @@ Compare those forecasts with a related set using Gradient Boosting. #| code-fold: true ggplot( can_fc %>% filter(engine_type == "xgboost"), - aes(x = fc_target_date, group = time_value) + aes(x = target_date, group = forecast_date) ) + coord_cartesian(xlim = lubridate::ymd(c("2020-12-01", NA))) + geom_line( - data = can_latest, aes(x = time_value, y = cr_7dav), + data = can_latest$DT %>% tibble(), aes(x = time_value, y = cr_7dav), inherit.aes = FALSE, color = "gray50" ) + geom_ribbon(aes(ymin = `0.05`, ymax = `0.95`, fill = geo_value), alpha = 0.4 ) + - geom_line(aes(y = fc_.pred)) + - geom_point(aes(y = fc_.pred), size = 0.5) + - geom_vline(aes(xintercept = time_value), linetype = 2, alpha = 0.5) + + geom_line(aes(y = .pred)) + + geom_point(aes(y = .pred), size = 0.5) + + geom_vline(aes(xintercept = forecast_date), linetype = 2, alpha = 0.5) + facet_wrap(~geo_value, scales = "free_y", ncol = 3) + scale_x_date(minor_breaks = "month", date_labels = "%b %y") + scale_y_continuous(expand = expansion(c(0, 0.05))) + @@ -262,72 +266,61 @@ sliding with different engines in `arx_forecaster`, we may devote another vignette to work on improving the predictive modelling using the suite of tools available in epipredict. -## Pseudoprospective vs. unfaithful retrospective forecasting +## Version-aware forecasting ### Example using case data from US states We will now run pseudoprospective forecasts based on properly-versioned data (that would have been available in real-time) to forecast future COVID-19 case -rates from current and past COVID-19 case rates for all states. That is, we can -make forecasts on the archive, `us_archive`, and compare those to forecasts on -(time windows of) the latest data, `us_latest`, using the same general set-up as -above. For pseudoprospective forecasting, note that `us_archive` is fed into -`epix_slide()`, while for simpler (unfaithful) retrospective forecasting, -`us_latest` is fed into `epi_slide()`. #%% update to include percent_cli after -that issue is fixed? +rates from current and past COVID-19 case rates for all states. All we have to +do is use the historical archive of the data with version information, +`us_archive`, instead of `us_latest` like we did above, in the argument to our +forecaster wrapper `forecast_k_week_ahead()`. Below we do that computation, tag +it, and combine it with the forecasts from one of the engines made above. ```{r make-ar-kweek-asof} -k_week_versioning <- function(ahead, version = c("faithful", "unfaithful")) { - version <- match.arg(version) - if (version == "faithful") { - epix_slide( - us_archive, - ~ arx_forecaster( - .x, "case_rate", c("case_rate", "percent_cli"), - args_list = arx_args_list(ahead = ahead) - )$predictions, - before = 120 - 1, - ref_time_values = fc_time_values, - new_col_name = "fc" - ) %>% - mutate(version = "version faithful") %>% - rename(geo_value = "fc_geo_value") - } else { - k_week_ahead( - us_latest, "case_rate", c("case_rate", "percent_cli"), - ahead, linear_reg() - ) %>% mutate(version = "not version faithful") - } -} - # Generate the forecasts, and bind them together -fc <- bind_rows( - map(aheads, ~ k_week_versioning(.x, "faithful")) %>% list_rbind(), - map(aheads, ~ k_week_versioning(.x, "unfaithful")) %>% list_rbind() -) %>% pivot_quantiles_wider(fc_.pred_distn) +forecasts_version_aware <- bind_rows( + map(aheads, ~ forecast_k_week_ahead( + us_archive, + outcome = "case_rate", + predictors = c("case_rate", "percent_cli"), + ahead = .x, + engine = linear_reg() + )) %>% + bind_rows() %>% + mutate(version = "version faithful"), + forecasts_version_unaware %>% filter(engine_type == "lm") %>% mutate(version = "version unfaithful") +) ``` -Now we can plot the results on top of the latest case rates. As before, we will only display and focus on the results for FL and CA for simplicity. +Now we can plot the results on top of the latest case rates. As before, we will +only display and focus on the results for FL and CA for simplicity. ```{r plot-ar-asof, message = FALSE, warning = FALSE, fig.width = 9, fig.height = 6} #| code-fold: true -fc_cafl <- fc %>% filter(geo_value %in% c("ca", "fl")) -latest_cafl <- us_latest %>% filter(geo_value %in% c("ca", "fl")) - -ggplot(fc_cafl, aes(x = fc_target_date, group = time_value)) + +forecasts_filtered <- forecasts_version_aware %>% + tibble() %>% + filter(geo_value %in% c("ca", "fl")) +latest_data_filtered <- us_latest$DT %>% + tibble() %>% + select(-version) %>% + filter(geo_value %in% c("ca", "fl")) + +ggplot(forecasts_filtered, aes(x = target_date, group = forecast_date, fill = version)) + geom_line( - data = latest_cafl, aes(x = time_value, y = case_rate), + data = latest_data_filtered, aes(x = time_value, y = case_rate), inherit.aes = FALSE, color = "gray50" ) + - geom_ribbon(aes(ymin = `0.05`, ymax = `0.95`, fill = version), alpha = 0.4) + - geom_line(aes(y = fc_.pred)) + - geom_point(aes(y = fc_.pred), size = 0.5) + - geom_vline(aes(xintercept = time_value), linetype = 2, alpha = 0.5) + + geom_ribbon(aes(ymin = `0.05`, ymax = `0.95`), alpha = 0.4) + + geom_line(aes(y = .pred)) + + geom_point(aes(y = .pred), size = 0.5) + + geom_vline(aes(xintercept = forecast_date), linetype = 2, alpha = 0.5) + facet_grid(version ~ geo_value, scales = "free") + scale_x_date(minor_breaks = "month", date_labels = "%b %y") + + scale_fill_brewer(palette = "Set1") + scale_y_continuous(expand = expansion(c(0, 0.05))) + labs(x = "Date", y = "Reported COVID-19 case rates") + - scale_fill_brewer(palette = "Set1") + theme(legend.position = "none") ``` From 1a14295d22d57c961144c17855077d7169e5d880 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Wed, 2 Oct 2024 11:06:08 -0700 Subject: [PATCH 2/2] fix: slide plot --- .../execute-results/html.json | 4 +- .../figure-html/plot-ar-asof-1.svg | 278 +++++------------- sliding-forecasters.qmd | 27 +- 3 files changed, 91 insertions(+), 218 deletions(-) diff --git a/_freeze/sliding-forecasters/execute-results/html.json b/_freeze/sliding-forecasters/execute-results/html.json index 74fa0c2..9169e19 100644 --- a/_freeze/sliding-forecasters/execute-results/html.json +++ b/_freeze/sliding-forecasters/execute-results/html.json @@ -1,7 +1,7 @@ { - "hash": "a74b5381d41b5132ec6860cad9e9e252", + "hash": "6068951c462b53dbb580b3d90e9fa2f1", "result": { - "markdown": "# Sliding version-unaware and version-aware ARX forecasters across dates\n\n\n::: {.cell}\n\n:::\n\n\nA key function from the epiprocess package is `epix_slide()` (refer to the\nfollowing vignette for the basics of the function: [\"Work with archive objects\nand data\nrevisions\"](https://cmu-delphi.github.io/epiprocess/articles/archive.html))\nwhich allows performing version-aware computations. That is, the function only\nuses data that would have been available as of time t for that reference time.\n\nIn this vignette, we use `epix_slide()` for backtesting our `arx_forecaster` on\nhistorical COVID-19 case data from the US and from Canada. We first examine the\nresults from a version-unaware forecaster, comparing two different fitting\nengines and then we contrast this with version-aware forecasting. The former\nwill proceed by constructing an `epi_archive` that erases its version\ninformation and then use `epix_slide()` to forecast the future. The latter will\nkeep the versioned data and proceed similarly by using `epix_slide()` to\nforecast the future.\n\n## Version-unaware forecasting\n\n### Example using CLI and case data from US states\n\nFirst, we download the version history (i.e. archive) of the percentage of\ndoctor’s visits with CLI (COVID-like illness) computed from medical insurance\nclaims and the number of new confirmed COVID-19 cases per 100,000 population\n(daily) for all 50 states from the COVIDcast API. We process as before, with the\nmodification that we use `sync = \"locf\"` in `epix_merge()` so that the last\nversion of each observation can be carried forward to extrapolate unavailable\nversions for the less up-to-date input archive.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/grab-epi-data_89a9d4079f8ffc6080f83369668b2316'}\n\n```{.r .cell-code}\nus_raw_history_dfs <- readRDS(url(\n \"https://github.com/cmu-delphi/epipredict/raw/dev/vignettes/articles/all_states_covidcast_signals.rds\"\n))\n\nus_cli_archive <- us_raw_history_dfs[[1]] %>%\n select(geo_value, time_value, version = issue, percent_cli = value) %>%\n as_epi_archive(compactify = TRUE)\nus_cases_archive <- us_raw_history_dfs[[2]] %>%\n select(geo_value, time_value, version = issue, case_rate = value) %>%\n as_epi_archive(compactify = TRUE)\n\nus_archive <- epix_merge(\n us_cli_archive, us_cases_archive,\n sync = \"locf\", compactify = TRUE\n)\n```\n:::\n\n\nWe then get latest snapshot of the data from the archive by using\n`epix_as_of()`. We then create fake version information by setting `version =\ntime_value`. This creates an archive that pretends to have the latest data\navailable (since at version time `x` it has all the data up to time_value `x`,\nwhich in reality is unrealistic because the time values of the data received at\nversion time `x` often lags by a few days, not to mention the later corrections\nthat are amended to the data).\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/make-arx-kweek_6ff04c287f0d7a0f9d4503649e56bd3a'}\n\n```{.r .cell-code}\n# Latest snapshot of data, and forecast dates\nus_latest <- us_archive %>%\n epix_as_of(version = max(.$versions_end)) %>%\n mutate(version = time_value) %>%\n as_epi_archive()\nfc_time_values <- seq(\n from = as.Date(\"2020-08-01\"),\n to = as.Date(\"2021-11-01\"),\n by = \"1 month\"\n)\naheads <- c(7, 14, 21, 28)\n\nforecast_k_week_ahead <- function(epi_archive, outcome, predictors, ahead = 7, engine) {\n epi_archive %>%\n epix_slide(\n .f = function(x, gk, rtv) {\n arx_forecaster(\n x, outcome, predictors, engine,\n args_list = arx_args_list(ahead = ahead)\n )$predictions %>%\n mutate(engine_type = engine$engine) %>%\n pivot_quantiles_wider(.pred_distn)\n },\n .before = 120,\n .versions = fc_time_values\n )\n}\n\n# Generate the forecasts and bind them together\nforecasts_version_unaware <- bind_rows(\n map(aheads, ~ forecast_k_week_ahead(\n us_latest,\n outcome = \"case_rate\",\n predictors = c(\"case_rate\", \"percent_cli\"),\n ahead = .x,\n engine = linear_reg()\n )),\n map(aheads, ~ forecast_k_week_ahead(\n us_latest,\n outcome = \"case_rate\",\n predictors = c(\"case_rate\", \"percent_cli\"),\n ahead = .x,\n engine = rand_forest(mode = \"regression\")\n ))\n)\n```\n:::\n\n\nHere, `arx_forecaster()` does all the heavy lifting. It creates leads of the\ntarget (respecting time stamps and locations) along with lags of the features\n(here, the response and doctors visits), estimates a forecasting model using the\nspecified engine, creates predictions, and non-parametric confidence bands.\n\nTo see how the predictions compare, we plot them on top of the latest case\nrates. Note that even though we've fitted the model on all states, we'll just\ndisplay the results for two states, California (CA) and Florida (FL), to get a\nsense of the model performance while keeping the graphic simple.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/plot-arx_2918e9947b5ecabca1115f6cc2d8eb62'}\n\n```{.r .cell-code code-fold=\"true\"}\nforecasts_filtered <- forecasts_version_unaware %>%\n tibble() %>%\n filter(geo_value %in% c(\"ca\", \"fl\"))\nlatest_data_filtered <- us_latest$DT %>%\n tibble() %>%\n filter(geo_value %in% c(\"ca\", \"fl\"))\n\nggplot(forecasts_filtered, aes(x = target_date, group = forecast_date, fill = engine_type)) +\n geom_line(\n data = latest_data_filtered, aes(x = time_value, y = case_rate),\n inherit.aes = FALSE, color = \"gray50\"\n ) +\n geom_ribbon(aes(ymin = `0.05`, ymax = `0.95`), alpha = 0.4) +\n geom_line(aes(y = .pred)) +\n geom_point(aes(y = .pred), size = 0.5) +\n geom_vline(aes(xintercept = forecast_date), linetype = 2, alpha = 0.5) +\n facet_grid(engine_type ~ geo_value, scales = \"free\") +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %y\") +\n scale_fill_brewer(palette = \"Set1\") +\n scale_y_continuous(expand = expansion(c(0, 0.05))) +\n labs(x = \"Date\", y = \"Reported COVID-19 case rates\") +\n theme(legend.position = \"none\")\n```\n\n::: {.cell-output-display}\n![](sliding-forecasters_files/figure-html/plot-arx-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nFor the two states of interest, simple linear regression clearly performs better\nthan random forest in terms of accuracy of the predictions and does not result\nin such in overconfident predictions (overly narrow confidence bands). Though,\nin general, neither approach produces amazingly accurate forecasts. This could\nbe because the behaviour is rather different across states and the effects of\nother notable factors such as age and public health measures may be important to\naccount for in such forecasting. Including such factors as well as making\nenhancements such as correcting for outliers are some improvements one could\nmake to this simple model.[^1]\n\n[^1]: Note that, despite the above caveats, simple models like this tend to\nout-perform many far more complicated models in the online Covid forecasting due\nto those models high variance predictions.\n\n### Example using case data from Canada\n\nBy leveraging the flexibility of `epiprocess`, we can apply the same techniques\nto data from other sources. Since some collaborators are in British Columbia,\nCanada, we'll do essentially the same thing for Canada as we did above.\n\nThe [COVID-19 Canada Open Data Working Group](https://opencovid.ca/) collects\ndaily time series data on COVID-19 cases, deaths, recoveries, testing and\nvaccinations at the health region and province levels. Data are collected from\npublicly available sources such as government datasets and news releases.\nUnfortunately, there is no simple versioned source, so we have created our own\nfrom the Github commit history.\n\nFirst, we load versioned case rates at the provincial level. After converting\nthese to 7-day averages (due to highly variable provincial reporting\nmismatches), we then convert the data to an `epi_archive` object, and extract\nthe latest version from it. Finally, we run the same forcasting exercise as for\nthe American data, but here we compare the forecasts produced from using simple\nlinear regression with those from using boosted regression trees.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/get-can-fc_2457088f4bfc3bada5f7c38814504be7'}\n\n```{.r .cell-code}\n# source(\"drafts/canada-case-rates.R)\ncan <- epidatasets::can_prov_cases\ncan <- can %>%\n group_by(version, geo_value) %>%\n arrange(time_value) %>%\n mutate(cr_7dav = RcppRoll::roll_meanr(case_rate, n = 7L)) %>%\n as_epi_archive(compactify = TRUE)\n\ncan_latest <- epix_as_of(can, max_version = max(can$DT$version)) %>%\n mutate(version = time_value) %>%\n as_epi_archive()\n\n# Generate the forecasts, and bind them together\ncan_fc <- bind_rows(\n map(\n aheads,\n ~ forecast_k_week_ahead(can_latest, \"cr_7dav\", \"cr_7dav\", .x, linear_reg())\n ),\n map(\n aheads,\n ~ forecast_k_week_ahead(can_latest, \"cr_7dav\", \"cr_7dav\", .x, boost_tree(mode = \"regression\", trees = 20))\n )\n)\n```\n:::\n\n\nThe first figure shows the results for all of the provinces using linear regression.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/plot-can-fc-lr_f7e7878c3f1a72f4cb9216d68aa63292'}\n\n```{.r .cell-code code-fold=\"true\"}\nggplot(\n can_fc %>% filter(engine_type == \"lm\"),\n aes(x = target_date, group = forecast_date)\n) +\n coord_cartesian(xlim = lubridate::ymd(c(\"2020-12-01\", NA))) +\n geom_line(\n data = can_latest$DT %>% tibble(), aes(x = time_value, y = cr_7dav),\n inherit.aes = FALSE, color = \"gray50\"\n ) +\n geom_ribbon(aes(ymin = `0.05`, ymax = `0.95`, fill = geo_value),\n alpha = 0.4\n ) +\n geom_line(aes(y = .pred)) +\n geom_point(aes(y = .pred), size = 0.5) +\n geom_vline(aes(xintercept = forecast_date), linetype = 2, alpha = 0.5) +\n facet_wrap(~geo_value, scales = \"free_y\", ncol = 3) +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %y\") +\n scale_y_continuous(expand = expansion(c(0, 0.05))) +\n labs(\n title = \"Using simple linear regression\", x = \"Date\",\n y = \"Reported COVID-19 case rates\"\n ) +\n theme(legend.position = \"none\")\n```\n\n::: {.cell-output-display}\n![](sliding-forecasters_files/figure-html/plot-can-fc-lr-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nCompare those forecasts with a related set using Gradient Boosting.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/plot-can-fc-boost_58502edbabde6914bca10e407c6f445f'}\n\n```{.r .cell-code code-fold=\"true\"}\nggplot(\n can_fc %>% filter(engine_type == \"xgboost\"),\n aes(x = target_date, group = forecast_date)\n) +\n coord_cartesian(xlim = lubridate::ymd(c(\"2020-12-01\", NA))) +\n geom_line(\n data = can_latest$DT %>% tibble(), aes(x = time_value, y = cr_7dav),\n inherit.aes = FALSE, color = \"gray50\"\n ) +\n geom_ribbon(aes(ymin = `0.05`, ymax = `0.95`, fill = geo_value),\n alpha = 0.4\n ) +\n geom_line(aes(y = .pred)) +\n geom_point(aes(y = .pred), size = 0.5) +\n geom_vline(aes(xintercept = forecast_date), linetype = 2, alpha = 0.5) +\n facet_wrap(~geo_value, scales = \"free_y\", ncol = 3) +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %y\") +\n scale_y_continuous(expand = expansion(c(0, 0.05))) +\n labs(\n title = \"Using boosted regression trees\", x = \"Date\",\n y = \"Reported COVID-19 case rates\"\n ) +\n theme(legend.position = \"none\")\n```\n\n::: {.cell-output-display}\n![](sliding-forecasters_files/figure-html/plot-can-fc-boost-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nBoth approaches tend to produce quite volatile forecasts (point predictions)\nand/or are overly confident (very narrow bands), particularly when boosted\nregression trees are used. But as this is meant to be a simple demonstration of\nsliding with different engines in `arx_forecaster`, we may devote another\nvignette to work on improving the predictive modelling using the suite of tools\navailable in epipredict.\n\n## Version-aware forecasting\n\n### Example using case data from US states\n\nWe will now run pseudoprospective forecasts based on properly-versioned data\n(that would have been available in real-time) to forecast future COVID-19 case\nrates from current and past COVID-19 case rates for all states. All we have to\ndo is use the historical archive of the data with version information,\n`us_archive`, instead of `us_latest` like we did above, in the argument to our\nforecaster wrapper `forecast_k_week_ahead()`. Below we do that computation, tag\nit, and combine it with the forecasts from one of the engines made above.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/make-ar-kweek-asof_4b8247ffc26ffb4f84a5646852522f3b'}\n\n```{.r .cell-code}\n# Generate the forecasts, and bind them together\nforecasts_version_aware <- bind_rows(\n map(aheads, ~ forecast_k_week_ahead(\n us_archive,\n outcome = \"case_rate\",\n predictors = c(\"case_rate\", \"percent_cli\"),\n ahead = .x,\n engine = linear_reg()\n )) %>%\n bind_rows() %>%\n mutate(version = \"version faithful\"),\n forecasts_version_unaware %>% filter(engine_type == \"lm\") %>% mutate(version = \"version unfaithful\")\n)\n```\n:::\n\n\nNow we can plot the results on top of the latest case rates. As before, we will\nonly display and focus on the results for FL and CA for simplicity.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/plot-ar-asof_884abfb3ac2d5a8dfe9bc404ff34c5d4'}\n\n```{.r .cell-code code-fold=\"true\"}\nforecasts_filtered <- forecasts_version_aware %>%\n tibble() %>%\n filter(geo_value %in% c(\"ca\", \"fl\"))\nlatest_data_filtered <- us_latest$DT %>%\n tibble() %>%\n select(-version) %>%\n filter(geo_value %in% c(\"ca\", \"fl\"))\n\nggplot(forecasts_filtered, aes(x = target_date, group = forecast_date, fill = version)) +\n geom_line(\n data = latest_data_filtered, aes(x = time_value, y = case_rate),\n inherit.aes = FALSE, color = \"gray50\"\n ) +\n geom_ribbon(aes(ymin = `0.05`, ymax = `0.95`), alpha = 0.4) +\n geom_line(aes(y = .pred)) +\n geom_point(aes(y = .pred), size = 0.5) +\n geom_vline(aes(xintercept = forecast_date), linetype = 2, alpha = 0.5) +\n facet_grid(version ~ geo_value, scales = \"free\") +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %y\") +\n scale_fill_brewer(palette = \"Set1\") +\n scale_y_continuous(expand = expansion(c(0, 0.05))) +\n labs(x = \"Date\", y = \"Reported COVID-19 case rates\") +\n theme(legend.position = \"none\")\n```\n\n::: {.cell-output-display}\n![](sliding-forecasters_files/figure-html/plot-ar-asof-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nAgain, we observe that the results are not great for these two states, but\nthat's likely due to the simplicity of the model (ex. the omission of key\nfactors such as age and public health measures) and the quality of the data (ex.\nwe have not personally corrected for anomalies in the data).\n\nWe shall leave it to the reader to try the above version aware and unaware\nforecasting exercise on the Canadian case rate data. The above code for the\nAmerican state data should be readily adaptable for this purpose.\n", + "markdown": "# Sliding version-unaware and version-aware ARX forecasters across dates\n\n\n::: {.cell}\n\n:::\n\n\nA key function from the epiprocess package is `epix_slide()` (refer to the\nfollowing vignette for the basics of the function: [\"Work with archive objects\nand data\nrevisions\"](https://cmu-delphi.github.io/epiprocess/articles/archive.html))\nwhich allows performing version-aware computations. That is, the function only\nuses data that would have been available as of time t for that reference time.\n\nIn this vignette, we use `epix_slide()` for backtesting our `arx_forecaster` on\nhistorical COVID-19 case data from the US and from Canada. We first examine the\nresults from a version-unaware forecaster, comparing two different fitting\nengines and then we contrast this with version-aware forecasting. The former\nwill proceed by constructing an `epi_archive` that erases its version\ninformation and then use `epix_slide()` to forecast the future. The latter will\nkeep the versioned data and proceed similarly by using `epix_slide()` to\nforecast the future.\n\n## Version-unaware forecasting\n\n### Example using CLI and case data from US states\n\nFirst, we download the version history (i.e. archive) of the percentage of\ndoctor’s visits with CLI (COVID-like illness) computed from medical insurance\nclaims and the number of new confirmed COVID-19 cases per 100,000 population\n(daily) for all 50 states from the COVIDcast API. We process as before, with the\nmodification that we use `sync = \"locf\"` in `epix_merge()` so that the last\nversion of each observation can be carried forward to extrapolate unavailable\nversions for the less up-to-date input archive.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/grab-epi-data_89a9d4079f8ffc6080f83369668b2316'}\n\n```{.r .cell-code}\nus_raw_history_dfs <- readRDS(url(\n \"https://github.com/cmu-delphi/epipredict/raw/dev/vignettes/articles/all_states_covidcast_signals.rds\"\n))\n\nus_cli_archive <- us_raw_history_dfs[[1]] %>%\n select(geo_value, time_value, version = issue, percent_cli = value) %>%\n as_epi_archive(compactify = TRUE)\nus_cases_archive <- us_raw_history_dfs[[2]] %>%\n select(geo_value, time_value, version = issue, case_rate = value) %>%\n as_epi_archive(compactify = TRUE)\n\nus_archive <- epix_merge(\n us_cli_archive, us_cases_archive,\n sync = \"locf\", compactify = TRUE\n)\n```\n:::\n\n\nWe then get latest snapshot of the data from the archive by using\n`epix_as_of()`. We then create fake version information by setting `version =\ntime_value`. This creates an archive that pretends to have the latest data\navailable (since at version time `x` it has all the data up to time_value `x`,\nwhich in reality is unrealistic because the time values of the data received at\nversion time `x` often lags by a few days, not to mention the later corrections\nthat are amended to the data).\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/make-arx-kweek_c224b23a96f435b4b2bdec18ce48389c'}\n\n```{.r .cell-code}\n# Get latest snapshot of data and pretend it's an archive\nus_latest <- us_archive %>%\n epix_as_of(version = max(.$versions_end)) %>%\n mutate(version = time_value) %>%\n as_epi_archive()\nfc_time_values <- seq(\n from = as.Date(\"2020-08-01\"),\n to = as.Date(\"2021-11-01\"),\n by = \"1 month\"\n)\naheads <- c(7, 14, 21, 28)\n\nforecast_k_week_ahead <- function(epi_archive, outcome, predictors, ahead = 7, engine) {\n epi_archive %>%\n epix_slide(\n .f = function(x, gk, rtv) {\n arx_forecaster(\n x, outcome, predictors, engine,\n args_list = arx_args_list(ahead = ahead)\n )$predictions %>%\n mutate(engine_type = engine$engine) %>%\n pivot_quantiles_wider(.pred_distn)\n },\n .before = 120,\n .versions = fc_time_values\n )\n}\n\n# Generate the forecasts and bind them together\nforecasts_version_unaware <- bind_rows(\n map(aheads, ~ forecast_k_week_ahead(\n us_latest,\n outcome = \"case_rate\",\n predictors = c(\"case_rate\", \"percent_cli\"),\n ahead = .x,\n engine = linear_reg()\n )),\n map(aheads, ~ forecast_k_week_ahead(\n us_latest,\n outcome = \"case_rate\",\n predictors = c(\"case_rate\", \"percent_cli\"),\n ahead = .x,\n engine = rand_forest(mode = \"regression\")\n ))\n)\n```\n:::\n\n\nHere, `arx_forecaster()` does all the heavy lifting. It creates leads of the\ntarget (respecting time stamps and locations) along with lags of the features\n(here, the response and doctors visits), estimates a forecasting model using the\nspecified engine, creates predictions, and non-parametric confidence bands.\n\nTo see how the predictions compare, we plot them on top of the latest case\nrates. Note that even though we've fitted the model on all states, we'll just\ndisplay the results for two states, California (CA) and Florida (FL), to get a\nsense of the model performance while keeping the graphic simple.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/plot-arx_2918e9947b5ecabca1115f6cc2d8eb62'}\n\n```{.r .cell-code code-fold=\"true\"}\nforecasts_filtered <- forecasts_version_unaware %>%\n tibble() %>%\n filter(geo_value %in% c(\"ca\", \"fl\"))\nlatest_data_filtered <- us_latest$DT %>%\n tibble() %>%\n filter(geo_value %in% c(\"ca\", \"fl\"))\n\nggplot(forecasts_filtered, aes(x = target_date, group = forecast_date, fill = engine_type)) +\n geom_line(\n data = latest_data_filtered, aes(x = time_value, y = case_rate),\n inherit.aes = FALSE, color = \"gray50\"\n ) +\n geom_ribbon(aes(ymin = `0.05`, ymax = `0.95`), alpha = 0.4) +\n geom_line(aes(y = .pred)) +\n geom_point(aes(y = .pred), size = 0.5) +\n geom_vline(aes(xintercept = forecast_date), linetype = 2, alpha = 0.5) +\n facet_grid(engine_type ~ geo_value, scales = \"free\") +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %y\") +\n scale_fill_brewer(palette = \"Set1\") +\n scale_y_continuous(expand = expansion(c(0, 0.05))) +\n labs(x = \"Date\", y = \"Reported COVID-19 case rates\") +\n theme(legend.position = \"none\")\n```\n\n::: {.cell-output-display}\n![](sliding-forecasters_files/figure-html/plot-arx-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nFor the two states of interest, simple linear regression clearly performs better\nthan random forest in terms of accuracy of the predictions and does not result\nin such in overconfident predictions (overly narrow confidence bands). Though,\nin general, neither approach produces amazingly accurate forecasts. This could\nbe because the behaviour is rather different across states and the effects of\nother notable factors such as age and public health measures may be important to\naccount for in such forecasting. Including such factors as well as making\nenhancements such as correcting for outliers are some improvements one could\nmake to this simple model.[^1]\n\n[^1]: Note that, despite the above caveats, simple models like this tend to\nout-perform many far more complicated models in the online Covid forecasting due\nto those models high variance predictions.\n\n### Example using case data from Canada\n\nBy leveraging the flexibility of `epiprocess`, we can apply the same techniques\nto data from other sources. Since some collaborators are in British Columbia,\nCanada, we'll do essentially the same thing for Canada as we did above.\n\nThe [COVID-19 Canada Open Data Working Group](https://opencovid.ca/) collects\ndaily time series data on COVID-19 cases, deaths, recoveries, testing and\nvaccinations at the health region and province levels. Data are collected from\npublicly available sources such as government datasets and news releases.\nUnfortunately, there is no simple versioned source, so we have created our own\nfrom the Github commit history.\n\nFirst, we load versioned case rates at the provincial level. After converting\nthese to 7-day averages (due to highly variable provincial reporting\nmismatches), we then convert the data to an `epi_archive` object, and extract\nthe latest version from it. Finally, we run the same forcasting exercise as for\nthe American data, but here we compare the forecasts produced from using simple\nlinear regression with those from using boosted regression trees.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/get-can-fc_2457088f4bfc3bada5f7c38814504be7'}\n\n```{.r .cell-code}\n# source(\"drafts/canada-case-rates.R)\ncan <- epidatasets::can_prov_cases\ncan <- can %>%\n group_by(version, geo_value) %>%\n arrange(time_value) %>%\n mutate(cr_7dav = RcppRoll::roll_meanr(case_rate, n = 7L)) %>%\n as_epi_archive(compactify = TRUE)\n\ncan_latest <- epix_as_of(can, max_version = max(can$DT$version)) %>%\n mutate(version = time_value) %>%\n as_epi_archive()\n\n# Generate the forecasts, and bind them together\ncan_fc <- bind_rows(\n map(\n aheads,\n ~ forecast_k_week_ahead(can_latest, \"cr_7dav\", \"cr_7dav\", .x, linear_reg())\n ),\n map(\n aheads,\n ~ forecast_k_week_ahead(can_latest, \"cr_7dav\", \"cr_7dav\", .x, boost_tree(mode = \"regression\", trees = 20))\n )\n)\n```\n:::\n\n\nThe first figure shows the results for all of the provinces using linear regression.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/plot-can-fc-lr_f7e7878c3f1a72f4cb9216d68aa63292'}\n\n```{.r .cell-code code-fold=\"true\"}\nggplot(\n can_fc %>% filter(engine_type == \"lm\"),\n aes(x = target_date, group = forecast_date)\n) +\n coord_cartesian(xlim = lubridate::ymd(c(\"2020-12-01\", NA))) +\n geom_line(\n data = can_latest$DT %>% tibble(), aes(x = time_value, y = cr_7dav),\n inherit.aes = FALSE, color = \"gray50\"\n ) +\n geom_ribbon(aes(ymin = `0.05`, ymax = `0.95`, fill = geo_value),\n alpha = 0.4\n ) +\n geom_line(aes(y = .pred)) +\n geom_point(aes(y = .pred), size = 0.5) +\n geom_vline(aes(xintercept = forecast_date), linetype = 2, alpha = 0.5) +\n facet_wrap(~geo_value, scales = \"free_y\", ncol = 3) +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %y\") +\n scale_y_continuous(expand = expansion(c(0, 0.05))) +\n labs(\n title = \"Using simple linear regression\", x = \"Date\",\n y = \"Reported COVID-19 case rates\"\n ) +\n theme(legend.position = \"none\")\n```\n\n::: {.cell-output-display}\n![](sliding-forecasters_files/figure-html/plot-can-fc-lr-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nCompare those forecasts with a related set using Gradient Boosting.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/plot-can-fc-boost_58502edbabde6914bca10e407c6f445f'}\n\n```{.r .cell-code code-fold=\"true\"}\nggplot(\n can_fc %>% filter(engine_type == \"xgboost\"),\n aes(x = target_date, group = forecast_date)\n) +\n coord_cartesian(xlim = lubridate::ymd(c(\"2020-12-01\", NA))) +\n geom_line(\n data = can_latest$DT %>% tibble(), aes(x = time_value, y = cr_7dav),\n inherit.aes = FALSE, color = \"gray50\"\n ) +\n geom_ribbon(aes(ymin = `0.05`, ymax = `0.95`, fill = geo_value),\n alpha = 0.4\n ) +\n geom_line(aes(y = .pred)) +\n geom_point(aes(y = .pred), size = 0.5) +\n geom_vline(aes(xintercept = forecast_date), linetype = 2, alpha = 0.5) +\n facet_wrap(~geo_value, scales = \"free_y\", ncol = 3) +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %y\") +\n scale_y_continuous(expand = expansion(c(0, 0.05))) +\n labs(\n title = \"Using boosted regression trees\", x = \"Date\",\n y = \"Reported COVID-19 case rates\"\n ) +\n theme(legend.position = \"none\")\n```\n\n::: {.cell-output-display}\n![](sliding-forecasters_files/figure-html/plot-can-fc-boost-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nBoth approaches tend to produce quite volatile forecasts (point predictions)\nand/or are overly confident (very narrow bands), particularly when boosted\nregression trees are used. But as this is meant to be a simple demonstration of\nsliding with different engines in `arx_forecaster`, we may devote another\nvignette to work on improving the predictive modelling using the suite of tools\navailable in epipredict.\n\n## Version-aware forecasting\n\n### Example using case data from US states\n\nWe will now run pseudoprospective forecasts based on properly-versioned data\n(that would have been available in real-time) to forecast future COVID-19 case\nrates from current and past COVID-19 case rates for all states. All we have to\ndo is use the historical archive of the data with version information,\n`us_archive`, instead of `us_latest` like we did above, in the argument to our\nforecaster wrapper `forecast_k_week_ahead()`. Below we do that computation, tag\nit, and combine it with the forecasts from one of the engines made above.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/make-faithful-forecast_c688db7d4caffffd6af45a48d0cfd7bc'}\n\n```{.r .cell-code}\nforecasts_version_aware <- map(aheads, ~ forecast_k_week_ahead(\n us_archive,\n outcome = \"case_rate\",\n predictors = c(\"case_rate\", \"percent_cli\"),\n ahead = .x,\n engine = linear_reg()\n)) %>%\n bind_rows() %>%\n mutate(version = \"version faithful\")\n```\n:::\n\n\nNow we can plot the results on top of the latest case rates. As before, we will\nonly display and focus on the results for FL and CA for simplicity.\n\n\n::: {.cell layout-align=\"center\" hash='sliding-forecasters_cache/html/plot-ar-asof_9cbc2463d0ba3b648ac129e2d8b9ccbd'}\n\n```{.r .cell-code code-fold=\"true\"}\nforecasts_filtered <- bind_rows(\n forecasts_version_aware,\n forecasts_version_unaware %>%\n filter(engine_type == \"lm\") %>%\n mutate(version = \"version unfaithful\")\n) %>%\n tibble() %>%\n filter(geo_value %in% c(\"ca\", \"fl\"))\nlatest_data_filtered <- us_latest$DT %>%\n tibble() %>%\n select(-version) %>%\n filter(geo_value %in% c(\"ca\", \"fl\"))\n\nggplot(forecasts_filtered, aes(x = target_date, group = forecast_date, fill = version)) +\n geom_line(\n data = latest_data_filtered, aes(x = time_value, y = case_rate),\n inherit.aes = FALSE, color = \"gray50\"\n ) +\n geom_ribbon(aes(ymin = `0.05`, ymax = `0.95`), alpha = 0.4) +\n geom_line(aes(y = .pred)) +\n geom_point(aes(y = .pred), size = 0.5) +\n geom_vline(aes(xintercept = forecast_date), linetype = 2, alpha = 0.5) +\n facet_grid(version ~ geo_value, scales = \"free\") +\n scale_x_date(minor_breaks = \"month\", date_labels = \"%b %y\") +\n scale_fill_brewer(palette = \"Set1\") +\n scale_y_continuous(expand = expansion(c(0, 0.05))) +\n labs(x = \"Date\", y = \"Reported COVID-19 case rates\") +\n theme(legend.position = \"none\")\n```\n\n::: {.cell-output-display}\n![](sliding-forecasters_files/figure-html/plot-ar-asof-1.svg){fig-align='center' width=90%}\n:::\n:::\n\n\nAgain, we observe that the results are not great for these two states, but\nthat's likely due to the simplicity of the model (ex. the omission of key\nfactors such as age and public health measures) and the quality of the data (ex.\nwe have not personally corrected for anomalies in the data).\n\nWe shall leave it to the reader to try the above version aware and unaware\nforecasting exercise on the Canadian case rate data. The above code for the\nAmerican state data should be readily adaptable for this purpose.\n", "supporting": [], "filters": [ "rmarkdown/pagebreak.lua" diff --git a/_freeze/sliding-forecasters/figure-html/plot-ar-asof-1.svg b/_freeze/sliding-forecasters/figure-html/plot-ar-asof-1.svg index c21dcfc..b0ec475 100644 --- a/_freeze/sliding-forecasters/figure-html/plot-ar-asof-1.svg +++ b/_freeze/sliding-forecasters/figure-html/plot-ar-asof-1.svg @@ -409,7 +409,7 @@ - + @@ -427,7 +427,7 @@ - + @@ -727,7 +727,7 @@ - + @@ -742,19 +742,19 @@ - + - + - + - + - + @@ -835,7 +835,7 @@ - + @@ -1172,54 +1172,54 @@ - + - + - - - - + + + + - + - + - + - + - + - + - - + + - + - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + @@ -1284,70 +1284,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -1733,58 +1669,58 @@ - + - + - + - - - - + + + + - + - - + + - + - + - + - + - + - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + - + - + - + @@ -1861,70 +1797,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/sliding-forecasters.qmd b/sliding-forecasters.qmd index 65d8802..f486ce7 100644 --- a/sliding-forecasters.qmd +++ b/sliding-forecasters.qmd @@ -60,7 +60,7 @@ version time `x` often lags by a few days, not to mention the later corrections that are amended to the data). ```{r make-arx-kweek, warning = FALSE} -# Latest snapshot of data, and forecast dates +# Get latest snapshot of data and pretend it's an archive us_latest <- us_archive %>% epix_as_of(version = max(.$versions_end)) %>% mutate(version = time_value) %>% @@ -79,13 +79,13 @@ forecast_k_week_ahead <- function(epi_archive, outcome, predictors, ahead = 7, e arx_forecaster( x, outcome, predictors, engine, args_list = arx_args_list(ahead = ahead) - )$predictions %>% - mutate(engine_type = engine$engine) %>% - pivot_quantiles_wider(.pred_distn) + )$predictions }, .before = 120, .versions = fc_time_values - ) + ) %>% + mutate(engine_type = engine$engine) %>% + pivot_quantiles_wider(.pred_distn) } # Generate the forecasts and bind them together @@ -278,10 +278,8 @@ do is use the historical archive of the data with version information, forecaster wrapper `forecast_k_week_ahead()`. Below we do that computation, tag it, and combine it with the forecasts from one of the engines made above. -```{r make-ar-kweek-asof} -# Generate the forecasts, and bind them together -forecasts_version_aware <- bind_rows( - map(aheads, ~ forecast_k_week_ahead( +```{r make-faithful-forecast} +forecasts_version_aware <- map(aheads, ~ forecast_k_week_ahead( us_archive, outcome = "case_rate", predictors = c("case_rate", "percent_cli"), @@ -289,9 +287,7 @@ forecasts_version_aware <- bind_rows( engine = linear_reg() )) %>% bind_rows() %>% - mutate(version = "version faithful"), - forecasts_version_unaware %>% filter(engine_type == "lm") %>% mutate(version = "version unfaithful") -) + mutate(version = "version faithful") ``` Now we can plot the results on top of the latest case rates. As before, we will @@ -299,7 +295,12 @@ only display and focus on the results for FL and CA for simplicity. ```{r plot-ar-asof, message = FALSE, warning = FALSE, fig.width = 9, fig.height = 6} #| code-fold: true -forecasts_filtered <- forecasts_version_aware %>% +forecasts_filtered <- bind_rows( + forecasts_version_aware, + forecasts_version_unaware %>% + filter(engine_type == "lm") %>% + mutate(version = "version unfaithful") + ) %>% tibble() %>% filter(geo_value %in% c("ca", "fl")) latest_data_filtered <- us_latest$DT %>%