From 28371c806faed30492bff314567615ddf34bd910 Mon Sep 17 00:00:00 2001 From: ChloeYou Date: Tue, 14 Jun 2022 13:51:27 -0700 Subject: [PATCH 1/6] improve document for as_epi_df --- R/epi_df.R | 1 + man/as_epi_df.Rd | 54 ++++++++++++++++++++++ man/rmd/epi_df_example.Rmd | 91 ++++++++++++++++++++++++++++++++++++++ vignettes/epiprocess.Rmd | 5 +++ 4 files changed, 151 insertions(+) create mode 100644 man/rmd/epi_df_example.Rmd diff --git a/R/epi_df.R b/R/epi_df.R index 50b9a898..c3efadff 100644 --- a/R/epi_df.R +++ b/R/epi_df.R @@ -111,6 +111,7 @@ NULL #' @return An `epi_df` object. #' #' @export +#' @includeRmd man/rmd/epi_df_example.Rmd examples as_epi_df = function(x, ...) { UseMethod("as_epi_df") } diff --git a/man/as_epi_df.Rd b/man/as_epi_df.Rd index e092716e..0e48af87 100644 --- a/man/as_epi_df.Rd +++ b/man/as_epi_df.Rd @@ -69,3 +69,57 @@ the \code{tbl_ts} class is dropped, and any key variables (other than \code{other_keys} field. }} +\examples{ +# create an epi_df with additional keys + + ## convert a tsibble that has county code as an extra key + ex1 <- tidyr::tibble( + geo_value = rep(c("ca", "fl", "pa"), each = 3), + county_code = c(06059,06061,06067, + 12111,12113,12117, + 42101, 42103,42105), + time_value = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), + by = "day"), length.out = length(geo_value)), + value = 1:length(geo_value) + 0.01 * rnorm(length(geo_value)), + ) \%>\% + tsibble::as_tsibble(index = time_value, key = c(geo_value, county_code)) + + ex1 <- as_epi_df(x = ex1, geo_type = "state", time_type = "day", as_of = "2020-06-03") + + attr(ex1,"metadata") + + ## misnamed columns such as geo_value or time_value + ## renaming the columns to match epi_df format + ex2 <- tidyr::tibble( + state = rep(c("ca", "fl", "pa"), each = 3), # misnamed + pol = rep(c("blue", "swing", "swing"), each = 3), # extra key + reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), + by = "day"), length.out = length(state)), # misnamed + value = 1:length(state) + 0.01 * rnorm(length(state)), + ) \%>\% data.frame() + + head(ex2) + + ex2 <- ex2 \%>\% rename(geo_value = state, time_value = reported_date) \%>\% + as_epi_df(geo_type = "state", as_of = "2020-06-03", + additional_metadata = c(other_keys = "pol")) + + attr(ex2,"metadata") + + ## add a key to an epi_df object + + ### dataset from covidcast + ex3 <- jhu_csse_county_level_subset \%>\% + filter(time_value > "2021-12-01", state_name == "Massachusetts") \%>\% + dplyr::slice_tail(n = 6) + + attr(ex3,"metadata") # geo_type is county currently + + ### add state (MA) as a new column and a key to the metadata + ex3 <- ex3 \%>\% + tsibble::as_tsibble() \%>\% # needed to add the additional metadata + mutate(state = rep("MA",6)) \%>\% + as_epi_df(additional_metadata = c(other_keys = "state")) + + attr(ex3,"metadata") +} diff --git a/man/rmd/epi_df_example.Rmd b/man/rmd/epi_df_example.Rmd new file mode 100644 index 00000000..cd6d7e2a --- /dev/null +++ b/man/rmd/epi_df_example.Rmd @@ -0,0 +1,91 @@ +--- +title: "Examples on Additional Keys in epi_df" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Examples on Additional Keys in epi_df} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- +In the following examples we will show how to create an `epi_df` with additional keys. + +# Convert a `tsibble` that has county code as an extra key +```{r} +ex1 <- tibble( + geo_value = rep(c("ca", "fl", "pa"), each = 3), + county_code = c(06059,06061,06067, + 12111,12113,12117, + 42101, 42103,42105), + time_value = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), + by = "day"), length.out = length(geo_value)), + value = 1:length(geo_value) + 0.01 * rnorm(length(geo_value)) + ) %>% + as_tsibble(index = time_value, key = c(geo_value, county_code)) + +ex1 <- as_epi_df(x = ex1, geo_type = "state", time_type = "day", as_of = "2020-06-03") +``` + +The metadata now includes `county_code` as an extra key. +```{r} +attr(ex1,"metadata") +``` + + +# Dealing with misspecified column names + +`epi_df` requires there to be columns `geo_value` and `time_value`, if they do not exist then `as_epi_df()` throws an error. +```{r, error = TRUE} +data.frame( + state = rep(c("ca", "fl", "pa"), each = 3), # misnamed + pol = rep(c("blue", "swing", "swing"), each = 3), # extra key + reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), + by = "day"), length.out = length(geo_value)), # misnamed + value = 1:length(geo_value) + 0.01 * rnorm(length(geo_value)) +) %>% as_epi_df() +``` + +The columns can be renamed to match `epi_df` format. In the example below, notice there is also an additional key `pol`. +```{r} +ex2 <- tibble( + state = rep(c("ca", "fl", "pa"), each = 3), # misnamed + pol = rep(c("blue", "swing", "swing"), each = 3), # extra key + reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), + by = "day"), length.out = length(state)), # misnamed + value = 1:length(state) + 0.01 * rnorm(length(state)) +) %>% data.frame() + +head(ex2) + +ex2 <- ex2 %>% rename(geo_value = state, time_value = reported_date) %>% + as_epi_df(geo_type = "state", as_of = "2020-06-03", + additional_metadata = c(other_keys = "pol")) + +attr(ex2,"metadata") +``` + + +## Adding additional keys to an `epi_df` object + +In the above examples, all the keys are added to objects that are not `epi_df` objects. We illustrate how to add keys to an `epi_df` object. + +We use a subset dataset from the the `covidcast` library. + +```{r} +ex3 <- jhu_csse_county_level_subset %>% + filter(time_value > "2021-12-01", state_name == "Massachusetts") %>% + slice_tail(n = 6) + +attr(ex3,"metadata") # geo_type is county currently +``` + +Now we add state (MA) as a new column and a key to the metadata. +```{r} + +ex3 <- ex3 %>% + as_tsibble() %>% # needed to add the additional metadata + mutate(state = rep("MA",6)) %>% + as_epi_df(additional_metadata = c(other_keys = "state")) + +attr(ex3,"metadata") +``` + + diff --git a/vignettes/epiprocess.Rmd b/vignettes/epiprocess.Rmd index 1f5e83fe..cff624b1 100644 --- a/vignettes/epiprocess.Rmd +++ b/vignettes/epiprocess.Rmd @@ -198,3 +198,8 @@ ggplot(x, aes(x = time_value, y = cases)) + scale_x_date(minor_breaks = "month", date_labels = "%b %y") + labs(x = "Date", y = "Confirmed cases of Ebola in Sierra Leone") ``` + +## Examples on Additional Keys in epi_df + +```{r child = 'man/rmd/epi_df_example.Rmd'} +``` From 6f2009bdff03660fff1bd5e205720260891f0bd5 Mon Sep 17 00:00:00 2001 From: ChloeYou Date: Tue, 14 Jun 2022 13:52:01 -0700 Subject: [PATCH 2/6] update as_epi_df roxygen --- man/as_epi_df.Rd | 171 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 119 insertions(+), 52 deletions(-) diff --git a/man/as_epi_df.Rd b/man/as_epi_df.Rd index 0e48af87..659e0353 100644 --- a/man/as_epi_df.Rd +++ b/man/as_epi_df.Rd @@ -69,57 +69,124 @@ the \code{tbl_ts} class is dropped, and any key variables (other than \code{other_keys} field. }} +\section{Convert a \code{tsibble} that has county code as an extra key}{ +\if{html}{\out{
}}\preformatted{ex1 <- tibble( + geo_value = rep(c("ca", "fl", "pa"), each = 3), + county_code = c(06059,06061,06067, + 12111,12113,12117, + 42101, 42103,42105), + time_value = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), + by = "day"), length.out = length(geo_value)), + value = 1:length(geo_value) + 0.01 * rnorm(length(geo_value)) + ) \%>\% + as_tsibble(index = time_value, key = c(geo_value, county_code)) + +ex1 <- as_epi_df(x = ex1, geo_type = "state", time_type = "day", as_of = "2020-06-03") +}\if{html}{\out{
}} + +The metadata now includes \code{county_code} as an extra key.\if{html}{\out{
}}\preformatted{attr(ex1,"metadata") +}\if{html}{\out{
}}\preformatted{## $geo_type +## [1] "state" +## +## $time_type +## [1] "day" +## +## $as_of +## [1] "2020-06-03" +## +## $other_keys +## [1] "county_code" +} +} + +\section{Dealing with misspecified column names}{ +\code{epi_df} requires there to be columns \code{geo_value} and \code{time_value}, if +they do not exist then \code{as_epi_df()} throws an error.\if{html}{\out{
}}\preformatted{data.frame( + state = rep(c("ca", "fl", "pa"), each = 3), # misnamed + pol = rep(c("blue", "swing", "swing"), each = 3), # extra key + reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), + by = "day"), length.out = length(geo_value)), # misnamed + value = 1:length(geo_value) + 0.01 * rnorm(length(geo_value)) +) \%>\% as_epi_df() +}\if{html}{\out{
}}\preformatted{## Error in NextMethod(): object 'geo_value' not found +} + +The columns can be renamed to match \code{epi_df} format. In the example +below, notice there is also an additional key \code{pol}.\if{html}{\out{
}}\preformatted{ex2 <- tibble( + state = rep(c("ca", "fl", "pa"), each = 3), # misnamed + pol = rep(c("blue", "swing", "swing"), each = 3), # extra key + reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), + by = "day"), length.out = length(state)), # misnamed + value = 1:length(state) + 0.01 * rnorm(length(state)) +) \%>\% data.frame() + +head(ex2) +}\if{html}{\out{
}}\preformatted{## state pol reported_date value +## 1 ca blue 2020-06-01 1.006594 +## 2 ca blue 2020-06-02 2.020959 +## 3 ca blue 2020-06-03 3.009690 +## 4 fl swing 2020-06-01 4.005818 +## 5 fl swing 2020-06-02 5.007931 +## 6 fl swing 2020-06-03 6.015640 +}\if{html}{\out{
}}\preformatted{ex2 <- ex2 \%>\% rename(geo_value = state, time_value = reported_date) \%>\% + as_epi_df(geo_type = "state", as_of = "2020-06-03", + additional_metadata = c(other_keys = "pol")) + +attr(ex2,"metadata") +}\if{html}{\out{
}}\preformatted{## $geo_type +## [1] "state" +## +## $time_type +## [1] "day" +## +## $as_of +## [1] "2020-06-03" +## +## $other_keys +## [1] "pol" +} +\subsection{Adding additional keys to an \code{epi_df} object}{ + +In the above examples, all the keys are added to objects that are not +\code{epi_df} objects. We illustrate how to add keys to an \code{epi_df} object. + +We use a subset dataset from the the \code{covidcast} library.\if{html}{\out{
}}\preformatted{ex3 <- jhu_csse_county_level_subset \%>\% + filter(time_value > "2021-12-01", state_name == "Massachusetts") \%>\% + slice_tail(n = 6) + +attr(ex3,"metadata") # geo_type is county currently +}\if{html}{\out{
}}\preformatted{## $geo_type +## [1] "county" +## +## $time_type +## [1] "day" +## +## $as_of +## [1] "2022-05-23 14:35:45 PDT" +} + +Now we add state (MA) as a new column and a key to the metadata.\if{html}{\out{
}}\preformatted{ex3 <- ex3 \%>\% + as_tsibble() \%>\% # needed to add the additional metadata + mutate(state = rep("MA",6)) \%>\% + as_epi_df(additional_metadata = c(other_keys = "state")) + +attr(ex3,"metadata") +}\if{html}{\out{
}}\preformatted{## $geo_type +## [1] "county" +## +## $time_type +## [1] "day" +## +## $as_of +## [1] "2022-06-14 13:38:06 PDT" +## +## $other_keys +## [1] "state" +} +} +} + \examples{ -# create an epi_df with additional keys - - ## convert a tsibble that has county code as an extra key - ex1 <- tidyr::tibble( - geo_value = rep(c("ca", "fl", "pa"), each = 3), - county_code = c(06059,06061,06067, - 12111,12113,12117, - 42101, 42103,42105), - time_value = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), - by = "day"), length.out = length(geo_value)), - value = 1:length(geo_value) + 0.01 * rnorm(length(geo_value)), - ) \%>\% - tsibble::as_tsibble(index = time_value, key = c(geo_value, county_code)) - - ex1 <- as_epi_df(x = ex1, geo_type = "state", time_type = "day", as_of = "2020-06-03") - - attr(ex1,"metadata") - - ## misnamed columns such as geo_value or time_value - ## renaming the columns to match epi_df format - ex2 <- tidyr::tibble( - state = rep(c("ca", "fl", "pa"), each = 3), # misnamed - pol = rep(c("blue", "swing", "swing"), each = 3), # extra key - reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), - by = "day"), length.out = length(state)), # misnamed - value = 1:length(state) + 0.01 * rnorm(length(state)), - ) \%>\% data.frame() - - head(ex2) - - ex2 <- ex2 \%>\% rename(geo_value = state, time_value = reported_date) \%>\% - as_epi_df(geo_type = "state", as_of = "2020-06-03", - additional_metadata = c(other_keys = "pol")) - - attr(ex2,"metadata") - - ## add a key to an epi_df object - - ### dataset from covidcast - ex3 <- jhu_csse_county_level_subset \%>\% - filter(time_value > "2021-12-01", state_name == "Massachusetts") \%>\% - dplyr::slice_tail(n = 6) - - attr(ex3,"metadata") # geo_type is county currently - - ### add state (MA) as a new column and a key to the metadata - ex3 <- ex3 \%>\% - tsibble::as_tsibble() \%>\% # needed to add the additional metadata - mutate(state = rep("MA",6)) \%>\% - as_epi_df(additional_metadata = c(other_keys = "state")) - - attr(ex3,"metadata") +In the following examples we will show how to create an \code{epi_df} with +additional keys. } From 6d31320aedb5476e66b7c7f9b431ec908120ff54 Mon Sep 17 00:00:00 2001 From: ChloeYou Date: Tue, 14 Jun 2022 15:07:11 -0700 Subject: [PATCH 3/6] remove rmd example to fix error --- R/epi_df.R | 46 ++++++++++++++- man/as_epi_df.Rd | 111 ++++++------------------------------- man/rmd/epi_df_example.Rmd | 91 ------------------------------ vignettes/epiprocess.Rmd | 83 ++++++++++++++++++++++++++- 4 files changed, 145 insertions(+), 186 deletions(-) delete mode 100644 man/rmd/epi_df_example.Rmd diff --git a/R/epi_df.R b/R/epi_df.R index c3efadff..75ea5422 100644 --- a/R/epi_df.R +++ b/R/epi_df.R @@ -111,7 +111,51 @@ NULL #' @return An `epi_df` object. #' #' @export -#' @includeRmd man/rmd/epi_df_example.Rmd examples +#' @examples +#' # Convert a `tsibble` that has county code as an extra key +#' ex1 <- tsibble::tibble( +#' geo_value = rep(c("ca", "fl", "pa"), each = 3), +#' county_code = c(06059,06061,06067, +#' 12111,12113,12117, +#' 42101, 42103,42105), +#' time_value = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), +#' by = "day"), length.out = length(geo_value)), +#' value = 1:length(geo_value) + 0.01 * rnorm(length(geo_value)) +#' ) %>% +#' tsibble::as_tsibble(index = time_value, key = c(geo_value, county_code)) +#' +#' ex1 <- as_epi_df(x = ex1, geo_type = "state", time_type = "day", as_of = "2020-06-03") +#' attr(ex1,"metadata") +#' +#' # Dealing with misspecified column names +#' ex2 <- tsibble::tibble( +#' state = rep(c("ca", "fl", "pa"), each = 3), # misnamed +#' pol = rep(c("blue", "swing", "swing"), each = 3), # extra key +#' reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), +#' by = "day"), length.out = length(state)), # misnamed +#' value = 1:length(state) + 0.01 * rnorm(length(state)) +#' ) %>% data.frame() +#' +#' head(ex2) +#' +#' ex2 <- ex2 %>% dplyr::rename(geo_value = state, time_value = reported_date) %>% +#' as_epi_df(geo_type = "state", as_of = "2020-06-03", +#' additional_metadata = c(other_keys = "pol")) +#' +#' attr(ex2,"metadata") +#' +#' # Adding additional keys to an `epi_df` object +#' +#' ex3 <- jhu_csse_county_level_subset %>% +#' filter(time_value > "2021-12-01", state_name == "Massachusetts") %>% +#' dplyr::slice_tail(n = 6) +#' +#' ex3 <- ex3 %>% +#' tsibble::as_tsibble() %>% # needed to add the additional metadata +#' dplyr::mutate(state = rep("MA",6)) %>% +#' as_epi_df(additional_metadata = c(other_keys = "state")) +#' +#' attr(ex3,"metadata") as_epi_df = function(x, ...) { UseMethod("as_epi_df") } diff --git a/man/as_epi_df.Rd b/man/as_epi_df.Rd index 659e0353..bcb2772e 100644 --- a/man/as_epi_df.Rd +++ b/man/as_epi_df.Rd @@ -69,50 +69,24 @@ the \code{tbl_ts} class is dropped, and any key variables (other than \code{other_keys} field. }} -\section{Convert a \code{tsibble} that has county code as an extra key}{ -\if{html}{\out{
}}\preformatted{ex1 <- tibble( +\examples{ +# Convert a `tsibble` that has county code as an extra key +ex1 <- tsibble::tibble( geo_value = rep(c("ca", "fl", "pa"), each = 3), county_code = c(06059,06061,06067, - 12111,12113,12117, - 42101, 42103,42105), + 12111,12113,12117, + 42101, 42103,42105), time_value = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), by = "day"), length.out = length(geo_value)), value = 1:length(geo_value) + 0.01 * rnorm(length(geo_value)) - ) \%>\% - as_tsibble(index = time_value, key = c(geo_value, county_code)) +) \%>\% + tsibble::as_tsibble(index = time_value, key = c(geo_value, county_code)) ex1 <- as_epi_df(x = ex1, geo_type = "state", time_type = "day", as_of = "2020-06-03") -}\if{html}{\out{
}} - -The metadata now includes \code{county_code} as an extra key.\if{html}{\out{
}}\preformatted{attr(ex1,"metadata") -}\if{html}{\out{
}}\preformatted{## $geo_type -## [1] "state" -## -## $time_type -## [1] "day" -## -## $as_of -## [1] "2020-06-03" -## -## $other_keys -## [1] "county_code" -} -} - -\section{Dealing with misspecified column names}{ -\code{epi_df} requires there to be columns \code{geo_value} and \code{time_value}, if -they do not exist then \code{as_epi_df()} throws an error.\if{html}{\out{
}}\preformatted{data.frame( - state = rep(c("ca", "fl", "pa"), each = 3), # misnamed - pol = rep(c("blue", "swing", "swing"), each = 3), # extra key - reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), - by = "day"), length.out = length(geo_value)), # misnamed - value = 1:length(geo_value) + 0.01 * rnorm(length(geo_value)) -) \%>\% as_epi_df() -}\if{html}{\out{
}}\preformatted{## Error in NextMethod(): object 'geo_value' not found -} +attr(ex1,"metadata") -The columns can be renamed to match \code{epi_df} format. In the example -below, notice there is also an additional key \code{pol}.\if{html}{\out{
}}\preformatted{ex2 <- tibble( +# Dealing with misspecified column names +ex2 <- tsibble::tibble( state = rep(c("ca", "fl", "pa"), each = 3), # misnamed pol = rep(c("blue", "swing", "swing"), each = 3), # extra key reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), @@ -121,72 +95,23 @@ below, notice there is also an additional key \code{pol}.\if{html}{\out{
\% data.frame() head(ex2) -}\if{html}{\out{
}}\preformatted{## state pol reported_date value -## 1 ca blue 2020-06-01 1.006594 -## 2 ca blue 2020-06-02 2.020959 -## 3 ca blue 2020-06-03 3.009690 -## 4 fl swing 2020-06-01 4.005818 -## 5 fl swing 2020-06-02 5.007931 -## 6 fl swing 2020-06-03 6.015640 -}\if{html}{\out{
}}\preformatted{ex2 <- ex2 \%>\% rename(geo_value = state, time_value = reported_date) \%>\% + +ex2 <- ex2 \%>\% dplyr::rename(geo_value = state, time_value = reported_date) \%>\% as_epi_df(geo_type = "state", as_of = "2020-06-03", additional_metadata = c(other_keys = "pol")) attr(ex2,"metadata") -}\if{html}{\out{
}}\preformatted{## $geo_type -## [1] "state" -## -## $time_type -## [1] "day" -## -## $as_of -## [1] "2020-06-03" -## -## $other_keys -## [1] "pol" -} -\subsection{Adding additional keys to an \code{epi_df} object}{ -In the above examples, all the keys are added to objects that are not -\code{epi_df} objects. We illustrate how to add keys to an \code{epi_df} object. +# Adding additional keys to an `epi_df` object -We use a subset dataset from the the \code{covidcast} library.\if{html}{\out{
}}\preformatted{ex3 <- jhu_csse_county_level_subset \%>\% +ex3 <- jhu_csse_county_level_subset \%>\% filter(time_value > "2021-12-01", state_name == "Massachusetts") \%>\% - slice_tail(n = 6) - -attr(ex3,"metadata") # geo_type is county currently -}\if{html}{\out{
}}\preformatted{## $geo_type -## [1] "county" -## -## $time_type -## [1] "day" -## -## $as_of -## [1] "2022-05-23 14:35:45 PDT" -} + dplyr::slice_tail(n = 6) -Now we add state (MA) as a new column and a key to the metadata.\if{html}{\out{
}}\preformatted{ex3 <- ex3 \%>\% - as_tsibble() \%>\% # needed to add the additional metadata - mutate(state = rep("MA",6)) \%>\% +ex3 <- ex3 \%>\% + tsibble::as_tsibble() \%>\% # needed to add the additional metadata + dplyr::mutate(state = rep("MA",6)) \%>\% as_epi_df(additional_metadata = c(other_keys = "state")) attr(ex3,"metadata") -}\if{html}{\out{
}}\preformatted{## $geo_type -## [1] "county" -## -## $time_type -## [1] "day" -## -## $as_of -## [1] "2022-06-14 13:38:06 PDT" -## -## $other_keys -## [1] "state" -} -} -} - -\examples{ -In the following examples we will show how to create an \code{epi_df} with -additional keys. } diff --git a/man/rmd/epi_df_example.Rmd b/man/rmd/epi_df_example.Rmd deleted file mode 100644 index cd6d7e2a..00000000 --- a/man/rmd/epi_df_example.Rmd +++ /dev/null @@ -1,91 +0,0 @@ ---- -title: "Examples on Additional Keys in epi_df" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Examples on Additional Keys in epi_df} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- -In the following examples we will show how to create an `epi_df` with additional keys. - -# Convert a `tsibble` that has county code as an extra key -```{r} -ex1 <- tibble( - geo_value = rep(c("ca", "fl", "pa"), each = 3), - county_code = c(06059,06061,06067, - 12111,12113,12117, - 42101, 42103,42105), - time_value = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), - by = "day"), length.out = length(geo_value)), - value = 1:length(geo_value) + 0.01 * rnorm(length(geo_value)) - ) %>% - as_tsibble(index = time_value, key = c(geo_value, county_code)) - -ex1 <- as_epi_df(x = ex1, geo_type = "state", time_type = "day", as_of = "2020-06-03") -``` - -The metadata now includes `county_code` as an extra key. -```{r} -attr(ex1,"metadata") -``` - - -# Dealing with misspecified column names - -`epi_df` requires there to be columns `geo_value` and `time_value`, if they do not exist then `as_epi_df()` throws an error. -```{r, error = TRUE} -data.frame( - state = rep(c("ca", "fl", "pa"), each = 3), # misnamed - pol = rep(c("blue", "swing", "swing"), each = 3), # extra key - reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), - by = "day"), length.out = length(geo_value)), # misnamed - value = 1:length(geo_value) + 0.01 * rnorm(length(geo_value)) -) %>% as_epi_df() -``` - -The columns can be renamed to match `epi_df` format. In the example below, notice there is also an additional key `pol`. -```{r} -ex2 <- tibble( - state = rep(c("ca", "fl", "pa"), each = 3), # misnamed - pol = rep(c("blue", "swing", "swing"), each = 3), # extra key - reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), - by = "day"), length.out = length(state)), # misnamed - value = 1:length(state) + 0.01 * rnorm(length(state)) -) %>% data.frame() - -head(ex2) - -ex2 <- ex2 %>% rename(geo_value = state, time_value = reported_date) %>% - as_epi_df(geo_type = "state", as_of = "2020-06-03", - additional_metadata = c(other_keys = "pol")) - -attr(ex2,"metadata") -``` - - -## Adding additional keys to an `epi_df` object - -In the above examples, all the keys are added to objects that are not `epi_df` objects. We illustrate how to add keys to an `epi_df` object. - -We use a subset dataset from the the `covidcast` library. - -```{r} -ex3 <- jhu_csse_county_level_subset %>% - filter(time_value > "2021-12-01", state_name == "Massachusetts") %>% - slice_tail(n = 6) - -attr(ex3,"metadata") # geo_type is county currently -``` - -Now we add state (MA) as a new column and a key to the metadata. -```{r} - -ex3 <- ex3 %>% - as_tsibble() %>% # needed to add the additional metadata - mutate(state = rep("MA",6)) %>% - as_epi_df(additional_metadata = c(other_keys = "state")) - -attr(ex3,"metadata") -``` - - diff --git a/vignettes/epiprocess.Rmd b/vignettes/epiprocess.Rmd index cff624b1..695526d6 100644 --- a/vignettes/epiprocess.Rmd +++ b/vignettes/epiprocess.Rmd @@ -200,6 +200,87 @@ ggplot(x, aes(x = time_value, y = cases)) + ``` ## Examples on Additional Keys in epi_df +In the following examples we will show how to create an `epi_df` with additional keys. -```{r child = 'man/rmd/epi_df_example.Rmd'} +### Convert a `tsibble` that has county code as an extra key +```{r} +ex1 <- tibble( + geo_value = rep(c("ca", "fl", "pa"), each = 3), + county_code = c(06059,06061,06067, + 12111,12113,12117, + 42101, 42103,42105), + time_value = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), + by = "day"), length.out = length(geo_value)), + value = 1:length(geo_value) + 0.01 * rnorm(length(geo_value)) + ) %>% + as_tsibble(index = time_value, key = c(geo_value, county_code)) + +ex1 <- as_epi_df(x = ex1, geo_type = "state", time_type = "day", as_of = "2020-06-03") +``` + +The metadata now includes `county_code` as an extra key. +```{r} +attr(ex1,"metadata") +``` + + +### Dealing with misspecified column names + +`epi_df` requires there to be columns `geo_value` and `time_value`, if they do not exist then `as_epi_df()` throws an error. +```{r, error = TRUE} +data.frame( + state = rep(c("ca", "fl", "pa"), each = 3), # misnamed + pol = rep(c("blue", "swing", "swing"), each = 3), # extra key + reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), + by = "day"), length.out = length(geo_value)), # misnamed + value = 1:length(geo_value) + 0.01 * rnorm(length(geo_value)) +) %>% as_epi_df() ``` + +The columns can be renamed to match `epi_df` format. In the example below, notice there is also an additional key `pol`. +```{r} +ex2 <- tibble( + state = rep(c("ca", "fl", "pa"), each = 3), # misnamed + pol = rep(c("blue", "swing", "swing"), each = 3), # extra key + reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), + by = "day"), length.out = length(state)), # misnamed + value = 1:length(state) + 0.01 * rnorm(length(state)) +) %>% data.frame() + +head(ex2) + +ex2 <- ex2 %>% rename(geo_value = state, time_value = reported_date) %>% + as_epi_df(geo_type = "state", as_of = "2020-06-03", + additional_metadata = c(other_keys = "pol")) + +attr(ex2,"metadata") +``` + + +### Adding additional keys to an `epi_df` object + +In the above examples, all the keys are added to objects that are not `epi_df` objects. We illustrate how to add keys to an `epi_df` object. + +We use a subset dataset from the the `covidcast` library. + +```{r} +ex3 <- jhu_csse_county_level_subset %>% + filter(time_value > "2021-12-01", state_name == "Massachusetts") %>% + slice_tail(n = 6) + +attr(ex3,"metadata") # geo_type is county currently +``` + +Now we add state (MA) as a new column and a key to the metadata. +```{r} + +ex3 <- ex3 %>% + as_tsibble() %>% # needed to add the additional metadata + mutate(state = rep("MA",6)) %>% + as_epi_df(additional_metadata = c(other_keys = "state")) + +attr(ex3,"metadata") +``` + + + From f73a5872ac20c61073b17533553eaa8da41cee84 Mon Sep 17 00:00:00 2001 From: ChloeYou Date: Mon, 4 Jul 2022 11:28:08 -0700 Subject: [PATCH 4/6] update document according to feedback from @brookslogan --- R/epi_df.R | 35 +++++--- man/as_epi_df.Rd | 35 +++++--- vignettes/epiprocess.Rmd | 171 ++++++++++++++++++++------------------- 3 files changed, 134 insertions(+), 107 deletions(-) diff --git a/R/epi_df.R b/R/epi_df.R index 75ea5422..7b9febd6 100644 --- a/R/epi_df.R +++ b/R/epi_df.R @@ -113,32 +113,41 @@ NULL #' @export #' @examples #' # Convert a `tsibble` that has county code as an extra key -#' ex1 <- tsibble::tibble( +#' # Notice that county code should be a character string to preserve any leading zeroes +#' +#' # `other_keys` are specified in the `key` parameter +#' # in the `as_tsibble()` function, along with the primary key +#' ex1_input <- tibble::tibble( #' geo_value = rep(c("ca", "fl", "pa"), each = 3), -#' county_code = c(06059,06061,06067, -#' 12111,12113,12117, -#' 42101, 42103,42105), +#' county_code = c("06059","06061","06067", +#' "12111","12113","12117", +#' "42101", "42103","42105"), #' time_value = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), #' by = "day"), length.out = length(geo_value)), #' value = 1:length(geo_value) + 0.01 * rnorm(length(geo_value)) #' ) %>% #' tsibble::as_tsibble(index = time_value, key = c(geo_value, county_code)) #' -#' ex1 <- as_epi_df(x = ex1, geo_type = "state", time_type = "day", as_of = "2020-06-03") +#' ex1 <- as_epi_df(x = ex1_input, geo_type = "state", time_type = "day", as_of = "2020-06-03") #' attr(ex1,"metadata") #' -#' # Dealing with misspecified column names -#' ex2 <- tsibble::tibble( +#' # Dealing with misspecified column names: +#' # Geographical and temporal information must be provided in columns named +#' # `geo_value` and `time_value`; if we start from a data frame with a +#' # different format, it must be converted to use `geo_value` and `time_value` +#' # before calling `as_epi_df`. +#' +#' ex2_input <- tibble::tibble( #' state = rep(c("ca", "fl", "pa"), each = 3), # misnamed #' pol = rep(c("blue", "swing", "swing"), each = 3), # extra key #' reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), #' by = "day"), length.out = length(state)), # misnamed #' value = 1:length(state) + 0.01 * rnorm(length(state)) -#' ) %>% data.frame() +#' ) #' -#' head(ex2) +#' print(ex2_input) #' -#' ex2 <- ex2 %>% dplyr::rename(geo_value = state, time_value = reported_date) %>% +#' ex2 <- ex2_input %>% dplyr::rename(geo_value = state, time_value = reported_date) %>% #' as_epi_df(geo_type = "state", as_of = "2020-06-03", #' additional_metadata = c(other_keys = "pol")) #' @@ -146,11 +155,11 @@ NULL #' #' # Adding additional keys to an `epi_df` object #' -#' ex3 <- jhu_csse_county_level_subset %>% -#' filter(time_value > "2021-12-01", state_name == "Massachusetts") %>% +#' ex3_input <- jhu_csse_county_level_subset %>% +#' dplyr::filter(time_value > "2021-12-01", state_name == "Massachusetts") %>% #' dplyr::slice_tail(n = 6) #' -#' ex3 <- ex3 %>% +#' ex3 <- ex3_input %>% #' tsibble::as_tsibble() %>% # needed to add the additional metadata #' dplyr::mutate(state = rep("MA",6)) %>% #' as_epi_df(additional_metadata = c(other_keys = "state")) diff --git a/man/as_epi_df.Rd b/man/as_epi_df.Rd index bcb2772e..e3991e96 100644 --- a/man/as_epi_df.Rd +++ b/man/as_epi_df.Rd @@ -71,32 +71,41 @@ the \code{tbl_ts} class is dropped, and any key variables (other than \examples{ # Convert a `tsibble` that has county code as an extra key -ex1 <- tsibble::tibble( +# Notice that county code should be a character string to preserve any leading zeroes + +# `other_keys` are specified in the `key` parameter +# in the `as_tsibble()` function, along with the primary key +ex1_input <- tibble::tibble( geo_value = rep(c("ca", "fl", "pa"), each = 3), - county_code = c(06059,06061,06067, - 12111,12113,12117, - 42101, 42103,42105), + county_code = c("06059","06061","06067", + "12111","12113","12117", + "42101", "42103","42105"), time_value = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), by = "day"), length.out = length(geo_value)), value = 1:length(geo_value) + 0.01 * rnorm(length(geo_value)) ) \%>\% tsibble::as_tsibble(index = time_value, key = c(geo_value, county_code)) -ex1 <- as_epi_df(x = ex1, geo_type = "state", time_type = "day", as_of = "2020-06-03") +ex1 <- as_epi_df(x = ex1_input, geo_type = "state", time_type = "day", as_of = "2020-06-03") attr(ex1,"metadata") -# Dealing with misspecified column names -ex2 <- tsibble::tibble( +# Dealing with misspecified column names: +# Geographical and temporal information must be provided in columns named +# `geo_value` and `time_value`; if we start from a data frame with a +# different format, it must be converted to use `geo_value` and `time_value` +# before calling `as_epi_df`. + +ex2_input <- tibble::tibble( state = rep(c("ca", "fl", "pa"), each = 3), # misnamed pol = rep(c("blue", "swing", "swing"), each = 3), # extra key reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), by = "day"), length.out = length(state)), # misnamed value = 1:length(state) + 0.01 * rnorm(length(state)) -) \%>\% data.frame() +) -head(ex2) +print(ex2_input) -ex2 <- ex2 \%>\% dplyr::rename(geo_value = state, time_value = reported_date) \%>\% +ex2 <- ex2_input \%>\% dplyr::rename(geo_value = state, time_value = reported_date) \%>\% as_epi_df(geo_type = "state", as_of = "2020-06-03", additional_metadata = c(other_keys = "pol")) @@ -104,11 +113,11 @@ attr(ex2,"metadata") # Adding additional keys to an `epi_df` object -ex3 <- jhu_csse_county_level_subset \%>\% - filter(time_value > "2021-12-01", state_name == "Massachusetts") \%>\% +ex3_input <- jhu_csse_county_level_subset \%>\% + dplyr::filter(time_value > "2021-12-01", state_name == "Massachusetts") \%>\% dplyr::slice_tail(n = 6) -ex3 <- ex3 \%>\% +ex3 <- ex3_input \%>\% tsibble::as_tsibble() \%>\% # needed to add the additional metadata dplyr::mutate(state = rep("MA",6)) \%>\% as_epi_df(additional_metadata = c(other_keys = "state")) diff --git a/vignettes/epiprocess.Rmd b/vignettes/epiprocess.Rmd index 695526d6..31d55d5b 100644 --- a/vignettes/epiprocess.Rmd +++ b/vignettes/epiprocess.Rmd @@ -58,6 +58,7 @@ API](https://cmu-delphi.github.io/delphi-epidata/api/covidcast.html). library(delphi.epidata) library(epiprocess) library(dplyr) +library(withr) cases <- covidcast( data_source = "jhu-csse", @@ -127,6 +128,91 @@ x <- as_epi_df(cases) %>% attributes(x)$metadata ``` +## Using additional key columns in `epi_df` +In the following examples we will show how to create an `epi_df` with additional keys. + +### Converting a `tsibble` that has county code as an extra key +```{r} +ex1 <- tibble( + geo_value = rep(c("ca", "fl", "pa"), each = 3), + county_code = c("06059","06061","06067", + "12111","12113","12117", + "42101","42103","42105"), + time_value = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), + by = "day"), length.out = length(geo_value)), + value = 1:length(geo_value) + 0.01 * withr::with_rng_version("3.0.0", withr::with_seed(42, length(geo_value))) + ) %>% + as_tsibble(index = time_value, key = c(geo_value, county_code)) + +ex1 <- as_epi_df(x = ex1, geo_type = "state", time_type = "day", as_of = "2020-06-03") +``` + +The metadata now includes `county_code` as an extra key. +```{r} +attr(ex1,"metadata") +``` + + +### Dealing with misspecified column names + +`epi_df` requires there to be columns `geo_value` and `time_value`, if they do not exist then `as_epi_df()` throws an error. +```{r, error = TRUE} +data.frame( + state = rep(c("ca", "fl", "pa"), each = 3), # misnamed + pol = rep(c("blue", "swing", "swing"), each = 3), # extra key + reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), + by = "day"), length.out = length(geo_value)), # misnamed + value = 1:length(geo_value) + 0.01 * withr::with_rng_version("3.0.0", withr::with_seed(42, length(geo_value))) +) %>% as_epi_df() +``` + +The columns can be renamed to match `epi_df` format. In the example below, notice there is also an additional key `pol`. +```{r} +ex2 <- tibble( + state = rep(c("ca", "fl", "pa"), each = 3), # misnamed + pol = rep(c("blue", "swing", "swing"), each = 3), # extra key + reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), + by = "day"), length.out = length(state)), # misnamed + value = 1:length(state) + 0.01 * withr::with_rng_version("3.0.0", withr::with_seed(42, length(state))) +) %>% data.frame() + +head(ex2) + +ex2 <- ex2 %>% rename(geo_value = state, time_value = reported_date) %>% + as_epi_df(geo_type = "state", as_of = "2020-06-03", + additional_metadata = c(other_keys = "pol")) + +attr(ex2,"metadata") +``` + + +### Adding additional keys to an `epi_df` object + +In the above examples, all the keys are added to objects that are not `epi_df` objects. We illustrate how to add keys to an `epi_df` object. + +We use a toy data set included in `epiprocess` prepared using the `covidcast` library and are filtering to a single state for simplicity. + +```{r} +ex3 <- jhu_csse_county_level_subset %>% + filter(time_value > "2021-12-01", state_name == "Massachusetts") %>% + slice_tail(n = 6) + +attr(ex3,"metadata") # geo_type is county currently +``` + +Now we add state (MA) as a new column and a key to the metadata. Reminder that lower case state name abbreviations are what we would expect if this were a `geo_value` column. +```{r} + +ex3 <- ex3 %>% + as_tibble() %>% # needed to add the additional metadata + mutate(state = rep(tolower("MA"),6)) %>% + as_epi_df(additional_metadata = c(other_keys = "state")) + +attr(ex3,"metadata") +``` + +Setting these other keys affects the default `epi_slide` behavior, since the grouping is + ## Working with `epi_df` objects downstream Data in `epi_df` format should be easy to work with downstream, since it is a @@ -199,88 +285,11 @@ ggplot(x, aes(x = time_value, y = cases)) + labs(x = "Date", y = "Confirmed cases of Ebola in Sierra Leone") ``` -## Examples on Additional Keys in epi_df -In the following examples we will show how to create an `epi_df` with additional keys. - -### Convert a `tsibble` that has county code as an extra key -```{r} -ex1 <- tibble( - geo_value = rep(c("ca", "fl", "pa"), each = 3), - county_code = c(06059,06061,06067, - 12111,12113,12117, - 42101, 42103,42105), - time_value = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), - by = "day"), length.out = length(geo_value)), - value = 1:length(geo_value) + 0.01 * rnorm(length(geo_value)) - ) %>% - as_tsibble(index = time_value, key = c(geo_value, county_code)) - -ex1 <- as_epi_df(x = ex1, geo_type = "state", time_type = "day", as_of = "2020-06-03") -``` - -The metadata now includes `county_code` as an extra key. -```{r} -attr(ex1,"metadata") -``` - - -### Dealing with misspecified column names - -`epi_df` requires there to be columns `geo_value` and `time_value`, if they do not exist then `as_epi_df()` throws an error. -```{r, error = TRUE} -data.frame( - state = rep(c("ca", "fl", "pa"), each = 3), # misnamed - pol = rep(c("blue", "swing", "swing"), each = 3), # extra key - reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), - by = "day"), length.out = length(geo_value)), # misnamed - value = 1:length(geo_value) + 0.01 * rnorm(length(geo_value)) -) %>% as_epi_df() -``` - -The columns can be renamed to match `epi_df` format. In the example below, notice there is also an additional key `pol`. -```{r} -ex2 <- tibble( - state = rep(c("ca", "fl", "pa"), each = 3), # misnamed - pol = rep(c("blue", "swing", "swing"), each = 3), # extra key - reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), - by = "day"), length.out = length(state)), # misnamed - value = 1:length(state) + 0.01 * rnorm(length(state)) -) %>% data.frame() - -head(ex2) - -ex2 <- ex2 %>% rename(geo_value = state, time_value = reported_date) %>% - as_epi_df(geo_type = "state", as_of = "2020-06-03", - additional_metadata = c(other_keys = "pol")) - -attr(ex2,"metadata") -``` - - -### Adding additional keys to an `epi_df` object -In the above examples, all the keys are added to objects that are not `epi_df` objects. We illustrate how to add keys to an `epi_df` object. - -We use a subset dataset from the the `covidcast` library. - -```{r} -ex3 <- jhu_csse_county_level_subset %>% - filter(time_value > "2021-12-01", state_name == "Massachusetts") %>% - slice_tail(n = 6) - -attr(ex3,"metadata") # geo_type is county currently -``` - -Now we add state (MA) as a new column and a key to the metadata. -```{r} - -ex3 <- ex3 %>% - as_tsibble() %>% # needed to add the additional metadata - mutate(state = rep("MA",6)) %>% - as_epi_df(additional_metadata = c(other_keys = "state")) - -attr(ex3,"metadata") -``` +## Attribution +This document contains dataset that is a modified part of the [COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University](https://github.com/CSSEGISandData/COVID-19) as [republished in the COVIDcast Epidata API](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html). This data set is licensed under the terms of the [Creative Commons Attribution 4.0 International license](https://creativecommons.org/licenses/by/4.0/) by the Johns Hopkins University on behalf of its Center for Systems Science in Engineering. Copyright Johns Hopkins University 2020. +[From the COVIDcast Epidata API](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html): + These signals are taken directly from the JHU CSSE [COVID-19 GitHub repository](https://github.com/CSSEGISandData/COVID-19) without changes. From e1f85048c532c9e3ef7cf6c0207bf7717dce7a33 Mon Sep 17 00:00:00 2001 From: ChloeYou Date: Tue, 5 Jul 2022 09:51:36 -0700 Subject: [PATCH 5/6] add warning of metadata behaviour --- vignettes/epiprocess.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/epiprocess.Rmd b/vignettes/epiprocess.Rmd index 31d55d5b..379423b0 100644 --- a/vignettes/epiprocess.Rmd +++ b/vignettes/epiprocess.Rmd @@ -211,7 +211,7 @@ ex3 <- ex3 %>% attr(ex3,"metadata") ``` -Setting these other keys affects the default `epi_slide` behavior, since the grouping is +Currently `other_keys` metadata in `epi_df` doesn't impact `epi_slide()`, contrary to `other_keys` in `as_epi_archive` which affects how the update data is interpreted. ## Working with `epi_df` objects downstream From 0693a21e74cd39d89d3d3ca6b8347f4118e0374d Mon Sep 17 00:00:00 2001 From: "Logan C. Brooks" Date: Thu, 7 Jul 2022 09:40:17 -0700 Subject: [PATCH 6/6] Tweak tsibble -> epi_df example explanation --- R/epi_df.R | 10 +++++++--- man/as_epi_df.Rd | 10 +++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/R/epi_df.R b/R/epi_df.R index 7b9febd6..db4f4777 100644 --- a/R/epi_df.R +++ b/R/epi_df.R @@ -115,8 +115,6 @@ NULL #' # Convert a `tsibble` that has county code as an extra key #' # Notice that county code should be a character string to preserve any leading zeroes #' -#' # `other_keys` are specified in the `key` parameter -#' # in the `as_tsibble()` function, along with the primary key #' ex1_input <- tibble::tibble( #' geo_value = rep(c("ca", "fl", "pa"), each = 3), #' county_code = c("06059","06061","06067", @@ -128,8 +126,12 @@ NULL #' ) %>% #' tsibble::as_tsibble(index = time_value, key = c(geo_value, county_code)) #' +#' # The `other_keys` metadata (`"county_code"` in this case) is automatically +#' # inferred from the `tsibble`'s `key`: #' ex1 <- as_epi_df(x = ex1_input, geo_type = "state", time_type = "day", as_of = "2020-06-03") -#' attr(ex1,"metadata") +#' attr(ex1,"metadata")[["other_keys"]] +#' +#' #' #' # Dealing with misspecified column names: #' # Geographical and temporal information must be provided in columns named @@ -153,6 +155,8 @@ NULL #' #' attr(ex2,"metadata") #' +#' +#' #' # Adding additional keys to an `epi_df` object #' #' ex3_input <- jhu_csse_county_level_subset %>% diff --git a/man/as_epi_df.Rd b/man/as_epi_df.Rd index e3991e96..5d1b1335 100644 --- a/man/as_epi_df.Rd +++ b/man/as_epi_df.Rd @@ -73,8 +73,6 @@ the \code{tbl_ts} class is dropped, and any key variables (other than # Convert a `tsibble` that has county code as an extra key # Notice that county code should be a character string to preserve any leading zeroes -# `other_keys` are specified in the `key` parameter -# in the `as_tsibble()` function, along with the primary key ex1_input <- tibble::tibble( geo_value = rep(c("ca", "fl", "pa"), each = 3), county_code = c("06059","06061","06067", @@ -86,8 +84,12 @@ ex1_input <- tibble::tibble( ) \%>\% tsibble::as_tsibble(index = time_value, key = c(geo_value, county_code)) +# The `other_keys` metadata (`"county_code"` in this case) is automatically +# inferred from the `tsibble`'s `key`: ex1 <- as_epi_df(x = ex1_input, geo_type = "state", time_type = "day", as_of = "2020-06-03") -attr(ex1,"metadata") +attr(ex1,"metadata")[["other_keys"]] + + # Dealing with misspecified column names: # Geographical and temporal information must be provided in columns named @@ -111,6 +113,8 @@ ex2 <- ex2_input \%>\% dplyr::rename(geo_value = state, time_value = reported_da attr(ex2,"metadata") + + # Adding additional keys to an `epi_df` object ex3_input <- jhu_csse_county_level_subset \%>\%