From f0d3b56bd06e169f680f025ccd2524fbc8150e91 Mon Sep 17 00:00:00 2001 From: admin Date: Thu, 16 Jun 2022 14:05:14 -0700 Subject: [PATCH 1/4] Added tests to make sure new_epi_df works as intended --- NAMESPACE | 1 + R/epi_df.R | 122 ++++++++++++++++++--------- man/archive_cases_dv_subset.Rd | 5 +- man/incidence_num_outlier_example.Rd | 3 +- man/jhu_csse_county_level_subset.Rd | 3 +- man/jhu_csse_daily_subset.Rd | 37 +++++--- man/new_epi_df.Rd | 46 ++++++++++ tests/testthat/test-epi_df.R | 27 ++++++ 8 files changed, 188 insertions(+), 56 deletions(-) create mode 100644 man/new_epi_df.Rd create mode 100644 tests/testthat/test-epi_df.R diff --git a/NAMESPACE b/NAMESPACE index 11f488e4..0e74d698 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -40,6 +40,7 @@ export(growth_rate) export(is_epi_archive) export(is_epi_df) export(mutate) +export(new_epi_df) export(relocate) export(rename) export(slice) diff --git a/R/epi_df.R b/R/epi_df.R index 50b9a898..932e98b1 100644 --- a/R/epi_df.R +++ b/R/epi_df.R @@ -84,6 +84,85 @@ #' @name epi_df NULL + +#' Creates an `epi_df` object +#' +#' Creates a new `epi_df` object. By default builds an empty tibble with the +#' correct metadata for an `epi_df` object (ie. `geo_type`, `time_type`, and `as_of`). +#' Refer to the below info. about the arguments for more details. +#' +#' @param x A data.frame, [tibble::tibble], or [tsibble::tsibble] to be converted +#' @param geo_type Type for the geo values. If missing, then the function will +#' attempt to infer it from the geo values present; if this fails, then it +#' will be set to "custom". +#' @param time_type Type for the time values. If missing, then the function will +#' attempt to infer it from the time values present; if this fails, then it +#' will be set to "custom". +#' @param as_of Time value representing the time at which the given data were +#' available. For example, if `as_of` is January 31, 2022, then the `epi_df` +#' object that is created would represent the most up-to-date version of the +#' data available as of January 31, 2022. If the `as_of` argument is missing, +#' then the current day-time will be used. +#' @param additional_metadata List of additional metadata to attach to the +#' `epi_df` object. The metadata will have `geo_type`, `time_type`, and +#' `as_of` fields; named entries from the passed list or will be included as +#' well. +#' @param ... Additional arguments passed to methods. +#' @return An `epi_df` object. +#' +#' @export +new_epi_df = function(x = tibble::tibble(), geo_type, time_type, as_of, + additional_metadata = list()) { + # Check that we have a data frame + if (!is.data.frame(x)) { + Abort("`x` must be a data frame.") + } + + # If geo type is missing, then try to guess it + if (missing(geo_type)) { + geo_type = guess_geo_type(x$geo_value) + } + + # If time type is missing, then try to guess it + if (missing(time_type)) { + time_type = guess_time_type(x$time_value) + } + + # If as_of is missing, then try to guess it + if (missing(as_of)) { + # First check the metadata for an as_of field + if ("metadata" %in% names(attributes(x)) && + "as_of" %in% names(attributes(x)$metadata)) { + as_of = attributes(x)$metadata$as_of + } + + # Next check for as_of, issue, or version columns + else if ("as_of" %in% names(x)) as_of = max(x$as_of) + else if ("issue" %in% names(x)) as_of = max(x$issue) + else if ("version" %in% names(x)) as_of = max(x$version) + + # If we got here then we failed + else as_of = Sys.time() # Use the current day-time + } + + # Define metadata fields + metadata = list() + metadata$geo_type = geo_type + metadata$time_type = time_type + metadata$as_of = as_of + metadata = c(metadata, additional_metadata) + + # Reorder columns (geo_value, time_value, ...) + if(sum(dim(x)) != 0){ + x = dplyr::relocate(x, .data$geo_value, .data$time_value) + } + + # Apply epi_df class, attach metadata, and return + class(x) = c("epi_df", class(x)) + attributes(x)$metadata = metadata + return(x) +} + #' Convert to `epi_df` format #' #' Converts a data frame or tibble into an `epi_df` object. See the [getting @@ -142,47 +221,8 @@ as_epi_df.tbl_df = function(x, geo_type, time_type, as_of, Abort("`x` must contain a `time_value` column.") } - # If geo type is missing, then try to guess it - if (missing(geo_type)) { - geo_type = guess_geo_type(x$geo_value) - } - - # If time type is missing, then try to guess it - if (missing(time_type)) { - time_type = guess_time_type(x$time_value) - } - - # If as_of is missing, then try to guess it - if (missing(as_of)) { - # First check the metadata for an as_of field - if ("metadata" %in% names(attributes(x)) && - "as_of" %in% names(attributes(x)$metadata)) { - as_of = attributes(x)$metadata$as_of - } - - # Next check for as_of, issue, or version columns - else if ("as_of" %in% names(x)) as_of = max(x$as_of) - else if ("issue" %in% names(x)) as_of = max(x$issue) - else if ("version" %in% names(x)) as_of = max(x$version) - - # If we got here then we failed - else as_of = Sys.time() # Use the current day-time - } - - # Define metadata fields - metadata = list() - metadata$geo_type = geo_type - metadata$time_type = time_type - metadata$as_of = as_of - metadata = c(metadata, additional_metadata) - - # Reorder columns (geo_value, time_value, ...) - x = dplyr::relocate(x, .data$geo_value, .data$time_value) - - # Apply epi_df class, attach metadata, and return - class(x) = c("epi_df", class(x)) - attributes(x)$metadata = metadata - return(x) + new_epi_df(x, geo_type, time_type, as_of, + additional_metadata = list(), ...) } #' @method as_epi_df data.frame diff --git a/man/archive_cases_dv_subset.Rd b/man/archive_cases_dv_subset.Rd index 3a44ed04..1c6b9eb9 100644 --- a/man/archive_cases_dv_subset.Rd +++ b/man/archive_cases_dv_subset.Rd @@ -3,7 +3,7 @@ \docType{data} \name{archive_cases_dv_subset} \alias{archive_cases_dv_subset} -\title{Subset of daily doctor visits and cases from California, Florida, Texas, and New York in archive format} +\title{Subset of daily doctor visits and cases in archive format} \format{ An \code{epi_archive} data format. The data table DT has 129,638 rows and 5 columns: \describe{ @@ -35,6 +35,7 @@ This data source is based on information about outpatient visits, provided to us by health system partners, and also contains confirmed COVID-19 cases based on reports made available by the Center for Systems Science and Engineering at Johns Hopkins University. -This example data ranges from June 1, 2020 to Dec 1, 2021, and is also limited to California, Florida, Texas, and New York. +This example data ranges from June 1, 2020 to Dec 1, 2021, and +is also limited to California, Florida, Texas, and New York. } \keyword{datasets} diff --git a/man/incidence_num_outlier_example.Rd b/man/incidence_num_outlier_example.Rd index 23afdf51..90275099 100644 --- a/man/incidence_num_outlier_example.Rd +++ b/man/incidence_num_outlier_example.Rd @@ -32,7 +32,8 @@ incidence_num_outlier_example This data source of confirmed COVID-19 cases is based on reports made available by the Center for Systems Science and Engineering at Johns Hopkins University. -This example data is a snapshot as of Oct 28, 2021 and captures the cases from June 1, 2020 to May 31, 2021 +This example data is a snapshot as of Oct 28, 2021 and captures the cases +from June 1, 2020 to May 31, 2021 and is limited to California and Florida. } \keyword{datasets} diff --git a/man/jhu_csse_county_level_subset.Rd b/man/jhu_csse_county_level_subset.Rd index 6ab47a12..dfe8ef8a 100644 --- a/man/jhu_csse_county_level_subset.Rd +++ b/man/jhu_csse_county_level_subset.Rd @@ -33,6 +33,7 @@ jhu_csse_county_level_subset This data source of confirmed COVID-19 cases and deaths is based on reports made available by the Center for Systems Science and Engineering at Johns Hopkins University. -This example data ranges from Mar 1, 2020 to Dec 31, 2021, and is limited to Massachusetts and Vermont. +This example data ranges from Mar 1, 2020 to Dec 31, 2021, +and is limited to Massachusetts and Vermont. } \keyword{datasets} diff --git a/man/jhu_csse_daily_subset.Rd b/man/jhu_csse_daily_subset.Rd index 20c4d909..626bf545 100644 --- a/man/jhu_csse_daily_subset.Rd +++ b/man/jhu_csse_daily_subset.Rd @@ -3,28 +3,42 @@ \docType{data} \name{jhu_csse_daily_subset} \alias{jhu_csse_daily_subset} -\title{Subset of JHU daily cases and deaths from California, Florida, Texas, New York, Georgia, and Pennsylvania} +\title{Subset of JHU daily state cases and deaths} \format{ A tibble with 4026 rows and 6 variables: \describe{ -\item{geo_value}{the geographic value associated with each row of measurements.} +\item{geo_value}{the geographic value associated with each row +of measurements.} \item{time_value}{the time value associated with each row of measurements.} -\item{case_rate_7d_av}{7-day average signal of number of new confirmed COVID-19 cases per 100,000 population, daily} -\item{death_rate_7d_av}{7-day average signal of number of new confirmed deaths due to COVID-19 per 100,000 population, daily} +\item{case_rate_7d_av}{7-day average signal of number of new +confirmed COVID-19 cases per 100,000 population, daily} +\item{death_rate_7d_av}{7-day average signal of number of new confirmed +deaths due to COVID-19 per 100,000 population, daily} \item{cases}{Number of new confirmed COVID-19 cases, daily} -\item{cases_7d_av}{7-day average signal of number of new confirmed COVID-19 cases, daily} +\item{cases_7d_av}{7-day average signal of number of new confirmed +COVID-19 cases, daily} } } \source{ -This object contains a modified part of the \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. This data set is licensed under the terms of the +This object contains a modified part of the +\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} +as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. +This data set is licensed under the terms of the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license} -by the Johns Hopkins University on behalf of its Center for Systems Science in Engineering. -Copyright Johns Hopkins University 2020. +by the Johns Hopkins University on behalf of its Center for Systems Science +in Engineering. Copyright Johns Hopkins University 2020. Modifications: \itemize{ -\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}: These signals are taken directly from the JHU CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository} without changes. The 7-day average signals are computed by Delphi by calculating moving averages of the preceding 7 days, so the signal for June 7 is the average of the underlying data for June 1 through 7, inclusive. -\item Furthermore, the data has been limited to a very small number of rows, the signal names slightly altered, and formatted into a tibble. +\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}: +These signals are taken directly from the JHU CSSE +\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository} +without changes. The 7-day average signals are computed by Delphi by +calculating moving averages of the preceding 7 days, so the signal for +June 7 is the average of the underlying data for June 1 through 7, +inclusive. +\item Furthermore, the data has been limited to a very small number of rows, +the signal names slightly altered, and formatted into a tibble. } } \usage{ @@ -34,6 +48,7 @@ jhu_csse_daily_subset This data source of confirmed COVID-19 cases and deaths is based on reports made available by the Center for Systems Science and Engineering at Johns Hopkins University. -This example data ranges from Mar 1, 2020 to Dec 31, 2021, and is limited to California, Florida, Texas, New York, Georgia, and Pennsylvania. +This example data ranges from Mar 1, 2020 to Dec 31, 2021, and is limited to +California, Florida, Texas, New York, Georgia, and Pennsylvania. } \keyword{datasets} diff --git a/man/new_epi_df.Rd b/man/new_epi_df.Rd new file mode 100644 index 00000000..f1062a14 --- /dev/null +++ b/man/new_epi_df.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/epi_df.R +\name{new_epi_df} +\alias{new_epi_df} +\title{Creates an \code{epi_df} object} +\usage{ +new_epi_df( + x = tibble::tibble(), + geo_type, + time_type, + as_of, + additional_metadata = list() +) +} +\arguments{ +\item{x}{A data.frame, \link[tibble:tibble]{tibble::tibble}, or \link[tsibble:tsibble]{tsibble::tsibble} to be converted} + +\item{geo_type}{Type for the geo values. If missing, then the function will +attempt to infer it from the geo values present; if this fails, then it +will be set to "custom".} + +\item{time_type}{Type for the time values. If missing, then the function will +attempt to infer it from the time values present; if this fails, then it +will be set to "custom".} + +\item{as_of}{Time value representing the time at which the given data were +available. For example, if \code{as_of} is January 31, 2022, then the \code{epi_df} +object that is created would represent the most up-to-date version of the +data available as of January 31, 2022. If the \code{as_of} argument is missing, +then the current day-time will be used.} + +\item{additional_metadata}{List of additional metadata to attach to the +\code{epi_df} object. The metadata will have \code{geo_type}, \code{time_type}, and +\code{as_of} fields; named entries from the passed list or will be included as +well.} + +\item{...}{Additional arguments passed to methods.} +} +\value{ +An \code{epi_df} object. +} +\description{ +Creates a new \code{epi_df} object. By default builds an empty tibble with the +correct metadata for an \code{epi_df} object (ie. \code{geo_type}, \code{time_type}, and \code{as_of}). +Refer to the below info. about the arguments for more details. +} diff --git a/tests/testthat/test-epi_df.R b/tests/testthat/test-epi_df.R new file mode 100644 index 00000000..93a5ed71 --- /dev/null +++ b/tests/testthat/test-epi_df.R @@ -0,0 +1,27 @@ +test_that("new_epi_df works as intended", { + + # Empty tibble + wmsg = capture_warnings(a <- new_epi_df()) + expect_match(wmsg[1], + "Unknown or uninitialised column: `geo_value`.") + expect_match(wmsg[2], + "Unknown or uninitialised column: `time_value`.") + expect_true(is_epi_df(a)) + expect_identical(attributes(a)$metadata$geo_type, "custom") + expect_identical(attributes(a)$metadata$time_type, "custom") + expect_true(lubridate::is.POSIXt(attributes(a)$metadata$as_of)) + + # Simple non-empty tibble with geo_value and time_value cols + tib <- tibble::tibble( + x = 1:10, y = 1:10, + time_value = rep(seq(as.Date("2020-01-01"), by = 1, length.out = 5), times = 2), + geo_value = rep(c("ca", "hi"), each = 5) + ) + + epi_tib = new_epi_df(tib) + expect_true(is_epi_df(epi_tib)) + expect_length(epi_tib, 4L) + expect_identical(attributes(epi_tib)$metadata$geo_type, "state") + expect_identical(attributes(epi_tib)$metadata$time_type, "day") + expect_true(lubridate::is.POSIXt(attributes(epi_tib)$metadata$as_of)) +}) From 4d8c47d192df688fe82bf7efcd8f2e994aaafd38 Mon Sep 17 00:00:00 2001 From: admin Date: Thu, 16 Jun 2022 15:07:54 -0700 Subject: [PATCH 2/4] Added ... to new_epi_df --- R/epi_df.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/epi_df.R b/R/epi_df.R index 932e98b1..da77d1bd 100644 --- a/R/epi_df.R +++ b/R/epi_df.R @@ -112,7 +112,7 @@ NULL #' #' @export new_epi_df = function(x = tibble::tibble(), geo_type, time_type, as_of, - additional_metadata = list()) { + additional_metadata = list(), ...) { # Check that we have a data frame if (!is.data.frame(x)) { Abort("`x` must be a data frame.") From 7855ac974e5eae6cf0bc185abd24602f89c84daa Mon Sep 17 00:00:00 2001 From: admin Date: Thu, 16 Jun 2022 15:09:42 -0700 Subject: [PATCH 3/4] Removed ... from new_epi_df() --- R/epi_df.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/epi_df.R b/R/epi_df.R index da77d1bd..92ed96a6 100644 --- a/R/epi_df.R +++ b/R/epi_df.R @@ -87,7 +87,7 @@ NULL #' Creates an `epi_df` object #' -#' Creates a new `epi_df` object. By default builds an empty tibble with the +#' Creates a new `epi_df` object. By default, builds an empty tibble with the #' correct metadata for an `epi_df` object (ie. `geo_type`, `time_type`, and `as_of`). #' Refer to the below info. about the arguments for more details. #' @@ -107,12 +107,11 @@ NULL #' `epi_df` object. The metadata will have `geo_type`, `time_type`, and #' `as_of` fields; named entries from the passed list or will be included as #' well. -#' @param ... Additional arguments passed to methods. #' @return An `epi_df` object. #' #' @export new_epi_df = function(x = tibble::tibble(), geo_type, time_type, as_of, - additional_metadata = list(), ...) { + additional_metadata = list()) { # Check that we have a data frame if (!is.data.frame(x)) { Abort("`x` must be a data frame.") From edb07dbc1a6edd49ff7b46d12e7209d2d9902f85 Mon Sep 17 00:00:00 2001 From: admin Date: Thu, 16 Jun 2022 15:24:58 -0700 Subject: [PATCH 4/4] added ... to new_epi_df() --- R/epi_df.R | 3 ++- man/new_epi_df.Rd | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/R/epi_df.R b/R/epi_df.R index 92ed96a6..92e76271 100644 --- a/R/epi_df.R +++ b/R/epi_df.R @@ -107,11 +107,12 @@ NULL #' `epi_df` object. The metadata will have `geo_type`, `time_type`, and #' `as_of` fields; named entries from the passed list or will be included as #' well. +#' @param ... Additional arguments passed to methods. #' @return An `epi_df` object. #' #' @export new_epi_df = function(x = tibble::tibble(), geo_type, time_type, as_of, - additional_metadata = list()) { + additional_metadata = list(), ...) { # Check that we have a data frame if (!is.data.frame(x)) { Abort("`x` must be a data frame.") diff --git a/man/new_epi_df.Rd b/man/new_epi_df.Rd index f1062a14..95f8dc9f 100644 --- a/man/new_epi_df.Rd +++ b/man/new_epi_df.Rd @@ -9,7 +9,8 @@ new_epi_df( geo_type, time_type, as_of, - additional_metadata = list() + additional_metadata = list(), + ... ) } \arguments{ @@ -40,7 +41,7 @@ well.} An \code{epi_df} object. } \description{ -Creates a new \code{epi_df} object. By default builds an empty tibble with the +Creates a new \code{epi_df} object. By default, builds an empty tibble with the correct metadata for an \code{epi_df} object (ie. \code{geo_type}, \code{time_type}, and \code{as_of}). Refer to the below info. about the arguments for more details. }