Skip to content

Created an epi_df constructor #104

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ export(growth_rate)
export(is_epi_archive)
export(is_epi_df)
export(mutate)
export(new_epi_df)
export(relocate)
export(rename)
export(slice)
Expand Down
122 changes: 81 additions & 41 deletions R/epi_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,85 @@
#' @name epi_df
NULL


#' Creates an `epi_df` object
#'
#' Creates a new `epi_df` object. By default, builds an empty tibble with the
#' correct metadata for an `epi_df` object (ie. `geo_type`, `time_type`, and `as_of`).
#' Refer to the below info. about the arguments for more details.
#'
#' @param x A data.frame, [tibble::tibble], or [tsibble::tsibble] to be converted
#' @param geo_type Type for the geo values. If missing, then the function will
#' attempt to infer it from the geo values present; if this fails, then it
#' will be set to "custom".
#' @param time_type Type for the time values. If missing, then the function will
#' attempt to infer it from the time values present; if this fails, then it
#' will be set to "custom".
#' @param as_of Time value representing the time at which the given data were
#' available. For example, if `as_of` is January 31, 2022, then the `epi_df`
#' object that is created would represent the most up-to-date version of the
#' data available as of January 31, 2022. If the `as_of` argument is missing,
#' then the current day-time will be used.
#' @param additional_metadata List of additional metadata to attach to the
#' `epi_df` object. The metadata will have `geo_type`, `time_type`, and
#' `as_of` fields; named entries from the passed list or will be included as
#' well.
#' @param ... Additional arguments passed to methods.
#' @return An `epi_df` object.
#'
#' @export
new_epi_df = function(x = tibble::tibble(), geo_type, time_type, as_of,
additional_metadata = list(), ...) {
# Check that we have a data frame
if (!is.data.frame(x)) {
Abort("`x` must be a data frame.")
}

# If geo type is missing, then try to guess it
if (missing(geo_type)) {
geo_type = guess_geo_type(x$geo_value)
}

# If time type is missing, then try to guess it
if (missing(time_type)) {
time_type = guess_time_type(x$time_value)
}

# If as_of is missing, then try to guess it
if (missing(as_of)) {
# First check the metadata for an as_of field
if ("metadata" %in% names(attributes(x)) &&
"as_of" %in% names(attributes(x)$metadata)) {
as_of = attributes(x)$metadata$as_of
}

# Next check for as_of, issue, or version columns
else if ("as_of" %in% names(x)) as_of = max(x$as_of)
else if ("issue" %in% names(x)) as_of = max(x$issue)
else if ("version" %in% names(x)) as_of = max(x$version)

# If we got here then we failed
else as_of = Sys.time() # Use the current day-time
}

# Define metadata fields
metadata = list()
metadata$geo_type = geo_type
metadata$time_type = time_type
metadata$as_of = as_of
metadata = c(metadata, additional_metadata)

# Reorder columns (geo_value, time_value, ...)
if(sum(dim(x)) != 0){
x = dplyr::relocate(x, .data$geo_value, .data$time_value)
}

# Apply epi_df class, attach metadata, and return
class(x) = c("epi_df", class(x))
attributes(x)$metadata = metadata
return(x)
}

#' Convert to `epi_df` format
#'
#' Converts a data frame or tibble into an `epi_df` object. See the [getting
Expand Down Expand Up @@ -142,47 +221,8 @@ as_epi_df.tbl_df = function(x, geo_type, time_type, as_of,
Abort("`x` must contain a `time_value` column.")
}

# If geo type is missing, then try to guess it
if (missing(geo_type)) {
geo_type = guess_geo_type(x$geo_value)
}

# If time type is missing, then try to guess it
if (missing(time_type)) {
time_type = guess_time_type(x$time_value)
}

# If as_of is missing, then try to guess it
if (missing(as_of)) {
# First check the metadata for an as_of field
if ("metadata" %in% names(attributes(x)) &&
"as_of" %in% names(attributes(x)$metadata)) {
as_of = attributes(x)$metadata$as_of
}

# Next check for as_of, issue, or version columns
else if ("as_of" %in% names(x)) as_of = max(x$as_of)
else if ("issue" %in% names(x)) as_of = max(x$issue)
else if ("version" %in% names(x)) as_of = max(x$version)

# If we got here then we failed
else as_of = Sys.time() # Use the current day-time
}

# Define metadata fields
metadata = list()
metadata$geo_type = geo_type
metadata$time_type = time_type
metadata$as_of = as_of
metadata = c(metadata, additional_metadata)

# Reorder columns (geo_value, time_value, ...)
x = dplyr::relocate(x, .data$geo_value, .data$time_value)

# Apply epi_df class, attach metadata, and return
class(x) = c("epi_df", class(x))
attributes(x)$metadata = metadata
return(x)
new_epi_df(x, geo_type, time_type, as_of,
additional_metadata = list(), ...)
}

#' @method as_epi_df data.frame
Expand Down
5 changes: 3 additions & 2 deletions man/archive_cases_dv_subset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/incidence_num_outlier_example.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/jhu_csse_county_level_subset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

37 changes: 26 additions & 11 deletions man/jhu_csse_daily_subset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

47 changes: 47 additions & 0 deletions man/new_epi_df.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions tests/testthat/test-epi_df.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
test_that("new_epi_df works as intended", {

# Empty tibble
wmsg = capture_warnings(a <- new_epi_df())
expect_match(wmsg[1],
"Unknown or uninitialised column: `geo_value`.")
expect_match(wmsg[2],
"Unknown or uninitialised column: `time_value`.")
expect_true(is_epi_df(a))
expect_identical(attributes(a)$metadata$geo_type, "custom")
expect_identical(attributes(a)$metadata$time_type, "custom")
expect_true(lubridate::is.POSIXt(attributes(a)$metadata$as_of))

# Simple non-empty tibble with geo_value and time_value cols
tib <- tibble::tibble(
x = 1:10, y = 1:10,
time_value = rep(seq(as.Date("2020-01-01"), by = 1, length.out = 5), times = 2),
geo_value = rep(c("ca", "hi"), each = 5)
)

epi_tib = new_epi_df(tib)
expect_true(is_epi_df(epi_tib))
expect_length(epi_tib, 4L)
expect_identical(attributes(epi_tib)$metadata$geo_type, "state")
expect_identical(attributes(epi_tib)$metadata$time_type, "day")
expect_true(lubridate::is.POSIXt(attributes(epi_tib)$metadata$as_of))
})