diff --git a/DESCRIPTION b/DESCRIPTION index cacd43ba..533d08b1 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,4 +1,4 @@ -Package: epitools +Package: epiprocess Type: Package Title: Tools for basic signal processing in epidemiology Version: 1.0.0 diff --git a/NAMESPACE b/NAMESPACE index 6ee6fd35..7e588c50 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,13 +1,13 @@ # Generated by roxygen2: do not edit by hand -S3method(as.epi_tibble,data.frame) -S3method(as.epi_tibble,epi_tibble) -S3method(as.epi_tibble,tibble) -S3method(group_by,epi_tibble) -S3method(head,epi_tibble) -S3method(print,epi_tibble) -S3method(summary,epi_tibble) -S3method(ungroup,epi_tibble) +S3method(as.epi_df,data.frame) +S3method(as.epi_df,epi_df) +S3method(as.epi_df,tibble) +S3method(group_by,epi_df) +S3method(head,epi_df) +S3method(print,epi_df) +S3method(summary,epi_df) +S3method(ungroup,epi_df) export("%>%") export(End) export(Max) @@ -16,11 +16,11 @@ export(Median) export(Min) export(Start) export(Sum) -export(as.epi_tibble) -export(cor_lagged) +export(as.epi_df) export(detect_outliers) export(detect_outliers_rm) export(detect_outliers_stl) +export(epi_cor) export(epi_slide) export(estimate_deriv) export(pct_change) diff --git a/R/correlation.R b/R/correlation.R index d617bc3d..28c1abd1 100644 --- a/R/correlation.R +++ b/R/correlation.R @@ -1,12 +1,12 @@ -#' Compute lagged correlations between variables in an `epi_tibble` object +#' Compute lagged correlations between variables in an `epi_df` object #' -#' Computes lagged correlations between variables in an `epi_tibble` object, +#' Computes lagged correlations between variables in an `epi_df` object, #' allowing for grouping by geo value, time value, or any other variables. See -#' the [correlations -#' vignette](https://cmu-delphi.github.io/epitools/articles/correlations.html) +#' the [correlation +#' vignette](https://cmu-delphi.github.io/epitools/articles/correlation.html) #' for examples. #' -#' @param x The `epi_tibble` object under consideration. +#' @param x The `epi_df` object under consideration. #' @param var1,var2 The variables in `x` to correlate. #' @param dt1,dt2 Time shifts to consider for the two variables, respectively, #' before computing correlations. Negative shifts translate into in a lag @@ -40,11 +40,11 @@ #' @importFrom stats cor #' @importFrom rlang .data enquo #' @export -cor_lagged = function(x, var1, var2, dt1 = 0, dt2 = 0, - by = geo_value, use = "na.or.complete", - method = c("pearson", "kendall", "spearman")) { - # Check we have an `epi_tibble` object - if (!inherits(x, "epi_tibble")) abort("`x` must be of class `epi_tibble`.") +epi_cor = function(x, var1, var2, dt1 = 0, dt2 = 0, by = geo_value, + use = "na.or.complete", + method = c("pearson", "kendall", "spearman")) { + # Check we have an `epi_df` object + if (!inherits(x, "epi_df")) abort("`x` must be of class `epi_df`.") # Check that we have variables to do computations on if (missing(var1)) abort("`var1` must be specified.") diff --git a/R/derivative.R b/R/derivative.R index 9453c26f..0061b386 100644 --- a/R/derivative.R +++ b/R/derivative.R @@ -1,12 +1,12 @@ -#' Estimate derivatives of a variable in an `epi_tibble` object +#' Estimate derivatives of a variable in an `epi_df` object #' -#' Estimates derivatives of a variable in an `epi_tibble` object, using a local -#' (in time) linear regression or alternative smoother. See the [estimating -#' derivatives -#' vignette](https://cmu-delphi.github.io/epitools/articles/derivatives.html) -#' for examples. +#' Estimates derivatives of a variable in an `epi_df` object, using a local (in +#' time) linear regression or alternative smoothing method. See the [estimating +#' derivatives +#' vignette](https://cmu-delphi.github.io/epitools/articles/derivative.html) for +#' examples. #' -#' @param x The `epi_tibble` object under consideration. +#' @param x The `epi_df` object under consideration. #' @param var The variable in `x` whose derivatives are to be estimated. #' @param method One of "lin", "ss", or "tf" indicating the method to use for #' the derivative calculation. To estimate the derivative at any time point, @@ -49,7 +49,7 @@ #' is `TRUE`. #' @param ... Additional arguments to pass to the function that estimates #' derivatives. See details below. -#' @return An `epi_tibble` object given by appending a new column to `x`, named +#' @return An `epi_df` object given by appending a new column to `x`, named #' according to the `new_col_name` argument, containing the derivative values. #' #' @details Derivatives are estimated using: @@ -141,7 +141,7 @@ estimate_deriv = function(x, var, method = c("lin", "ss", "tf"), n = 14, x = select(x, -temp) %>% ungroup() # Attach the class and metadata and return - class(x) = c("epi_tibble", class(x)) + class(x) = c("epi_df", class(x)) attributes(x)$metadata = metadata return(x) } diff --git a/R/epi_tibble.R b/R/epi_df.R similarity index 76% rename from R/epi_tibble.R rename to R/epi_df.R index 8bc97c31..846cb918 100644 --- a/R/epi_tibble.R +++ b/R/epi_df.R @@ -1,8 +1,8 @@ -#' Convert data to `epi_tibble` format +#' Convert data to `epi_df` format #' #' Converts a data frame or tibble into a format that is consistent with the -#' `epi_tibble` class, ensuring that it has a certain minimal set of columns, -#' and that it has certain minimal metadata. +#' `epi_df` class, ensuring that it has a certain minimal set of columns, and +#' that it has certain minimal metadata. #' #' @param x The object to be converted. See the methods section below for #' details on formatting of each input type. @@ -16,20 +16,19 @@ #' attempt to infer it from the passed object `x`; if this fails, then the #' current day-time will be used. #' @param additional_metadata List of additional metadata to attach to the -#' `epi_tibble` object. All objects will have `time_type`, `geo_type`, and -#' `issue` fields; named entries from the passed list or will be included as -#' well. +#' `epi_df` object. All objects will have `time_type`, `geo_type`, and `issue` +#' fields; named entries from the passed list or will be included as well. #' @param ... Additional arguments passed to methods. -#' @return An `epi_tibble` object. +#' @return An `epi_df` object. #' -#' @details An `epi_tibble` is a tibble with (at least) the following columns: +#' @details An `epi_df` is a tibble with (at least) the following columns: #' #' * `geo_value`: the geographic value associated with each measurement. #' * `time_value`: the time value associated with each measurement. #' #' Other columns can be considered as measured variables, which we also broadly -#' refer to as signal variables. An `epi_tibble` object also has metadata with -#' (at least) the following fields: +#' refer to as signal variables. An `epi_df` object also has metadata with (at +#' least) the following fields: #' #' * `geo_type`: the type for the geo values. #' * `time_type`: the type for the time values. @@ -37,19 +36,19 @@ #' #' The first two fields above, `geo_type` and `time_type`, can usually be #' inferred from the `geo_value` and `time_value` columns, respectively. The -#' last field above, `issue`, is the most unique to the `epi_tibble` format. -#' In a typical case, this represents the maximum of the issues of individual +#' last field above, `issue`, is the most unique to the `epi_df` format. In a +#' typical case, this represents the maximum of the issues of individual #' signal values measured in the data set; hence we would also say that the #' data set is comprised of all signal values observed "as of" the given issue #' in the metadata. #' -#' Metadata for an `epi_tibble` object `x` can be accessed (and altered) via +#' Metadata for an `epi_df` object `x` can be accessed (and altered) via #' `attributes(x)$metadata`. More information on geo types, time types, and #' issues is given below. #' #' @section Geo types: -#' The following geo types are supported in an `epi_tibble`. Their geo coding -#' (specification of geo values for each geo type) is also described below. +#' The following geo types are supported in an `epi_df`. Their geo coding +#' (specification of geo values for each geo type) is also described below. #' #' * `"county"`: each observation corresponds to a U.S. county; coded by 5-digit #' FIPS code. @@ -69,7 +68,7 @@ #' geo type is labeled as "custom". #' #' @section Time types: -#' The following time types are supported in an `epi_tibble`. Their time coding +#' The following time types are supported in an `epi_df`. Their time coding #' (specification of time values for each time type) is also described below. #' #' * `"day-time"`: each observation corresponds to a time on a given day (measured @@ -88,19 +87,19 @@ #' todo #' #' @export -as.epi_tibble = function(x, ...) { - UseMethod("as.epi_tibble") +as.epi_df = function(x, ...) { + UseMethod("as.epi_df") } -#' @method as.epi_tibble epi_tibble -#' @describeIn as.epi_tibble Simply returns the `epi_tibble` object unchanged. +#' @method as.epi_df epi_df +#' @describeIn as.epi_df Simply returns the `epi_df` object unchanged. #' @export -as.epi_tibble.epi_tibble = function(x, ...) { +as.epi_df.epi_df = function(x, ...) { return(x) } -#' @method as.epi_tibble tibble -#' @describeIn as.epi_tibble The input tibble `x` must contain the columns +#' @method as.epi_df tibble +#' @describeIn as.epi_df The input tibble `x` must contain the columns #' `geo_value` and `time_value`. All other columns will be preserved as is, #' and treated as measured variables. If `issue` is missing, then the function #' will look for `issue` as a column of `x`, or as a field in its metadata @@ -108,8 +107,8 @@ as.epi_tibble.epi_tibble = function(x, ...) { #' current day-time will be used. #' @importFrom rlang .data abort #' @export -as.epi_tibble.tibble = function(x, geo_type, time_type, issue, - additional_metadata = list(), ...) { +as.epi_df.tibble = function(x, geo_type, time_type, issue, + additional_metadata = list(), ...) { # Check that we have geo_value and time_value columns if (!("geo_value" %in% names(x))) { abort("`x` must contain a `geo_value` column.") @@ -198,9 +197,9 @@ as.epi_tibble.tibble = function(x, geo_type, time_type, issue, metadata$issue = issue metadata = c(metadata, additional_metadata) - # Convert to a tibble, apply epi_tibble class, attach metadata + # Convert to a tibble, apply epi_df class, attach metadata if (!inherits(x, "tibble")) x = tibble::as_tibble(x) - class(x) = c("epi_tibble", class(x)) + class(x) = c("epi_df", class(x)) attributes(x)$metadata = metadata # Reorder columns (geo_value, time_value, ...) and return @@ -208,30 +207,30 @@ as.epi_tibble.tibble = function(x, geo_type, time_type, issue, return(x) } -#' @method as.epi_tibble data.frame -#' @describeIn as.epi_tibble The input data frame `x` must contain the columns +#' @method as.epi_df data.frame +#' @describeIn as.epi_df The input data frame `x` must contain the columns #' `geo_value` and `time_value`. All other columns will be preserved as is, #' and treated as measured variables. If `issue` is missing, then the function #' will look for `issue` as a column of `x`, or as a field in its metadata #' (stored in its attributes), to infer the issue; if this fails, then the -#' current day-time will be used. +#' current day-time will be used. #' @export -as.epi_tibble.data.frame = as.epi_tibble.tibble +as.epi_df.data.frame = as.epi_df.tibble -#' Print `epi_tibble` object +#' Print `epi_df` object #' -#' Prints a brief summary of the `epi_tibble` object, then prints the underlying -#' tibble. +#' Prints a brief summary of the `epi_df` object, then prints the underlying +#' tibble. #' -#' @param x The `epi_tibble` object. +#' @param x The `epi_df` object. #' @param ... Additional arguments passed to `print.tibble()` to print the #' data. -#' @return The `epi_tibble` object, unchanged. +#' @return The `epi_df` object, unchanged. #' -#' @method print epi_tibble +#' @method print epi_df #' @export -print.epi_tibble = function(x, ...) { - cat("An `epi_tibble` object, with metadata:\n") +print.epi_df = function(x, ...) { + cat("An `epi_df` object, with metadata:\n") cat(sprintf("* %-10s= %s\n", "geo_type", attributes(x)$metadata$geo_type)) cat(sprintf("* %-10s= %s\n", "time_type", attributes(x)$metadata$time_type)) cat(sprintf("* %-10s= %s\n", "issue", attributes(x)$metadata$issue)) @@ -239,28 +238,28 @@ print.epi_tibble = function(x, ...) { NextMethod() } -#' @method head epi_tibble +#' @method head epi_df #' @importFrom utils head #' @export -head.epi_tibble = function(x, ...) { +head.epi_df = function(x, ...) { head(tibble::as_tibble(x), ...) } -#' Summarize `epi_tibble` object +#' Summarize `epi_df` object #' -#' Prints a variety of summary statistics about the `epi_tibble` object, such as +#' Prints a variety of summary statistics about the `epi_df` object, such as #' the time range included and geographic coverage. #' -#' @param object The `epi_tibble` object. +#' @param object The `epi_df` object. #' @param ... Additional arguments, for compatibility with `summary()`. #' Currently unused. #' @return No return value; called only to print summary statistics. #' -#' @method summary epi_tibble +#' @method summary epi_df #' @importFrom stats median #' @export -summary.epi_tibble = function(object, ...) { - cat("An `epi_tibble` object, with metadata:\n") +summary.epi_df = function(object, ...) { + cat("An `epi_df` object, with metadata:\n") cat(sprintf("* %-10s= %s\n", "geo_type", attributes(x)$metadata$geo_type)) cat(sprintf("* %-10s= %s\n", "time_type", attributes(x)$metadata$time_type)) cat(sprintf("* %-10s= %s\n", "issue", attributes(x)$metadata$issue)) @@ -273,29 +272,29 @@ summary.epi_tibble = function(object, ...) { dplyr::summarize(median(.data$num))))) } -#' Group or ungroup `epi_tibble` object +#' Group or ungroup `epi_df` object #' -#' Groups or ungroups an `epi_tibble`, preserving class and attributes. +#' Groups or ungroups an `epi_df`, preserving class and attributes. #' -#' @method group_by epi_tibble +#' @method group_by epi_df #' @importFrom dplyr group_by #' @export -group_by.epi_tibble = function(x, ...) { +group_by.epi_df = function(x, ...) { metadata = attributes(x)$metadata x = NextMethod() - class(x) = c("epi_tibble", class(x)) + class(x) = c("epi_df", class(x)) attributes(x)$metadata = metadata return(x) } -#' @method ungroup epi_tibble -#' @rdname group_by.epi_tibble +#' @method ungroup epi_df +#' @rdname group_by.epi_df #' @importFrom dplyr ungroup #' @export -ungroup.epi_tibble = function(x, ...) { +ungroup.epi_df = function(x, ...) { metadata = attributes(x)$metadata x = NextMethod() - class(x) = c("epi_tibble", class(x)) + class(x) = c("epi_df", class(x)) attributes(x)$metadata = metadata return(x) } diff --git a/R/epitools.R b/R/epiprocess.R similarity index 72% rename from R/epitools.R rename to R/epiprocess.R index 731fbe04..0749647f 100644 --- a/R/epitools.R +++ b/R/epiprocess.R @@ -1,9 +1,9 @@ -#' epitools: Tools for basic signal processing in epidemiology +#' epiprocess: Tools for basic signal processing in epidemiology #' #' This package introduces a common data structure for epidemiological data sets #' measured over space and time, and offers associated utilities to perform #' basic signal processing tasks. #' #' @docType package -#' @name epitools +#' @name epiprocess NULL diff --git a/R/outliers.R b/R/outliers.R index c1a2572f..0bbcc58d 100644 --- a/R/outliers.R +++ b/R/outliers.R @@ -1,8 +1,10 @@ -#' Detect outliers in a variable in an `epi_tibble` object +#' Detect outliers in a variable in an `epi_df` object #' -#' Applies one or more outlier detection methods to a variable in an -#' `epi_tibble` object, and optionally aggregates the results to create -#' consensus results. +#' Applies one or more outlier detection methods to a variable in an `epi_df` +#' object, and optionally aggregates the results to create consensus results. +#' See the [outliers +#' vignette](https://cmu-delphi.github.io/epitools/articles/outliers.html) for +#' examples. #' #' @details Each outlier detection method, one per row of the passed `methods` #' tibble, is a function that must take as its first two arguments `x` and @@ -19,7 +21,7 @@ #' "stl", shorthand for `detect_outliers_stl()`, which detects outliers via an #' STL decomposition. #' -#' @param x The `epi_tibble` object under consideration. +#' @param x The `epi_df` object under consideration. #' @param var The variable in `x` on which to run outlier detection. #' @param methods A tibble specifying the method(s) to use for outlier #' detection, with one row per method, and the following columns: @@ -41,9 +43,9 @@ #' that setting `new_col_name` equal to an existing column name will overwrite #' this column. #' -#' @return An `epi_tibble` object given by appending a new column to `x`, named +#' @return An `epi_df` object given by appending a new column to `x`, named #' according to the `new_col_name` argument, containing the outlier detection -#' thresholds and replacement values from all detection methods. +#' thresholds and replacement values from all detection methods. #' #' @importFrom dplyr group_modify mutate select #' @importFrom purrr map pmap_dfc @@ -57,8 +59,8 @@ detect_outliers = function(x, var, abbr = "rm"), combiner = c("median", "mean", "none"), new_col_name = "outlier_info") { - # Check we have an `epi_tibble` object - if (!inherits(x, "epi_tibble")) abort("`x` must be of class `epi_tibble`.") + # Check we have an `epi_df` object + if (!inherits(x, "epi_df")) abort("`x` must be of class `epi_df`.") # Check that we have a variable to do computations on if (missing(var)) abort("`var` must be specified.") @@ -79,7 +81,7 @@ detect_outliers = function(x, var, new_col_name = new_col_name) # Attach the class and metadata and return -class(x) = c("epi_tibble", class(x)) +class(x) = c("epi_df", class(x)) attributes(x)$metadata = metadata return(x) } @@ -149,7 +151,7 @@ detect_outliers_one_grp = function(.data_group, #' Detects outliers based on a distance from the rolling median specified in #' terms of multiples of the rolling interquartile range (IQR). #' -#' @param x The `epi_tibble` object under consideration. +#' @param x The `epi_df` object under consideration. #' @param var The variable in `x` on which to run outlier detection. #' @param n Number of time steps to use in the rolling window. Default is 21. #' @param log_transform Should a log transform be applied before running outlier @@ -240,7 +242,7 @@ detect_outliers_rolling_median = detect_outliers_rm #' are exactly as in `detect_outliers_rm()`; refer to its help file for their #' description. #' -#' @param x The `epi_tibble` object under consideration. +#' @param x The `epi_df` object under consideration. #' @param var The variable in `x` on which to run outlier detection. #' @param n_trend Number of time steps to use in the rolling window for trend. #' Default is 21. diff --git a/R/pct_change.R b/R/pct_change.R index 99d1c5fe..a2b47830 100644 --- a/R/pct_change.R +++ b/R/pct_change.R @@ -1,11 +1,11 @@ -#' Compute percentage change of a variable in an `epi_tibble` object +#' Compute percentage change of a variable in an `epi_df` object #' -#' Computes the percentage change of a variable in an `epi_tibble` object. See -#' the [percentage change -#' vignette](https://cmu-delphi.github.io/epitools/articles/pct-change.html) for -#' examples. +#' Computes the percentage change of a variable in an `epi_df` object. See the +#' [percentage change +#' vignette](https://cmu-delphi.github.io/epitools/articles/pct_change.html) for +#' examples. #' -#' @param x The `epi_tibble` object under consideration. +#' @param x The `epi_df` object under consideration. #' @param var The variable in `x` whose percentage change values are to be #' computed. #' @param n Number of time steps to use in the running window. For example, if @@ -33,7 +33,7 @@ #' metadata. Read the documentation for [epi_slide()] for more details. #' @param na_rm Should missing values be removed before the computation? Default #' is `TRUE`. -#' @return An `epi_tibble` object given by appending a new column to `x`, named +#' @return An `epi_df` object given by appending a new column to `x`, named #' according to the `new_col_name` argument, containing the percentage change #' values. #' diff --git a/R/slide.R b/R/slide.R index 5cce4da0..0b7ef1da 100644 --- a/R/slide.R +++ b/R/slide.R @@ -1,8 +1,8 @@ -#' Slide a function over variables in an `epi_tibble` object +#' Slide a function over variables in an `epi_df` object #' -#' Slides a given function over variables in an `epi_tibble` object. See the -#' [slide vignette](https://cmu-delphi.github.io/epitools/articles/slide.html) -#' for examples. +#' Slides a given function over variables in an `epi_df` object. See the [slide +#' vignette](https://cmu-delphi.github.io/epitools/articles/slide.html) for +#' examples. #' #' @details To "slide" means to apply a function or formula over a running #' window of `n` time steps, where the unit (the meaning of one time step) is @@ -12,7 +12,7 @@ #' `time_step` argument (which if specified would override the default choice #' based on the metadata). #' -#' @param x The `epi_tibble` object under consideration. +#' @param x The `epi_df` object under consideration. #' @param slide_fun Function or formula to slide over variables in `x`. To #' "slide" means to apply a function or formula over a running window of `n` #' time steps (where one time step is typically one day or one week; see @@ -52,7 +52,7 @@ #' `time_type` is "day-time"). #' @param ... Additional arguments to pass to the function or formula specified #' via `slide_fun`. -#' @return An `epi_tibble` object given by appending a new column to `x`, named +#' @return An `epi_df` object given by appending a new column to `x`, named #' according to the `new_col_name` argument, containing the slide values. #' #' @details In order to slide a function or formula in a suitable grouped @@ -67,8 +67,8 @@ epi_slide = function(x, slide_fun, n = 14, align = c("right", "center", "left"), before, complete = FALSE, new_col_name = "slide_value", new_col_type = c("dbl", "int", "lgl", "chr", "list"), time_step, ...) { - # Check we have an `epi_tibble` object - if (!inherits(x, "epi_tibble")) abort("`x` must be of class `epi_tibble`.") + # Check we have an `epi_df` object + if (!inherits(x, "epi_df")) abort("`x` must be of class `epi_df`.") # Which slide_index function? new_col_type = match.arg(new_col_type) @@ -125,7 +125,7 @@ epi_slide = function(x, slide_fun, n = 14, align = c("right", "center", "left"), ...) # Attach the class and metadata and return - class(x) = c("epi_tibble", class(x)) + class(x) = c("epi_df", class(x)) attributes(x)$metadata = metadata return(x) } diff --git a/_pkgdown.yml b/_pkgdown.yml index 87fc22b7..a03fb348 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -13,9 +13,9 @@ articles: desc: Basic usage and examples. navbar: ~ contents: - - epitools + - epiprocess - slide - - pct-change + - pct_change - derivative - correlation - aggregation diff --git a/docs/404.html b/docs/404.html index 7e665f37..84502428 100644 --- a/docs/404.html +++ b/docs/404.html @@ -6,7 +6,7 @@ -Page not found (404) • epitools +Page not found (404) • epiprocess @@ -70,7 +70,7 @@ - epitools + epiprocess 1.0.0 @@ -84,7 +84,7 @@
  • - Get started + Get started
  • Reference @@ -99,6 +99,9 @@
  • 1. Slide a computation over signal values
  • +
  • + 2. Compute percentage change over time +
  • 3. Estimate derivatives of signals
  • @@ -112,7 +115,7 @@ 6. Detect and correct outliers in signals
  • - 6. Work with issue dates and archive objects + 7. Work with issue dates and archive objects
  • diff --git a/docs/articles/aggregation.html b/docs/articles/aggregation.html index d3a174ff..1a8529b3 100644 --- a/docs/articles/aggregation.html +++ b/docs/articles/aggregation.html @@ -5,14 +5,14 @@ -5. Aggregate signals over space and time • epitools +5. Aggregate signals over space and time • epiprocess - + - + - + - - - - - -Get started with `epitools` • epitools - - - - - - - - - - -
    -
    - - - - -
    -
    - - - - -

    This package introduces a common data structure for epidemiological data sets measured over space and time, and offers associated utilities to perform basic signal processing tasks.

    -
    -

    -Installing

    -

    This package is not on CRAN yet, so it can be installed using the devtools package:

    -
    devtools::install_github("cmu-delphi/epitools", ref = "main")
    -

    Building the vignettes, such as this getting started guide, takes a significant amount of time. They are not included in the package by default. If you want to include vignettes, then use this modified command:

    -
    devtools::install_github("cmu-delphi/epitools", ref = "main",
    -                         build_vignettes = TRUE, dependencies = TRUE)
    -
    -
    -

    -Getting data into epi_tibble format

    -

    We’ll start by showing how to get data into epi_tibble format, which is just a tibble with a bit of special structure, and is the format assumed by all of the functions in the epitools package. An epi_tibble object has (at least) the following columns:

    -
      -
    • -geo_value: the geographic value associated with each measurement.
    • -
    • -time_value: the time value associated with each measurement.
    • -
    -

    Other columns can be considered as measured variables, which we also broadly refer to as signal variables. To learn more about the epi_tibble format, you can read the documentation for as.epi_tibble().

    -

    A data frame or tibble that has geo_value and time_value columns can be converted into an epi_tibble object, using the function as.epi_tibble(). As an example, we’ll look at daily cumulative COVID-19 cases for 4 states (CA, FL, NY, and TX) in the U.S., over a year spanning mid 2020 to mid 2021, using the covidcast package to fetch this data from the COVIDcast API.

    -
    library(covidcast)
    -
    -case_data <- covidcast_signal(data_source = "jhu-csse",
    -                              signal = "confirmed_cumulative_num",
    -                              start_day = "2020-06-01",
    -                              end_day = "2021-5-31",
    -                              geo_type = "state",
    -                              geo_values = c("ca", "fl", "ny", "tx"))
    -
    -class(case_data)
    -
    ## [1] "covidcast_signal" "data.frame"
    -
    colnames(case_data)
    -
    ##  [1] "data_source"         "signal"              "geo_value"          
    -##  [4] "time_value"          "source"              "geo_type"           
    -##  [7] "time_type"           "issue"               "lag"                
    -## [10] "missing_value"       "missing_stderr"      "missing_sample_size"
    -## [13] "value"               "stderr"              "sample_size"
    -

    As we can see, a data frame returned covidcast::covidcast_signal() has the columns required for an epi_tibble object (along with many others). A call to as.epi_tibble(), with further specification of some relevant metadata, brings the data frame into epi_tibble format.

    -
    library(epitools)
    -library(dplyr)
    -
    -x <- as.epi_tibble(case_data,
    -                   geo_type = "state",
    -                   time_type = "day",
    -                   issue = max(case_data$issue)) %>%
    -  rename("cases_cum" = value) %>%
    -  select(geo_value, time_value, cases_cum)
    -
    -class(x)
    -
    ## [1] "epi_tibble" "tbl_df"     "tbl"        "data.frame"
    - -
    ## An `epi_tibble` object, with metadata:
    -## * geo_type  = state
    -## * time_type = day
    -## * issue     = 2022-01-04
    -## 
    -## Summary of space-time coverge:
    -## * earliest time value              = 2020-06-01
    -## * latest time value                = 2021-05-31
    -## * median geo values per time value = 4
    -
    head(x)
    -
    ## # A tibble: 6 × 3
    -##   geo_value time_value cases_cum
    -##   <chr>     <date>         <dbl>
    -## 1 ca        2020-06-01    116245
    -## 2 fl        2020-06-01     56830
    -## 3 ny        2020-06-01    371711
    -## 4 tx        2020-06-01     65593
    -## 5 ca        2020-06-02    118665
    -## 6 fl        2020-06-02     57447
    -
    attributes(x)$metadata
    -
    ## $geo_type
    -## [1] "state"
    -## 
    -## $time_type
    -## [1] "day"
    -## 
    -## $issue
    -## [1] "2022-01-04"
    -
    -
    -

    -Some info about required metadata

    -

    In general, an epi_tibble object must have following fields in its metadata:

    -
      -
    • -geo_type: the type for the geo values.
    • -
    • -time_type: the type for the time values.
    • -
    • -issue: the time value at which the given data set was issued.
    • -
    -

    In a typical case, the issue field represents the maximum of the issues of individual signal values measured in the data set; hence we would also say that the data set is comprised of all signal values observed “as of” the given issue in the metadata.

    -

    If any of the geo_type, time_type, and issue arguments are missing in a call to as.epi_tibble(), then this function will try to infer them from the passed object (The documentation for as.epi_tibble() gives more details.)

    -
    x <- as.epi_tibble(case_data) %>%
    -  rename("cases_cum" = value) %>%
    -  select(geo_value, time_value, cases_cum)
    -
    -attributes(x)$metadata
    -
    ## $geo_type
    -## [1] "state"
    -## 
    -## $time_type
    -## [1] "day"
    -## 
    -## $issue
    -## [1] "2022-01-04"
    -
    -
    -

    -Working with epi_tibble objects downstream

    -

    Data in epi_tibble format should be easy to work with downstream; in the other vignettes, we’ll walk through some basic signal processing tasks using functions provided in the epitools package; of course, we can also write custom code for for other downstream uses, like plotting, which is pretty easy to do ggplot2.

    -
    library(ggplot2)
    -theme_set(theme_bw())
    -
    -ggplot(x, aes(x = time_value, y = cases_cum, color = geo_value)) +
    -  geom_line() +
    -  scale_x_date(minor_breaks = "month", date_labels = "%b %y") +
    -  labs(x = "Date", y = "Cumulative COVID-19 cases", color = "State")
    -

    -

    As a last example, we’ll look at data on daily new (not cumulative) SARS cases in Canada in 2003, from the outbreaks package.

    -
    x <- outbreaks::sars_canada_2003 %>%
    -  mutate(geo_value = "ca") %>%
    -  rename(time_value = date) %>%
    -  select(geo_value, time_value, starts_with("cases")) %>%
    -  as.epi_tibble(geo_type = "nation")
    -
    -head(x)
    -
    ## # A tibble: 6 × 6
    -##   geo_value time_value cases_travel cases_household cases_healthcare cases_other
    -##   <chr>     <date>            <int>           <int>            <int>       <int>
    -## 1 ca        2003-02-23            1               0                0           0
    -## 2 ca        2003-02-24            0               0                0           0
    -## 3 ca        2003-02-25            0               0                0           0
    -## 4 ca        2003-02-26            0               1                0           0
    -## 5 ca        2003-02-27            0               0                0           0
    -## 6 ca        2003-02-28            1               0                0           0
    -
    library(tidyr)
    -x <- x %>%
    -  pivot_longer(starts_with("cases"), names_to = "type") %>%
    -  mutate(type = substring(type, 7))
    -
    -yrange <- range(x %>% group_by(time_value) %>%
    -                  summarize(value = sum(value)) %>% pull(value))
    -
    -ggplot(x, aes(x = time_value, y = value)) +
    -  geom_col(aes(y = value, fill = type)) +
    -  scale_x_date(minor_breaks = "month", date_labels = "%b %y") +
    -  scale_y_continuous(breaks = yrange[1]:yrange[2]) +
    -  labs(x = "Date", y = "SARS cases in Canada", fill = "Type")
    -

    -
    -
    - - - -
    - - - - -
    - - - - - - diff --git a/docs/articles/epitools_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/epitools_files/figure-html/unnamed-chunk-5-1.png deleted file mode 100644 index 46e9b82c..00000000 Binary files a/docs/articles/epitools_files/figure-html/unnamed-chunk-5-1.png and /dev/null differ diff --git a/docs/articles/epitools_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/epitools_files/figure-html/unnamed-chunk-6-1.png deleted file mode 100644 index da7b14b1..00000000 Binary files a/docs/articles/epitools_files/figure-html/unnamed-chunk-6-1.png and /dev/null differ diff --git a/docs/articles/epitools_files/figure-html/unnamed-chunk-7-1.png b/docs/articles/epitools_files/figure-html/unnamed-chunk-7-1.png deleted file mode 100644 index 551927d0..00000000 Binary files a/docs/articles/epitools_files/figure-html/unnamed-chunk-7-1.png and /dev/null differ diff --git a/docs/articles/epitools_files/header-attrs-2.10/header-attrs.js b/docs/articles/epitools_files/header-attrs-2.10/header-attrs.js deleted file mode 100644 index dd57d92e..00000000 --- a/docs/articles/epitools_files/header-attrs-2.10/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/index.html b/docs/articles/index.html index 5ff511ec..db20864f 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -6,7 +6,7 @@ -Articles • epitools +Articles • epiprocess @@ -70,7 +70,7 @@ - epitools + epiprocess 1.0.0 @@ -84,7 +84,7 @@
  • - Get started + Get started
  • Reference @@ -99,6 +99,9 @@
  • 1. Slide a computation over signal values
  • +
  • + 2. Compute percentage change over time +
  • 3. Estimate derivatives of signals
  • @@ -112,7 +115,7 @@ 6. Detect and correct outliers in signals
  • - 6. Work with issue dates and archive objects + 7. Work with issue dates and archive objects
  • @@ -145,10 +148,12 @@

    Using the package

    Basic usage and examples.

    -
    Get started with `epitools`
    +
    Get started with `epiprocess`
    1. Slide a computation over signal values
    +
    2. Compute percentage change over time
    +
    3. Estimate derivatives of signals
    4. Correlate signals over space and time
    @@ -157,7 +162,7 @@

    Using the package

    6. Detect and correct outliers in signals
    -
    6. Work with issue dates and archive objects
    +
    7. Work with issue dates and archive objects
    diff --git a/docs/articles/issues.html b/docs/articles/issues.html index 5e47f66d..9824f2e7 100644 --- a/docs/articles/issues.html +++ b/docs/articles/issues.html @@ -5,14 +5,14 @@ -6. Work with issue dates and archive objects • epitools +7. Work with issue dates and archive objects • epiprocess - - + + - + - + - + @@ -70,7 +70,7 @@ - epitools + epiprocess 1.0.0 @@ -84,7 +84,7 @@
  • - Get started + Get started
  • Reference @@ -99,6 +99,9 @@
  • 1. Slide a computation over signal values
  • +
  • + 2. Compute percentage change over time +
  • 3. Estimate derivatives of signals
  • @@ -112,7 +115,7 @@ 6. Detect and correct outliers in signals
  • - 6. Work with issue dates and archive objects + 7. Work with issue dates and archive objects
  • diff --git a/docs/index.html b/docs/index.html index af1e28fd..da0227da 100644 --- a/docs/index.html +++ b/docs/index.html @@ -5,7 +5,7 @@ -Tools for basic signal processing in epidemiology • epitools +Tools for basic signal processing in epidemiology • epiprocess @@ -32,7 +32,7 @@ - epitools + epiprocess 1.0.0 @@ -46,7 +46,7 @@
  • - Get started + Get started
  • Reference @@ -61,6 +61,9 @@
  • 1. Slide a computation over signal values
  • +
  • + 2. Compute percentage change over time +
  • 3. Estimate derivatives of signals
  • @@ -74,7 +77,7 @@ 6. Detect and correct outliers in signals
  • - 6. Work with issue dates and archive objects + 7. Work with issue dates and archive objects
  • @@ -98,9 +101,9 @@
    -
    +
    +epiprocess

    This package introduces a common data structure for epidemiological data sets measured over space and time, and offers associated utilities to perform basic signal processing tasks.

    @@ -109,8 +112,6 @@ @@ -86,7 +86,7 @@
  • - Get started + Get started
  • Reference @@ -101,6 +101,9 @@
  • 1. Slide a computation over signal values
  • +
  • + 2. Compute percentage change over time +
  • 3. Estimate derivatives of signals
  • @@ -114,7 +117,7 @@ 6. Detect and correct outliers in signals
  • - 6. Work with issue dates and archive objects + 7. Work with issue dates and archive objects
  • diff --git a/docs/reference/cor_lagged.html b/docs/reference/cor_lagged.html deleted file mode 100644 index f6ad1c6a..00000000 --- a/docs/reference/cor_lagged.html +++ /dev/null @@ -1,245 +0,0 @@ - - - - - - - - -Compute lagged correlations between variables in an <code>epi_tibble</code> object — cor_lagged • epitools - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Computes lagged correlations between variables in an epi_tibble object, -allowing for grouping by geo value, time value, or any other variables. See -the correlations vignette -for examples.

    -
    - -
    cor_lagged(
    -  x,
    -  var1,
    -  var2,
    -  dt1 = 0,
    -  dt2 = 0,
    -  by = geo_value,
    -  use = "na.or.complete",
    -  method = c("pearson", "kendall", "spearman")
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - -
    x

    The epi_tibble object under consideration.

    var1, var2

    The variables in x to correlate.

    dt1, dt2

    Time shifts to consider for the two variables, respectively, -before computing correlations. Negative shifts translate into in a lag -value and positive shifts into a lead value; for example, if dt = -1, -then the new value on June 2 is the original value on June 1; if dt = 1, -then the new value on June 2 is the original value on June 3; if dt = 0, -then the values are left as is. Default is 0 for both dt1 and dt2. Note -that the time shifts are always performed per geo value; see details.

    by

    The variable(s) to group by, for the correlation computation. If -geo_value, the default, then correlations are computed for each geo -value, over all time; if time_value, then correlations are computed for -each time, over all geo values. A grouping can also be any specified using -number of columns of x; for example, we can use by = c(geo_value, age_group), assuming x has a column age_group, in order to compute -correlations for each pair of geo value and age group. To omit a grouping -entirely, use by = NULL. Note that the grouping here is always applied -after the time shifts; see details.

    use, method

    Arguments to pass to cor(), with "na.or.complete" the -default for use (different than cor()) and "pearson" the default for -method (same as cor()).

    - -

    Value

    - -

    An tibble with the grouping columns first (geo_value, time_value, -or possibly others), and then a column cor, which gives the correlation.

    -

    Details

    - -

    Time shifts are always performed first, grouped by geo value (this -way they amount to shifting each individual time series). After this, the -geo grouping is removed, and the grouping specified in the by argument is -applied. Then, correlations are computed.

    - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.5.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/detect_outliers.html b/docs/reference/detect_outliers.html index e2680f5c..4929f5fd 100644 --- a/docs/reference/detect_outliers.html +++ b/docs/reference/detect_outliers.html @@ -6,7 +6,7 @@ -Detect outliers in a variable in an <code>epi_tibble</code> object — detect_outliers • epitools +Detect outliers in a variable in an <code>epi_df</code> object — detect_outliers • epiprocess @@ -39,10 +39,11 @@ - - + + @@ -73,7 +74,7 @@ - epitools + epiprocess 1.0.0
    @@ -87,7 +88,7 @@
  • - Get started + Get started
  • Reference @@ -102,6 +103,9 @@
  • 1. Slide a computation over signal values
  • +
  • + 2. Compute percentage change over time +
  • 3. Estimate derivatives of signals
  • @@ -115,7 +119,7 @@ 6. Detect and correct outliers in signals
  • - 6. Work with issue dates and archive objects + 7. Work with issue dates and archive objects
  • @@ -140,15 +144,16 @@
    -

    Applies one or more outlier detection methods to a variable in an -epi_tibble object, and optionally aggregates the results to create -consensus results.

    +

    Applies one or more outlier detection methods to a variable in an epi_df +object, and optionally aggregates the results to create consensus results. +See the outliers vignette for +examples.

    detect_outliers(
    @@ -164,7 +169,7 @@ 

    Arg x -

    The epi_tibble object under consideration.

    +

    The epi_df object under consideration.

    var @@ -203,7 +208,7 @@

    Arg

    Value

    -

    An epi_tibble object given by appending a new column to x, named +

    An epi_df object given by appending a new column to x, named according to the new_col_name argument, containing the outlier detection thresholds and replacement values from all detection methods.

    Details

    diff --git a/docs/reference/detect_outliers_rm.html b/docs/reference/detect_outliers_rm.html index 225a9618..3eb8bedb 100644 --- a/docs/reference/detect_outliers_rm.html +++ b/docs/reference/detect_outliers_rm.html @@ -6,7 +6,7 @@ -Detect outliers based on a rolling median — detect_outliers_rm • epitools +Detect outliers based on a rolling median — detect_outliers_rm • epiprocess @@ -72,7 +72,7 @@ - epitools + epiprocess 1.0.0
    @@ -86,7 +86,7 @@
  • - Get started + Get started
  • Reference @@ -101,6 +101,9 @@
  • 1. Slide a computation over signal values
  • +
  • + 2. Compute percentage change over time +
  • 3. Estimate derivatives of signals
  • @@ -114,7 +117,7 @@ 6. Detect and correct outliers in signals
  • - 6. Work with issue dates and archive objects + 7. Work with issue dates and archive objects
  • @@ -165,7 +168,7 @@

    Arg x -

    The epi_tibble object under consideration.

    +

    The epi_df object under consideration.

    var diff --git a/docs/reference/detect_outliers_stl.html b/docs/reference/detect_outliers_stl.html index 31371b2f..9dbfc692 100644 --- a/docs/reference/detect_outliers_stl.html +++ b/docs/reference/detect_outliers_stl.html @@ -6,7 +6,7 @@ -Detect outliers based on an STL decomposition — detect_outliers_stl • epitools +Detect outliers based on an STL decomposition — detect_outliers_stl • epiprocess @@ -71,7 +71,7 @@ - epitools + epiprocess 1.0.0

    @@ -85,7 +85,7 @@
  • - Get started + Get started
  • Reference @@ -100,6 +100,9 @@
  • 1. Slide a computation over signal values
  • +
  • + 2. Compute percentage change over time +
  • 3. Estimate derivatives of signals
  • @@ -113,7 +116,7 @@ 6. Detect and correct outliers in signals
  • - 6. Work with issue dates and archive objects + 7. Work with issue dates and archive objects
  • @@ -166,7 +169,7 @@

    Arg x -

    The epi_tibble object under consideration.

    +

    The epi_df object under consideration.

    var diff --git a/docs/reference/epi_slide.html b/docs/reference/epi_slide.html index 07abb106..42d7e65f 100644 --- a/docs/reference/epi_slide.html +++ b/docs/reference/epi_slide.html @@ -6,7 +6,7 @@ -Slide a function over variables in an <code>epi_tibble</code> object — epi_slide • epitools +Slide a function over variables in an <code>epi_df</code> object — epi_slide • epiprocess @@ -39,10 +39,9 @@ - - + + @@ -73,7 +72,7 @@ - epitools + epiprocess 1.0.0 @@ -87,7 +86,7 @@
  • - Get started + Get started
  • Reference @@ -102,6 +101,9 @@
  • 1. Slide a computation over signal values
  • +
  • + 2. Compute percentage change over time +
  • 3. Estimate derivatives of signals
  • @@ -115,7 +117,7 @@ 6. Detect and correct outliers in signals
  • - 6. Work with issue dates and archive objects + 7. Work with issue dates and archive objects
  • @@ -140,15 +142,14 @@
    -

    Slides a given function over variables in an epi_tibble object. See the -slide vignette -for examples.

    +

    Slides a given function over variables in an epi_df object. See the slide vignette for +examples.

    epi_slide(
    @@ -169,7 +170,7 @@ 

    Arg x -

    The epi_tibble object under consideration.

    +

    The epi_df object under consideration.

    slide_fun @@ -239,7 +240,7 @@

    Arg

    Value

    -

    An epi_tibble object given by appending a new column to x, named +

    An epi_df object given by appending a new column to x, named according to the new_col_name argument, containing the slide values.

    Details

    diff --git a/docs/reference/epitools.html b/docs/reference/epitools.html deleted file mode 100644 index 238b1c1e..00000000 --- a/docs/reference/epitools.html +++ /dev/null @@ -1,184 +0,0 @@ - - - - - - - - -epitools: Tools for basic signal processing in epidemiology — epitools • epitools - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    This package introduces a common data structure for epidemiological data sets -measured over space and time, and offers associated utilities to perform -basic signal processing tasks.

    -
    - - - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.5.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/estimate_deriv.html b/docs/reference/estimate_deriv.html index 567eefc1..555b449f 100644 --- a/docs/reference/estimate_deriv.html +++ b/docs/reference/estimate_deriv.html @@ -6,7 +6,7 @@ -Estimate derivatives of a variable in an <code>epi_tibble</code> object — estimate_deriv • epitools +Estimate derivatives of a variable in an <code>epi_df</code> object — estimate_deriv • epiprocess @@ -39,10 +39,10 @@ - - + + @@ -73,7 +73,7 @@ - epitools + epiprocess 1.0.0
    @@ -87,7 +87,7 @@
  • - Get started + Get started
  • Reference @@ -102,6 +102,9 @@
  • 1. Slide a computation over signal values
  • +
  • + 2. Compute percentage change over time +
  • 3. Estimate derivatives of signals
  • @@ -115,7 +118,7 @@ 6. Detect and correct outliers in signals
  • - 6. Work with issue dates and archive objects + 7. Work with issue dates and archive objects
  • @@ -140,15 +143,15 @@
    -

    Estimates derivatives of a variable in an epi_tibble object, using a local -(in time) linear regression or alternative smoother. See the estimating derivatives vignette -for examples.

    +

    Estimates derivatives of a variable in an epi_df object, using a local (in +time) linear regression or alternative smoothing method. See the estimating derivatives vignette for +examples.

    estimate_deriv(
    @@ -172,7 +175,7 @@ 

    Arg x -

    The epi_tibble object under consideration.

    +

    The epi_df object under consideration.

    var @@ -256,7 +259,7 @@

    Arg

    Value

    -

    An epi_tibble object given by appending a new column to x, named +

    An epi_df object given by appending a new column to x, named according to the new_col_name argument, containing the derivative values.

    Details

    diff --git a/docs/reference/index.html b/docs/reference/index.html index e96f3757..3e8e7209 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -6,7 +6,7 @@ -Function reference • epitools +Function reference • epiprocess @@ -70,7 +70,7 @@ - epitools + epiprocess 1.0.0
    @@ -84,7 +84,7 @@
  • - Get started + Get started
  • Reference @@ -99,6 +99,9 @@
  • 1. Slide a computation over signal values
  • +
  • + 2. Compute percentage change over time +
  • 3. Estimate derivatives of signals
  • @@ -112,7 +115,7 @@ 6. Detect and correct outliers in signals
  • - 6. Work with issue dates and archive objects + 7. Work with issue dates and archive objects
  • @@ -169,21 +172,15 @@

    as.epi_tibble()

    - -

    Convert data to epi_tibble format

    - - - -

    cor_lagged()

    +

    as.epi_df()

    -

    Compute lagged correlations between variables in an epi_tibble object

    +

    Convert data to epi_df format

    detect_outliers()

    -

    Detect outliers in a variable in an epi_tibble object

    +

    Detect outliers in a variable in an epi_df object

    @@ -198,46 +195,52 @@

    epi_cor()

    + +

    Compute lagged correlations between variables in an epi_df object

    + +

    epi_slide()

    -

    Slide a function over variables in an epi_tibble object

    +

    Slide a function over variables in an epi_df object

    -

    epitools

    +

    epiprocess

    -

    epitools: Tools for basic signal processing in epidemiology

    +

    epiprocess: Tools for basic signal processing in epidemiology

    estimate_deriv()

    -

    Estimate derivatives of a variable in an epi_tibble object

    +

    Estimate derivatives of a variable in an epi_df object

    -

    group_by(<epi_tibble>) ungroup(<epi_tibble>)

    +

    group_by(<epi_df>) ungroup(<epi_df>)

    -

    Group or ungroup epi_tibble object

    +

    Group or ungroup epi_df object

    pct_change()

    -

    Compute percentage change of a variable in an epi_tibble object

    +

    Compute percentage change of a variable in an epi_df object

    -

    print(<epi_tibble>)

    +

    print(<epi_df>)

    -

    Print epi_tibble object

    +

    Print epi_df object

    -

    summary(<epi_tibble>)

    +

    summary(<epi_df>)

    -

    Summarize epi_tibble object

    +

    Summarize epi_df object

    diff --git a/docs/reference/pct_change.html b/docs/reference/pct_change.html index 5a0556c7..4a2cb150 100644 --- a/docs/reference/pct_change.html +++ b/docs/reference/pct_change.html @@ -6,7 +6,7 @@ -Compute percentage change of a variable in an <code>epi_tibble</code> object — pct_change • epitools +Compute percentage change of a variable in an <code>epi_df</code> object — pct_change • epiprocess @@ -39,9 +39,9 @@ - - + @@ -73,7 +73,7 @@ - epitools + epiprocess 1.0.0

    @@ -87,7 +87,7 @@
  • - Get started + Get started
  • Reference @@ -102,6 +102,9 @@
  • 1. Slide a computation over signal values
  • +
  • + 2. Compute percentage change over time +
  • 3. Estimate derivatives of signals
  • @@ -115,7 +118,7 @@ 6. Detect and correct outliers in signals
  • - 6. Work with issue dates and archive objects + 7. Work with issue dates and archive objects
  • @@ -140,14 +143,14 @@
    -

    Computes the percentage change of a variable in an epi_tibble object. See -the percentage change vignette for +

    Computes the percentage change of a variable in an epi_df object. See the +percentage change vignette for examples.

    @@ -168,7 +171,7 @@

    Arg x -

    The epi_tibble object under consideration.

    +

    The epi_df object under consideration.

    var @@ -225,7 +228,7 @@

    Arg

    Value

    -

    An epi_tibble object given by appending a new column to x, named +

    An epi_df object given by appending a new column to x, named according to the new_col_name argument, containing the percentage change values.

    See also

    diff --git a/docs/reference/pipe.html b/docs/reference/pipe.html index 333dd5c3..7698cd27 100644 --- a/docs/reference/pipe.html +++ b/docs/reference/pipe.html @@ -6,7 +6,7 @@ -Pipe operator — %>% • epitools +Pipe operator — %>% • epiprocess @@ -71,7 +71,7 @@ - epitools + epiprocess 1.0.0
    @@ -85,7 +85,7 @@
  • - Get started + Get started
  • Reference @@ -100,6 +100,9 @@
  • 1. Slide a computation over signal values
  • +
  • + 2. Compute percentage change over time +
  • 3. Estimate derivatives of signals
  • @@ -113,7 +116,7 @@ 6. Detect and correct outliers in signals
  • - 6. Work with issue dates and archive objects + 7. Work with issue dates and archive objects
  • diff --git a/index.md b/index.md index ae7e95cd..1e4914e0 100644 --- a/index.md +++ b/index.md @@ -1,4 +1,4 @@ -# epitools +# epiprocess This package introduces a common data structure for epidemiological data sets measured over space and time, and offers associated utilities to perform basic diff --git a/man/as.epi_tibble.Rd b/man/as.epi_tibble.Rd deleted file mode 100644 index ff82b568..00000000 --- a/man/as.epi_tibble.Rd +++ /dev/null @@ -1,142 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/epi_tibble.R -\name{as.epi_tibble} -\alias{as.epi_tibble} -\alias{as.epi_tibble.epi_tibble} -\alias{as.epi_tibble.tibble} -\alias{as.epi_tibble.data.frame} -\title{Convert data to \code{epi_tibble} format} -\usage{ -as.epi_tibble(x, ...) - -\method{as.epi_tibble}{epi_tibble}(x, ...) - -\method{as.epi_tibble}{tibble}(x, geo_type, time_type, issue, additional_metadata = list(), ...) - -\method{as.epi_tibble}{data.frame}(x, geo_type, time_type, issue, additional_metadata = list(), ...) -} -\arguments{ -\item{x}{The object to be converted. See the methods section below for -details on formatting of each input type.} - -\item{...}{Additional arguments passed to methods.} - -\item{geo_type}{The type for the geo values. If missing, then the function -will attempt to infer it from the geo values present; if this fails, then -it will be set to "custom".} - -\item{time_type}{The type for the time values. If missing, then the function -will attempt to infer it from the time values present; if this fails, then -it will be set to "custom".} - -\item{issue}{Issue to use for this data. If missing, then the function will -attempt to infer it from the passed object \code{x}; if this fails, then the -current day-time will be used.} - -\item{additional_metadata}{List of additional metadata to attach to the -\code{epi_tibble} object. All objects will have \code{time_type}, \code{geo_type}, and -\code{issue} fields; named entries from the passed list or will be included as -well.} -} -\value{ -An \code{epi_tibble} object. -} -\description{ -Converts a data frame or tibble into a format that is consistent with the -\code{epi_tibble} class, ensuring that it has a certain minimal set of columns, -and that it has certain minimal metadata. -} -\details{ -An \code{epi_tibble} is a tibble with (at least) the following columns: -\itemize{ -\item \code{geo_value}: the geographic value associated with each measurement. -\item \code{time_value}: the time value associated with each measurement. -} - -Other columns can be considered as measured variables, which we also broadly -refer to as signal variables. An \code{epi_tibble} object also has metadata with -(at least) the following fields: -\itemize{ -\item \code{geo_type}: the type for the geo values. -\item \code{time_type}: the type for the time values. -\item \code{issue}: the time value at which the given data set was issued. -} - -The first two fields above, \code{geo_type} and \code{time_type}, can usually be -inferred from the \code{geo_value} and \code{time_value} columns, respectively. The -last field above, \code{issue}, is the most unique to the \code{epi_tibble} format. -In a typical case, this represents the maximum of the issues of individual -signal values measured in the data set; hence we would also say that the -data set is comprised of all signal values observed "as of" the given issue -in the metadata. - -Metadata for an \code{epi_tibble} object \code{x} can be accessed (and altered) via -\code{attributes(x)$metadata}. More information on geo types, time types, and -issues is given below. -} -\section{Methods (by class)}{ -\itemize{ -\item \code{epi_tibble}: Simply returns the \code{epi_tibble} object unchanged. - -\item \code{tibble}: The input tibble \code{x} must contain the columns -\code{geo_value} and \code{time_value}. All other columns will be preserved as is, -and treated as measured variables. If \code{issue} is missing, then the function -will look for \code{issue} as a column of \code{x}, or as a field in its metadata -(stored in its attributes), to infer the issue; if this fails, then the -current day-time will be used. - -\item \code{data.frame}: The input data frame \code{x} must contain the columns -\code{geo_value} and \code{time_value}. All other columns will be preserved as is, -and treated as measured variables. If \code{issue} is missing, then the function -will look for \code{issue} as a column of \code{x}, or as a field in its metadata -(stored in its attributes), to infer the issue; if this fails, then the -current day-time will be used. -}} - -\section{Geo types}{ - -The following geo types are supported in an \code{epi_tibble}. Their geo coding -(specification of geo values for each geo type) is also described below. -\itemize{ -\item \code{"county"}: each observation corresponds to a U.S. county; coded by 5-digit -FIPS code. -\item \code{"hrr"}: each observation corresponds to a U.S. hospital referral region -(designed to represent regional healthcare markets); there are 306 HRRs in -the U.S; coded by number (nonconsecutive, between 1 and 457). -\item \code{"state"}: each observation corresponds to a U.S. state; coded by 2-digit -postal abbreviation (lowercase); -note that Puerto Rico is "pr" and Washington D.C. is "dc". -\item \code{"hhs"}: each observation corresponds to a U.S. HHS region; coded by number -(consecutive, between 1 and 10). -\item \code{"nation"}: each observation corresponds to a country; coded by ISO 31661- -alpha-2 country codes (lowercase). -} - -The above geo types come with aggregation utilities in the package, \emph{todo: -refer to relevant functionality, vignette, and so on}. An unrecognizable -geo type is labeled as "custom". -} - -\section{Time types}{ - -The following time types are supported in an \code{epi_tibble}. Their time coding -(specification of time values for each time type) is also described below. -\itemize{ -\item \code{"day-time"}: each observation corresponds to a time on a given day (measured -to the second); coded as a \code{POSIXct} object, as in \code{as.POSIXct("2020-06-09 18:45:40")}. -\item \code{"day"}: each observation corresponds to a day; coded as a \code{Date} object, -as in \code{as.Date("2020-06-09")}. -\item \code{"week"}: each observation corresponds to a week; the alignment can be -arbitrary (as to whether a week starts on a Monday, Tuesday, etc.; the -U.S. CDC definition of an epidemiological week starts on a Sunday); coded -as a \code{Date} object, representing the start date of week. -} - -An unrecognisable time type is labeled as "custom". -} - -\section{Issues}{ - -todo -} - diff --git a/man/cor_lagged.Rd b/man/cor_lagged.Rd deleted file mode 100644 index 9fc4a163..00000000 --- a/man/cor_lagged.Rd +++ /dev/null @@ -1,59 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/correlation.R -\name{cor_lagged} -\alias{cor_lagged} -\title{Compute lagged correlations between variables in an \code{epi_tibble} object} -\usage{ -cor_lagged( - x, - var1, - var2, - dt1 = 0, - dt2 = 0, - by = geo_value, - use = "na.or.complete", - method = c("pearson", "kendall", "spearman") -) -} -\arguments{ -\item{x}{The \code{epi_tibble} object under consideration.} - -\item{var1, var2}{The variables in \code{x} to correlate.} - -\item{dt1, dt2}{Time shifts to consider for the two variables, respectively, -before computing correlations. Negative shifts translate into in a lag -value and positive shifts into a lead value; for example, if \code{dt = -1}, -then the new value on June 2 is the original value on June 1; if \code{dt = 1}, -then the new value on June 2 is the original value on June 3; if \code{dt = 0}, -then the values are left as is. Default is 0 for both \code{dt1} and \code{dt2}. Note -that the time shifts are always performed \emph{per geo value}; see details.} - -\item{by}{The variable(s) to group by, for the correlation computation. If -\code{geo_value}, the default, then correlations are computed for each geo -value, over all time; if \code{time_value}, then correlations are computed for -each time, over all geo values. A grouping can also be any specified using -number of columns of \code{x}; for example, we can use \code{by = c(geo_value, age_group)}, assuming \code{x} has a column \code{age_group}, in order to compute -correlations for each pair of geo value and age group. To omit a grouping -entirely, use \code{by = NULL}. Note that the grouping here is always applied -\emph{after} the time shifts; see details.} - -\item{use, method}{Arguments to pass to \code{cor()}, with "na.or.complete" the -default for \code{use} (different than \code{cor()}) and "pearson" the default for -\code{method} (same as \code{cor()}).} -} -\value{ -An tibble with the grouping columns first (\code{geo_value}, \code{time_value}, -or possibly others), and then a column \code{cor}, which gives the correlation. -} -\description{ -Computes lagged correlations between variables in an \code{epi_tibble} object, -allowing for grouping by geo value, time value, or any other variables. See -the \href{https://cmu-delphi.github.io/epitools/articles/correlations.html}{correlations vignette} -for examples. -} -\details{ -Time shifts are always performed first, grouped by geo value (this -way they amount to shifting each individual time series). After this, the -geo grouping is removed, and the grouping specified in the \code{by} argument is -applied. Then, correlations are computed. -} diff --git a/man/detect_outliers.Rd b/man/detect_outliers.Rd index 07101c7c..3c624fe3 100644 --- a/man/detect_outliers.Rd +++ b/man/detect_outliers.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/outliers.R \name{detect_outliers} \alias{detect_outliers} -\title{Detect outliers in a variable in an \code{epi_tibble} object} +\title{Detect outliers in a variable in an \code{epi_df} object} \usage{ detect_outliers( x, @@ -13,7 +13,7 @@ detect_outliers( ) } \arguments{ -\item{x}{The \code{epi_tibble} object under consideration.} +\item{x}{The \code{epi_df} object under consideration.} \item{var}{The variable in \code{x} on which to run outlier detection.} @@ -42,14 +42,15 @@ that setting \code{new_col_name} equal to an existing column name will overwrite this column.} } \value{ -An \code{epi_tibble} object given by appending a new column to \code{x}, named +An \code{epi_df} object given by appending a new column to \code{x}, named according to the \code{new_col_name} argument, containing the outlier detection thresholds and replacement values from all detection methods. } \description{ -Applies one or more outlier detection methods to a variable in an -\code{epi_tibble} object, and optionally aggregates the results to create -consensus results. +Applies one or more outlier detection methods to a variable in an \code{epi_df} +object, and optionally aggregates the results to create consensus results. +See the \href{https://cmu-delphi.github.io/epitools/articles/outliers.html}{outliers vignette} for +examples. } \details{ Each outlier detection method, one per row of the passed \code{methods} diff --git a/man/detect_outliers_rm.Rd b/man/detect_outliers_rm.Rd index df8c3b3d..a10be46d 100644 --- a/man/detect_outliers_rm.Rd +++ b/man/detect_outliers_rm.Rd @@ -16,7 +16,7 @@ detect_outliers_rm( ) } \arguments{ -\item{x}{The \code{epi_tibble} object under consideration.} +\item{x}{The \code{epi_df} object under consideration.} \item{var}{The variable in \code{x} on which to run outlier detection.} diff --git a/man/detect_outliers_stl.Rd b/man/detect_outliers_stl.Rd index fc1eb718..c2befecb 100644 --- a/man/detect_outliers_stl.Rd +++ b/man/detect_outliers_stl.Rd @@ -19,7 +19,7 @@ detect_outliers_stl( ) } \arguments{ -\item{x}{The \code{epi_tibble} object under consideration.} +\item{x}{The \code{epi_df} object under consideration.} \item{var}{The variable in \code{x} on which to run outlier detection.} diff --git a/man/epi_slide.Rd b/man/epi_slide.Rd index 1591cea5..93621a40 100644 --- a/man/epi_slide.Rd +++ b/man/epi_slide.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/slide.R \name{epi_slide} \alias{epi_slide} -\title{Slide a function over variables in an \code{epi_tibble} object} +\title{Slide a function over variables in an \code{epi_df} object} \usage{ epi_slide( x, @@ -18,7 +18,7 @@ epi_slide( ) } \arguments{ -\item{x}{The \code{epi_tibble} object under consideration.} +\item{x}{The \code{epi_df} object under consideration.} \item{slide_fun}{Function or formula to slide over variables in \code{x}. To "slide" means to apply a function or formula over a running window of \code{n} @@ -67,13 +67,12 @@ only be meaningful if \code{time_value} is of class \code{POSIXct}, that is, if via \code{slide_fun}.} } \value{ -An \code{epi_tibble} object given by appending a new column to \code{x}, named +An \code{epi_df} object given by appending a new column to \code{x}, named according to the \code{new_col_name} argument, containing the slide values. } \description{ -Slides a given function over variables in an \code{epi_tibble} object. See the -\href{https://cmu-delphi.github.io/epitools/articles/slide.html}{slide vignette} -for examples. +Slides a given function over variables in an \code{epi_df} object. See the \href{https://cmu-delphi.github.io/epitools/articles/slide.html}{slide vignette} for +examples. } \details{ To "slide" means to apply a function or formula over a running diff --git a/man/epitools.Rd b/man/epitools.Rd deleted file mode 100644 index 44cf6fab..00000000 --- a/man/epitools.Rd +++ /dev/null @@ -1,11 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/epitools.R -\docType{package} -\name{epitools} -\alias{epitools} -\title{epitools: Tools for basic signal processing in epidemiology} -\description{ -This package introduces a common data structure for epidemiological data sets -measured over space and time, and offers associated utilities to perform -basic signal processing tasks. -} diff --git a/man/estimate_deriv.Rd b/man/estimate_deriv.Rd index dacf3647..ef2a5eb1 100644 --- a/man/estimate_deriv.Rd +++ b/man/estimate_deriv.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/derivative.R \name{estimate_deriv} \alias{estimate_deriv} -\title{Estimate derivatives of a variable in an \code{epi_tibble} object} +\title{Estimate derivatives of a variable in an \code{epi_df} object} \usage{ estimate_deriv( x, @@ -21,7 +21,7 @@ estimate_deriv( ) } \arguments{ -\item{x}{The \code{epi_tibble} object under consideration.} +\item{x}{The \code{epi_df} object under consideration.} \item{var}{The variable in \code{x} whose derivatives are to be estimated.} @@ -78,13 +78,13 @@ is \code{TRUE}.} derivatives. See details below.} } \value{ -An \code{epi_tibble} object given by appending a new column to \code{x}, named +An \code{epi_df} object given by appending a new column to \code{x}, named according to the \code{new_col_name} argument, containing the derivative values. } \description{ -Estimates derivatives of a variable in an \code{epi_tibble} object, using a local -(in time) linear regression or alternative smoother. See the \href{https://cmu-delphi.github.io/epitools/articles/derivatives.html}{estimating derivatives vignette} -for examples. +Estimates derivatives of a variable in an \code{epi_df} object, using a local (in +time) linear regression or alternative smoothing method. See the \href{https://cmu-delphi.github.io/epitools/articles/derivative.html}{estimating derivatives vignette} for +examples. } \details{ Derivatives are estimated using: diff --git a/man/group_by.epi_tibble.Rd b/man/group_by.epi_tibble.Rd deleted file mode 100644 index be50410d..00000000 --- a/man/group_by.epi_tibble.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/epi_tibble.R -\name{group_by.epi_tibble} -\alias{group_by.epi_tibble} -\alias{ungroup.epi_tibble} -\title{Group or ungroup \code{epi_tibble} object} -\usage{ -\method{group_by}{epi_tibble}(x, ...) - -\method{ungroup}{epi_tibble}(x, ...) -} -\description{ -Groups or ungroups an \code{epi_tibble}, preserving class and attributes. -} diff --git a/man/pct_change.Rd b/man/pct_change.Rd index 9de92ac3..27863a7f 100644 --- a/man/pct_change.Rd +++ b/man/pct_change.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/pct_change.R \name{pct_change} \alias{pct_change} -\title{Compute percentage change of a variable in an \code{epi_tibble} object} +\title{Compute percentage change of a variable in an \code{epi_df} object} \usage{ pct_change( x, @@ -17,7 +17,7 @@ pct_change( ) } \arguments{ -\item{x}{The \code{epi_tibble} object under consideration.} +\item{x}{The \code{epi_df} object under consideration.} \item{var}{The variable in \code{x} whose percentage change values are to be computed.} @@ -55,13 +55,13 @@ metadata. Read the documentation for \code{\link[=epi_slide]{epi_slide()}} for m is \code{TRUE}.} } \value{ -An \code{epi_tibble} object given by appending a new column to \code{x}, named +An \code{epi_df} object given by appending a new column to \code{x}, named according to the \code{new_col_name} argument, containing the percentage change values. } \description{ -Computes the percentage change of a variable in an \code{epi_tibble} object. See -the \href{https://cmu-delphi.github.io/epitools/articles/pct-change.html}{percentage change vignette} for +Computes the percentage change of a variable in an \code{epi_df} object. See the +\href{https://cmu-delphi.github.io/epitools/articles/pct_change.html}{percentage change vignette} for examples. } \seealso{ diff --git a/man/print.epi_tibble.Rd b/man/print.epi_tibble.Rd deleted file mode 100644 index 297300bb..00000000 --- a/man/print.epi_tibble.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/epi_tibble.R -\name{print.epi_tibble} -\alias{print.epi_tibble} -\title{Print \code{epi_tibble} object} -\usage{ -\method{print}{epi_tibble}(x, ...) -} -\arguments{ -\item{x}{The \code{epi_tibble} object.} - -\item{...}{Additional arguments passed to \code{print.tibble()} to print the -data.} -} -\value{ -The \code{epi_tibble} object, unchanged. -} -\description{ -Prints a brief summary of the \code{epi_tibble} object, then prints the underlying -tibble. -} diff --git a/man/summary.epi_tibble.Rd b/man/summary.epi_tibble.Rd deleted file mode 100644 index d82a8523..00000000 --- a/man/summary.epi_tibble.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/epi_tibble.R -\name{summary.epi_tibble} -\alias{summary.epi_tibble} -\title{Summarize \code{epi_tibble} object} -\usage{ -\method{summary}{epi_tibble}(object, ...) -} -\arguments{ -\item{object}{The \code{epi_tibble} object.} - -\item{...}{Additional arguments, for compatibility with \code{summary()}. -Currently unused.} -} -\value{ -No return value; called only to print summary statistics. -} -\description{ -Prints a variety of summary statistics about the \code{epi_tibble} object, such as -the time range included and geographic coverage. -} diff --git a/vignettes/correlation.Rmd b/vignettes/correlation.Rmd index cf6e743e..25cbac53 100644 --- a/vignettes/correlation.Rmd +++ b/vignettes/correlation.Rmd @@ -7,8 +7,8 @@ vignette: > %\VignetteEncoding{UTF-8} --- -The `epitools` package provides some simple functionality for computing lagged -correlations between two signals, over space or time, via `cor_lagged()`. This +The `epiprocess` package provides some simple functionality for computing lagged +correlations between two signals, over space or time, via `epi_cor()`. This function is really just a convenience wrapper over some basic commands: it first performs any specified time shifts, grouped by geo value (this way, it shifts each of the individual time series); then, it removes the geo grouping and @@ -19,7 +19,7 @@ and death rates, smoothed using a 7-day trailing average. ```{r, message = FALSE} library(covidcast) -library(epitools) +library(epiprocess) library(dplyr) start_day <- "2020-03-01" @@ -43,7 +43,7 @@ y <- covidcast_signal(data_source = "jhu-csse", x <- x %>% full_join(y, by = c("geo_value", "time_value")) %>% - as.epi_tibble() %>% + as.epi_df() %>% group_by(geo_value) %>% epi_slide(slide_fun = ~ Mean(.x$case_rates), n = 7, new_col_name = "case_rates") %>% @@ -54,21 +54,21 @@ x <- x %>% ## Correlations grouped by time -The `cor_lagged()` function operates on an `epi_tibble` object, and it requires -further specification of the variables to correlate, in its next two arguments -(`var1` and `var2`). +The `epi_cor()` function operates on an `epi_df` object, and it requires further +specification of the variables to correlate, in its next two arguments (`var1` +and `var2`). In general, we can specify any grouping variable (or combination of variables) -in a call to `cor_lagged()`, via the `by` argument. This potentially leads to -many ways to compute correlations. There are always at least two ways to compute -correlations in an `epi_tibble`: grouping by time value, and by geo value. The +in a call to `epi_cor()`, via the `by` argument. This potentially leads to many +ways to compute correlations. There are always at least two ways to compute +correlations in an `epi_df`: grouping by time value, and by geo value. The former is obtained by setting `by = time_value`. ```{r, message = FALSE, warning = FALSE} library(ggplot2) theme_set(theme_bw()) -z1 <- cor_lagged(x, case_rates, death_rates, by = time_value) +z1 <- epi_cor(x, case_rates, death_rates, by = time_value) ggplot(z1, aes(x = time_value, y = cor)) + geom_line() + @@ -81,21 +81,21 @@ rates linearly associated, across the U.S. states?". We might be interested in broadening this question, instead asking: "on any given day, do higher case rates tend to associate with higher death rates?", removing the dependence on a linear relationship. The latter can be addressed using Spearman correlation, -accomplished by setting `method = "spearman"` in the call to `cor_lagged()`. +accomplished by setting `method = "spearman"` in the call to `epi_cor()`. Spearman correlation is highly robust and invariant to monotone transformations. ## Lagged correlations We might also be interested in how case rates associate with death rates in the -*future*. Using the `dt1` parameter in `cor_lagged()`, we can lag case rates -back any number of days we want, before calculating correlations. Below, we set -`dt1 = -10`. This means that `var1 = case_rates` will be lagged by 10 days, so -that case rates on June 1st will be correlated with death rates on June 11th. -(It might also help to think of it this way: death rates on a certain day will -be correlated with case rates at an offset of -10 days.) +*future*. Using the `dt1` parameter in `epi_cor()`, we can lag case rates back +any number of days we want, before calculating correlations. Below, we set `dt1 += -10`. This means that `var1 = case_rates` will be lagged by 10 days, so that +case rates on June 1st will be correlated with death rates on June 11th. (It +might also help to think of it this way: death rates on a certain day will be +correlated with case rates at an offset of -10 days.) ```{r, message = FALSE, warning = FALSE} -z2 <- cor_lagged(x, case_rates, death_rates, by = time_value, dt1 = -10) +z2 <- epi_cor(x, case_rates, death_rates, by = time_value, dt1 = -10) z <- rbind(z1 %>% mutate(lag = 0), z2 %>% mutate(lag = 10)) %>% @@ -118,8 +118,8 @@ geo_value`. We'll again look at correlations both for 0- and 10-day lagged case rates. ```{r, message = FALSE, warning = FALSE} -z1 <- cor_lagged(x, case_rates, death_rates, by = geo_value) -z2 <- cor_lagged(x, case_rates, death_rates, by = geo_value, dt1 = -10) +z1 <- epi_cor(x, case_rates, death_rates, by = geo_value) +z2 <- epi_cor(x, case_rates, death_rates, by = geo_value, dt1 = -10) z <- rbind(z1 %>% mutate(lag = 0), z2 %>% mutate(lag = 10)) %>% @@ -143,7 +143,7 @@ library(purrr) lags = 0:35 z <- map_dfr(lags, function(lag) { - cor_lagged(x, case_rates, death_rates, by = geo_value, dt1 = -lag) %>% + epi_cor(x, case_rates, death_rates, by = geo_value, dt1 = -lag) %>% mutate(lag = lag) }) diff --git a/vignettes/derivative.Rmd b/vignettes/derivative.Rmd index 3bb6d12f..c2ba9d05 100644 --- a/vignettes/derivative.Rmd +++ b/vignettes/derivative.Rmd @@ -14,7 +14,7 @@ per 100,000 people), smoothed using a 7-day trailing average. ```{r, message = FALSE} library(covidcast) -library(epitools) +library(epiprocess) library(dplyr) x <- covidcast_signal(data_source = "jhu-csse", @@ -23,7 +23,7 @@ x <- covidcast_signal(data_source = "jhu-csse", end_day = "2021-05-31", geo_type = "state", geo_values = "fl") %>% - as.epi_tibble() %>% + as.epi_df() %>% rename(case_rates = value) %>% select(geo_value, time_value, case_rates) %>% epi_slide(slide_fun = ~ Mean(.x$case_rates), n = 7, @@ -32,13 +32,13 @@ x <- covidcast_signal(data_source = "jhu-csse", ## Estimating derivatives via linear regression -The `estimate_deriv()` function operates on a `epi_tibble` object, and aside -from this, takes three main arguments: `var`, the variable whose derivatives are -to be estimated; `method`, the name of the method to use for derivative -estimation; and `n`, the length of the local window (here, in days) to use in -training the given method. Here we use `method = "lin"`, the default, which -estimates the derivative using the slope from a simple linear regression, and -`n = 14`, also the default. +The `estimate_deriv()` function operates on a `epi_df` object, and aside from +this, takes three main arguments: `var`, the variable whose derivatives are to +be estimated; `method`, the name of the method to use for derivative estimation; +and `n`, the length of the local window (here, in days) to use in training the +given method. Here we use `method = "lin"`, the default, which estimates the +derivative using the slope from a simple linear regression, and `n = 14`, also +the default. ```{r} x <- estimate_deriv(x, case_rates, method = "lin", n = 14) diff --git a/vignettes/epitools.Rmd b/vignettes/epiprocess.Rmd similarity index 67% rename from vignettes/epitools.Rmd rename to vignettes/epiprocess.Rmd index c1503781..2e5952af 100644 --- a/vignettes/epitools.Rmd +++ b/vignettes/epiprocess.Rmd @@ -1,8 +1,8 @@ --- -title: Get started with `epitools` +title: Get started with `epiprocess` output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{Get started with epitools} + %\VignetteIndexEntry{Get started with epiprocess} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- @@ -17,7 +17,7 @@ This package is not on CRAN yet, so it can be installed using the [`devtools`](https://cran.r-project.org/package=devtools) package: ```{r, eval = FALSE} -devtools::install_github("cmu-delphi/epitools", ref = "main") +devtools::install_github("cmu-delphi/epiprocess", ref = "main") ``` Building the vignettes, such as this getting started guide, takes a significant @@ -25,31 +25,31 @@ amount of time. They are not included in the package by default. If you want to include vignettes, then use this modified command: ```{r, eval = FALSE} -devtools::install_github("cmu-delphi/epitools", ref = "main", +devtools::install_github("cmu-delphi/epiprocess", ref = "main", build_vignettes = TRUE, dependencies = TRUE) ``` -## Getting data into `epi_tibble` format +## Getting data into `epi_df` format -We'll start by showing how to get data into `epi_tibble` format, which is just a +We'll start by showing how to get data into `epi_df` format, which is just a tibble with a bit of special structure, and is the format assumed by all of the -functions in the `epitools` package. An `epi_tibble` object has (at least) the -following columns: +functions in the `epiprocess` package. An `epi_df` object has (at least) the +following columns: * `geo_value`: the geographic value associated with each measurement. * `time_value`: the time value associated with each measurement. Other columns can be considered as measured variables, which we also broadly -refer to as signal variables. To learn more about the `epi_tibble` format, you -can read the documentation for `as.epi_tibble()`. +refer to as signal variables. To learn more about the `epi_df` format, you can +read the documentation for `as.epi_df()`. -A data frame or tibble that has `geo_value` and `time_value` columns can be -converted into an `epi_tibble` object, using the function `as.epi_tibble()`. As -an example, we'll look at daily cumulative COVID-19 cases for 4 states (CA, FL, -NY, and TX) in the U.S., over a year spanning mid 2020 to mid 2021, using the -[`covidcast`](https://cmu-delphi.github.io/covidcast/covidcastR/) -package to fetch this data from the [COVIDcast -API](https://cmu-delphi.github.io/delphi-epidata/api/covidcast.html/). +A data frame or tibble that has `geo_value` and `time_value` columns can be +converted into an `epi_df` object, using the function `as.epi_df()`. As an +example, we'll look at daily cumulative COVID-19 cases for 4 states (CA, FL, NY, +and TX) in the U.S., over a year spanning mid 2020 to mid 2021, using the +[`covidcast`](https://cmu-delphi.github.io/covidcast/covidcastR/) package to +fetch this data from the [COVIDcast +API](https://cmu-delphi.github.io/delphi-epidata/api/covidcast.html/). ```{r, message = FALSE} library(covidcast) @@ -65,19 +65,19 @@ class(case_data) colnames(case_data) ``` -As we can see, a data frame returned `covidcast::covidcast_signal()` has the -columns required for an `epi_tibble` object (along with many others). A call to -`as.epi_tibble()`, with further specification of some relevant metadata, brings -the data frame into `epi_tibble` format. +As we can see, a data frame returned `covidcast::covidcast_signal()` has the +columns required for an `epi_df` object (along with many others). A call to +`as.epi_df()`, with further specification of some relevant metadata, brings the +data frame into `epi_df` format. ```{r, message = FALSE} -library(epitools) +library(epiprocess) library(dplyr) -x <- as.epi_tibble(case_data, - geo_type = "state", - time_type = "day", - issue = max(case_data$issue)) %>% +x <- as.epi_df(case_data, + geo_type = "state", + time_type = "day", + issue = max(case_data$issue)) %>% rename("cases_cum" = value) %>% select(geo_value, time_value, cases_cum) @@ -89,7 +89,7 @@ attributes(x)$metadata ## Some info about required metadata -In general, an `epi_tibble` object must have following fields in its metadata: +In general, an `epi_df` object must have following fields in its metadata: * `geo_type`: the type for the geo values. * `time_type`: the type for the time values. @@ -100,24 +100,24 @@ individual signal values measured in the data set; hence we would also say that the data set is comprised of all signal values observed "as of" the given issue in the metadata. -If any of the `geo_type`, `time_type`, and `issue` arguments are missing in a -call to `as.epi_tibble()`, then this function will try to infer them from the -passed object (The documentation for `as.epi_tibble()` gives more details.) +If any of the `geo_type`, `time_type`, and `issue` arguments are missing in a +call to `as.epi_df()`, then this function will try to infer them from the passed +object (The documentation for `as.epi_df()` gives more details.) ```{r} -x <- as.epi_tibble(case_data) %>% +x <- as.epi_df(case_data) %>% rename("cases_cum" = value) %>% select(geo_value, time_value, cases_cum) attributes(x)$metadata ``` -## Working with `epi_tibble` objects downstream +## Working with `epi_df` objects downstream -Data in `epi_tibble` format should be easy to work with downstream; in the other -vignettes, we'll walk through some basic signal processing tasks using functions -provided in the `epitools` package; of course, we can also write custom code for -for other downstream uses, like plotting, which is pretty easy to do `ggplot2`. +Data in `epi_df` format should be easy to work with downstream; in the other +vignettes, we'll walk through some basic signal processing tasks using functions +provided in the `epiprocess` package; of course, we can also write custom code +for other downstream uses, like plotting, which is pretty easy to do `ggplot2`. ```{r, message = FALSE, warning = FALSE} library(ggplot2) @@ -138,7 +138,7 @@ x <- outbreaks::sars_canada_2003 %>% mutate(geo_value = "ca") %>% rename(time_value = date) %>% select(geo_value, time_value, starts_with("cases")) %>% - as.epi_tibble(geo_type = "nation") + as.epi_df(geo_type = "nation") head(x) diff --git a/vignettes/issues.Rmd b/vignettes/issues.Rmd index 718ea1cb..93ac2f0e 100644 --- a/vignettes/issues.Rmd +++ b/vignettes/issues.Rmd @@ -1,8 +1,8 @@ --- -title: 6. Work with issue dates and archive objects +title: 7. Work with issue dates and archive objects output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{6. Work with issue dates and archive objects} + %\VignetteIndexEntry{7. Work with issue dates and archive objects} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- @@ -17,7 +17,7 @@ Basic stuff: ``` x <- as_of(z, issue = "2020-06-15") ``` - * here the object `x` will be an `epi_tibble` object + * here the object `x` will be an `epi_df` object * the function `as_of()` by default returns the latest observation (defined by unique combo of `geo_value` and `time_value` columns being equal) whose issue doesn't surpass the specified one @@ -33,11 +33,11 @@ Slide stuff: - this allows us to do honest forecaster training. basically, replaces `epival` (we shouldn't need a separate package ...) - so `epi_slide()` checks what object it's being given - * if an `epi_tibble`, it works as it does currently + * if an `epi_df`, it works as it does currently * if an `epi_archive`, then for each working time value, it uses `as_of()` to only return the data you would have had at that time (and whose time value is withing `n` trailing time steps of the specified one) - demo this in the vignette with the hand-built AR forecaster. the `epipred` package can and still should be built, to contain more fancy forecasters, and potentially also scoring roles. but the functionality for *running* these will -all already be built into `epitools`, just as a generalization of `epi_slide()` \ No newline at end of file +be already built into `epiprocess`, just as a generalization of `epi_slide()` \ No newline at end of file diff --git a/vignettes/outliers.Rmd b/vignettes/outliers.Rmd index 41625ebd..3834e35f 100644 --- a/vignettes/outliers.Rmd +++ b/vignettes/outliers.Rmd @@ -8,14 +8,14 @@ vignette: > --- This vignette describes functionality for detecting and correcting outliers in -signals `detect_outliers()` and `correct_outliers()`. These functions are +signals `detect_outliers()` and `correct_outliers()`. These functions are designed to be modular and extendable, so that you can provide your own outlier -detection and correction functions and use them with `epi_tibble` objects. We'll +detection and correction functions and use them with `epi_df` objects. We'll work state-level daily reported COVID-19 case counts in Florida and New Jersey. ```{r, message = FALSE, fig.width = 8, fig.height = 7} library(covidcast) -library(epitools) +library(epiprocess) library(dplyr) library(tidyr) library(ggplot2) @@ -29,7 +29,7 @@ x <- covidcast_signal(data_source = "jhu-csse", geo_values = c("fl", "nj"), as_of = "2021-10-28") %>% select(geo_value, time_value, cases = value) %>% - as.epi_tibble() + as.epi_df() ggplot(x, aes(x = time_value, y = cases)) + geom_line() + @@ -128,20 +128,17 @@ plot_outliers = function(x, var, outliers_col = outlier_info, names_pattern = "(.+)_(.+)") # If requested, filter to only combined method - if (combined_only) { - x_long <- x_long %>% filter(method == "combined") - } - + if (combined_only) x_long <- x_long %>% filter(method == "combined") + # Start of plot with observed data p <- ggplot() + geom_line(data = x, mapping = aes(x = time_value, y = !!var)) # If requested, add bands - if (bands) { + if (bands) p <- p + geom_ribbon(data = x_long, aes(x = time_value, ymin = lower, ymax = upper, color = method), fill = NA) - } # If requested, add points if (points) { @@ -152,9 +149,8 @@ plot_outliers = function(x, var, outliers_col = outlier_info, } # If requested, add faceting - if (!is.null(facet_vars)) { + if (!is.null(facet_vars)) p <- p + facet_wrap(facet_vars, nrow = nrow, ncol = ncol, scales = scales) - } p } diff --git a/vignettes/pct_change.Rmd b/vignettes/pct_change.Rmd index a5165a30..b53fb853 100644 --- a/vignettes/pct_change.Rmd +++ b/vignettes/pct_change.Rmd @@ -9,12 +9,12 @@ vignette: > A basic way of assessing growth in a signal is to look at its percentage change over two neighboring time windows. We investigate this in the current vignette, -using the `pct_change()` function in the `epitools` package. As before, we'll +using the `pct_change()` function in the `epiprocess` package. As before, we'll look at state-level daily reported COVID-19 cases. ```{r, message = FALSE} library(covidcast) -library(epitools) +library(epiprocess) library(dplyr) x <- covidcast_signal(data_source = "jhu-csse", @@ -23,21 +23,21 @@ x <- covidcast_signal(data_source = "jhu-csse", end_day = "2021-05-31", geo_type = "state", geo_values = "fl") %>% - as.epi_tibble() %>% + as.epi_df() %>% rename(cases = value) %>% select(geo_value, time_value, cases) ``` ## Percentage change -The `pct_change()` function operates on a `epi_tibble` object, and aside from -this, takes two main arguments: `var`, the variable whose percentage change -values are to be computed; and `n`, the length of the local window (here, in -days) to use to compute the percentage change values. For example, if `n = 10`, -then the percentage change on November 10 is computed as 100 * (B - A) / A, -where A is the sum of the signal in between November 6 and 10, and A is the sum -in between November 1 and 5. The default is `n = 14` (here, corresponding to the -percentage change between adjacent weeks). +The `pct_change()` function operates on a `epi_df` object, and aside from this, +takes two main arguments: `var`, the variable whose percentage change values are +to be computed; and `n`, the length of the local window (here, in days) to use +to compute the percentage change values. For example, if `n = 10`, then the +percentage change on November 10 is computed as 100 * (B - A) / A, where A is +the sum of the signal in between November 6 and 10, and A is the sum in between +November 1 and 5. The default is `n = 14` (here, corresponding to the percentage +change between adjacent weeks). ```{r} x <- pct_change(x, cases, n = 14) diff --git a/vignettes/slide.Rmd b/vignettes/slide.Rmd index d949a7f0..46b3c44b 100644 --- a/vignettes/slide.Rmd +++ b/vignettes/slide.Rmd @@ -7,31 +7,31 @@ vignette: > %\VignetteEncoding{UTF-8} --- -One of the most central tools in the `epitools` package is `epi_slide()`, which -is based on the family of functions provided by the +One of the most central tools in the `epiprocess` package is `epi_slide()`, +which is based on the family of functions provided by the [`slider`](https://cran.r-project.org/web/packages/slider/) package. In -`epitools`, to "slide" means to apply a computation---represented as a function -or formula---over a running window of `n` time steps. Several other functions -in the `epitools` package, such as `pct_change()` and `estimate_deriv()`, use -`epi_slide()` as their workhorse. Suitable groupings can always be achieved by -a preliminary call to `group_by()`. +`epiprocess`, to "slide" means to apply a computation---represented as a +function or formula---over a running window of `n` time steps. Several other +functions in the `epiprocess` package, such as `pct_change()` and +`estimate_deriv()`, use `epi_slide()` as their workhorse. Suitable groupings can +always be achieved by a preliminary call to `group_by()`. By default, the meaning of one time step is inferred from the `time_type` of the -`epi_tibble` object under consideration. If `time_type` is "day", then one time -step is one day. The time step can also be specified manually in the call to +`epi_df` object under consideration. If `time_type` is "day", then one time step +is one day. The time step can also be specified manually in the call to `epi_slide()`; you can read the cocumentation for more details. Furthermore, the -alignment of the running window used in `epi_slide()` can either be "trailing" +alignment of the running window used in `epi_slide()` can either be "trailing" or "centered"; the default is "trailing", and is what we use in this and most other vignettes, without specifying otherwise. -Similar to the getting started guide, we'll fetch daily reported COVID-19 cases +Similar to the getting started guide, we'll fetch daily reported COVID-19 cases for four U.S. states (note: new, not cumulative cases) using the -[`covidcast`](https://cmu-delphi.github.io/covidcast/covidcastR/) package, and -then convert this to `epi_tibble` format. +[`covidcast`](https://cmu-delphi.github.io/covidcast/covidcastR/) package, and +then convert this to `epi_df` format. ```{r, message = FALSE} library(covidcast) -library(epitools) +library(epiprocess) library(dplyr) x <- covidcast_signal(data_source = "jhu-csse", @@ -40,7 +40,7 @@ x <- covidcast_signal(data_source = "jhu-csse", end_day = "2021-05-31", geo_type = "state", geo_values = c("ca", "fl", "ny", "tx")) %>% - as.epi_tibble() %>% + as.epi_df() %>% rename(cases = value) %>% select(geo_value, time_value, cases) ``` @@ -59,14 +59,14 @@ x %>% ``` The formula specified via `slide_fun` has access to all columns present in the -original `epi_tibble` object (and must refer to them with the prefix `.x$`). -Here he function `Mean()` is just a simple wrapper around `mean()` that omits -`NA` values by default (provided by the `epitools` package). +original `epi_df` object (and must refer to them with the prefix `.x$`). Here +he function `Mean()` is just a simple wrapper around `mean()` that omits `NA` +values by default (provided by the `epiprocess` package). -The function `epi_slide()` returns an `epi_tibble` object with a new column -appended that contains the results (from sliding), named `slide_value` as the -default. We can of course change this post hoc, or we can instead specify a new -name up front using the `new_col_name` argument: +The function `epi_slide()` returns an `epi_df` object with a new column appended +that contains the results (from sliding), named `slide_value` as the default. We +can of course change this post hoc, or we can instead specify a new name up +front using the `new_col_name` argument: ```{r} x <- x %>% @@ -111,20 +111,20 @@ head(x, 10) ## Building and running a local forecaster As a more complicated example, we create a forecaster based on a local (in time) -autoregression or AR model. AR models can be fit in numerous ways (using base R +autoregression or AR model. AR models can be fit in numerous ways (using base R functions and various packages), but here we define it "by hand" both because it -provides a more advanced example of sliding a function over an `epi_tibble` -object, and because it allows us to be a bit more flexible in defining a -*probabilistic* forecaster: one that outputs not just a point prediction, but -a notion of uncertainty around this. In particular, our forecaster will -output a point prediction along with an 90\% uncertainty band, represented by -a predictive quantiles at the 5\% and 95\% levels (lower and upper endpoints of -the uncertainty band). - -The function defined below, `prob_ar()`, is our probabilistic AR forecaster. The -`lags`argument indicates which lags to use in the model, and `ahead` indicates +provides a more advanced example of sliding a function over an `epi_df` object, +and because it allows us to be a bit more flexible in defining a *probabilistic* +forecaster: one that outputs not just a point prediction, but a notion of +uncertainty around this. In particular, our forecaster will output a point +prediction along with an 90\% uncertainty band, represented by a predictive +quantiles at the 5\% and 95\% levels (lower and upper endpoints of the +uncertainty band). + +The function defined below, `prob_ar()`, is our probabilistic AR forecaster. The +`lags`argument indicates which lags to use in the model, and `ahead` indicates how far ahead in the future to make forecasts (both are encoded in terms of the -units of the `time_value` column; so, days, in the working `epi_tibble` being +units of the `time_value` column; so, days, in the working `epi_df` being considered in this vignette). ```{r} @@ -155,9 +155,9 @@ prob_ar <- function(v, lags = c(0, 7, 14), ahead = 7, min_train_window = 20, } ``` -Now we go ahead and slide this AR forecaster over the working `epi_tibble` of -COVID-19 cases. Note that we actually model the `cases_7dav` column, to operate -on the scale of smoothed COVID-19 cases. (This is clearly equivalent, up to a +Now we go ahead and slide this AR forecaster over the working `epi_df` of +COVID-19 cases. Note that we actually model the `cases_7dav` column, to operate +on the scale of smoothed COVID-19 cases. (This is clearly equivalent, up to a constant, to modeling weekly sums of COVID-19 cases.) ```{r}