From 927f0579db7500ad9e05f5000692b74ad10c096e Mon Sep 17 00:00:00 2001 From: Ryan Tibshirani Date: Fri, 25 Feb 2022 17:17:10 -0500 Subject: [PATCH] Edits to duplicate values checks - Edit slightly to make code and error/warning messages between growth_rate() and detect_outlr() to be a bit more unified - Remove line in DESCRIPTION that suggests delphi.epidata. This is not ready for prime time yet, and in addition, I think it has still been causing some errors for people to install the package --- DESCRIPTION | 5 +---- R/growth_rate.R | 8 +++++++- R/outliers.R | 4 +++- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9d3ce499..3a2ebad6 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -51,8 +51,5 @@ Imports: tidyr, tsibble Suggests: - testthat (>= 3.0.0), - delphi.epidata -Remotes: - github::cmu-delphi/delphi-epidata-r + testthat (>= 3.0.0), Config/testthat/edition: 3 diff --git a/R/growth_rate.R b/R/growth_rate.R index 23a4dddd..6f3d38f2 100644 --- a/R/growth_rate.R +++ b/R/growth_rate.R @@ -22,7 +22,9 @@ #' @param log_scale Should growth rates be estimated using the parametrization #' on the log scale? See details for an explanation. Default is `FALSE`. #' @param dup_rm Should we check and remove duplicates in `x` (and corresponding -#' elements of `y`) before the computation? Default is `FALSE`. +#' elements of `y`) before the computation? Some methods might handle +#' duplicate `x` values gracefully, whereas others might fail (either quietly +#' or loudly). Default is `FALSE`. #' @param na_rm Should missing values be removed before the computation? Default #' is `FALSE`. #' @param ... Additional arguments to pass to the method used to estimate the @@ -122,9 +124,13 @@ growth_rate = function(x = seq_along(y), y, x0 = x, # Remove duplicates if we need to if (dup_rm) { o = !duplicated(x) + if (any(!o)) { + Warn("`x` contains duplicate values. (If being run on a column in an `epi_df`, did you group by relevant key variables?)") + } x = x[o] y = y[o] } + # Remove NAs if we need to if (na_rm) { diff --git a/R/outliers.R b/R/outliers.R index 9bd370dc..4acc52ab 100644 --- a/R/outliers.R +++ b/R/outliers.R @@ -53,7 +53,9 @@ detect_outlr = function(x = seq_along(y), y, combiner = match.arg(combiner) # Validate that x contains all distinct values - if (max(table(x)) > 1) Abort("`x` must not contain duplicate values; did you group your `epi_df` by all relevant key variables?") + if (any(duplicated(x))) { + Abort("`x` cannot contain duplicate values. (If being run on a column in an `epi_df`, did you group by relevant key variables?)") + } # Run all outlier detection methods results = purrr::pmap_dfc(methods, function(method, args, abbr) {