From 927f0579db7500ad9e05f5000692b74ad10c096e Mon Sep 17 00:00:00 2001
From: Ryan Tibshirani <ryantibs@gmail.com>
Date: Fri, 25 Feb 2022 17:17:10 -0500
Subject: [PATCH] Edits to duplicate values checks

- Edit slightly to make code and error/warning messages between
  growth_rate() and detect_outlr() to be a bit more unified
- Remove line in DESCRIPTION that suggests delphi.epidata. This is
  not ready for prime time yet, and in addition, I think it has
  still been causing some errors for people to install the package
---
 DESCRIPTION     | 5 +----
 R/growth_rate.R | 8 +++++++-
 R/outliers.R    | 4 +++-
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 9d3ce499..3a2ebad6 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -51,8 +51,5 @@ Imports:
   tidyr,
   tsibble
 Suggests:
-    testthat (>= 3.0.0),
-    delphi.epidata
-Remotes:
-    github::cmu-delphi/delphi-epidata-r
+  testthat (>= 3.0.0),
 Config/testthat/edition: 3
diff --git a/R/growth_rate.R b/R/growth_rate.R
index 23a4dddd..6f3d38f2 100644
--- a/R/growth_rate.R
+++ b/R/growth_rate.R
@@ -22,7 +22,9 @@
 #' @param log_scale Should growth rates be estimated using the parametrization
 #'   on the log scale? See details for an explanation. Default is `FALSE`.
 #' @param dup_rm Should we check and remove duplicates in `x` (and corresponding
-#'   elements of `y`) before the computation? Default is `FALSE`.
+#'   elements of `y`) before the computation? Some methods might handle
+#'   duplicate `x` values gracefully, whereas others might fail (either quietly
+#'   or loudly). Default is `FALSE`.
 #' @param na_rm Should missing values be removed before the computation? Default
 #'   is `FALSE`.
 #' @param ... Additional arguments to pass to the method used to estimate the
@@ -122,9 +124,13 @@ growth_rate = function(x = seq_along(y), y, x0 = x,
   # Remove duplicates if we need to
   if (dup_rm) {
     o = !duplicated(x)
+    if (any(!o)) {
+      Warn("`x` contains duplicate values. (If being run on a column in an `epi_df`, did you group by relevant key variables?)")
+    }
     x = x[o]
     y = y[o]
   }
+    
   
   # Remove NAs if we need to
   if (na_rm) {
diff --git a/R/outliers.R b/R/outliers.R
index 9bd370dc..4acc52ab 100644
--- a/R/outliers.R
+++ b/R/outliers.R
@@ -53,7 +53,9 @@ detect_outlr = function(x = seq_along(y), y,
   combiner = match.arg(combiner)
   
   # Validate that x contains all distinct values
-  if (max(table(x)) > 1) Abort("`x` must not contain duplicate values; did you group your `epi_df` by all relevant key variables?")
+  if (any(duplicated(x))) {
+    Abort("`x` cannot contain duplicate values. (If being run on a column in an `epi_df`, did you group by relevant key variables?)")
+  }
 
   # Run all outlier detection methods
   results = purrr::pmap_dfc(methods, function(method, args, abbr) {