Merge pull request #51 from cmu-delphi/look-at-dup

ryantibs · web-flow · commit d59314e20b55 · 2022-02-25T17:19:39.000-05:00
Edits to duplicate values checks
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -51,8 +51,5 @@ Imports:
   tidyr,
   tsibble
 Suggests:
-    testthat (>= 3.0.0),
-    delphi.epidata
-Remotes:
-    github::cmu-delphi/delphi-epidata-r
+  testthat (>= 3.0.0),
 Config/testthat/edition: 3
diff --git a/R/growth_rate.R b/R/growth_rate.R
@@ -22,7 +22,9 @@
 #' @param log_scale Should growth rates be estimated using the parametrization
 #'   on the log scale? See details for an explanation. Default is `FALSE`.
 #' @param dup_rm Should we check and remove duplicates in `x` (and corresponding
-#'   elements of `y`) before the computation? Default is `FALSE`.
+#'   elements of `y`) before the computation? Some methods might handle
+#'   duplicate `x` values gracefully, whereas others might fail (either quietly
+#'   or loudly). Default is `FALSE`.
 #' @param na_rm Should missing values be removed before the computation? Default
 #'   is `FALSE`.
 #' @param ... Additional arguments to pass to the method used to estimate the
@@ -122,9 +124,13 @@ growth_rate = function(x = seq_along(y), y, x0 = x,
   # Remove duplicates if we need to
   if (dup_rm) {
     o = !duplicated(x)
+    if (any(!o)) {
+      Warn("`x` contains duplicate values. (If being run on a column in an `epi_df`, did you group by relevant key variables?)")
+    }
     x = x[o]
     y = y[o]
   }
+    
   
   # Remove NAs if we need to
   if (na_rm) {
diff --git a/R/outliers.R b/R/outliers.R
@@ -53,7 +53,9 @@ detect_outlr = function(x = seq_along(y), y,
   combiner = match.arg(combiner)
   
   # Validate that x contains all distinct values
-  if (max(table(x)) > 1) Abort("`x` must not contain duplicate values; did you group your `epi_df` by all relevant key variables?")
+  if (any(duplicated(x))) {
+    Abort("`x` cannot contain duplicate values. (If being run on a column in an `epi_df`, did you group by relevant key variables?)")
+  }
 
   # Run all outlier detection methods
   results = purrr::pmap_dfc(methods, function(method, args, abbr) {