Skip to content

minor bug fixes to outlier detection #50

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ Imports:
tidyr,
tsibble
Suggests:
testthat (>= 3.0.0),
delphi.epidata
Remotes:
github::cmu-delphi/delphi-epidata-r
Config/testthat/edition: 3
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,5 @@ importFrom(stats,cor)
importFrom(stats,median)
importFrom(tidyr,unnest)
importFrom(tidyselect,eval_select)
importFrom(tidyselect,starts_with)
importFrom(tsibble,as_tsibble)
24 changes: 15 additions & 9 deletions R/outliers.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,15 @@
#'
#' @export
detect_outlr = function(x = seq_along(y), y,
methods = tibble(method = "rm",
methods = tibble::tibble(method = "rm",
args = list(list()),
abbr = "rm"),
combiner = c("median", "mean", "none")) {
# Validate combiner
combiner = match.arg(combiner)

# Validate that x contains all distinct values
if (max(table(x)) > 1) Abort("`x` must not contain duplicate values; did you group your `epi_df` by all relevant key variables?")

# Run all outlier detection methods
results = purrr::pmap_dfc(methods, function(method, args, abbr) {
Expand Down Expand Up @@ -187,6 +190,7 @@ detect_outlr_rm = function(x = seq_along(y), y, n = 21,
#' description.
#'
#' @importFrom stats median
#' @importFrom tidyselect starts_with
#' @export
detect_outlr_stl = function(x = seq_along(y), y,
n_trend = 21,
Expand Down Expand Up @@ -216,11 +220,10 @@ detect_outlr_stl = function(x = seq_along(y), y,
fabletools::model(feasts::STL(stl_formula, robust = TRUE)) %>%
generics::components() %>%
tibble::as_tibble() %>%
dplyr::transmute(
trend = trend,
seasonal = season_week,
resid = remainder)

dplyr::select(trend:remainder) %>%
dplyr::rename_with(~ "seasonal", tidyselect::starts_with("season")) %>%
dplyr::rename(resid = remainder)

# Allocate the seasonal term from STL to either fitted or resid
if (!is.null(seasonal_period)) {
stl_components = stl_components %>%
Expand Down Expand Up @@ -263,15 +266,18 @@ detect_outlr_stl = function(x = seq_along(y), y,

# Common function for rolling IQR, using fitted and resid variables
roll_iqr = function(z, n, detection_multiplier, min_radius,
replacement_multiplier, min_lower) {
replacement_multiplier, min_lower) {
if (typeof(z$y) == "integer") as_type = as.integer
else as_type = as.numeric

epi_slide(z, roll_iqr = IQR(resid), n = n, align = "center") %>%
dplyr::mutate(
lower = pmax(min_lower,
fitted - pmax(min_radius, detection_multiplier * roll_iqr)),
upper = fitted + pmax(min_radius, detection_multiplier * roll_iqr),
replacement = dplyr::case_when(
(y < lower) ~ fitted - replacement_multiplier * roll_iqr,
(y > upper) ~ fitted + replacement_multiplier * roll_iqr,
(y < lower) ~ as_type(fitted - replacement_multiplier * roll_iqr),
(y > upper) ~ as_type(fitted + replacement_multiplier * roll_iqr),
TRUE ~ y)) %>%
dplyr::select(lower, upper, replacement) %>%
tibble::as_tibble()
Expand Down
2 changes: 1 addition & 1 deletion man/detect_outlr.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions tests/testthat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
library(testthat)
library(epiprocess)

test_check("epiprocess")
7 changes: 7 additions & 0 deletions tests/testthat/test-outliers.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
test_that("detect_outlr throws error with duplicate x", {
expect_error(detect_outlr(x = c(1, 2, 3, 3, 4), y = 1:5))
})

test_that("detect_outlr throws error with length(x) != length(y)", {
expect_error(detect_outlr(x = 1:3, y = 1:5))
})
3 changes: 2 additions & 1 deletion vignettes/outliers.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ detection_methods = bind_rows(
abbr = "rm"),
tibble(method = "stl",
args = list(list(detect_negatives = TRUE,
detection_multiplier = 2.5)),
detection_multiplier = 2.5,
seasonal_period = 7)),
abbr = "stl_seasonal"),
tibble(method = "stl",
args = list(list(detect_negatives = TRUE,
Expand Down