Skip to content

Commit 7ae9e87

Browse files
authored
Merge pull request #382 from cmu-delphi/djm/autoplot
Djm/autoplot
2 parents 44486f9 + c84d678 commit 7ae9e87

13 files changed

+453
-12
lines changed

DESCRIPTION

+5-2
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,15 @@ Description: This package introduces a common data structure for epidemiological
2626
License: MIT + file LICENSE
2727
Copyright: file inst/COPYRIGHTS
2828
Imports:
29+
checkmate,
2930
cli,
3031
data.table,
3132
dplyr (>= 1.0.0),
3233
fabletools,
3334
feasts,
3435
generics,
3536
genlasso,
37+
ggplot2,
3638
lifecycle (>= 1.0.1),
3739
lubridate,
3840
magrittr,
@@ -49,7 +51,6 @@ Imports:
4951
Suggests:
5052
covidcast,
5153
epidatr,
52-
ggplot2,
5354
knitr,
5455
outbreaks,
5556
rmarkdown,
@@ -66,12 +67,13 @@ Config/testthat/edition: 3
6667
Encoding: UTF-8
6768
LazyData: true
6869
Roxygen: list(markdown = TRUE)
69-
RoxygenNote: 7.2.3
70+
RoxygenNote: 7.3.1
7071
Depends:
7172
R (>= 2.10)
7273
URL: https://cmu-delphi.github.io/epiprocess/
7374
Collate:
7475
'archive.R'
76+
'autoplot.R'
7577
'correlation.R'
7678
'data.R'
7779
'epi_df.R'
@@ -80,6 +82,7 @@ Collate:
8082
'methods-epi_archive.R'
8183
'grouped_epi_archive.R'
8284
'growth_rate.R'
85+
'key_colnames.R'
8386
'methods-epi_df.R'
8487
'outliers.R'
8588
'reexports.R'

NAMESPACE

+12
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ S3method(as_epi_df,tbl_df)
88
S3method(as_epi_df,tbl_ts)
99
S3method(as_tibble,epi_df)
1010
S3method(as_tsibble,epi_df)
11+
S3method(autoplot,epi_df)
1112
S3method(dplyr_col_modify,col_modify_recorder_df)
1213
S3method(dplyr_col_modify,epi_df)
1314
S3method(dplyr_reconstruct,epi_df)
@@ -20,6 +21,10 @@ S3method(group_by,grouped_epi_archive)
2021
S3method(group_by_drop_default,grouped_epi_archive)
2122
S3method(group_modify,epi_df)
2223
S3method(groups,grouped_epi_archive)
24+
S3method(key_colnames,data.frame)
25+
S3method(key_colnames,default)
26+
S3method(key_colnames,epi_archive)
27+
S3method(key_colnames,epi_df)
2328
S3method(next_after,Date)
2429
S3method(next_after,integer)
2530
S3method(print,epi_df)
@@ -34,6 +39,7 @@ export(arrange)
3439
export(as_epi_archive)
3540
export(as_epi_df)
3641
export(as_tsibble)
42+
export(autoplot)
3743
export(detect_outlr)
3844
export(detect_outlr_rm)
3945
export(detect_outlr_stl)
@@ -51,6 +57,7 @@ export(growth_rate)
5157
export(is_epi_archive)
5258
export(is_epi_df)
5359
export(is_grouped_epi_archive)
60+
export(key_colnames)
5461
export(max_version_with_row_in)
5562
export(mutate)
5663
export(new_epi_df)
@@ -61,6 +68,10 @@ export(slice)
6168
export(ungroup)
6269
export(unnest)
6370
importFrom(R6,R6Class)
71+
importFrom(checkmate,anyInfinite)
72+
importFrom(checkmate,assert)
73+
importFrom(checkmate,assert_character)
74+
importFrom(checkmate,assert_int)
6475
importFrom(cli,cli_inform)
6576
importFrom(data.table,":=")
6677
importFrom(data.table,address)
@@ -89,6 +100,7 @@ importFrom(dplyr,rename)
89100
importFrom(dplyr,select)
90101
importFrom(dplyr,slice)
91102
importFrom(dplyr,ungroup)
103+
importFrom(ggplot2,autoplot)
92104
importFrom(lubridate,days)
93105
importFrom(lubridate,weeks)
94106
importFrom(magrittr,"%>%")

R/autoplot.R

+166
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
#' Automatically plot an epi_df
2+
#'
3+
#' @param object An `epi_df`
4+
#' @param ... <[`tidy-select`][dplyr_tidy_select]> One or more unquoted
5+
#' expressions separated by commas. Variable names can be used as if they
6+
#' were positions in the data frame, so expressions like `x:y` can
7+
#' be used to select a range of variables.
8+
#' @param .color_by Which variables should determine the color(s) used to plot
9+
#' lines. Options include:
10+
#' * `all_keys` - the default uses the interaction of any key variables
11+
#' including the `geo_value`
12+
#' * `geo_value` - `geo_value` only
13+
#' * `other_keys` - any available keys that are not `geo_value`
14+
#' * `.response` - the numeric variables (same as the y-axis)
15+
#' * `all` - uses the interaction of all keys and numeric variables
16+
#' * `none` - no coloring aesthetic is applied
17+
#' @param .facet_by Similar to `.color_by` except that the default is to display
18+
#' each numeric variable on a separate facet
19+
#' @param .base_color Lines will be shown with this color. For example, with a
20+
#' single numeric variable and faceting by `geo_value`, all locations would
21+
#' share the same color line.
22+
#' @param .max_facets Cut down of the number of facets displayed. Especially
23+
#' useful for testing when there are many `geo_value`'s or keys.
24+
#'
25+
#' @return A ggplot object
26+
#' @export
27+
#'
28+
#' @examples
29+
#' autoplot(jhu_csse_daily_subset, cases, death_rate_7d_av)
30+
#' autoplot(jhu_csse_daily_subset, case_rate_7d_av, .facet_by = "geo_value")
31+
#' autoplot(jhu_csse_daily_subset, case_rate_7d_av,
32+
#' .color_by = "none",
33+
#' .facet_by = "geo_value"
34+
#' )
35+
#' autoplot(jhu_csse_daily_subset, case_rate_7d_av, .color_by = "none",
36+
#' .base_color = "red", .facet_by = "geo_value")
37+
#'
38+
#' # .base_color specification won't have any effect due .color_by default
39+
#' autoplot(jhu_csse_daily_subset, case_rate_7d_av,
40+
#' .base_color = "red", .facet_by = "geo_value")
41+
autoplot.epi_df <- function(
42+
object, ...,
43+
.color_by = c("all_keys", "geo_value", "other_keys", ".response", "all", "none"),
44+
.facet_by = c(".response", "other_keys", "all_keys", "geo_value", "all", "none"),
45+
.base_color = "#3A448F",
46+
.max_facets = Inf) {
47+
.color_by <- match.arg(.color_by)
48+
.facet_by <- match.arg(.facet_by)
49+
50+
assert(anyInfinite(.max_facets), assert_int(.max_facets), combine = "or")
51+
assert_character(.base_color, len = 1)
52+
53+
key_cols <- key_colnames(object)
54+
non_key_cols <- setdiff(names(object), key_cols)
55+
geo_and_other_keys <- kill_time_value(key_cols)
56+
57+
# --- check for numeric variables
58+
allowed <- purrr::map_lgl(object[non_key_cols], is.numeric)
59+
allowed <- allowed[allowed]
60+
if (length(allowed) == 0 && rlang::dots_n(...) == 0L) {
61+
cli::cli_abort("No numeric variables were available to plot automatically.",
62+
class = "epiprocess__no_numeric_vars_available")
63+
}
64+
vars <- tidyselect::eval_select(rlang::expr(c(...)), object)
65+
if (rlang::is_empty(vars)) { # find them automatically if unspecified
66+
vars <- tidyselect::eval_select(names(allowed)[1], object)
67+
cli::cli_warn(
68+
"Plot variable was unspecified. Automatically selecting {.var {names(allowed)[1]}}.",
69+
class = "epiprocess__unspecified_plot_var"
70+
)
71+
} else { # if variables were specified, ensure that they are numeric
72+
ok <- names(vars) %in% names(allowed)
73+
if (!any(ok)) {
74+
cli::cli_abort(
75+
"None of the requested variables {.var {names(vars)}} are numeric.",
76+
class = "epiprocess__all_requested_vars_not_numeric"
77+
)
78+
} else if (!all(ok)) {
79+
cli::cli_warn(c(
80+
"Only the requested variables {.var {names(vars)[ok]}} are numeric.",
81+
i = "`autoplot()` cannot display {.var {names(vars)[!ok]}}."
82+
),
83+
class = "epiprocess__some_requested_vars_not_numeric")
84+
vars <- vars[ok]
85+
}
86+
}
87+
88+
# --- create a viable df to plot
89+
pos <- tidyselect::eval_select(
90+
rlang::expr(c("time_value", tidyselect::all_of(geo_and_other_keys), names(vars))), object
91+
)
92+
if (length(vars) > 1) {
93+
object <- tidyr::pivot_longer(
94+
object[pos], tidyselect::all_of(names(vars)),
95+
values_to = ".response",
96+
names_to = ".response_name"
97+
)
98+
} else {
99+
object <- dplyr::rename(object[pos], .response := !!names(vars))
100+
}
101+
all_keys <- rlang::syms(as.list(geo_and_other_keys))
102+
other_keys <- rlang::syms(as.list(setdiff(geo_and_other_keys, "geo_value")))
103+
all_avail <- rlang::syms(as.list(c(geo_and_other_keys, ".response_name")))
104+
105+
object <- object %>%
106+
dplyr::mutate(
107+
.colours = switch(.color_by,
108+
all_keys = interaction(!!!all_keys, sep = "/"),
109+
geo_value = geo_value,
110+
other_keys = interaction(!!!other_keys, sep = "/"),
111+
all = interaction(!!!all_avail, sep = "/"),
112+
NULL
113+
),
114+
.facets = switch(.facet_by,
115+
all_keys = interaction(!!!all_keys, sep = "/"),
116+
geo_value = as.factor(geo_value),
117+
other_keys = interaction(!!!other_keys, sep = "/"),
118+
all = interaction(!!!all_avail, sep = "/"),
119+
NULL
120+
)
121+
)
122+
123+
if (.max_facets < Inf && ".facets" %in% names(object)) {
124+
n_facets <- nlevels(object$.facets)
125+
if (n_facets > .max_facets) {
126+
top_n <- levels(as.factor(object$.facets))[seq_len(.max_facets)]
127+
object <- dplyr::filter(object, .facets %in% top_n) %>%
128+
dplyr::mutate(.facets = droplevels(.facets))
129+
if (".colours" %in% names(object)) {
130+
object <- dplyr::mutate(object, .colours = droplevels(.colours))
131+
}
132+
}
133+
}
134+
135+
p <- ggplot2::ggplot(object, ggplot2::aes(x = .data$time_value)) +
136+
ggplot2::theme_bw()
137+
138+
if (".colours" %in% names(object)) {
139+
p <- p + ggplot2::geom_line(
140+
ggplot2::aes(y = .data$.response, colour = .data$.colours),
141+
key_glyph = "timeseries"
142+
) +
143+
ggplot2::scale_colour_viridis_d(name = "")
144+
} else if (length(vars) > 1 && .color_by == ".response") {
145+
p <- p +
146+
ggplot2::geom_line(ggplot2::aes(
147+
y = .data$.response, colour = .data$.response_name
148+
)) +
149+
ggplot2::scale_colour_viridis_d(name = "")
150+
} else { # none
151+
p <- p +
152+
ggplot2::geom_line(ggplot2::aes(y = .data$.response), color = .base_color)
153+
}
154+
155+
if (".facets" %in% names(object)) {
156+
p <- p + ggplot2::facet_wrap(~.facets, scales = "free_y") +
157+
ggplot2::ylab(names(vars))
158+
if (.facet_by == "all") p <- p + ggplot2::ylab("")
159+
} else if ((length(vars) > 1 && .facet_by == ".response")) {
160+
p <- p + ggplot2::facet_wrap(~.response_name, scales = "free_y") +
161+
ggplot2::ylab("")
162+
} else {
163+
p <- p + ggplot2::ylab(names(vars))
164+
}
165+
p
166+
}

R/epiprocess.R

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#' measured over space and time, and offers associated utilities to perform
55
#' basic signal processing tasks.
66
#'
7-
#' @docType package
7+
#' @importFrom checkmate assert assert_character assert_int anyInfinite
88
#' @name epiprocess
9-
NULL
9+
"_PACKAGE"
1010
utils::globalVariables(c(".x", ".group_key", ".ref_time_value"))

R/key_colnames.R

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#' Grab any keys associated to an epi_df
2+
#'
3+
#' @param x a data.frame, tibble, or epi_df
4+
#' @param ... additional arguments passed on to methods
5+
#'
6+
#' @return If an `epi_df`, this returns all "keys". Otherwise `NULL`
7+
#' @keywords internal
8+
#' @export
9+
key_colnames <- function(x, ...) {
10+
UseMethod("key_colnames")
11+
}
12+
13+
#' @export
14+
key_colnames.default <- function(x, ...) {
15+
character(0L)
16+
}
17+
18+
#' @export
19+
key_colnames.data.frame <- function(x, other_keys = character(0L), ...) {
20+
assert_character(other_keys)
21+
nm <- c("time_value", "geo_value", other_keys)
22+
intersect(nm, colnames(x))
23+
}
24+
25+
#' @export
26+
key_colnames.epi_df <- function(x, ...) {
27+
other_keys <- attr(x, "metadata")$other_keys
28+
c("time_value", "geo_value", other_keys)
29+
}
30+
31+
#' @export
32+
key_colnames.epi_archive <- function(x, ...) {
33+
other_keys <- attr(x, "metadata")$other_keys
34+
c("time_value", "geo_value", other_keys)
35+
}
36+
37+
kill_time_value <- function(v) {
38+
assert_character(v)
39+
v[v != "time_value"]
40+
}

R/reexports.R

+7
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,10 @@ dplyr::slice
5555
#' @importFrom tidyr unnest
5656
#' @export
5757
tidyr::unnest
58+
59+
60+
# ggplot2 -----------------------------------------------------------------
61+
62+
#' @importFrom ggplot2 autoplot
63+
#' @export
64+
ggplot2::autoplot

R/utils.R

+7-7
Original file line numberDiff line numberDiff line change
@@ -478,13 +478,13 @@ quiet <- function(x) {
478478

479479
# Create an auto-named list
480480
enlist <- function(...) {
481-
x <- list(...)
482-
n <- as.character(sys.call())[-1]
483-
if (!is.null(n0 <- names(x))) {
484-
n[n0 != ""] <- n0[n0 != ""]
485-
}
486-
names(x) <- n
487-
return(x)
481+
# converted to thin wrapper around
482+
rlang::dots_list(
483+
...,
484+
.homonyms = "error",
485+
.named = TRUE,
486+
.check_assign = TRUE
487+
)
488488
}
489489

490490
# Variable assignment from a list. NOT USED. Something is broken, this doesn't

_pkgdown.yml

+5-1
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,13 @@ reference:
8080
- archive_cases_dv_subset
8181
- incidence_num_outlier_example
8282
- contains("jhu_csse")
83+
- title: Basic automatic plotting
84+
- contents:
85+
- autoplot.epi_df
8386
- title: internal
84-
contents:
87+
- contents:
8588
- epiprocess
8689
- max_version_with_row_in
8790
- next_after
8891
- guess_period
92+
- key_colnames

0 commit comments

Comments
 (0)