Skip to content

Commit 7e93071

Browse files
committed
Extend by to multi variables in grouped_cor()
- This was basically "for free" implementation-wise; just needed to update needed the documentation to reflect this - Rename to `grouped_cor()` from `sliced_cor()`; update all uses of the word "slice" in documentation and vignettes accordingly
1 parent 26f6342 commit 7e93071

File tree

8 files changed

+74
-127
lines changed

8 files changed

+74
-127
lines changed

NAMESPACE

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ export(Start)
1616
export(Sum)
1717
export(as.epi_signal)
1818
export(estimate_deriv)
19+
export(grouped_cor)
1920
export(pct_change)
2021
export(quiet)
21-
export(sliced_cor)
2222
export(slide_by_geo)
2323
importFrom(dplyr,arrange)
2424
importFrom(dplyr,group_by)

R/cor.R

+15-25
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#' Compute correlations between variables in an `epi_signal` object
22
#'
33
#' Computes correlations between variables in an `epi_signal` object, allowing
4-
#' for slicing by geo location, or by time. See the [correlations
4+
#' for grouping by geo value, time value, other variables. See the [correlations
55
#' vignette](https://cmu-delphi.github.io/epitools/articles/correlations.html)
66
#' for examples.
77
#'
@@ -13,25 +13,28 @@
1313
#' then the new value on June 2 is the original value on June 1; if `dt = 1`,
1414
#' then the new value on June 2 is the original value on June 3; if `dt = 0`,
1515
#' then the values are left as is. Default is 0 for both `dt1` and `dt2`.
16-
#' @param by If `geo_value`, then correlations are computed for each geo value,
17-
#' over all time (each correlation is measured between two time series at the
18-
#' same location). If `time_value`, then correlations are computed for each
19-
#' time, over all geo values (each correlation is measured between two
20-
#' vectors at one time). Default is `geo_value`.
16+
#' @param by The variable or variables to group by, before computing
17+
#' correlations. If `geo_value`, the default, then correlations are computed
18+
#' for each geo value, over all time; if `time_value`, then correlations are
19+
#' computed for each time, over all geo values. This can alternatively be any
20+
#' specified using number of columns of `x`; for example, we can use `by =
21+
#' c(geo_value, age_group)`, if `x` has a column `age_group` containing the
22+
#' age group associated with the measurements, to compute correlations for
23+
#' each pair of geo value and age group.
2124
#' @param use,method Arguments to pass to `cor()`, with "na.or.complete" the
2225
#' default for `use` (different than `cor()`) and "pearson" the default for
2326
#' `method` (same as `cor()`).
2427
#'
25-
#' @return An tibble with first column `geo_value` or `time_value` (depending on
26-
#' `by`), and second column `cor`, which gives the correlation.
28+
#' @return An tibble with the grouping columns first (`geo_value`, `time_value`,
29+
#' or possibly others), and then a column `cor`, which gives the correlation.
2730
#'
2831
#' @importFrom dplyr arrange group_by mutate summarize ungroup
2932
#' @importFrom stats cor
3033
#' @importFrom rlang .data enquo
3134
#' @export
32-
sliced_cor = function(x, var1, var2, dt1 = 0, dt2 = 0,
33-
by = geo_value, use = "na.or.complete",
34-
method = c("pearson", "kendall", "spearman")) {
35+
grouped_cor = function(x, var1, var2, dt1 = 0, dt2 = 0,
36+
by = geo_value, use = "na.or.complete",
37+
method = c("pearson", "kendall", "spearman")) {
3538
# Check we have an `epi_signal` object
3639
if (!inherits(x, "epi_signal")) abort("`x` be of class `epi_signal`.")
3740

@@ -41,23 +44,10 @@ sliced_cor = function(x, var1, var2, dt1 = 0, dt2 = 0,
4144
var1 = enquo(var1)
4245
var2 = enquo(var2)
4346

44-
# Which way to slice? Which method?
47+
# Which grouping? Which method?
4548
by = enquo(by)
4649
method = match.arg(method)
4750

48-
## # Join the two data frames together by pairs of geo_value and time_value
49-
## z = dplyr::full_join(x, y, by = c("geo_value", "time_value"))
50-
51-
## # Make sure that we have a complete record of dates for each geo_value
52-
## z_all = dplyr::group_by(z, .data$geo_value) %>%
53-
## dplyr::summarize(time_value = seq.Date(
54-
## as.Date(min(.data$time_value)),
55-
## as.Date(max(.data$time_value)),
56-
## by = "day")) %>%
57-
## dplyr::ungroup()
58-
59-
## z = dplyr::full_join(z, z_all, by = c("geo_value", "time_value"))
60-
6151
# Perform time shifts, then compute appropriate correlations and return
6252
return(x %>%
6353
group_by(.data$geo_value) %>%

docs/articles/correlations.html

+15-14
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/pkgdown.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,5 @@ articles:
88
pct-change: pct-change.html
99
sensorization: sensorization.html
1010
slide: slide.html
11-
last_built: 2021-10-25T15:13Z
11+
last_built: 2021-10-25T20:39Z
1212

0 commit comments

Comments
 (0)