1
1
# ' Compute correlations between variables in an `epi_signal` object
2
2
# '
3
3
# ' Computes correlations between variables in an `epi_signal` object, allowing
4
- # ' for slicing by geo location, or by time . See the [correlations
4
+ # ' for grouping by geo value, time value, other variables . See the [correlations
5
5
# ' vignette](https://cmu-delphi.github.io/epitools/articles/correlations.html)
6
6
# ' for examples.
7
7
# '
13
13
# ' then the new value on June 2 is the original value on June 1; if `dt = 1`,
14
14
# ' then the new value on June 2 is the original value on June 3; if `dt = 0`,
15
15
# ' then the values are left as is. Default is 0 for both `dt1` and `dt2`.
16
- # ' @param by If `geo_value`, then correlations are computed for each geo value,
17
- # ' over all time (each correlation is measured between two time series at the
18
- # ' same location). If `time_value`, then correlations are computed for each
19
- # ' time, over all geo values (each correlation is measured between two
20
- # ' vectors at one time). Default is `geo_value`.
16
+ # ' @param by The variable or variables to group by, before computing
17
+ # ' correlations. If `geo_value`, the default, then correlations are computed
18
+ # ' for each geo value, over all time; if `time_value`, then correlations are
19
+ # ' computed for each time, over all geo values. This can alternatively be any
20
+ # ' specified using number of columns of `x`; for example, we can use `by =
21
+ # ' c(geo_value, age_group)`, if `x` has a column `age_group` containing the
22
+ # ' age group associated with the measurements, to compute correlations for
23
+ # ' each pair of geo value and age group.
21
24
# ' @param use,method Arguments to pass to `cor()`, with "na.or.complete" the
22
25
# ' default for `use` (different than `cor()`) and "pearson" the default for
23
26
# ' `method` (same as `cor()`).
24
27
# '
25
- # ' @return An tibble with first column `geo_value` or `time_value` (depending on
26
- # ' `by` ), and second column `cor`, which gives the correlation.
28
+ # ' @return An tibble with the grouping columns first ( `geo_value`, `time_value`,
29
+ # ' or possibly others ), and then a column `cor`, which gives the correlation.
27
30
# '
28
31
# ' @importFrom dplyr arrange group_by mutate summarize ungroup
29
32
# ' @importFrom stats cor
30
33
# ' @importFrom rlang .data enquo
31
34
# ' @export
32
- sliced_cor = function (x , var1 , var2 , dt1 = 0 , dt2 = 0 ,
33
- by = geo_value , use = " na.or.complete" ,
34
- method = c(" pearson" , " kendall" , " spearman" )) {
35
+ grouped_cor = function (x , var1 , var2 , dt1 = 0 , dt2 = 0 ,
36
+ by = geo_value , use = " na.or.complete" ,
37
+ method = c(" pearson" , " kendall" , " spearman" )) {
35
38
# Check we have an `epi_signal` object
36
39
if (! inherits(x , " epi_signal" )) abort(" `x` be of class `epi_signal`." )
37
40
@@ -41,23 +44,10 @@ sliced_cor = function(x, var1, var2, dt1 = 0, dt2 = 0,
41
44
var1 = enquo(var1 )
42
45
var2 = enquo(var2 )
43
46
44
- # Which way to slice ? Which method?
47
+ # Which grouping ? Which method?
45
48
by = enquo(by )
46
49
method = match.arg(method )
47
50
48
- # # # Join the two data frames together by pairs of geo_value and time_value
49
- # # z = dplyr::full_join(x, y, by = c("geo_value", "time_value"))
50
-
51
- # # # Make sure that we have a complete record of dates for each geo_value
52
- # # z_all = dplyr::group_by(z, .data$geo_value) %>%
53
- # # dplyr::summarize(time_value = seq.Date(
54
- # # as.Date(min(.data$time_value)),
55
- # # as.Date(max(.data$time_value)),
56
- # # by = "day")) %>%
57
- # # dplyr::ungroup()
58
-
59
- # # z = dplyr::full_join(z, z_all, by = c("geo_value", "time_value"))
60
-
61
51
# Perform time shifts, then compute appropriate correlations and return
62
52
return (x %> %
63
53
group_by(.data $ geo_value ) %> %
0 commit comments