@@ -3,42 +3,44 @@ n_days <- 40
3
3
removed_date <- 10
4
4
simple_dates <- seq(as.Date(" 2012-01-01" ), by = " day" , length.out = n_days )
5
5
simple_dates <- simple_dates [- removed_date ]
6
- rand_vals <- rnorm(n_days - 1 )
6
+ rand_vals <- rnorm(n_days - 1 )
7
7
8
8
# Two states, with 2 variables. a is linear, going up in one state and down in the other
9
9
# b is just random
10
10
# note that day 10 is missing
11
11
epi_data <- epiprocess :: as_epi_df(rbind(tibble(
12
12
geo_value = " al" ,
13
13
time_value = simple_dates ,
14
- a = 1 : (n_days - 1 ),
14
+ a = 1 : (n_days - 1 ),
15
15
b = rand_vals
16
16
), tibble(
17
17
geo_value = " ca" ,
18
18
time_value = simple_dates ,
19
- a = (n_days - 1 ): 1 ,
19
+ a = (n_days - 1 ): 1 ,
20
20
b = rand_vals + 10
21
21
)))
22
22
test_that(" rolling_mean generates correct mean" , {
23
23
rolled <- rolling_mean(epi_data )
24
24
rolled
25
25
expect_equal(names(rolled ), c(" geo_value" , " time_value" , " a" , " b" , " a_m7" , " b_m7" ))
26
26
# hand specified rolling mean with a rear window of 7, noting that mean(1:7) = 4
27
- linear_roll_mean <- c(seq(from = 1 , to = 4 , by = .5 ), seq(from = 4. 5 , to = 35.5 , by = 1 ))
28
- # day 10 is missing, so days 11-18 are thrown off
29
- lag_st <- 10
30
- unusual_days <- c(mean(c(( lag_st ) : ( lag_st - 6 ))), mean(c(( lag_st + 1 ) : ( lag_st + 1 - 6 ))), mean(c(( lag_st + 2 ) : ( lag_st + 2 - 6 ))), mean(c(( lag_st + 3 ) : ( lag_st + 3 - 6 ))), mean(c(( lag_st + 4 ) : ( lag_st + 4 - 6 ))), mean(c(( lag_st + 5 ): (lag_st + 5 - 6 ))), mean(c(( lag_st + 6 ) : ( lag_st + 6 - 6 ) )))
27
+ linear_roll_mean <- c(seq(from = 1 , by = .5 , length.out = 7 ), seq(from = 5 , to = 36 , by = 1 ))
28
+ # day 10 is missing, so the average days 11-16 are thrown off, only using 6 values instead of 7
29
+ gap_starts <- epi_data % > % filter( geo_value == " al " & time_value == as.Date( " 2012-01-11 " )) % > % pull( a )
30
+ unusual_days <- map_vec(seq( from = 0 , to = 5 ), \( d ) mean((( gap_starts + d ) - 0 ): (( gap_starts + d ) - 5 )))
31
31
# stitching the lag induced hiccup into the "normal" mean values
32
- expected_mean <- c(linear_roll_mean [1 : 9 ], unusual_days , linear_roll_mean [17 : (n_days - 1 )])
32
+ expected_mean <- c(linear_roll_mean [1 : 9 ], unusual_days , linear_roll_mean [16 : (n_days - 1 )])
33
+ expected_mean
33
34
34
35
expect_equal(rolled %> % filter(geo_value == " al" ) %> % pull(" a_m7" ), expected_mean )
36
+ # Doing the same for California
35
37
# same, but "ca" is reversed, noting mean(40:(40-7)) =36.5
36
- linear_reverse_roll_mean <- c(seq(from = 39 , to = 35 .5 , by = - 0.5 ), seq(from = 34.5 , to = 4.5 , by = - 1 ))
37
- lag_st <- 36
38
- # day 10 is missing, so days 11-18 are thrown off
39
- unusual_days <- c(mean(c(( lag_st ) : ( lag_st - 6 ))), mean(c(( lag_st - 1 ) : ( lag_st - 1 - 6 ))), mean(c(( lag_st - 2 ) : ( lag_st - 2 - 6 ))), mean(c(( lag_st - 3 ) : ( lag_st - 3 - 6 ))), mean(c(( lag_st - 4 ) : ( lag_st - 4 - 6 ))), mean(c(( lag_st - 5 ): (lag_st - 5 - 6 ))), mean(c(( lag_st - 6 ) : ( lag_st - 6 - 6 ) )))
38
+ linear_reverse_roll_mean <- c(seq(from = 39 , by = - 0 .5 , length.out = 7 ), seq(from = 35 , to = 4 , by = - 1 ))
39
+ # day 10 is missing, so days 11-16 are thrown off
40
+ gap_starts <- epi_data % > % filter( geo_value == " ca " & time_value == as.Date( " 2012-01-11 " )) % > % pull( a )
41
+ unusual_days <- map_vec(seq( from = 0 , to = 5 ), \( d ) mean((( gap_starts - d ) + 0 ): (( gap_starts - d ) + 5 )))
40
42
# stitching the lag induced hiccup into the "normal" mean values
41
- expected_mean <- c(linear_reverse_roll_mean [1 : 9 ], unusual_days , linear_reverse_roll_mean [17 : (n_days - 1 )])
43
+ expected_mean <- c(linear_reverse_roll_mean [1 : 9 ], unusual_days , linear_reverse_roll_mean [16 : (n_days - 1 )])
42
44
# actually testing
43
45
expect_equal(rolled %> % filter(geo_value == " ca" ) %> % pull(" a_m7" ), expected_mean )
44
46
@@ -47,16 +49,16 @@ test_that("rolling_mean generates correct mean", {
47
49
})
48
50
49
51
test_that(" rolling_sd generates correct standard deviation" , {
50
- rolled <- rolling_sd(epi_data ,keep_mean = TRUE )
52
+ rolled <- rolling_sd(epi_data , keep_mean = TRUE )
51
53
expect_equal(names(rolled ), c(" geo_value" , " time_value" , " a" , " b" , " a_m14" , " a_sd28" , " b_m14" , " b_sd28" ))
52
54
# hand specified rolling mean with a rear window of 7, noting that mean(1:14) = 7.5
53
55
linear_roll_mean <- c(seq(from = 1 , to = 7.5 , by = .5 ), seq(from = 8.5 , to = 16.5 , by = 1 ), seq(from = 17 , to = 32 , by = 1 ))
54
56
linear_roll_mean
55
57
expect_equal(rolled %> % filter(geo_value == " al" ) %> % pull(" a_m14" ), linear_roll_mean )
56
58
# and the standard deviation is
57
- linear_roll_mean <- append(linear_roll_mean , NA , after = removed_date - 1 )
59
+ linear_roll_mean <- append(linear_roll_mean , NA , after = removed_date - 1 )
58
60
linear_values <- 1 : 39
59
- linear_values <- append(linear_values , NA , after = removed_date - 1 )
61
+ linear_values <- append(linear_values , NA , after = removed_date - 1 )
60
62
linear_roll_sd <- sqrt(slider :: slide_dbl((linear_values - linear_roll_mean )^ 2 , \(x ) mean(x , na.rm = TRUE ), .before = 28 ))
61
63
# drop the extra date caused by the inclusion of the NAs
62
64
linear_roll_sd <- linear_roll_sd [- (removed_date )]
@@ -75,10 +77,10 @@ test_that("get_trainable_names pulls out mean and sd columns", {
75
77
# TODO example with NA's, example with missing days, only one column, keep_mean
76
78
77
79
test_that(" update_predictors keeps unmodified predictors" , {
78
- epi_data [" c" ] = NaN
79
- epi_data [" d" ] = NaN
80
- epi_data [" b_m14" ] = NaN
81
- epi_data [" b_sd28" ] = NaN
80
+ epi_data [" c" ] <- NaN
81
+ epi_data [" d" ] <- NaN
82
+ epi_data [" b_m14" ] <- NaN
83
+ epi_data [" b_sd28" ] <- NaN
82
84
predictors <- c(" a" , " b" , " c" ) # everything but d
83
85
modified <- c(" b" , " c" ) # we want to exclude b but not its modified versions
84
86
expected_predictors <- c(" a" , " b_m14" , " b_sd28" )
0 commit comments