9
9
# ' @template indicator-template
10
10
# ' @template signal-template
11
11
# '
12
- # ' @importFrom dplyr %>% filter select group_by summarize across everything group_split ungroup
12
+ # ' @importFrom dplyr %>% filter group_by summarize across everything group_split ungroup
13
13
# ' @importFrom tidyr drop_na
14
- # ' @importFrom rlang .data .env
15
14
# '
16
15
# ' @export
17
16
run_backfill <- function (df , params ,
18
17
refd_col = " time_value" , lag_col = " lag" , issued_col = " issue_date" ,
19
18
signal_suffixes = c(" " ), indicator = " " , signal = " " ) {
20
- df <- filter(df , .data $ lag < params $ ref_lag + 30 ) # a rough filtration to save memory
19
+ df <- filter(df , lag < params $ ref_lag + 30 ) # a rough filtration to save memory
21
20
22
21
geo_levels <- params $ geo_levels
23
22
if (" state" %in% geo_levels ) {
@@ -34,16 +33,17 @@ run_backfill <- function(df, params,
34
33
# Aggregate counties up to state level
35
34
agg_cols <- c(" geo_value" , issued_col , refd_col , lag_col )
36
35
# Sum all non-agg columns. Summarized columns keep original names
36
+ df $ geo_value <- df $ state_id
37
+ df $ state_id <- NULL
37
38
df <- df %> %
38
- select(- .data $ geo_value , geo_value = .data $ state_id ) %> %
39
39
group_by(across(agg_cols )) %> %
40
40
summarize(across(everything(), sum )) %> %
41
41
ungroup()
42
42
}
43
43
if (geo_level == " county" ) {
44
44
# Keep only 200 most populous (within the US) counties
45
45
top_200_geos <- get_populous_counties()
46
- df <- filter(df , .data $ geo_value %in% top_200_geos )
46
+ df <- filter(df , geo_value %in% top_200_geos )
47
47
}
48
48
49
49
test_data_list <- list ()
@@ -58,7 +58,7 @@ run_backfill <- function(df, params,
58
58
}
59
59
60
60
msg_ts(" Splitting data into geo groups" )
61
- group_dfs <- group_split(df , .data $ geo_value )
61
+ group_dfs <- group_split(df , geo_value )
62
62
63
63
# Build model for each location
64
64
for (subdf in group_dfs ) {
@@ -112,15 +112,15 @@ run_backfill <- function(df, params,
112
112
)
113
113
}
114
114
combined_df <- add_params_for_dates(combined_df , refd_col , lag_col )
115
- combined_df <- combined_df %> % filter(.data $ lag < params $ ref_lag )
115
+ combined_df <- combined_df %> % filter(lag < params $ ref_lag )
116
116
117
117
geo_train_data <- combined_df %> %
118
- filter(.data $ issue_date < params $ training_end_date ) %> %
119
- filter(.data $ target_date < = params $ training_end_date ) %> %
120
- filter(.data $ target_date > params $ training_start_date ) %> %
118
+ filter(issue_date < params $ training_end_date ) %> %
119
+ filter(target_date < = params $ training_end_date ) %> %
120
+ filter(target_date > params $ training_start_date ) %> %
121
121
drop_na()
122
122
geo_test_data <- combined_df %> %
123
- filter(.data $ issue_date %in% params $ test_dates ) %> %
123
+ filter(issue_date %in% params $ test_dates ) %> %
124
124
drop_na()
125
125
126
126
if (nrow(geo_test_data ) == 0 ) {
@@ -135,8 +135,8 @@ run_backfill <- function(df, params,
135
135
if (value_type == " fraction" ) {
136
136
# Use beta prior approach to adjust fractions
137
137
geo_prior_test_data = combined_df %> %
138
- filter(.data $ issue_date > min(params $ test_dates ) - 7 ) %> %
139
- filter(.data $ issue_date < = max(params $ test_dates ))
138
+ filter(issue_date > min(params $ test_dates ) - 7 ) %> %
139
+ filter(issue_date < = max(params $ test_dates ))
140
140
updated_data <- frac_adj(geo_train_data , geo_test_data , geo_prior_test_data ,
141
141
indicator = indicator , signal = signal ,
142
142
geo_level = geo_level , signal_suffix = signal_suffix ,
@@ -236,9 +236,8 @@ run_backfill <- function(df, params,
236
236
# ' @template lag_col-template
237
237
# ' @template issued_col-template
238
238
# '
239
- # ' @importFrom dplyr bind_rows mutate %>%
239
+ # ' @importFrom dplyr bind_rows %>%
240
240
# ' @importFrom parallel detectCores
241
- # ' @importFrom rlang .data :=
242
241
# ' @importFrom stringr str_interp
243
242
# '
244
243
# ' @export
@@ -251,7 +250,7 @@ main <- function(params,
251
250
252
251
indicators_subset <- INDICATORS_AND_SIGNALS
253
252
if (params $ indicators != " all" ) {
254
- indicators_subset <- filter(indicators_subset , .data $ indicator == params $ indicators )
253
+ indicators_subset <- filter(indicators_subset , indicator == params $ indicators )
255
254
}
256
255
if (nrow(indicators_subset ) == 0 ) {
257
256
stop(" no indicators to process" )
@@ -307,14 +306,12 @@ main <- function(params,
307
306
input_data <- lapply(
308
307
files_list ,
309
308
function (file ) {
310
- read_data(file ) %> %
311
- fips_to_geovalue() %> %
312
- mutate(
313
- # Use `glue` syntax to construct a new field by variable,
314
- # from https://stackoverflow.com/a/26003971/14401472
315
- " {refd_col}" : = as.Date(.data [[refd_col ]], " %Y-%m-%d" ),
316
- " {issued_col}" : = as.Date(.data [[issued_col ]], " %Y-%m-%d" )
317
- )
309
+ df <- read_data(file ) %> %
310
+ fips_to_geovalue()
311
+ df [[refd_col ]] <- as.Date(df [[refd_col ]], " %Y-%m-%d" )
312
+ df [[issued_col ]] <- as.Date(df [[issued_col ]], " %Y-%m-%d" )
313
+
314
+ return (df )
318
315
}
319
316
) %> %
320
317
bind_rows()
0 commit comments