Skip to content

Commit 2105797

Browse files
committed
Fix var shifting in advanced.Rmd: by geo group, handling time gaps
1 parent d59314e commit 2105797

File tree

1 file changed

+26
-15
lines changed

1 file changed

+26
-15
lines changed

vignettes/advanced.Rmd

+26-15
Original file line numberDiff line numberDiff line change
@@ -239,24 +239,35 @@ prob_arx <- function(x, y, geo_value, time_value, lags = c(0, 7, 14),
239239
lags = rep(lags, length.out = ncol(x))
240240
241241
# Build features and response for the AR model, and then fit it
242-
dat <- do.call(
243-
data.frame,
244-
unlist( # Below we loop through and build the lagged features
245-
purrr::map(1:ncol(x), function(i) {
246-
purrr::map(lags[[i]], function(lag) dplyr::lag(x[,i], n = lag))
247-
}),
248-
recursive = FALSE
249-
)
250-
)
251-
names(dat) <- paste0("x", 1:ncol(dat))
252-
dat$y <- dplyr::lead(y, n = ahead)
253-
obj <- lm(y ~ ., data = dat)
242+
dat =
243+
# Make each (covariate,lag) request into a row of a tibble...
244+
tibble::tibble(i = seq_len(ncol(x)), lag=lags) %>%
245+
tidyr::unchop(lag) %>%
246+
# ... assign names we know will won't conflict...
247+
dplyr::mutate(name = paste0("x",seq_len(nrow(.)))) %>%
248+
# ... turn each of these requests into a separate tibble with indexing columns...
249+
purrr::pmap(function(i, lag, name) {
250+
tibble::tibble(
251+
geo_value = geo_value,
252+
time_value = time_value + lag, # shifting variable back by `lag` = adding `lag` to time_values
253+
!!name := x[,i]
254+
)
255+
}) %>%
256+
# ... as well as one for the desired shift of y...
257+
c(list(tibble(
258+
geo_value = geo_value,
259+
time_value = time_value - ahead, # shifting variable forward by `ahead` = subtracting `lead` from time_values
260+
y = y
261+
))) %>%
262+
# ... and combine them together.
263+
purrr::reduce(full_join, by=c("geo_value","time_value"))
264+
dat1 = copy(dat)
265+
obj <- lm(y ~ ., data = select(dat, -geo_value, -time_value))
254266
255267
# Use LOCF to fill NAs in the latest feature values. Do it by geo value, and
256268
# use data.table functionality for this
257-
cols <- names(dat)
258-
dat <- cbind(dat, data.frame(geo_value, time_value))
259-
dat <- as.data.table(dat)
269+
cols <- setdiff(names(dat), c("geo_value","time_value"))
270+
setDT(dat)
260271
dat[, (cols) := nafill(.SD, type = "locf"), .SDcols = cols, by = "geo_value"]
261272
262273
# Make predictions

0 commit comments

Comments
 (0)