Skip to content

Commit 802d11b

Browse files
committed
Merge branch 'ndefries/backfill/speed2' into ndefries/backfill/speed-join-v-merge-order-matters
2 parents c1a9e10 + d8c61e6 commit 802d11b

File tree

4 files changed

+5
-10
lines changed

4 files changed

+5
-10
lines changed

backfill_corrections/delphiBackfillCorrection/R/model.R

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -208,22 +208,17 @@ get_model <- function(model_path, train_data, covariates, tau,
208208
" does not exist; training new model")
209209
}
210210
# Quantile regression
211-
## TODO: how does the speed compare using GLPK? Apparently it's faster on smaller
212-
# models.
213211
obj <- quantile_lasso(as.matrix(train_data[covariates]),
214212
train_data$log_value_target, tau = tau,
215213
lambda = lambda, standardize = FALSE, lp_solver = lp_solver)
216214

217215
# Save model to cache.
218216
create_dir_not_exist(dirname(model_path))
219-
## TODO: save() is fairly slow. Since we're not sharing the model files, can we
220-
# use saveRDS() instead?
221217
save(obj, file=model_path)
222218
} else {
223219
# Load model from cache invisibly. Object has the same name as the original
224220
# model object, `obj`.
225221
msg_ts("Loading from ", model_path)
226-
## TODO: readRDS()
227222
load(model_path)
228223
}
229224

backfill_corrections/delphiBackfillCorrection/R/preprocessing.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ get_7dav <- function(pivot_df, refd_col) {
9393
# Keep time values at the front
9494
pivot_df[, refd_col],
9595
# Compute moving average of all non-refd columns
96-
RcppRoll::roll_mean(
96+
roll_mean(
9797
as.matrix(pivot_df[, names(pivot_df)[names(pivot_df) != refd_col]]),
9898
7L, align = "right", fill = NA
9999
)
@@ -158,7 +158,7 @@ get_weekofmonth <- function(date) {
158158
month <- month(date)
159159
day <- day(date)
160160
firstdayofmonth <- as.numeric(format(make_date(year, month, 1), format="%u"))
161-
n_days <- lubridate::days_in_month(date)
161+
n_days <- days_in_month(date)
162162
n_weeks <- (n_days + firstdayofmonth - 1) %/% 7 + 1
163163
extra_check <- as.integer(n_weeks > 5)
164164
return (max((day + firstdayofmonth - 1) %/% 7 - extra_check, 0) %% 4 + 1)

backfill_corrections/delphiBackfillCorrection/R/utils.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ training_days_check <- function(issue_date, training_days) {
205205
get_populous_counties <- function() {
206206
return(
207207
covidcast::county_census %>%
208-
dplyr::select(pop = POPESTIMATE2019, fips = FIPS) %>%
208+
select(pop = POPESTIMATE2019, fips = FIPS) %>%
209209
# Drop megacounties (states)
210210
filter(!endsWith(fips, "000")) %>%
211211
arrange(desc(pop)) %>%

backfill_corrections/delphiBackfillCorrection/unit-tests/testthat/test-preprocessing.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,15 +50,15 @@ test_that("testing NA filling for missing udpates", {
5050
test_that("testing the calculation of 7-day moving average", {
5151
df_new <- fill_rows(fake_df, refd_col, lag_col, min_refd, max_refd, ref_lag)
5252
df <- fill_missing_updates(df_new, value_col, refd_col, lag_col)
53-
df$issue_date <- df[[refd_col]] + df[[lag_col]]
53+
df$issue_date <- as.Date(df[[refd_col]]) + df[[lag_col]]
5454
pivot_df <- df[order(df$issue_date, decreasing=FALSE), ] %>%
5555
pivot_wider(id_cols=refd_col, names_from="issue_date",
5656
values_from="value_raw")
5757
pivot_df[is.na(pivot_df)] = 0
5858
backfill_df <- get_7dav(pivot_df, refd_col)
5959

6060

61-
output <- backfill_df[backfill_df[[refd_col]] == as.Date("2022-01-07"), "value_raw"]
61+
output <- backfill_df[backfill_df[[refd_col]] == "2022-01-07", "value_raw"]
6262
expected <- colSums(pivot_df[, -1]) / 7
6363
expect_true(all(output == expected))
6464
})

0 commit comments

Comments
 (0)