Skip to content

Commit cad3600

Browse files
authored
Merge pull request #1806 from cmu-delphi/ndefries/backfill/speed-join-v-merge-order-matters
[Backfill corrections] Use `dplyr` joins in `add_7dav` fn
2 parents b301361 + 802d11b commit cad3600

File tree

2 files changed

+14
-7
lines changed

2 files changed

+14
-7
lines changed

backfill_corrections/delphiBackfillCorrection/NAMESPACE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,11 @@ importFrom(dplyr,bind_rows)
3131
importFrom(dplyr,desc)
3232
importFrom(dplyr,everything)
3333
importFrom(dplyr,filter)
34+
importFrom(dplyr,full_join)
3435
importFrom(dplyr,group_by)
3536
importFrom(dplyr,group_split)
3637
importFrom(dplyr,if_else)
38+
importFrom(dplyr,left_join)
3739
importFrom(dplyr,pull)
3840
importFrom(dplyr,select)
3941
importFrom(dplyr,starts_with)
@@ -48,6 +50,7 @@ importFrom(lubridate,month)
4850
importFrom(lubridate,year)
4951
importFrom(parallel,detectCores)
5052
importFrom(purrr,map_dfc)
53+
importFrom(purrr,reduce)
5154
importFrom(quantgen,quantile_lasso)
5255
importFrom(readr,write_csv)
5356
importFrom(stats,coef)

backfill_corrections/delphiBackfillCorrection/R/preprocessing.R

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,8 @@ add_weekofmonth <- function(df, time_col, wm = WEEK_ISSUES) {
187187
#' @template lag_col-template
188188
#' @template ref_lag-template
189189
#'
190+
#' @importFrom dplyr full_join left_join
191+
#' @importFrom purrr reduce
190192
#' @importFrom tidyr pivot_wider drop_na
191193
#'
192194
#' @export
@@ -203,16 +205,21 @@ add_7davs_and_target <- function(df, value_col, refd_col, lag_col, ref_lag) {
203205
names(avg_df)[names(avg_df) == value_col] <- 'value_7dav'
204206
avg_df_prev7 <- add_shift(avg_df, 7, refd_col)
205207
names(avg_df_prev7)[names(avg_df_prev7) == 'value_7dav'] <- 'value_prev_7dav'
206-
207-
backfill_df <- Reduce(function(x, y) merge(x, y, all=TRUE),
208-
list(df, avg_df, avg_df_prev7))
208+
209+
backfill_df <- reduce(
210+
list(df, avg_df, avg_df_prev7),
211+
full_join, by=c(refd_col, "issue_date")
212+
)
209213

210214
# Add target
211215
target_df <- df[df$lag==ref_lag, c(refd_col, value_col, "issue_date")]
212216
names(target_df)[names(target_df) == value_col] <- 'value_target'
213217
names(target_df)[names(target_df) == 'issue_date'] <- 'target_date'
214218

215-
backfill_df <- merge(backfill_df, target_df, by=refd_col, all.x=TRUE)
219+
backfill_df <- left_join(backfill_df, target_df, by=c(refd_col))
220+
221+
# Remove invalid rows
222+
backfill_df <- drop_na(backfill_df, c(lag_col))
216223

217224
# Add log values
218225
backfill_df$log_value_raw = log(backfill_df$value_raw + 1)
@@ -221,9 +228,6 @@ add_7davs_and_target <- function(df, value_col, refd_col, lag_col, ref_lag) {
221228
backfill_df$log_value_prev_7dav = log(backfill_df$value_prev_7dav + 1)
222229
backfill_df$log_7dav_slope = backfill_df$log_value_7dav - backfill_df$log_value_prev_7dav
223230

224-
# Remove invalid rows
225-
backfill_df <- drop_na(backfill_df, c(lag_col))
226-
227231
return (as.data.frame(backfill_df))
228232
}
229233

0 commit comments

Comments
 (0)