Skip to content

[Backfill corrections] Convert dates-as-strings to Date types #1764

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions backfill_corrections/delphiBackfillCorrection/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ importFrom(lubridate,year)
importFrom(parallel,detectCores)
importFrom(quantgen,quantile_lasso)
importFrom(readr,write_csv)
importFrom(rlang,":=")
importFrom(rlang,.data)
importFrom(rlang,.env)
importFrom(stats,coef)
Expand Down
27 changes: 21 additions & 6 deletions backfill_corrections/delphiBackfillCorrection/R/main.R
Original file line number Diff line number Diff line change
Expand Up @@ -226,13 +226,17 @@ run_backfill <- function(df, params,
#' Perform backfill correction on all desired signals and geo levels
#'
#' @template params-template
#' @template refd_col-template
#' @template lag_col-template
#' @template issued_col-template
#'
#' @importFrom dplyr bind_rows mutate
#' @importFrom dplyr bind_rows mutate %>%
#' @importFrom parallel detectCores
#' @importFrom rlang .data
#' @importFrom rlang .data :=
#'
#' @export
main <- function(params) {
main <- function(params,
refd_col = "time_value", lag_col = "lag", issued_col = "issue_date") {
if (!params$train_models && !params$make_predictions) {
msg_ts("both model training and prediction generation are turned off; exiting")
return(NULL)
Expand Down Expand Up @@ -287,7 +291,16 @@ main <- function(params) {
msg_ts("Reading in and combining associated files")
input_data <- lapply(
files_list,
function(file) {read_data(file) %>% fips_to_geovalue()}
function(file) {
read_data(file) %>%
fips_to_geovalue() %>%
mutate(
# Use `glue` syntax to construct a new field by variable,
# from https://stackoverflow.com/a/26003971/14401472
"{refd_col}" := as.Date(.data[[refd_col]], "%Y-%m-%d"),
"{issued_col}" := as.Date(.data[[issued_col]], "%Y-%m-%d")
)
}
) %>%
bind_rows()

Expand All @@ -304,16 +317,18 @@ main <- function(params) {
msg_ts(str_interp("for ${value_type}"))
result <- validity_checks(
input_data, value_type,
params$num_col, params$denom_col, input_group$name_suffix
params$num_col, params$denom_col, input_group$name_suffix,
refd_col = refd_col, lag_col = lag_col, issued_col = issued_col
)
input_data <- result[["df"]]
}

# Check available training days
training_days_check(input_data$issue_date, params$training_days)
training_days_check(input_data[[issued_col]], params$training_days)

# Perform backfill corrections and save result
run_backfill(input_data, params,
refd_col = refd_col, lag_col = lag_col, issued_col = issued_col,
indicator = input_group$indicator, signal = input_group$signal,
signal_suffixes = input_group$name_suffix)
}
Expand Down
17 changes: 11 additions & 6 deletions backfill_corrections/delphiBackfillCorrection/R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,14 +119,15 @@ create_dir_not_exist <- function(path)
#' @template num_col-template
#' @template denom_col-template
#' @template signal_suffixes-template
#' @template refd_col-template
#' @template lag_col-template
#' @template issued_col-template
#'
#' @return list of input dataframe augmented with lag column, if it
#' didn't already exist, and character vector of one or two value
#' column names, depending on requested `value_type`
validity_checks <- function(df, value_type, num_col, denom_col, signal_suffixes,
lag_col = "lag", issued_col = "issue_date") {
refd_col = "time_value", lag_col = "lag", issued_col = "issue_date") {
if (!missing(signal_suffixes) && !is.na(signal_suffixes) && !all(signal_suffixes == "") && !all(is.na(signal_suffixes))) {
num_col <- paste(num_col, signal_suffixes, sep = "_")
denom_col <- paste(num_col, signal_suffixes, sep = "_")
Expand All @@ -144,18 +145,22 @@ validity_checks <- function(df, value_type, num_col, denom_col, signal_suffixes,
}

# time_value must exist in the dataset
if ( !"time_value" %in% colnames(df) ) {
stop("No 'time_value' column detected for the reference date!")
if ( !(refd_col %in% colnames(df)) ) {
stop("No reference date column detected for the reference date!")
}

if (!(inherits(df[[refd_col]], "Date"))) {
stop("Reference date column must be of `Date` type")
}

# issue_date and lag should exist in the dataset
if ( !(lag_col %in% colnames(df)) || !(issued_col %in% colnames(df)) ) {
stop("`issue_date` and `lag` fields must exist in the input data")
stop("Issue date and lag fields must exist in the input data")
}

if ( any(is.na(df[[lag_col]])) || any(is.na(df[[issued_col]])) ||
any(is.na(df$time_value)) ) {
stop("`issue_date`, `lag`, or `time_value` contain missing values")
any(is.na(df[[refd_col]])) ) {
stop("Issue date, lag, or reference date fields contain missing values")
}

return(list(df = df, value_cols = value_cols))
Expand Down
16 changes: 15 additions & 1 deletion backfill_corrections/delphiBackfillCorrection/man/main.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.