Skip to content

Commit 523b1a6

Browse files
committed
call validation once to reduce calls to "duplicate"
1 parent 52f747a commit 523b1a6

File tree

4 files changed

+21
-26
lines changed

4 files changed

+21
-26
lines changed

backfill_corrections/delphiBackfillCorrection/R/main.R

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -324,15 +324,12 @@ main <- function(params,
324324

325325
# Check data type and required columns
326326
msg_ts("Validating input data")
327-
for (value_type in params$value_types) {
328-
msg_ts("for ", value_type)
329-
result <- validity_checks(
330-
input_data, value_type,
331-
params$num_col, params$denom_col, input_group$name_suffix,
332-
refd_col = refd_col, lag_col = lag_col, issued_col = issued_col
333-
)
334-
input_data <- result[["df"]]
335-
}
327+
# Validate while date fields still stored as strings for speed.
328+
input_data <- validity_checks(
329+
input_data, params$value_types,
330+
params$num_col, params$denom_col, input_group$name_suffix,
331+
refd_col = refd_col, lag_col = lag_col, issued_col = issued_col
332+
)
336333

337334
input_data[[refd_col]] <- as.Date(input_data[[refd_col]], "%Y-%m-%d")
338335
input_data[[issued_col]] <- as.Date(input_data[[issued_col]], "%Y-%m-%d")

backfill_corrections/delphiBackfillCorrection/R/utils.R

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,8 @@ create_dir_not_exist <- function(path)
122122
#' Check input data for validity
123123
#'
124124
#' @template df-template
125-
#' @template value_type-template
125+
#' @param value_types character vector of all signal types to process. Either
126+
#' or both of "count" and "fraction".
126127
#' @template num_col-template
127128
#' @template denom_col-template
128129
#' @template signal_suffixes-template
@@ -133,18 +134,20 @@ create_dir_not_exist <- function(path)
133134
#' @return list of input dataframe augmented with lag column, if it
134135
#' didn't already exist, and character vector of one or two value
135136
#' column names, depending on requested `value_type`
136-
validity_checks <- function(df, value_type, num_col, denom_col, signal_suffixes,
137+
validity_checks <- function(df, value_types, num_col, denom_col, signal_suffixes,
137138
refd_col = "time_value", lag_col = "lag", issued_col = "issue_date") {
138139
if (!missing(signal_suffixes) && !is.na(signal_suffixes) && !all(signal_suffixes == "") && !all(is.na(signal_suffixes))) {
139140
num_col <- paste(num_col, signal_suffixes, sep = "_")
140141
denom_col <- paste(denom_col, signal_suffixes, sep = "_")
141142
}
142143

143144
# Check data type and required columns
144-
if (value_type == "count") {
145-
if ( all(num_col %in% colnames(df)) ) { value_cols=c(num_col) }
146-
else { stop("No valid column name detected for the count values!") }
147-
} else if (value_type == "fraction") {
145+
if ("count" %in% value_types) {
146+
if ( !all(num_col %in% colnames(df)) ) {
147+
stop("No valid column name detected for the count values!")
148+
}
149+
}
150+
if ("fraction" %in% value_types) {
148151
value_cols = c(num_col, denom_col)
149152
if ( !all(value_cols %in% colnames(df)) ) {
150153
stop("No valid column name detected for the fraction values!")
@@ -157,7 +160,7 @@ validity_checks <- function(df, value_type, num_col, denom_col, signal_suffixes,
157160
}
158161

159162
# issue_date and lag should exist in the dataset
160-
if ( !(lag_col %in% colnames(df)) || !(issued_col %in% colnames(df)) ) {
163+
if ( !all(c(lag_col, issued_col) %in% colnames(df)) ) {
161164
stop("Issue date and lag fields must exist in the input data")
162165
}
163166

@@ -178,7 +181,7 @@ validity_checks <- function(df, value_type, num_col, denom_col, signal_suffixes,
178181
" least one reference date-issue date-location combination")
179182
}
180183

181-
return(list(df = df, value_cols = value_cols))
184+
return(df)
182185
}
183186

184187
#' Check available training days

backfill_corrections/delphiBackfillCorrection/man/validity_checks.Rd

Lines changed: 3 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

backfill_corrections/delphiBackfillCorrection/unit-tests/testthat/test-utils.R

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ test_that("validity_checks alerts appropriately", {
158158
geo_value = rep("01001", 3)
159159

160160
check_wrapper <- function(df, value_type, signal_suffixes = "") {
161-
validity_checks(df, value_type = value_type, num_col = "num",
161+
validity_checks(df, value_types = value_type, num_col = "num",
162162
denom_col = "den", signal_suffixes = signal_suffixes)
163163
}
164164

@@ -179,8 +179,6 @@ test_that("validity_checks alerts appropriately", {
179179

180180
expect_error(check_wrapper(data.frame(num, den), "count"),
181181
"No reference date column detected for the reference date!")
182-
expect_error(check_wrapper(data.frame(num, den, time_value = as.character(time_value)), "count"),
183-
"Reference date column must be of `Date` type")
184182

185183

186184
issued_lag_error <- "Issue date and lag fields must exist in the input data"
@@ -205,10 +203,6 @@ test_that("validity_checks alerts appropriately", {
205203
expect_error(check_wrapper(bind_rows(df, new_row), "count"), missing_val_error)
206204

207205

208-
expect_error(check_wrapper(data.frame(num, den, time_value, lag, issue_date = as.character(issue_date)), "count"),
209-
"Issue date column must be of `Date` type")
210-
211-
212206
df <- data.frame(num, den, time_value, issue_date, lag, geo_value, state_id)
213207
expect_warning(check_wrapper(df[rep(1, 3), ], "count"),
214208
"Data contains duplicate rows, dropping")

0 commit comments

Comments
 (0)