Skip to content

Commit 345b424

Browse files
authored
Merge pull request #1761 from cmu-delphi/ndefries/backfill/rename-fips
[Backfill corrections] Make sure pipeline has `geo_value` field to use
2 parents a0c9e3d + 75ca624 commit 345b424

File tree

7 files changed

+47
-12
lines changed

7 files changed

+47
-12
lines changed

backfill_corrections/delphiBackfillCorrection/NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ importFrom(dplyr,group_split)
3434
importFrom(dplyr,if_else)
3535
importFrom(dplyr,mutate)
3636
importFrom(dplyr,pull)
37+
importFrom(dplyr,rename)
3738
importFrom(dplyr,select)
3839
importFrom(dplyr,summarize)
3940
importFrom(dplyr,ungroup)

backfill_corrections/delphiBackfillCorrection/R/io.R

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,31 @@
11
#' Read a parquet file into a dataframe
22
#'
3-
#' @template input_dir-template
3+
#' @template input_file-template
44
#'
55
#' @importFrom arrow read_parquet
66
#'
77
#' @export
8-
read_data <- function(input_dir) {
9-
df <- read_parquet(input_dir, as_data_frame = TRUE)
8+
read_data <- function(input_file) {
9+
df <- read_parquet(input_file, as_data_frame = TRUE)
1010
return (df)
1111
}
1212

13+
#' Make sure data contains a `geo_value` field
14+
#'
15+
#' @template df-template
16+
#'
17+
#' @importFrom dplyr rename %>%
18+
#' @importFrom rlang .data
19+
fips_to_geovalue <- function(df) {
20+
if ( !("geo_value" %in% colnames(df)) ) {
21+
if ( !("fips" %in% colnames(df)) ) {
22+
stop("Either `fips` or `geo_value` field must be available")
23+
}
24+
df <- rename(df, geo_value = .data$fips)
25+
}
26+
return(df)
27+
}
28+
1329
#' Export the result to customized directory
1430
#'
1531
#' @param test_data test data containing prediction results

backfill_corrections/delphiBackfillCorrection/R/main.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ main <- function(params) {
287287
msg_ts("Reading in and combining associated files")
288288
input_data <- lapply(
289289
files_list,
290-
function(file) {read_data(file)}
290+
function(file) {read_data(file) %>% fips_to_geovalue()}
291291
) %>%
292292
bind_rows()
293293

backfill_corrections/delphiBackfillCorrection/R/utils.R

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -148,12 +148,14 @@ validity_checks <- function(df, value_type, num_col, denom_col, signal_suffixes,
148148
stop("No 'time_value' column detected for the reference date!")
149149
}
150150

151-
# issue_date or lag should exist in the dataset
152-
if ( !lag_col %in% colnames(df) ) {
153-
if ( issued_col %in% colnames(df) ) {
154-
df$lag = as.integer(df$issue_date - df$time_value)
155-
}
156-
else {stop("No issue_date or lag exists!")}
151+
# issue_date and lag should exist in the dataset
152+
if ( !(lag_col %in% colnames(df)) || !(issued_col %in% colnames(df)) ) {
153+
stop("`issue_date` and `lag` fields must exist in the input data")
154+
}
155+
156+
if ( any(is.na(df[[lag_col]])) || any(is.na(df[[issued_col]])) ||
157+
any(is.na(df$time_value)) ) {
158+
stop("`issue_date`, `lag`, or `time_value` contain missing values")
157159
}
158160

159161
return(list(df = df, value_cols = value_cols))
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
#' @param input_file path to input data file in parquet format

backfill_corrections/delphiBackfillCorrection/man/fips_to_geovalue.Rd

Lines changed: 15 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

backfill_corrections/delphiBackfillCorrection/man/read_data.Rd

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)