Skip to content

Commit 1f359bc

Browse files
authored
Merge pull request #1249 from cmu-delphi/support-experimental-wave-12
Support experimental survey Wave 12
2 parents b66193c + b56a961 commit 1f359bc

File tree

5 files changed

+73
-12
lines changed

5 files changed

+73
-12
lines changed

facebook/delphiFacebook/R/responses.R

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,9 @@ load_response_one <- function(input_filename, params, contingency_run) {
130130
Q80 = col_integer(),
131131
I5 = col_character(),
132132
I7 = col_character(),
133+
V1alt = col_character(),
134+
V15c = col_character(),
135+
P6 = col_character(),
133136
E2_1 = col_integer(),
134137
E2_2 = col_integer()
135138
),
@@ -162,6 +165,8 @@ load_response_one <- function(input_filename, params, contingency_run) {
162165
assert(length(wave) == 1, "can only code one wave at a time")
163166

164167
input_data <- module_assignment(input_data, wave)
168+
input_data <- experimental_arm_assignment(input_data, wave)
169+
165170
input_data <- bodge_v4_translation(input_data, wave)
166171
input_data <- bodge_C6_C8(input_data, wave)
167172
input_data <- bodge_B13(input_data, wave)
@@ -364,7 +369,8 @@ filter_data_for_aggregation <- function(df, params, lead_days = 12L)
364369
dplyr::between(.data$hh_number_sick, 0L, 30L),
365370
dplyr::between(.data$hh_number_total, 1L, 30L),
366371
.data$hh_number_sick <= .data$hh_number_total,
367-
.data$day >= (as.Date(params$start_date) - lead_days)
372+
.data$day >= (as.Date(params$start_date) - lead_days),
373+
.data$wave != 12.5 # Ignore experimental Wave 12 data
368374
)
369375

370376
msg_plain(paste0("Finished filtering data for aggregations"))
@@ -503,6 +509,28 @@ module_assignment <- function(input_data, wave) {
503509
return(input_data)
504510
}
505511

512+
#' Label arms of experimental Wave 12.
513+
#'
514+
#' @param input_data data frame of responses, before subsetting to select
515+
#' variables
516+
#' @param wave integer indicating survey version
517+
#'
518+
#' @return data frame with new `module` column
519+
#' @importFrom dplyr case_when
520+
experimental_arm_assignment <- function(input_data, wave) {
521+
if (wave == 12.5) {
522+
assert( "random_number_exp" %in% names(input_data) )
523+
input_data$w12_treatment <- case_when(
524+
input_data$random_number_exp >= 0.6666 ~ 1, # demographics placed after symptom items
525+
input_data$random_number_exp >= 0.3333 ~ 2, # demographics placed after vaccine items
526+
input_data$random_number_exp < 0.3333 ~ 3, # alternative wording to V1
527+
TRUE ~ NA_real_
528+
)
529+
}
530+
531+
return(input_data)
532+
}
533+
506534
#' Create dataset for sharing with research partners
507535
#'
508536
#' Different survey waves may have different sets of questions. Here we report
@@ -511,11 +539,13 @@ module_assignment <- function(input_data, wave) {
511539
#'
512540
#' @param input_data data frame of responses
513541
#' @param county_crosswalk crosswalk mapping ZIP5 to counties
542+
#' @param params list containing `produce_individual_raceeth`, indicating
543+
#' whether or not to issue microdata with race-ethnicity field
514544
#' @importFrom stringi stri_trim stri_replace_all
515545
#' @importFrom dplyr left_join group_by filter ungroup select rename
516546
#'
517547
#' @export
518-
create_complete_responses <- function(input_data, county_crosswalk)
548+
create_complete_responses <- function(input_data, county_crosswalk, params)
519549
{
520550
cols_to_report <- c(
521551
"start_dt", "end_dt", "date",
@@ -541,9 +571,10 @@ create_complete_responses <- function(input_data, county_crosswalk)
541571
"B10c", "B13", "C18a", "C18b", "C7a", "D12", "E4",
542572
"G1", "G2", "G3", "H1", "H2", "H3", "I1", "I2", "I3", "I4", "I5",
543573
"I6_1", "I6_2", "I6_3", "I6_4", "I6_5", "I6_6", "I6_7", "I6_8",
544-
"I7", "K1", "K2", "V11a", "V12a", "V15a", "V15b", "V16", "V3a", "module", # added in Wave 11
574+
"I7", "K1", "K2", "V11a", "V12a", "V15a", "V15b", "V16", "V3a", # added in Wave 11
575+
"V1alt", "B13a", "V15c", "P1", "P2", "P3", "P4", "P5", "P6", # added in experimental Wave 12
545576

546-
"raceethnicity", "token", "wave", "UserLanguage",
577+
"raceethnicity", "token", "wave", "w12_treatment", "module", "UserLanguage",
547578
"zip5" # temporarily; we'll filter by this column later and then drop it before writing
548579
)
549580

@@ -617,7 +648,10 @@ surveyID_to_wave <- Vectorize(function(surveyID) {
617648
"SV_ddjHkcYrrLWgM2V" = 7,
618649
"SV_ewAVaX7Wz3l0UqG" = 8,
619650
"SV_6PADB8DyF9SIyXk" = 10,
620-
"SV_4VEaeffqQtDo33M" = 11)
651+
"SV_4VEaeffqQtDo33M" = 11,
652+
"SV_3TL0r243mLkDzCK" = 12.5, # experimental version of Wave 12
653+
"TBD finalized version" = 12 # finalized version of Wave 12
654+
)
621655

622656
if ( any(names(waves) == surveyID) ) {
623657
return(waves[[surveyID]])
@@ -667,9 +701,11 @@ filter_complete_responses <- function(data_full, params)
667701
data_full <- select(data_full, -.data$zip5)
668702

669703
# 9 includes StartDatetime, EndDatetime, Date, token, wave, geo_id,
670-
# UserLanguage + two questions (ignore raceethnicity field which may or may
671-
# not exist, depending on params)
672-
valid_row_filter <- rowSums( !is.na(data_full[, names(data_full) != "raceethnicity"]) ) >= 9
704+
# UserLanguage + two questions (ignore raceethnicity, module, and
705+
# w12_assignment fields which may or may not exist, depending on params and
706+
# survey version)
707+
ignore_cols <- c("raceethnicity", "w12_assignment", "module")
708+
valid_row_filter <- rowSums( !is.na(data_full[, !(names(data_full) %in% ignore_cols)]) ) >= 9
673709
data_full <- data_full[valid_row_filter, ]
674710

675711
return(data_full)

facebook/delphiFacebook/R/run.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ run_facebook <- function(params)
3131
msg_df("response data to aggregate", data_agg)
3232

3333
# create "complete" data that will be shared with research partners
34-
data_full <- create_complete_responses(input_data, cw_list$county)
34+
data_full <- create_complete_responses(input_data, cw_list$county, params)
3535
data_full <- filter_complete_responses(data_full, params)
3636
data_full <- join_weights(data_full, params, weights = "full")
3737
msg_df("full data to share with research partners", data_full)

facebook/delphiFacebook/man/create_complete_responses.Rd

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

facebook/delphiFacebook/man/experimental_arm_assignment.Rd

Lines changed: 20 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

facebook/delphiFacebook/unit-tests/testthat/test-responses.R

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,15 +124,17 @@ test_that("filter_data_for_aggregation works correctly", {
124124
hh_number_sick = c(0, NA, 4, -5, 55, 5, 5, 5, 3, 3, 0, 30, 1),
125125
hh_number_total = c(1, 4, NA, 5, 5, -5, 100, 5, 5, 1, 1, 30, 1),
126126
day = c("2021-01-01", "2021-01-01", "2021-01-02", "2021-01-01", "2021-01-01", "2021-01-01", "2021-01-01", "2021-01-01", "2021-01-01", "2021-01-01", "2021-01-01", "2021-01-01", "2020-01-01"),
127-
date = c("2021-01-01", "2021-01-01", "2021-01-02", "2021-01-01", "2021-01-01", "2021-01-01", "2021-01-01", "2021-01-01", "2021-01-01", "2021-01-01", "2021-01-01", "2021-01-01", "2020-01-01")
127+
date = c("2021-01-01", "2021-01-01", "2021-01-02", "2021-01-01", "2021-01-01", "2021-01-01", "2021-01-01", "2021-01-01", "2021-01-01", "2021-01-01", "2021-01-01", "2021-01-01", "2020-01-01"),
128+
wave = 12
128129
)
129130

130131
expected <- tibble(
131132
zip5 = c("10001", "10001", "10001", "10001"),
132133
hh_number_sick = c(5, 3, 0, 30),
133134
hh_number_total = c(5, 5, 1, 30),
134135
day = c("2021-01-01", "2021-01-01", "2021-01-01", "2021-01-01"),
135-
date = c("2021-01-01", "2021-01-01", "2021-01-01", "2021-01-01")
136+
date = c("2021-01-01", "2021-01-01", "2021-01-01", "2021-01-01"),
137+
wave = 12
136138
)
137139

138140
expect_equal(filter_data_for_aggregation(input, params),

0 commit comments

Comments
 (0)