@@ -130,6 +130,9 @@ load_response_one <- function(input_filename, params, contingency_run) {
130
130
Q80 = col_integer(),
131
131
I5 = col_character(),
132
132
I7 = col_character(),
133
+ V1alt = col_character(),
134
+ V15c = col_character(),
135
+ P6 = col_character(),
133
136
E2_1 = col_integer(),
134
137
E2_2 = col_integer()
135
138
),
@@ -162,6 +165,8 @@ load_response_one <- function(input_filename, params, contingency_run) {
162
165
assert(length(wave ) == 1 , " can only code one wave at a time" )
163
166
164
167
input_data <- module_assignment(input_data , wave )
168
+ input_data <- experimental_arm_assignment(input_data , wave )
169
+
165
170
input_data <- bodge_v4_translation(input_data , wave )
166
171
input_data <- bodge_C6_C8(input_data , wave )
167
172
input_data <- bodge_B13(input_data , wave )
@@ -364,7 +369,8 @@ filter_data_for_aggregation <- function(df, params, lead_days = 12L)
364
369
dplyr :: between(.data $ hh_number_sick , 0L , 30L ),
365
370
dplyr :: between(.data $ hh_number_total , 1L , 30L ),
366
371
.data $ hh_number_sick < = .data $ hh_number_total ,
367
- .data $ day > = (as.Date(params $ start_date ) - lead_days )
372
+ .data $ day > = (as.Date(params $ start_date ) - lead_days ),
373
+ .data $ wave != 12.5 # Ignore experimental Wave 12 data
368
374
)
369
375
370
376
msg_plain(paste0(" Finished filtering data for aggregations" ))
@@ -503,6 +509,28 @@ module_assignment <- function(input_data, wave) {
503
509
return (input_data )
504
510
}
505
511
512
+ # ' Label arms of experimental Wave 12.
513
+ # '
514
+ # ' @param input_data data frame of responses, before subsetting to select
515
+ # ' variables
516
+ # ' @param wave integer indicating survey version
517
+ # '
518
+ # ' @return data frame with new `module` column
519
+ # ' @importFrom dplyr case_when
520
+ experimental_arm_assignment <- function (input_data , wave ) {
521
+ if (wave == 12.5 ) {
522
+ assert( " random_number_exp" %in% names(input_data ) )
523
+ input_data $ w12_treatment <- case_when(
524
+ input_data $ random_number_exp > = 0.6666 ~ 1 , # demographics placed after symptom items
525
+ input_data $ random_number_exp > = 0.3333 ~ 2 , # demographics placed after vaccine items
526
+ input_data $ random_number_exp < 0.3333 ~ 3 , # alternative wording to V1
527
+ TRUE ~ NA_real_
528
+ )
529
+ }
530
+
531
+ return (input_data )
532
+ }
533
+
506
534
# ' Create dataset for sharing with research partners
507
535
# '
508
536
# ' Different survey waves may have different sets of questions. Here we report
@@ -511,11 +539,13 @@ module_assignment <- function(input_data, wave) {
511
539
# '
512
540
# ' @param input_data data frame of responses
513
541
# ' @param county_crosswalk crosswalk mapping ZIP5 to counties
542
+ # ' @param params list containing `produce_individual_raceeth`, indicating
543
+ # ' whether or not to issue microdata with race-ethnicity field
514
544
# ' @importFrom stringi stri_trim stri_replace_all
515
545
# ' @importFrom dplyr left_join group_by filter ungroup select rename
516
546
# '
517
547
# ' @export
518
- create_complete_responses <- function (input_data , county_crosswalk )
548
+ create_complete_responses <- function (input_data , county_crosswalk , params )
519
549
{
520
550
cols_to_report <- c(
521
551
" start_dt" , " end_dt" , " date" ,
@@ -541,9 +571,10 @@ create_complete_responses <- function(input_data, county_crosswalk)
541
571
" B10c" , " B13" , " C18a" , " C18b" , " C7a" , " D12" , " E4" ,
542
572
" G1" , " G2" , " G3" , " H1" , " H2" , " H3" , " I1" , " I2" , " I3" , " I4" , " I5" ,
543
573
" I6_1" , " I6_2" , " I6_3" , " I6_4" , " I6_5" , " I6_6" , " I6_7" , " I6_8" ,
544
- " I7" , " K1" , " K2" , " V11a" , " V12a" , " V15a" , " V15b" , " V16" , " V3a" , " module" , # added in Wave 11
574
+ " I7" , " K1" , " K2" , " V11a" , " V12a" , " V15a" , " V15b" , " V16" , " V3a" , # added in Wave 11
575
+ " V1alt" , " B13a" , " V15c" , " P1" , " P2" , " P3" , " P4" , " P5" , " P6" , # added in experimental Wave 12
545
576
546
- " raceethnicity" , " token" , " wave" , " UserLanguage" ,
577
+ " raceethnicity" , " token" , " wave" , " w12_treatment " , " module " , " UserLanguage" ,
547
578
" zip5" # temporarily; we'll filter by this column later and then drop it before writing
548
579
)
549
580
@@ -617,7 +648,10 @@ surveyID_to_wave <- Vectorize(function(surveyID) {
617
648
" SV_ddjHkcYrrLWgM2V" = 7 ,
618
649
" SV_ewAVaX7Wz3l0UqG" = 8 ,
619
650
" SV_6PADB8DyF9SIyXk" = 10 ,
620
- " SV_4VEaeffqQtDo33M" = 11 )
651
+ " SV_4VEaeffqQtDo33M" = 11 ,
652
+ " SV_3TL0r243mLkDzCK" = 12.5 , # experimental version of Wave 12
653
+ " TBD finalized version" = 12 # finalized version of Wave 12
654
+ )
621
655
622
656
if ( any(names(waves ) == surveyID ) ) {
623
657
return (waves [[surveyID ]])
@@ -667,9 +701,11 @@ filter_complete_responses <- function(data_full, params)
667
701
data_full <- select(data_full , - .data $ zip5 )
668
702
669
703
# 9 includes StartDatetime, EndDatetime, Date, token, wave, geo_id,
670
- # UserLanguage + two questions (ignore raceethnicity field which may or may
671
- # not exist, depending on params)
672
- valid_row_filter <- rowSums( ! is.na(data_full [, names(data_full ) != " raceethnicity" ]) ) > = 9
704
+ # UserLanguage + two questions (ignore raceethnicity, module, and
705
+ # w12_assignment fields which may or may not exist, depending on params and
706
+ # survey version)
707
+ ignore_cols <- c(" raceethnicity" , " w12_assignment" , " module" )
708
+ valid_row_filter <- rowSums( ! is.na(data_full [, ! (names(data_full ) %in% ignore_cols )]) ) > = 9
673
709
data_full <- data_full [valid_row_filter , ]
674
710
675
711
return (data_full )
0 commit comments