Skip to content

Commit 7bf632a

Browse files
authored
Merge pull request #1557 from cmu-delphi/ndefries/output-qsf-matrix-items
[CITS QSF utils] Add new tool to print matrix item names
2 parents 18a2ce8 + 96e8d2f commit 7bf632a

File tree

3 files changed

+197
-84
lines changed

3 files changed

+197
-84
lines changed

facebook/qsf-tools/generate-codebook.R

Lines changed: 37 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
##
55
## Usage:
66
##
7-
## Rscript generate-codebook.R path/to/qsf path/to/codebook
7+
## Rscript generate-codebook.R [UMD/CMU] path/to/qsf path/to/codebook
88

99
suppressPackageStartupMessages({
1010
library(tidyverse)
@@ -17,81 +17,32 @@ suppressPackageStartupMessages({
1717

1818

1919
process_qsf <- function(path_to_qsf,
20+
survey_version=c("CMU", "UMD"),
2021
path_to_shortname_map="./static/item_shortquestion_map.csv",
2122
path_to_replacement_map="./static/item_replacement_map.csv") {
23+
survey_version <- match.arg(survey_version)
2224
q <- read_json(path_to_qsf)
2325
wave <- get_wave(path_to_qsf)
2426

25-
# get the survey elements with flow logic (should be one per block randomization branch)
26-
ii_flow <- q$SurveyElements %>%
27-
map_chr("Element") %>%
28-
{. == "FL"} %>%
29-
which()
30-
ii_block_randomizer <- q$SurveyElements[ii_flow] %>%
31-
map(~ .x$Payload$Flow) %>%
32-
map(~ map(.x,~ .x$Type == "BlockRandomizer")) %>%
33-
unlist() %>%
34-
which()
35-
random_block_ids <- q$SurveyElements[ii_flow] %>%
36-
map(~ .x$Payload$Flow) %>%
37-
map(~ .x[ii_block_randomizer]) %>%
38-
map(~ map(.x,~ .x$Flow)) %>%
39-
map(~ map(.x,~ map(.x,~ .x$ID))) %>%
40-
unlist()
41-
42-
block_id_item_map <- get_block_item_map(q)
43-
block_id_item_map <- block_id_item_map %>% filter(BlockID %in% random_block_ids) %>%
44-
select(-BlockID)
45-
46-
# get the survey elements that are questions:
47-
ii_questions <- q$SurveyElements %>%
48-
map_chr("Element") %>%
49-
{. == "SQ"} %>%
50-
which()
51-
52-
# get the questions that were shown to respondents
53-
shown_items <- get_shown_items(q)
54-
ii_shown <- q$SurveyElements[ii_questions] %>%
55-
map_chr(~ .x$Payload$QuestionID) %>%
56-
{. %in% shown_items} %>%
57-
which()
27+
displayed_questions <- subset_qsf_to_displayed(q)
5828

59-
# subset qsf to valid elements
60-
displayed_questions <- q$SurveyElements[ii_questions][ii_shown]
61-
62-
# Qualtrics auto-assigned question IDs
29+
# get Qualtrics auto-assigned question IDs
6330
qids <- displayed_questions %>%
6431
map_chr(~ .x$Payload$QuestionID)
6532

66-
# the items are how we will match these to the survey data:
67-
items <- displayed_questions %>%
68-
map_chr(~ .x$Payload$DataExportTag)
69-
70-
# B13 was originally named incorrectly. Rename manually as needed
71-
items[items == "B13 "] <- "B13"
72-
# V2a in Wave 13 was originally named incorrectly. Rename manually as needed
73-
if (wave == 13) {
74-
items[items == "V2a"] <- "V2d"
75-
}
33+
# get item names
34+
item_names <- displayed_questions %>%
35+
map_chr(~ .x$Payload$DataExportTag) %>%
36+
patch_item_names(survey_version, wave)
7637

77-
# get the text of the question:
38+
# get question text:
7839
questions <- displayed_questions %>%
7940
map_chr(~ .x$Payload$QuestionText)
8041

81-
# get the type of question:
82-
type_map <- c(MC = "Multiple choice", TE = "Text", Matrix = "Matrix")
83-
qtype <- displayed_questions %>%
84-
map_chr(~ .x$Payload$QuestionType) %>%
85-
{type_map[.]}
86-
87-
ii_multiselect <- displayed_questions %>%
88-
map_chr(~ .x$Payload$Selector) %>%
89-
{. == "MAVR"} %>%
90-
which()
91-
qtype[ii_multiselect] <- "Multiselect"
92-
qtype[items == "A5"] <- "Matrix" # this will be treated like C10
93-
94-
# get the choices (for MC and Matrix):
42+
# get question types
43+
qtype <- get_question_formats(displayed_questions, item_names, survey_version)
44+
45+
# get choices for multiple choice (MC) and Matrix items:
9546
choices <- displayed_questions %>%
9647
map(~ .x$Payload$Choices) %>%
9748
map(~ map(.x, "Display"))
@@ -122,8 +73,8 @@ process_qsf <- function(path_to_qsf,
12273
map(which) %>%
12374
map(names) %>%
12475
unlist()
125-
other_text_items <- paste(items[ii_other_text_option], text_elem, "TEXT", sep="_") %>%
126-
setNames(items[ii_other_text_option])
76+
other_text_items <- paste(item_names[ii_other_text_option], text_elem, "TEXT", sep="_") %>%
77+
setNames(item_names[ii_other_text_option])
12778

12879
# some questions port the choices from other questions
12980
ii_carryforward <- displayed_questions %>%
@@ -189,7 +140,7 @@ process_qsf <- function(path_to_qsf,
189140
paste(c(qid, selectable_text, option_code), collapse="")
190141
}, .x)) %>%
191142
# Replace QID with question number (A2, etc)
192-
map(~ gsubfn("(QID[0-9]+)", function(qid) {items[qids == qid]}, .x)) %>%
143+
map(~ gsubfn("(QID[0-9]+)", function(qid) {item_names[qids == qid]}, .x)) %>%
193144
# Collapse logic into a single string.
194145
map(~ paste(.x, collapse=" "))
195146

@@ -220,7 +171,7 @@ process_qsf <- function(path_to_qsf,
220171
unlist()
221172

222173
# format all qsf content lists into a single tibble
223-
qdf <- tibble(variable = items,
174+
qdf <- tibble(variable = item_names,
224175
question = questions,
225176
type = qtype,
226177
choices = choices,
@@ -229,8 +180,9 @@ process_qsf <- function(path_to_qsf,
229180
response_option_randomization = response_option_randomization)
230181

231182
# Add on module randomization
183+
block_id_item_map <- map_qids_to_module(q)
232184
block_id_item_map <- block_id_item_map %>%
233-
left_join(data.frame(qid=qids, item=items), by=c("Questions"="qid"))
185+
left_join(data.frame(qid=qids, item=item_names), by=c("Questions"="qid"))
234186
qdf <- qdf %>% left_join(block_id_item_map, by=c(variable="item")) %>%
235187
rename(group_of_respondents_item_was_shown_to = BlockName)
236188

@@ -289,16 +241,21 @@ process_qsf <- function(path_to_qsf,
289241
select(new) %>%
290242
unnest(new)
291243

292-
# A5 and C10 are special cases b/c of they are matrix of text entry questions:
293-
# also C10 needs an extra _1.
294-
matrix_items <- matrix_items %>%
295-
mutate(variable = if_else(str_starts(variable, "C10"), paste0(variable, "_1"), variable),
296-
type = if_else(str_starts(variable, "A5|C10"), "Text", type),
297-
choices = if_else(str_starts(variable, "A5|C10"), list(list()), choices))
244+
# Custom matrix formatting
245+
if (survey_version == "CMU") {
246+
# A5 and C10 are special cases b/c they are matrices of text entry questions:
247+
# also C10 needs an extra _1.
248+
matrix_items <- matrix_items %>%
249+
mutate(variable = if_else(str_starts(variable, "C10"), paste0(variable, "_1"), variable),
250+
type = if_else(str_starts(variable, "A5|C10"), "Text", type),
251+
choices = if_else(str_starts(variable, "A5|C10"), list(list()), choices))
252+
} else if (survey_version == "UMD") {
253+
# pass
254+
}
298255

299256
qdf <- bind_rows(nonmatrix_items, matrix_items)
300257

301-
# indicate which items have replaced old items.
258+
# indicate which new items have replaced old items.
302259
replaces_map <- read_csv(path_to_replacement_map,
303260
col_types = cols(new_item = col_character(),
304261
old_item = col_character()
@@ -465,11 +422,12 @@ add_qsf_to_codebook <- function(path_to_qsf, path_to_codebook) {
465422

466423
args <- commandArgs(TRUE)
467424

468-
if (length(args) != 2) {
469-
stop("Usage: Rscript generate-codebook.R path/to/qsf path/to/codebook")
425+
if (length(args) != 3) {
426+
stop("Usage: Rscript generate-codebook.R [UMD/CMU] path/to/qsf path/to/codebook")
470427
}
471428

472-
path_to_qsf <- args[1]
473-
path_to_codebook <- args[2]
429+
survey_version <- args[1]
430+
path_to_qsf <- args[2]
431+
path_to_codebook <- args[3]
474432

475433
invisible(add_qsf_to_codebook(path_to_qsf, path_to_codebook))
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#!/usr/bin/env Rscript
2+
3+
## Print a list of survey questions that we handle as matrices.
4+
##
5+
## Usage:
6+
##
7+
## Rscript list-matrix-items.R [UMD/CMU] path/to/qsf
8+
9+
suppressPackageStartupMessages({
10+
library(jsonlite)
11+
library(tidyverse)
12+
source("qsf-utils.R")
13+
})
14+
15+
16+
print_matrix_items <- function(path_to_qsf, survey_version=c("CMU", "UMD")) {
17+
survey_version <- match.arg(survey_version)
18+
q <- read_json(path_to_qsf)
19+
wave <- get_wave(path_to_qsf)
20+
21+
displayed_questions <- subset_qsf_to_displayed(q)
22+
23+
# Get survey item names
24+
item_names <- displayed_questions %>%
25+
map_chr(~ .x$Payload$DataExportTag) %>%
26+
patch_item_names(survey_version, wave)
27+
28+
# Get survey item formats
29+
qtype <- get_question_formats(displayed_questions, item_names, survey_version)
30+
31+
qdf <- tibble(variable = item_names,
32+
type = qtype)
33+
34+
matrix_items <- qdf %>% filter(type == "Matrix") %>% pull(variable)
35+
message("Wave ", wave, " has ", length(matrix_items), " matrix items: ", paste(matrix_items, collapse=", "))
36+
37+
return(NULL)
38+
}
39+
40+
args <- commandArgs(TRUE)
41+
42+
if (length(args) != 2) {
43+
stop("Usage: Rscript list-matrix-items.R [UMD/CMU] path/to/qsf")
44+
}
45+
46+
survey_version <- args[1]
47+
path_to_qsf <- args[2]
48+
49+
invisible(print_matrix_items(path_to_qsf, survey_version))

facebook/qsf-tools/qsf-utils.R

Lines changed: 111 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77
#'
88
#' @return list of Qualtrics Question IDs (QIDs) of items shown to respondents
99
get_shown_items <- function(qsf) {
10-
block_out <- Filter(function(elem) { elem[["Element"]] == "BL" }, qsf$SurveyElements)[[1]]$Payload
10+
all_blocks <- Filter(function(elem) { elem[["Element"]] == "BL" }, qsf$SurveyElements)[[1]]$Payload
1111

1212
shown_items <- list()
13-
for (block in block_out) {
13+
for (block in all_blocks) {
1414
if (block$Type == "Trash") {
1515
next
1616
}
@@ -27,10 +27,10 @@ get_shown_items <- function(qsf) {
2727

2828

2929
get_block_item_map <- function(qsf) {
30-
block_out <- Filter(function(elem) { elem[["Element"]] == "BL" }, qsf$SurveyElements)[[1]]$Payload
30+
all_blocks <- Filter(function(elem) { elem[["Element"]] == "BL" }, qsf$SurveyElements)[[1]]$Payload
3131

3232
items <- list()
33-
for (block in block_out) {
33+
for (block in all_blocks) {
3434
if (block$Type == "Trash") {
3535
next
3636
}
@@ -56,7 +56,7 @@ get_block_item_map <- function(qsf) {
5656
get_wave <- function(path_to_qsf) {
5757
qsf_name_pattern <- "(.*Wave_)([0-9]*([.][0-9])?)([.]qsf)$"
5858
if (!grepl(qsf_name_pattern, path_to_qsf)) {
59-
stop("qsf filename should be of the format 'Survey_of_COVID-Like_Illness_-_Wave_XX.qsf'")
59+
stop("qsf filename should be of the format '<survey prefix>Wave_XX.qsf' where 'XX' is an integer or float")
6060
}
6161

6262
wave <- as.numeric(
@@ -65,3 +65,109 @@ get_wave <- function(path_to_qsf) {
6565

6666
return(wave)
6767
}
68+
69+
#' Create mapping of QIDs to module name
70+
#'
71+
#' @param qsf contents of QSF file in JSON format
72+
#'
73+
#' @return dataframe with `BlockName` (module name) and `Questions` (QIDs) columns
74+
map_qids_to_module <- function(qsf) {
75+
# get the survey elements with flow logic (should be one per block randomization branch)
76+
ii_flow <- qsf$SurveyElements %>%
77+
map_chr("Element") %>%
78+
{. == "FL"} %>%
79+
which()
80+
ii_block_randomizer <- qsf$SurveyElements[ii_flow] %>%
81+
map(~ .x$Payload$Flow) %>%
82+
map(~ map(.x,~ .x$Type == "BlockRandomizer")) %>%
83+
unlist() %>%
84+
which()
85+
random_block_ids <- qsf$SurveyElements[ii_flow] %>%
86+
map(~ .x$Payload$Flow) %>%
87+
map(~ .x[ii_block_randomizer]) %>%
88+
map(~ map(.x,~ .x$Flow)) %>%
89+
map(~ map(.x,~ map(.x,~ .x$ID))) %>%
90+
unlist()
91+
92+
block_id_item_map <- get_block_item_map(qsf)
93+
block_id_item_map <- block_id_item_map %>% filter(BlockID %in% random_block_ids) %>%
94+
select(-BlockID)
95+
96+
return(block_id_item_map)
97+
}
98+
99+
#' Get only questions that were shown to respondents, using definition in `get_shown_items`
100+
#'
101+
#' @param qsf contents of QSF file in JSON format
102+
#'
103+
#' @return QSF subsetted to only displayed questions
104+
subset_qsf_to_displayed <- function(qsf) {
105+
# get the survey elements that are questions:
106+
ii_questions <- qsf$SurveyElements %>%
107+
map_chr("Element") %>%
108+
{. == "SQ"} %>%
109+
which()
110+
111+
# get the questions that were shown to respondents
112+
shown_items <- get_shown_items(qsf)
113+
ii_shown <- qsf$SurveyElements[ii_questions] %>%
114+
map_chr(~ .x$Payload$QuestionID) %>%
115+
{. %in% shown_items} %>%
116+
which()
117+
118+
# subset qsf to valid elements
119+
displayed_questions <- qsf$SurveyElements[ii_questions][ii_shown]
120+
121+
return(displayed_questions)
122+
}
123+
124+
#' Replace erroneous question names
125+
#'
126+
#' @param item_names character vector of survey question names
127+
#' @param survey_version either "UMD" or "CMU"
128+
#' @param wave integer or float survey version
129+
#'
130+
#' @return character vector of repaired survey question names
131+
patch_item_names <- function(item_names, survey_version, wave) {
132+
if (survey_version == "CMU") {
133+
# B13 was originally named incorrectly.
134+
item_names[item_names == "B13 "] <- "B13"
135+
# V2a in Wave 13 was originally named incorrectly.
136+
if (wave == 13) {
137+
item_names[item_names == "V2a"] <- "V2d"
138+
}
139+
} else if (survey_version == "UMD") {
140+
# pass
141+
}
142+
143+
return(item_names)
144+
}
145+
146+
#' Fetch and customize question format types.
147+
#'
148+
#' @param qsf contents of QSF file in JSON format
149+
#' @param item_names character vector of survey question names
150+
#' @param survey_version either "UMD" or "CMU"
151+
#'
152+
#' @return character vector of repaired survey question names
153+
get_question_formats <- function(qsf, item_names, survey_version){
154+
type_map <- c(MC = "Multiple choice", TE = "Text", Matrix = "Matrix")
155+
156+
qtype <- qsf %>%
157+
map_chr(~ .x$Payload$QuestionType) %>%
158+
{type_map[.]}
159+
160+
ii_multiselect <- qsf %>%
161+
map_chr(~ .x$Payload$Selector) %>%
162+
{. == "MAVR"} %>%
163+
which()
164+
qtype[ii_multiselect] <- "Multiselect"
165+
166+
if (survey_version == "CMU") {
167+
qtype[item_names == "A5"] <- "Matrix" # this will be treated like C10
168+
} else if (survey_version == "UMD") {
169+
# pass
170+
}
171+
172+
return(qtype)
173+
}

0 commit comments

Comments
 (0)