Skip to content

Commit 6adb7d3

Browse files
committed
add new tool to print matrix items + refactoring
1 parent 44b0475 commit 6adb7d3

File tree

3 files changed

+195
-83
lines changed

3 files changed

+195
-83
lines changed

facebook/qsf-tools/generate-codebook.R

Lines changed: 36 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
##
55
## Usage:
66
##
7-
## Rscript generate-codebook.R path/to/qsf path/to/codebook
7+
## Rscript generate-codebook.R [UMD/CMU] path/to/qsf path/to/codebook
88

99
suppressPackageStartupMessages({
1010
library(tidyverse)
@@ -17,81 +17,33 @@ suppressPackageStartupMessages({
1717

1818

1919
process_qsf <- function(path_to_qsf,
20+
survey_version=c("CMU", "UMD"),
2021
path_to_shortname_map="./static/item_shortquestion_map.csv",
2122
path_to_replacement_map="./static/item_replacement_map.csv") {
23+
survey_version <- match.arg(survey_version)
2224
q <- read_json(path_to_qsf)
2325
wave <- get_wave(path_to_qsf)
2426

25-
# get the survey elements with flow logic (should be one per block randomization branch)
26-
ii_flow <- q$SurveyElements %>%
27-
map_chr("Element") %>%
28-
{. == "FL"} %>%
29-
which()
30-
ii_block_randomizer <- q$SurveyElements[ii_flow] %>%
31-
map(~ .x$Payload$Flow) %>%
32-
map(~ map(.x,~ .x$Type == "BlockRandomizer")) %>%
33-
unlist() %>%
34-
which()
35-
random_block_ids <- q$SurveyElements[ii_flow] %>%
36-
map(~ .x$Payload$Flow) %>%
37-
map(~ .x[ii_block_randomizer]) %>%
38-
map(~ map(.x,~ .x$Flow)) %>%
39-
map(~ map(.x,~ map(.x,~ .x$ID))) %>%
40-
unlist()
41-
42-
block_id_item_map <- get_block_item_map(q)
43-
block_id_item_map <- block_id_item_map %>% filter(BlockID %in% random_block_ids) %>%
44-
select(-BlockID)
45-
46-
# get the survey elements that are questions:
47-
ii_questions <- q$SurveyElements %>%
48-
map_chr("Element") %>%
49-
{. == "SQ"} %>%
50-
which()
51-
52-
# get the questions that were shown to respondents
53-
shown_items <- get_shown_items(q)
54-
ii_shown <- q$SurveyElements[ii_questions] %>%
55-
map_chr(~ .x$Payload$QuestionID) %>%
56-
{. %in% shown_items} %>%
57-
which()
27+
block_id_item_map <- map_qids_to_module(q)
28+
displayed_questions <- subset_qsf_to_displayed(q)
5829

59-
# subset qsf to valid elements
60-
displayed_questions <- q$SurveyElements[ii_questions][ii_shown]
61-
62-
# Qualtrics auto-assigned question IDs
30+
# get Qualtrics auto-assigned question IDs
6331
qids <- displayed_questions %>%
6432
map_chr(~ .x$Payload$QuestionID)
6533

66-
# the items are how we will match these to the survey data:
67-
items <- displayed_questions %>%
68-
map_chr(~ .x$Payload$DataExportTag)
69-
70-
# B13 was originally named incorrectly. Rename manually as needed
71-
items[items == "B13 "] <- "B13"
72-
# V2a in Wave 13 was originally named incorrectly. Rename manually as needed
73-
if (wave == 13) {
74-
items[items == "V2a"] <- "V2d"
75-
}
34+
# get item names
35+
item_names <- displayed_questions %>%
36+
map_chr(~ .x$Payload$DataExportTag) %>%
37+
patch_item_names(survey_version, wave)
7638

77-
# get the text of the question:
39+
# get question text:
7840
questions <- displayed_questions %>%
7941
map_chr(~ .x$Payload$QuestionText)
8042

81-
# get the type of question:
82-
type_map <- c(MC = "Multiple choice", TE = "Text", Matrix = "Matrix")
83-
qtype <- displayed_questions %>%
84-
map_chr(~ .x$Payload$QuestionType) %>%
85-
{type_map[.]}
86-
87-
ii_multiselect <- displayed_questions %>%
88-
map_chr(~ .x$Payload$Selector) %>%
89-
{. == "MAVR"} %>%
90-
which()
91-
qtype[ii_multiselect] <- "Multiselect"
92-
qtype[items == "A5"] <- "Matrix" # this will be treated like C10
93-
94-
# get the choices (for MC and Matrix):
43+
# get question types
44+
qtype <- get_question_formats(displayed_questions, item_names, survey_version)
45+
46+
# get choices for multiple choice (MC) and Matrix items:
9547
choices <- displayed_questions %>%
9648
map(~ .x$Payload$Choices) %>%
9749
map(~ map(.x, "Display"))
@@ -122,8 +74,8 @@ process_qsf <- function(path_to_qsf,
12274
map(which) %>%
12375
map(names) %>%
12476
unlist()
125-
other_text_items <- paste(items[ii_other_text_option], text_elem, "TEXT", sep="_") %>%
126-
setNames(items[ii_other_text_option])
77+
other_text_items <- paste(item_names[ii_other_text_option], text_elem, "TEXT", sep="_") %>%
78+
setNames(item_names[ii_other_text_option])
12779

12880
# some questions port the choices from other questions
12981
ii_carryforward <- displayed_questions %>%
@@ -189,7 +141,7 @@ process_qsf <- function(path_to_qsf,
189141
paste(c(qid, selectable_text, option_code), collapse="")
190142
}, .x)) %>%
191143
# Replace QID with question number (A2, etc)
192-
map(~ gsubfn("(QID[0-9]+)", function(qid) {items[qids == qid]}, .x)) %>%
144+
map(~ gsubfn("(QID[0-9]+)", function(qid) {item_names[qids == qid]}, .x)) %>%
193145
# Collapse logic into a single string.
194146
map(~ paste(.x, collapse=" "))
195147

@@ -203,7 +155,7 @@ process_qsf <- function(path_to_qsf,
203155
unlist()
204156

205157
# format all qsf content lists into a single tibble
206-
qdf <- tibble(variable = items,
158+
qdf <- tibble(variable = item_names,
207159
question = questions,
208160
type = qtype,
209161
choices = choices,
@@ -213,7 +165,7 @@ process_qsf <- function(path_to_qsf,
213165

214166
# Add on module randomization
215167
block_id_item_map <- block_id_item_map %>%
216-
left_join(data.frame(qid=qids, item=items), by=c("Questions"="qid"))
168+
left_join(data.frame(qid=qids, item=item_names), by=c("Questions"="qid"))
217169
qdf <- qdf %>% left_join(block_id_item_map, by=c(variable="item")) %>%
218170
rename(group_of_respondents_item_was_shown_to = BlockName)
219171

@@ -272,12 +224,17 @@ process_qsf <- function(path_to_qsf,
272224
select(new) %>%
273225
unnest(new)
274226

275-
# A5 and C10 are special cases b/c of they are matrix of text entry questions:
276-
# also C10 needs an extra _1.
277-
matrix_items <- matrix_items %>%
278-
mutate(variable = if_else(str_starts(variable, "C10"), paste0(variable, "_1"), variable),
279-
type = if_else(str_starts(variable, "A5|C10"), "Text", type),
280-
choices = if_else(str_starts(variable, "A5|C10"), list(list()), choices))
227+
# Custom matrix formatting
228+
if (survey_version == "CMU") {
229+
# A5 and C10 are special cases b/c they are matrices of text entry questions:
230+
# also C10 needs an extra _1.
231+
matrix_items <- matrix_items %>%
232+
mutate(variable = if_else(str_starts(variable, "C10"), paste0(variable, "_1"), variable),
233+
type = if_else(str_starts(variable, "A5|C10"), "Text", type),
234+
choices = if_else(str_starts(variable, "A5|C10"), list(list()), choices))
235+
} else if (survey_version == "UMD") {
236+
# pass
237+
}
281238

282239
qdf <- bind_rows(nonmatrix_items, matrix_items)
283240

@@ -448,11 +405,12 @@ add_qsf_to_codebook <- function(path_to_qsf, path_to_codebook) {
448405

449406
args <- commandArgs(TRUE)
450407

451-
if (length(args) != 2) {
452-
stop("Usage: Rscript generate-codebook.R path/to/qsf path/to/codebook")
408+
if (length(args) != 3) {
409+
stop("Usage: Rscript generate-codebook.R [UMD/CMU] path/to/qsf path/to/codebook")
453410
}
454411

455-
path_to_qsf <- args[1]
456-
path_to_codebook <- args[2]
412+
survey_version <- args[1]
413+
path_to_qsf <- args[2]
414+
path_to_codebook <- args[3]
457415

458416
invisible(add_qsf_to_codebook(path_to_qsf, path_to_codebook))
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#!/usr/bin/env Rscript
2+
3+
## Print a list of survey questions that we handle as matrices.
4+
##
5+
## Usage:
6+
##
7+
## Rscript list-matrix-items.R [UMD/CMU] path/to/qsf
8+
9+
suppressPackageStartupMessages({
10+
library(jsonlite)
11+
library(tidyverse)
12+
source("qsf-utils.R")
13+
})
14+
15+
16+
print_matrix_items <- function(path_to_qsf, survey_version=c("CMU", "UMD")) {
17+
survey_version <- match.arg(survey_version)
18+
q <- read_json(path_to_qsf)
19+
wave <- get_wave(path_to_qsf)
20+
21+
displayed_questions <- subset_qsf_to_displayed(q)
22+
23+
# Get survey item names
24+
item_names <- displayed_questions %>%
25+
map_chr(~ .x$Payload$DataExportTag) %>%
26+
patch_item_names(item_names, survey_version, wave)
27+
28+
# Get survey item formats
29+
qtype <- get_question_formats(displayed_questions, item_names, survey_version)
30+
31+
qdf <- tibble(variable = item_names,
32+
type = qtype)
33+
34+
print(qdf %>% filter(type == "Matrix") %>% pull(variable))
35+
36+
return(NULL)
37+
}
38+
39+
args <- commandArgs(TRUE)
40+
41+
if (length(args) != 2) {
42+
stop("Usage: Rscript list-matrix-items.R [UMD/CMU] path/to/qsf")
43+
}
44+
45+
survey_version <- args[1]
46+
path_to_qsf <- args[2]
47+
48+
invisible(print_matrix_items(path_to_qsf, survey_version))

facebook/qsf-tools/qsf-utils.R

Lines changed: 111 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77
#'
88
#' @return list of Qualtrics Question IDs (QIDs) of items shown to respondents
99
get_shown_items <- function(qsf) {
10-
block_out <- Filter(function(elem) { elem[["Element"]] == "BL" }, qsf$SurveyElements)[[1]]$Payload
10+
all_blocks <- Filter(function(elem) { elem[["Element"]] == "BL" }, qsf$SurveyElements)[[1]]$Payload
1111

1212
shown_items <- list()
13-
for (block in block_out) {
13+
for (block in all_blocks) {
1414
if (block$Type == "Trash") {
1515
next
1616
}
@@ -27,10 +27,10 @@ get_shown_items <- function(qsf) {
2727

2828

2929
get_block_item_map <- function(qsf) {
30-
block_out <- Filter(function(elem) { elem[["Element"]] == "BL" }, qsf$SurveyElements)[[1]]$Payload
30+
all_blocks <- Filter(function(elem) { elem[["Element"]] == "BL" }, qsf$SurveyElements)[[1]]$Payload
3131

3232
items <- list()
33-
for (block in block_out) {
33+
for (block in all_blocks) {
3434
if (block$Type == "Trash") {
3535
next
3636
}
@@ -56,7 +56,7 @@ get_block_item_map <- function(qsf) {
5656
get_wave <- function(path_to_qsf) {
5757
qsf_name_pattern <- "(.*Wave_)([0-9]*([.][0-9])?)([.]qsf)$"
5858
if (!grepl(qsf_name_pattern, path_to_qsf)) {
59-
stop("qsf filename should be of the format 'Survey_of_COVID-Like_Illness_-_Wave_XX.qsf'")
59+
stop("qsf filename should be of the format '<survey prefix>Wave_XX.qsf' where 'XX' is an integer or float")
6060
}
6161

6262
wave <- as.numeric(
@@ -65,3 +65,109 @@ get_wave <- function(path_to_qsf) {
6565

6666
return(wave)
6767
}
68+
69+
#' Create mapping of QIDs to module name
70+
#'
71+
#' @param qsf contents of QSF file in JSON format
72+
#'
73+
#' @return dataframe with `BlockName` (module name) and `Questions` (QIDs) columns
74+
map_qids_to_module <- function(qsf) {
75+
# get the survey elements with flow logic (should be one per block randomization branch)
76+
ii_flow <- qsf$SurveyElements %>%
77+
map_chr("Element") %>%
78+
{. == "FL"} %>%
79+
which()
80+
ii_block_randomizer <- qsf$SurveyElements[ii_flow] %>%
81+
map(~ .x$Payload$Flow) %>%
82+
map(~ map(.x,~ .x$Type == "BlockRandomizer")) %>%
83+
unlist() %>%
84+
which()
85+
random_block_ids <- qsf$SurveyElements[ii_flow] %>%
86+
map(~ .x$Payload$Flow) %>%
87+
map(~ .x[ii_block_randomizer]) %>%
88+
map(~ map(.x,~ .x$Flow)) %>%
89+
map(~ map(.x,~ map(.x,~ .x$ID))) %>%
90+
unlist()
91+
92+
block_id_item_map <- get_block_item_map(qsf)
93+
block_id_item_map <- block_id_item_map %>% filter(BlockID %in% random_block_ids) %>%
94+
select(-BlockID)
95+
96+
return(block_id_item_map)
97+
}
98+
99+
#' Get only questions that were shown to respondents, using definition in `get_shown_items`
100+
#'
101+
#' @param qsf contents of QSF file in JSON format
102+
#'
103+
#' @return QSF subsetted to only displayed questions
104+
subset_qsf_to_displayed <- function(qsf) {
105+
# get the survey elements that are questions:
106+
ii_questions <- qsf$SurveyElements %>%
107+
map_chr("Element") %>%
108+
{. == "SQ"} %>%
109+
which()
110+
111+
# get the questions that were shown to respondents
112+
shown_items <- get_shown_items(qsf)
113+
ii_shown <- qsf$SurveyElements[ii_questions] %>%
114+
map_chr(~ .x$Payload$QuestionID) %>%
115+
{. %in% shown_items} %>%
116+
which()
117+
118+
# subset qsf to valid elements
119+
displayed_questions <- qsf$SurveyElements[ii_questions][ii_shown]
120+
121+
return(displayed_questions)
122+
}
123+
124+
#' Replace erroneous question names
125+
#'
126+
#' @param item_names character vector of survey question names
127+
#' @param survey_version either "UMD" or "CMU"
128+
#' @param wave integer or float survey version
129+
#'
130+
#' @return character vector of repaired survey question names
131+
patch_item_names <- function(item_names, survey_version, wave) {
132+
if (survey_version == "CMU") {
133+
# B13 was originally named incorrectly.
134+
item_names[item_names == "B13 "] <- "B13"
135+
# V2a in Wave 13 was originally named incorrectly.
136+
if (wave == 13) {
137+
item_names[item_names == "V2a"] <- "V2d"
138+
}
139+
} else if (survey_version == "UMD") {
140+
# pass
141+
}
142+
143+
return(item_names)
144+
}
145+
146+
#' Fetch and customize question format types.
147+
#'
148+
#' @param qsf contents of QSF file in JSON format
149+
#' @param item_names character vector of survey question names
150+
#' @param survey_version either "UMD" or "CMU"
151+
#'
152+
#' @return character vector of repaired survey question names
153+
get_question_formats <- function(qsf, item_names, survey_version){
154+
type_map <- c(MC = "Multiple choice", TE = "Text", Matrix = "Matrix")
155+
156+
qtype <- qsf %>%
157+
map_chr(~ .x$Payload$QuestionType) %>%
158+
{type_map[.]}
159+
160+
ii_multiselect <- qsf %>%
161+
map_chr(~ .x$Payload$Selector) %>%
162+
{. == "MAVR"} %>%
163+
which()
164+
qtype[ii_multiselect] <- "Multiselect"
165+
166+
if (survey_version == "CMU") {
167+
qtype[item_names == "A5"] <- "Matrix" # this will be treated like C10
168+
} else if (survey_version == "UMD") {
169+
# pass
170+
}
171+
172+
return(qtype)
173+
}

0 commit comments

Comments
 (0)