Skip to content

[CITS QSF utils] Add new tool to print matrix item names #1557

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 4, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 37 additions & 79 deletions facebook/qsf-tools/generate-codebook.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
##
## Usage:
##
## Rscript generate-codebook.R path/to/qsf path/to/codebook
## Rscript generate-codebook.R [UMD/CMU] path/to/qsf path/to/codebook

suppressPackageStartupMessages({
library(tidyverse)
Expand All @@ -17,81 +17,32 @@ suppressPackageStartupMessages({


process_qsf <- function(path_to_qsf,
survey_version=c("CMU", "UMD"),
path_to_shortname_map="./static/item_shortquestion_map.csv",
path_to_replacement_map="./static/item_replacement_map.csv") {
survey_version <- match.arg(survey_version)
q <- read_json(path_to_qsf)
wave <- get_wave(path_to_qsf)

# get the survey elements with flow logic (should be one per block randomization branch)
ii_flow <- q$SurveyElements %>%
map_chr("Element") %>%
{. == "FL"} %>%
which()
ii_block_randomizer <- q$SurveyElements[ii_flow] %>%
map(~ .x$Payload$Flow) %>%
map(~ map(.x,~ .x$Type == "BlockRandomizer")) %>%
unlist() %>%
which()
random_block_ids <- q$SurveyElements[ii_flow] %>%
map(~ .x$Payload$Flow) %>%
map(~ .x[ii_block_randomizer]) %>%
map(~ map(.x,~ .x$Flow)) %>%
map(~ map(.x,~ map(.x,~ .x$ID))) %>%
unlist()

block_id_item_map <- get_block_item_map(q)
block_id_item_map <- block_id_item_map %>% filter(BlockID %in% random_block_ids) %>%
select(-BlockID)

# get the survey elements that are questions:
ii_questions <- q$SurveyElements %>%
map_chr("Element") %>%
{. == "SQ"} %>%
which()

# get the questions that were shown to respondents
shown_items <- get_shown_items(q)
ii_shown <- q$SurveyElements[ii_questions] %>%
map_chr(~ .x$Payload$QuestionID) %>%
{. %in% shown_items} %>%
which()
displayed_questions <- subset_qsf_to_displayed(q)

# subset qsf to valid elements
displayed_questions <- q$SurveyElements[ii_questions][ii_shown]

# Qualtrics auto-assigned question IDs
# get Qualtrics auto-assigned question IDs
qids <- displayed_questions %>%
map_chr(~ .x$Payload$QuestionID)

# the items are how we will match these to the survey data:
items <- displayed_questions %>%
map_chr(~ .x$Payload$DataExportTag)

# B13 was originally named incorrectly. Rename manually as needed
items[items == "B13 "] <- "B13"
# V2a in Wave 13 was originally named incorrectly. Rename manually as needed
if (wave == 13) {
items[items == "V2a"] <- "V2d"
}
# get item names
item_names <- displayed_questions %>%
map_chr(~ .x$Payload$DataExportTag) %>%
patch_item_names(survey_version, wave)

# get the text of the question:
# get question text:
questions <- displayed_questions %>%
map_chr(~ .x$Payload$QuestionText)

# get the type of question:
type_map <- c(MC = "Multiple choice", TE = "Text", Matrix = "Matrix")
qtype <- displayed_questions %>%
map_chr(~ .x$Payload$QuestionType) %>%
{type_map[.]}

ii_multiselect <- displayed_questions %>%
map_chr(~ .x$Payload$Selector) %>%
{. == "MAVR"} %>%
which()
qtype[ii_multiselect] <- "Multiselect"
qtype[items == "A5"] <- "Matrix" # this will be treated like C10

# get the choices (for MC and Matrix):
# get question types
qtype <- get_question_formats(displayed_questions, item_names, survey_version)

# get choices for multiple choice (MC) and Matrix items:
choices <- displayed_questions %>%
map(~ .x$Payload$Choices) %>%
map(~ map(.x, "Display"))
Expand Down Expand Up @@ -122,8 +73,8 @@ process_qsf <- function(path_to_qsf,
map(which) %>%
map(names) %>%
unlist()
other_text_items <- paste(items[ii_other_text_option], text_elem, "TEXT", sep="_") %>%
setNames(items[ii_other_text_option])
other_text_items <- paste(item_names[ii_other_text_option], text_elem, "TEXT", sep="_") %>%
setNames(item_names[ii_other_text_option])

# some questions port the choices from other questions
ii_carryforward <- displayed_questions %>%
Expand Down Expand Up @@ -189,7 +140,7 @@ process_qsf <- function(path_to_qsf,
paste(c(qid, selectable_text, option_code), collapse="")
}, .x)) %>%
# Replace QID with question number (A2, etc)
map(~ gsubfn("(QID[0-9]+)", function(qid) {items[qids == qid]}, .x)) %>%
map(~ gsubfn("(QID[0-9]+)", function(qid) {item_names[qids == qid]}, .x)) %>%
# Collapse logic into a single string.
map(~ paste(.x, collapse=" "))

Expand All @@ -203,7 +154,7 @@ process_qsf <- function(path_to_qsf,
unlist()

# format all qsf content lists into a single tibble
qdf <- tibble(variable = items,
qdf <- tibble(variable = item_names,
question = questions,
type = qtype,
choices = choices,
Expand All @@ -212,8 +163,9 @@ process_qsf <- function(path_to_qsf,
response_option_randomization = response_option_randomization)

# Add on module randomization
block_id_item_map <- map_qids_to_module(q)
block_id_item_map <- block_id_item_map %>%
left_join(data.frame(qid=qids, item=items), by=c("Questions"="qid"))
left_join(data.frame(qid=qids, item=item_names), by=c("Questions"="qid"))
qdf <- qdf %>% left_join(block_id_item_map, by=c(variable="item")) %>%
rename(group_of_respondents_item_was_shown_to = BlockName)

Expand Down Expand Up @@ -272,16 +224,21 @@ process_qsf <- function(path_to_qsf,
select(new) %>%
unnest(new)

# A5 and C10 are special cases b/c of they are matrix of text entry questions:
# also C10 needs an extra _1.
matrix_items <- matrix_items %>%
mutate(variable = if_else(str_starts(variable, "C10"), paste0(variable, "_1"), variable),
type = if_else(str_starts(variable, "A5|C10"), "Text", type),
choices = if_else(str_starts(variable, "A5|C10"), list(list()), choices))
# Custom matrix formatting
if (survey_version == "CMU") {
# A5 and C10 are special cases b/c they are matrices of text entry questions:
# also C10 needs an extra _1.
matrix_items <- matrix_items %>%
mutate(variable = if_else(str_starts(variable, "C10"), paste0(variable, "_1"), variable),
type = if_else(str_starts(variable, "A5|C10"), "Text", type),
choices = if_else(str_starts(variable, "A5|C10"), list(list()), choices))
} else if (survey_version == "UMD") {
# pass
}

qdf <- bind_rows(nonmatrix_items, matrix_items)

# indicate which items have replaced old items.
# indicate which new items have replaced old items.
replaces_map <- read_csv(path_to_replacement_map,
col_types = cols(new_item = col_character(),
old_item = col_character()
Expand Down Expand Up @@ -448,11 +405,12 @@ add_qsf_to_codebook <- function(path_to_qsf, path_to_codebook) {

args <- commandArgs(TRUE)

if (length(args) != 2) {
stop("Usage: Rscript generate-codebook.R path/to/qsf path/to/codebook")
if (length(args) != 3) {
stop("Usage: Rscript generate-codebook.R [UMD/CMU] path/to/qsf path/to/codebook")
}

path_to_qsf <- args[1]
path_to_codebook <- args[2]
survey_version <- args[1]
path_to_qsf <- args[2]
path_to_codebook <- args[3]

invisible(add_qsf_to_codebook(path_to_qsf, path_to_codebook))
49 changes: 49 additions & 0 deletions facebook/qsf-tools/list-matrix-items.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/usr/bin/env Rscript

## Print a list of survey questions that we handle as matrices.
##
## Usage:
##
## Rscript list-matrix-items.R [UMD/CMU] path/to/qsf

suppressPackageStartupMessages({
library(jsonlite)
library(tidyverse)
source("qsf-utils.R")
})


print_matrix_items <- function(path_to_qsf, survey_version=c("CMU", "UMD")) {
survey_version <- match.arg(survey_version)
q <- read_json(path_to_qsf)
wave <- get_wave(path_to_qsf)

displayed_questions <- subset_qsf_to_displayed(q)

# Get survey item names
item_names <- displayed_questions %>%
map_chr(~ .x$Payload$DataExportTag) %>%
patch_item_names(survey_version, wave)

# Get survey item formats
qtype <- get_question_formats(displayed_questions, item_names, survey_version)

qdf <- tibble(variable = item_names,
type = qtype)

matrix_items <- qdf %>% filter(type == "Matrix") %>% pull(variable)
message("Wave ", wave, " has ", length(matrix_items), " matrix items: ", paste(matrix_items, collapse=", "))

return(NULL)
}

args <- commandArgs(TRUE)

if (length(args) != 2) {
stop("Usage: Rscript list-matrix-items.R [UMD/CMU] path/to/qsf")
}

survey_version <- args[1]
path_to_qsf <- args[2]

invisible(print_matrix_items(path_to_qsf, survey_version))
116 changes: 111 additions & 5 deletions facebook/qsf-tools/qsf-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
#'
#' @return list of Qualtrics Question IDs (QIDs) of items shown to respondents
get_shown_items <- function(qsf) {
block_out <- Filter(function(elem) { elem[["Element"]] == "BL" }, qsf$SurveyElements)[[1]]$Payload
all_blocks <- Filter(function(elem) { elem[["Element"]] == "BL" }, qsf$SurveyElements)[[1]]$Payload

shown_items <- list()
for (block in block_out) {
for (block in all_blocks) {
if (block$Type == "Trash") {
next
}
Expand All @@ -27,10 +27,10 @@ get_shown_items <- function(qsf) {


get_block_item_map <- function(qsf) {
block_out <- Filter(function(elem) { elem[["Element"]] == "BL" }, qsf$SurveyElements)[[1]]$Payload
all_blocks <- Filter(function(elem) { elem[["Element"]] == "BL" }, qsf$SurveyElements)[[1]]$Payload

items <- list()
for (block in block_out) {
for (block in all_blocks) {
if (block$Type == "Trash") {
next
}
Expand All @@ -56,7 +56,7 @@ get_block_item_map <- function(qsf) {
get_wave <- function(path_to_qsf) {
qsf_name_pattern <- "(.*Wave_)([0-9]*([.][0-9])?)([.]qsf)$"
if (!grepl(qsf_name_pattern, path_to_qsf)) {
stop("qsf filename should be of the format 'Survey_of_COVID-Like_Illness_-_Wave_XX.qsf'")
stop("qsf filename should be of the format '<survey prefix>Wave_XX.qsf' where 'XX' is an integer or float")
}

wave <- as.numeric(
Expand All @@ -65,3 +65,109 @@ get_wave <- function(path_to_qsf) {

return(wave)
}

#' Create mapping of QIDs to module name
#'
#' @param qsf contents of QSF file in JSON format
#'
#' @return dataframe with `BlockName` (module name) and `Questions` (QIDs) columns
map_qids_to_module <- function(qsf) {
# get the survey elements with flow logic (should be one per block randomization branch)
ii_flow <- qsf$SurveyElements %>%
map_chr("Element") %>%
{. == "FL"} %>%
which()
ii_block_randomizer <- qsf$SurveyElements[ii_flow] %>%
map(~ .x$Payload$Flow) %>%
map(~ map(.x,~ .x$Type == "BlockRandomizer")) %>%
unlist() %>%
which()
random_block_ids <- qsf$SurveyElements[ii_flow] %>%
map(~ .x$Payload$Flow) %>%
map(~ .x[ii_block_randomizer]) %>%
map(~ map(.x,~ .x$Flow)) %>%
map(~ map(.x,~ map(.x,~ .x$ID))) %>%
unlist()

block_id_item_map <- get_block_item_map(qsf)
block_id_item_map <- block_id_item_map %>% filter(BlockID %in% random_block_ids) %>%
select(-BlockID)

return(block_id_item_map)
}

#' Get only questions that were shown to respondents, using definition in `get_shown_items`
#'
#' @param qsf contents of QSF file in JSON format
#'
#' @return QSF subsetted to only displayed questions
subset_qsf_to_displayed <- function(qsf) {
# get the survey elements that are questions:
ii_questions <- qsf$SurveyElements %>%
map_chr("Element") %>%
{. == "SQ"} %>%
which()

# get the questions that were shown to respondents
shown_items <- get_shown_items(qsf)
ii_shown <- qsf$SurveyElements[ii_questions] %>%
map_chr(~ .x$Payload$QuestionID) %>%
{. %in% shown_items} %>%
which()

# subset qsf to valid elements
displayed_questions <- qsf$SurveyElements[ii_questions][ii_shown]

return(displayed_questions)
}

#' Replace erroneous question names
#'
#' @param item_names character vector of survey question names
#' @param survey_version either "UMD" or "CMU"
#' @param wave integer or float survey version
#'
#' @return character vector of repaired survey question names
patch_item_names <- function(item_names, survey_version, wave) {
if (survey_version == "CMU") {
# B13 was originally named incorrectly.
item_names[item_names == "B13 "] <- "B13"
# V2a in Wave 13 was originally named incorrectly.
if (wave == 13) {
item_names[item_names == "V2a"] <- "V2d"
}
} else if (survey_version == "UMD") {
# pass
}

return(item_names)
}

#' Fetch and customize question format types.
#'
#' @param qsf contents of QSF file in JSON format
#' @param item_names character vector of survey question names
#' @param survey_version either "UMD" or "CMU"
#'
#' @return character vector of repaired survey question names
get_question_formats <- function(qsf, item_names, survey_version){
type_map <- c(MC = "Multiple choice", TE = "Text", Matrix = "Matrix")

qtype <- qsf %>%
map_chr(~ .x$Payload$QuestionType) %>%
{type_map[.]}

ii_multiselect <- qsf %>%
map_chr(~ .x$Payload$Selector) %>%
{. == "MAVR"} %>%
which()
qtype[ii_multiselect] <- "Multiselect"

if (survey_version == "CMU") {
qtype[item_names == "A5"] <- "Matrix" # this will be treated like C10
} else if (survey_version == "UMD") {
# pass
}

return(qtype)
}