Skip to content

Commit 18a2ce8

Browse files
authored
Merge pull request #1564 from cmu-delphi/ndefries/qsf-differ-export-matrix-subqs
[CTIS qsf tools] For changed matrix subquestions, the differ lists the particular items that changed
2 parents e5196eb + 651f9c5 commit 18a2ce8

File tree

1 file changed

+87
-20
lines changed

1 file changed

+87
-20
lines changed

facebook/qsf-tools/qsf-differ.R

Lines changed: 87 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ suppressPackageStartupMessages({
1515
library(jsonlite)
1616
library(stringr)
1717
library(dplyr)
18+
library(purrr)
1819
library(readr)
1920
source("qsf-utils.R")
2021
})
@@ -57,34 +58,74 @@ get_qsf_file <- function(path,
5758
) {
5859
# Read file as json.
5960
qsf <- read_json(path)
60-
6161
## Block
6262
shown_items <- get_shown_items(qsf)
6363

6464
## Questions
6565
questions <- Filter(function(elem) { elem[["Element"]] == "SQ" }, qsf$SurveyElements)
6666

67+
qids <- questions %>%
68+
map_chr(~ .x$Payload$QuestionID)
69+
6770
qid_item_map <- list()
6871
questions_out <- list()
69-
for (question in questions) {
70-
question <- question$Payload
72+
for (question_raw in questions) {
73+
question_raw <- question_raw$Payload
7174

7275
# Skip items not shown to respondents.
73-
if ( !(question$QuestionID %in% shown_items) ) {
76+
if ( !(question_raw$QuestionID %in% shown_items) ) {
7477
next
7578
}
7679

77-
if (!identical(keep_items, c("all"))) {
78-
question <- question[names(question) %in% c("QuestionID", keep_items)]
80+
question <- question_raw[names(question_raw) %in% c("QuestionID", keep_items)]
81+
82+
recode_values <- question_raw$RecodeValues # If doesn't exist, will be NULL
83+
carryforward_choices <- question_raw$DynamicChoices$Locator # If doesn't exist, will be NULL
84+
85+
if (!is.null(carryforward_choices)) {
86+
# Get choices that are programmed specifically for this question
87+
old_choices <- question$Choices
88+
89+
# Get carried-forward choices
90+
carryforward_choices_qid <- carryforward_choices %>%
91+
str_split(., "/") %>%
92+
map(~ .x[3]) %>% unlist()
93+
carryforward_question <- questions[qids == carryforward_choices_qid][[1]]$Payload
94+
carryforward_choices <- carryforward_question$Choices
95+
96+
# By default, carried forward choices are coded during the carry as
97+
# "x<original code>". They are then recoded as pure numeric codes using
98+
# the `RecodeValues` field. Some carried forward choices do not have
99+
# `RecodeValues` defined and so in that case we don't want to prepend the
100+
# codes with "x".
101+
if (!is.null(recode_values)) {
102+
names(carryforward_choices) <- paste0("x", names(carryforward_choices))
103+
}
104+
# Combine new choices and carried-forward choices
105+
question$Choices <- c(old_choices, carryforward_choices)
79106
}
80107

81-
# Rearrange Answers/Choices elements to be consistent between matrix and
82-
# other items.
83108
if ("QuestionType" %in% names(question)) {
84109
if (question$QuestionType == "Matrix") {
110+
# Rearrange Answers/Choices elements to be consistent between matrix and
111+
# other items.
85112
question$Subquestions <- question$Choices
86113
question$Choices <- question$Answers
87114
question$Answers <- NULL
115+
116+
# Recode subquestion names to match exported data.
117+
# FALSE if not set, otherwise a list
118+
matrix_subquestion_field_names <- question_raw$ChoiceDataExportTags
119+
if (!inherits(matrix_subquestion_field_names, "list")) {
120+
# When subquestion field names are not set, generate incrementing names
121+
names(question$Subquestions) <- paste(
122+
question$DataExportTag,
123+
1:length(question$Subquestions),
124+
sep = "_"
125+
)
126+
} else {
127+
names(question$Subquestions) <- matrix_subquestion_field_names[names(question$Subquestions)] %>% unlist()
128+
}
88129
}
89130
}
90131

@@ -136,8 +177,13 @@ diff_surveys <- function(old_qsf, new_qsf) {
136177
old_questions <- old_qsf$questions
137178
new_questions <- new_qsf$questions
138179

139-
added <- setdiff(new_shown_items, old_shown_items)
140-
removed <- setdiff(old_shown_items, new_shown_items)
180+
added_qs <- setdiff(new_shown_items, old_shown_items)
181+
added <- rep(NA, length(added_qs))
182+
names(added) <- added_qs
183+
184+
removed_qs <- setdiff(old_shown_items, new_shown_items)
185+
removed <- rep(NA, length(removed_qs))
186+
names(removed) <- removed_qs
141187

142188
added_df <- create_diff_df(added, "Added", NULL, new_questions)
143189
removed_df <- create_diff_df(removed, "Removed", old_questions, NULL)
@@ -171,13 +217,33 @@ diff_question <- function(names, change_type=c("Choices", "QuestionText",
171217
old_qsf, new_qsf) {
172218
change_type <- match.arg(change_type)
173219

174-
changed <- c()
220+
changed <- list()
175221
for (question in names) {
176222
if ( !identical(old_qsf[[question]][[change_type]], new_qsf[[question]][[change_type]]) ) {
177-
changed <- append(changed, question)
223+
changed_subquestions <- c()
224+
if (change_type == "Subquestions") {
225+
subquestion_codes <- unique(
226+
c(
227+
names(old_qsf[[question]][[change_type]]),
228+
names(new_qsf[[question]][[change_type]])
229+
)
230+
)
231+
232+
for (code in subquestion_codes) {
233+
if ( !identical(old_qsf[[question]][[change_type]][[code]], new_qsf[[question]][[change_type]][[code]]) ) {
234+
changed_subquestions <- append(changed_subquestions, code)
235+
}
236+
}
237+
changed_subquestions <- paste(changed_subquestions, collapse=", ")
238+
}
239+
240+
if (length(changed_subquestions) == 0) {
241+
changed_subquestions <- NA
242+
}
243+
changed[[question]] <- changed_subquestions
178244
}
179245
}
180-
out <- create_diff_df(changed, change_type, old_qsf, new_qsf)
246+
out <- create_diff_df(unlist(changed), change_type, old_qsf, new_qsf)
181247

182248
return(out)
183249
}
@@ -208,25 +274,26 @@ create_diff_df <- function(questions, change_type=c("Added", "Removed",
208274
Choices = "Answer choices changed",
209275
Subquestions = "Matrix subquestions changed"
210276
)
211-
questions <- sort(questions)
212277

213-
if (!is.null(old_qsf, new_qsf {
214-
old_qids <- sapply(questions, function(question) { old_qsf, new_qsfquestion]]$QuestionID })
278+
if (!is.null(old_qsf)) {
279+
old_qids <- sapply(names(questions), function(question) { old_qsf[[question]]$QuestionID })
215280
} else {
216281
old_qids <- NA
217282
}
218283
if (!is.null(new_qsf)) {
219-
new_qids <- sapply(questions, function(question) { new_qsf[[question]]$QuestionID })
284+
new_qids <- sapply(names(questions), function(question) { new_qsf[[question]]$QuestionID })
220285
} else {
221286
new_qids <- NA
222287
}
223288

224289
out <- data.frame(
225290
change_type=change_descriptions[[change_type]],
226-
item=questions,
291+
item=names(questions),
227292
old_qid=old_qids,
228-
new_qid=new_qids
229-
)
293+
new_qid=new_qids,
294+
impacted_subquestions=questions
295+
) %>%
296+
arrange(item)
230297
}
231298

232299
return(out)

0 commit comments

Comments
 (0)