@@ -31,10 +31,8 @@ generate_changelog <- function(path_to_codebook,
31
31
# waves, plus a description of what changed and why.
32
32
qsf_diff <- get_diff(path_to_diff )
33
33
34
- if (! (" notes" %in% names(qsf_diff ))) {
35
- if (is.null(path_to_old_changelog )) {
36
- stop(" rationales must be provided either in the diff or via an old version of the changelog" )
37
- }
34
+ if (! (" notes" %in% names(qsf_diff )) && is.null(path_to_old_changelog )) {
35
+ warning(" rationales must be provided either in the diff or via an old version of the changelog" )
38
36
qsf_diff $ notes <- NA_character_
39
37
}
40
38
@@ -60,14 +58,14 @@ generate_changelog <- function(path_to_codebook,
60
58
changelog <- add_rationales_from_old_changelog(changelog , path_to_old_changelog )
61
59
check_missing_rationales(changelog )
62
60
63
- write_excel_csv(changelog , path_to_changelog , quote = " needed" )
61
+ write_excel_csv(changelog % > % rename( new_version = new_wave , old_version = old_wave ) , path_to_changelog , quote = " needed" )
64
62
}
65
63
66
64
# Read codebook from path. Drop fields we don't use in the changelog.
67
65
get_codebook <- function (path_to_codebook ) {
68
66
codebook <- read_csv(path_to_codebook , col_types = cols(
69
67
.default = col_character(),
70
- wave = col_double()
68
+ version = col_double()
71
69
)) %> %
72
70
rename(question_text = question , matrix_subquestion_text = matrix_subquestion ) %> %
73
71
select(
@@ -77,7 +75,7 @@ get_codebook <- function(path_to_codebook) {
77
75
78
76
return (codebook )
79
77
}
80
-
78
+
81
79
# Try to load `path_to_diff`. Check if it is a single CSV or a directory
82
80
# containing a set of CSVs.
83
81
get_diff <- function (path_to_diff ) {
@@ -86,11 +84,40 @@ get_diff <- function(path_to_diff) {
86
84
csvs <- list.files(path_to_diff , pattern = " *.csv$" , full.names = TRUE )
87
85
qsf_diff <- list ()
88
86
for (csv in csvs ) {
89
- qsf_diff [[ csv ]] <- read_csv(csv , col_types = cols(
87
+ curr_diff <- read_csv(csv , col_types = cols(
90
88
.default = col_character(),
91
89
new_wave = col_double(),
92
90
old_wave = col_double()
93
91
))
92
+
93
+ old_wave_vars <- curr_diff %> %
94
+ filter(change_type != " Item added" ) %> %
95
+ distinct(item ) %> %
96
+ pull()
97
+ new_wave_vars <- curr_diff %> %
98
+ filter(change_type != " Item removed" ) %> %
99
+ distinct(item ) %> %
100
+ pull()
101
+ if (all(c(" C0_matrix" , " C0_likert" ) %in% old_wave_vars ) || all(c(" C0_matrix" , " C0_likert" ) %in% new_wave_vars )) {
102
+ stop(" Only one of 'C0_matrix' and 'C0_likert' can be present at once in file " , csv )
103
+ }
104
+ if (all(c(" B13_profile" , " B13_likert" ) %in% old_wave_vars ) || all(c(" B13_profile" , " B13_likert" ) %in% new_wave_vars )) {
105
+ stop(" Only one of 'B13_profile' and 'B13_likert' can be present at once in file " , csv )
106
+ }
107
+ if (all(c(" B14_profile" , " B14_likert" ) %in% old_wave_vars ) || all(c(" B14_profile" , " B14_likert" ) %in% new_wave_vars )) {
108
+ stop(" Only one of 'B14_profile' and 'B14_likert' can be present at once in file " , csv )
109
+ }
110
+ if (all(c(" B12a_profile" , " B12a_likert" ) %in% old_wave_vars ) || all(c(" B12a_profile" , " B12a_likert" ) %in% new_wave_vars )) {
111
+ stop(" Only one of 'B12a_profile' and 'B12a_likert' can be present at once in file " , csv )
112
+ }
113
+ if (all(c(" B12b_profile" , " B12b_likert" ) %in% old_wave_vars ) || all(c(" B12b_profile" , " B12b_likert" ) %in% new_wave_vars )) {
114
+ stop(" Only one of 'B12b_profile' and 'B12b_likert' can be present at once in file " , csv )
115
+ }
116
+ if (all(c(" B1b_matrix" , " B1b_likert" ) %in% old_wave_vars ) || all(c(" B1b_matrix" , " B1b_likert" ) %in% new_wave_vars )) {
117
+ stop(" Only one of 'B1b_matrix' and 'B1b_likert' can be present at once in file " , csv )
118
+ }
119
+
120
+ qsf_diff [[csv ]] <- curr_diff
94
121
}
95
122
qsf_diff <- purrr :: reduce(qsf_diff , rbind ) %> %
96
123
rename(variable_name = item ) %> %
@@ -169,23 +196,72 @@ prepare_matrix_base_questions_for_join <- function(qsf_diff, codebook) {
169
196
qsf_diff %> % distinct(variable_name ) %> % pull(),
170
197
codebook %> % distinct(variable ) %> % pull()
171
198
)
199
+
172
200
# Add an underscore to the unmatched variable names to create a regex pattern
173
201
matrix_prefixes <- paste0(vars_not_in_codebook , " _" )
174
202
names(matrix_prefixes ) <- vars_not_in_codebook
203
+
204
+ # A subset of UMD variables need manual mapping.
205
+ if (" C0_matrix" %in% names(matrix_prefixes )) {
206
+ matrix_prefixes [" C0_matrix" ] <- " C0_"
207
+ }
208
+ if (" C0_likert" %in% names(matrix_prefixes )) {
209
+ matrix_prefixes [" C0_likert" ] <- " C0_"
210
+ }
211
+
212
+ if (" B13_profile" %in% names(matrix_prefixes )) {
213
+ matrix_prefixes [" B13_profile" ] <- " B13_"
214
+ }
215
+ if (" B13_likert" %in% names(matrix_prefixes )) {
216
+ matrix_prefixes [" B13_likert" ] <- " B13_"
217
+ }
218
+
219
+ if (" B14_profile" %in% names(matrix_prefixes )) {
220
+ matrix_prefixes [" B14_profile" ] <- " B14_"
221
+ }
222
+ if (" B14_likert" %in% names(matrix_prefixes )) {
223
+ matrix_prefixes [" B14_likert" ] <- " B14_"
224
+ }
225
+
226
+ if (" B12a_profile" %in% names(matrix_prefixes )) {
227
+ matrix_prefixes [" B12a_profile" ] <- " B12a_"
228
+ }
229
+ if (" B12a_likert" %in% names(matrix_prefixes )) {
230
+ matrix_prefixes [" B12a_likert" ] <- " B12a_"
231
+ }
232
+
233
+ if (" B12b_profile" %in% names(matrix_prefixes )) {
234
+ matrix_prefixes [" B12b_profile" ] <- " B12b_"
235
+ }
236
+ if (" B12b_likert" %in% names(matrix_prefixes )) {
237
+ matrix_prefixes [" B12b_likert" ] <- " B12b_"
238
+ }
239
+
240
+ if (" B1b_matrix" %in% names(matrix_prefixes )) {
241
+ matrix_prefixes [" B1b_matrix" ] <- " B1b_"
242
+ }
243
+ if (" B1b_likert" %in% names(matrix_prefixes )) {
244
+ matrix_prefixes [" B1b_likert" ] <- " B1b_"
245
+ }
175
246
176
247
# First matrix item match by wave and matrix base question.
177
248
map_matrix_prefix_to_first_match <- codebook %> %
178
249
mutate(
179
250
join_variable = case_when(
180
251
# Create the basename for matrix items.
181
- grepl(" _" , variable ) ~ strsplit(variable , " _" ) %> % purrr :: map_chr(~ .x [1 ]) %> % paste0(" _" ),
252
+ ! is.na(matrix_subquestion_text ) ~ strsplit(variable , " _" ) %> %
253
+ # Get all but last underscore-delimited chunk
254
+ purrr :: map(~ .x [1 : (length(.x ) - 1 )]) %> %
255
+ # Combine all but the last chunk with underscores.
256
+ purrr :: map(~ paste0(.x , collapse = " _" ) %> % paste0(" _" )) %> %
257
+ unlist(),
182
258
TRUE ~ variable
183
259
)
184
260
) %> %
185
261
filter(join_variable %in% matrix_prefixes ) %> %
186
- group_by(wave , join_variable ) %> %
262
+ group_by(version , join_variable ) %> %
187
263
slice_head() %> %
188
- select(wave , variable , join_variable )
264
+ select(version , variable , join_variable )
189
265
190
266
# Add the regex patterns onto the diff.
191
267
qsf_diff <- qsf_diff %> %
@@ -199,13 +275,13 @@ prepare_matrix_base_questions_for_join <- function(qsf_diff, codebook) {
199
275
map_matrix_prefix_to_first_match %> % rename_with(function (column_names ) {
200
276
paste(" new" , column_names , sep = " _" )
201
277
}),
202
- by = c(" new_wave" = " new_wave " , " join_variable" = " new_join_variable" )
278
+ by = c(" new_wave" = " new_version " , " join_variable" = " new_join_variable" )
203
279
) %> %
204
280
left_join(
205
281
map_matrix_prefix_to_first_match %> % rename_with(function (column_names ) {
206
282
paste(" old" , column_names , sep = " _" )
207
283
}),
208
- by = c(" old_wave" = " old_wave " , " join_variable" = " old_join_variable" )
284
+ by = c(" old_wave" = " old_version " , " join_variable" = " old_join_variable" )
209
285
) %> %
210
286
rename(
211
287
join_variable_new_wave = new_variable ,
@@ -216,7 +292,7 @@ prepare_matrix_base_questions_for_join <- function(qsf_diff, codebook) {
216
292
join_variable_old_wave = coalesce(join_variable_old_wave , variable_name )
217
293
) %> %
218
294
select(- join_variable )
219
-
295
+
220
296
return (list (" diff" = qsf_diff , " vars_not_in_codebook" = vars_not_in_codebook ))
221
297
}
222
298
@@ -229,14 +305,14 @@ make_changelog_from_codebook_and_diff <- function(qsf_diff, codebook, vars_not_i
229
305
codebook %> % rename_with(function (column_names ) {
230
306
paste(" new" , column_names , sep = " _" )
231
307
}),
232
- by = c(" new_wave" = " new_wave " , " join_variable_new_wave" = " new_variable" )
308
+ by = c(" new_wave" = " new_version " , " join_variable_new_wave" = " new_variable" )
233
309
) %> %
234
310
# Add info about previous version of question
235
311
left_join(
236
312
codebook %> % rename_with(function (column_names ) {
237
313
paste(" old" , column_names , sep = " _" )
238
314
}),
239
- by = c(" old_wave" = " old_wave " , " join_variable_old_wave" = " old_variable" )
315
+ by = c(" old_wave" = " old_version " , " join_variable_old_wave" = " old_variable" )
240
316
) %> %
241
317
select(
242
318
new_wave ,
@@ -265,6 +341,42 @@ make_changelog_from_codebook_and_diff <- function(qsf_diff, codebook, vars_not_i
265
341
variable_name %in% vars_not_in_codebook ~ NA_character_ ,
266
342
TRUE ~ old_matrix_subquestion_text
267
343
)
344
+ ) %> %
345
+ # When an item was added, all `old_` fields should be empty; when an item
346
+ # was removed, all `new_` fields should be empty.
347
+ mutate(
348
+ old_question_text = case_when(
349
+ change_type == " Item added" ~ NA_character_ ,
350
+ TRUE ~ old_question_text
351
+ ),
352
+ old_matrix_subquestion_text = case_when(
353
+ change_type == " Item added" ~ NA_character_ ,
354
+ TRUE ~ old_matrix_subquestion_text
355
+ ),
356
+ old_response_options = case_when(
357
+ change_type == " Item added" ~ NA_character_ ,
358
+ TRUE ~ old_response_options
359
+ ),
360
+ old_display_logic = case_when(
361
+ change_type == " Item added" ~ NA_character_ ,
362
+ TRUE ~ old_display_logic
363
+ ),
364
+ new_question_text = case_when(
365
+ change_type == " Item removed" ~ NA_character_ ,
366
+ TRUE ~ new_question_text
367
+ ),
368
+ new_matrix_subquestion_text = case_when(
369
+ change_type == " Item removed" ~ NA_character_ ,
370
+ TRUE ~ new_matrix_subquestion_text
371
+ ),
372
+ new_response_options = case_when(
373
+ change_type == " Item removed" ~ NA_character_ ,
374
+ TRUE ~ new_response_options
375
+ ),
376
+ new_display_logic = case_when(
377
+ change_type == " Item removed" ~ NA_character_ ,
378
+ TRUE ~ new_display_logic
379
+ )
268
380
)
269
381
270
382
return (changelog )
0 commit comments