@@ -128,15 +128,18 @@ combine_tables <- function(seen_file, input_dir, input_files, output_file) {
128
128
129
129
if (file.exists(output_file )) {
130
130
output_names <- names(read_csv(output_file , n_max = 0L ))
131
- assert(identical(output_names , names(input_df )),
132
- paste0(" Column names and/or order differ between new and old input for " , output_file ))
131
+ identical_names <- identical(output_names , names(input_df ))
132
+ } else {
133
+ identical_names <- TRUE
133
134
}
134
135
135
136
seen_files <- load_seen_file(seen_file )
136
137
any_prev_seen <- any(input_files %in% seen_files )
137
138
138
- # If no input files have been seen before, we don't need to deduplicate.
139
- if (any_prev_seen ) {
139
+ # If input files have been seen before, we need to deduplicate. If there is a
140
+ # mismatch between input and output column names/order, we need to explicitly
141
+ # merge input and output data to make sure columns match up correctly.
142
+ if (any_prev_seen || ! identical_names ) {
140
143
assert(file.exists(output_file ),
141
144
paste0(" The output file " , output_file , " does not exist, but non-zero" ,
142
145
" files using the same grouping variables have been seen before." ))
@@ -178,7 +181,7 @@ combine_tables <- function(seen_file, input_dir, input_files, output_file) {
178
181
return (list (
179
182
output_df = output_df ,
180
183
newly_seen_files = newly_seen ,
181
- any_prev_seen = any_prev_seen ))
184
+ can_overwrite = any_prev_seen || ! identical_names ))
182
185
}
183
186
184
187
# ' Save a combined dataframe and list of seen files to disk.
@@ -188,19 +191,20 @@ combine_tables <- function(seen_file, input_dir, input_files, output_file) {
188
191
# '
189
192
# ' @param combined_output Named list output from `combine_tables`. Contains an
190
193
# ' `output` dataframe, a list of newly seen files, and a flag indicating
191
- # ' whether any input filenames have been seen before.
194
+ # ' whether we need to overwrite the existing output file or we can append to
195
+ # ' it.
192
196
# ' @param seen_file Path to file listing filenames that have been previously
193
197
# ' loaded into an output file.
194
198
# ' @param output_file Path to corresponding output file.
195
199
write_rollup <- function (combined_output , seen_file , output_file ) {
196
200
output_df <- combined_output [[" output_df" ]]
197
201
newly_seen_files <- combined_output [[" newly_seen_files" ]]
198
- any_prev_seen_files <- combined_output [[" any_prev_seen " ]]
202
+ can_overwrite <- combined_output [[" can_overwrite " ]]
199
203
200
204
# If some input files have been seen before, overwrite any existing output
201
205
# file. If no input files have been seen before, we can append directly to the
202
206
# output file. File is created if it doesn't already exist.
203
- if (any_prev_seen_files ) {
207
+ if (can_overwrite ) {
204
208
# Automatically uses gzip compression based on output file name. Overwrites
205
209
# existing file of the same name.
206
210
write_csv(output_df , output_file )
@@ -225,4 +229,7 @@ if (length(args) < 2) {
225
229
input_path <- args [1 ]
226
230
output_path <- args [2 ]
227
231
232
+ input_path <- " ~/Downloads/0418_tables/"
233
+ output_path <- " ~/Downloads/rollup_test_FB_press_conf"
234
+
228
235
invisible (run_rollup(input_path , output_path ))
0 commit comments