library(readr) library(dplyr) library(purrr) NEW_RECEIVING <- "receiving_2021_commitc1a9e102a731c977bc7b7dea4822177faeac0dc4" OLD_RECEIVING <- "receiving_2021_commit1b71f21bbade4c9d286af2d79a917b772df5d132" SUB_DIR <- "changehc_covid" examine_differences <- function(fname, row_equality = c("identical", "approximate")) { row_equality <- match.arg(row_equality) newfile <- file.path(NEW_RECEIVING, SUB_DIR, fname) oldfile <- file.path(OLD_RECEIVING, SUB_DIR, fname) new <- read_csv(newfile) old <- read_csv(oldfile) print(identical(new, old)) # not identical print(all.equal(new, old)) # and also not equal mismatch_cols <- c() for (col in names(new)) { if ( all.equal(new[[col]], old[[col]]) != TRUE ) { print(col) print(all.equal(new[[col]], old[[col]])) mismatch_cols <- c(mismatch_cols, col) } } print(mismatch_cols) for (col in mismatch_cols) { # Get indices for rows that aren't identical ii_exact <- which(new[[col]] != old[[col]]) # Get indices for rows that aren't approximately equal ii_approx <- map2_lgl(new[[col]], old[[col]], ~!isTRUE(all.equal(.x, .y))) %>% which() print(col) print(paste0(length(ii_approx), " approximately unequal rows vs ", length(ii_exact), " non-identical rows vs ", length(new[[col]]), " total rows")) ii <- switch (row_equality, "identical" = ii_exact, "approximate" = ii_approx ) browser() print(new[ii,]) print(old[ii,] ) print(head(new[[col]][ii])) print(head(old[[col]][ii])) if (row_equality == "identical") { print(head(format(new[[col]][ii], digits=22))) print(head(format(old[[col]][ii], digits=22))) } print("") } } fname <- "coefs_20210221_20210122_changehc_covid_state_lambda0.1_count.csv.gz" examine_differences(fname, "approximate") fname <- "prediction_20210221_20210122_changehc_covid_state_lambda0.1_count.csv.gz" examine_differences(fname, "approximate")