test: modify pipeline and refactor ab comparison script

dshemetov · dshemetov · commit c73af47046c7 · 2024-03-25T16:03:14.000-07:00
diff --git a/scripts/covid_hosp_explore.R b/scripts/covid_hosp_explore.R
@@ -6,32 +6,41 @@ source("scripts/targets-common.R")
 # Add custom parameter combinations in the list below.
 make_unique_grids <- function() {
   list(
+    # tidyr::expand_grid(
+    #   forecaster = "scaled_pop",
+    #   trainer = c("linreg", "quantreg"),
+    #   ahead = c(1:7, 14, 21, 28),
+    #   pop_scaling = TRUE
+    # ),
     tidyr::expand_grid(
       forecaster = "scaled_pop",
-      trainer = c("linreg", "quantreg"),
+      trainer = c("linreg"),
       ahead = c(1:7, 14, 21, 28),
-      pop_scaling = TRUE
-    ),
-    tidyr::expand_grid(
-      forecaster = "scaled_pop",
-      trainer = c("linreg", "quantreg"),
-      ahead = c(1:7, 14, 21, 28),
-      lags = list(c(0, 3, 5, 7, 14), c(0, 7, 14)),
-      pop_scaling = TRUE
-    ),
-    tidyr::expand_grid(
-      forecaster = "smoothed_scaled",
-      trainer = c("quantreg"),
-      ahead = c(1:7, 14, 21, 28),
-      #
-      lags = list(
-        #        smoothed,      sd,          smoothed,   sd
-        list(c(0, 3, 5, 7, 14), c(0)),
-        list(c(0, 7, 14, 21, 28), c(0)),
-        list(c(0, 2, 4, 7, 14, 21, 28), c(0))
-      ),
+      lags = list(c(0, 3, 5, 7, 14)),
       pop_scaling = TRUE
     )
+
+    # tidyr::expand_grid(
+    #   forecaster = "scaled_pop",
+    #   trainer = c("linreg", "quantreg"),
+    #   ahead = c(1:7, 14, 21, 28),
+    #   lags = list(c(0, 3, 5, 7, 14), c(0, 7, 14)),
+    #   pop_scaling = TRUE
+    # )
+    # ,
+    # tidyr::expand_grid(
+    #   forecaster = "smoothed_scaled",
+    #   trainer = c("quantreg"),
+    #   ahead = c(1:7, 14, 21, 28),
+    #   #
+    #   lags = list(
+    #     #        smoothed,      sd,          smoothed,   sd
+    #     list(c(0, 3, 5, 7, 14), c(0)),
+    #     list(c(0, 7, 14, 21, 28), c(0)),
+    #     list(c(0, 2, 4, 7, 14, 21, 28), c(0))
+    #   ),
+    #   pop_scaling = TRUE
+    # )
   )
 }
 #
@@ -50,24 +59,25 @@ make_unique_ensemble_grid <- function() {
         lags = c(0L, 3L, 5L, 7L, 14L)
       ),
       list(forecaster = "flatline_fc")
-    ),
-    # median forecaster
-    "ensemble_average",
-    list(average_type = "median"),
-    list(
-      list(
-        forecaster = "scaled_pop",
-        trainer = "linreg",
-        pop_scaling = TRUE,
-        lags = c(0, 3, 5, 7, 14)
-      ),
-      list(
-        forecaster = "scaled_pop",
-        trainer = "linreg",
-        pop_scaling = FALSE,
-        lags = c(0, 3, 5, 7, 14)
-      )
-    ),
+    )
+    # ,
+    # # median forecaster
+    # "ensemble_average",
+    # list(average_type = "median"),
+    # list(
+    #   list(
+    #     forecaster = "scaled_pop",
+    #     trainer = "linreg",
+    #     pop_scaling = TRUE,
+    #     lags = c(0, 3, 5, 7, 14)
+    #   ),
+    #   list(
+    #     forecaster = "scaled_pop",
+    #     trainer = "linreg",
+    #     pop_scaling = FALSE,
+    #     lags = c(0, 3, 5, 7, 14)
+    #   )
+    # ),
   )
 }
 
diff --git a/scripts/one_offs/r6_refactor.R b/scripts/one_offs/r6_refactor.R
@@ -0,0 +1,54 @@
+# R6 refactor comparison script.
+#
+# This script is used to compare the old and new R6 refactor objects to ensure
+# that the refactor did not change the forecast output. This script assumes that
+# you:
+#
+# 1. Ran the covid_hosp_explore pipeline (or downloaded the objects using `make
+#    download`)
+# 2. Copied the cache objects to a new directory (e.g. `covid_hosp_explore
+#    copy`)
+# 3. Installed the new epiprocess branch
+#    `renv::install("cmu-delphi/epiprocess@ds/r6-clean")`
+# 4. Ran the covid_hosp_explore pipeline again (should take about 3.5 hours)
+#
+# Once that is done, you should be able to run the script below and find no
+# differences in the forecasts.
+
+library(dplyr)
+library(magrittr)
+library(purrr)
+library(qs)
+
+df <- targets::tar_manifest()
+
+# Both have already been produced, so we can just read them in. Let's do a loop to compare them.
+old_forecasts <- list.files("covid_hosp_explore copy/objects", full.names = TRUE) %>%
+  keep(~ basename(.) %in% df$name) %>%
+  sort()
+new_forecasts <- list.files("covid_hosp_explore/objects", full.names = TRUE) %>%
+  keep(~ basename(.) %in% df$name) %>%
+  sort()
+
+# Make sure the lists are the same length and the basenames match
+assertthat::assert_that(
+  c(
+    length(old_forecasts) == length(new_forecasts),
+    basename(old_forecasts) == basename(new_forecasts)
+  ) %>% all()
+)
+
+tib <- tibble::tibble(
+  old_forecasts = old_forecasts,
+  new_forecasts = new_forecasts,
+  compare = purrr::map2_chr(old_forecasts, new_forecasts, function(x, y) {
+    all.equal(qs::qread(x), qs::qread(y)) %>% as.character()
+  })
+)
+tib %>%
+  filter(compare != "TRUE") %>%
+  slice(1:5) %>%
+  c()
+
+x <- qread("covid_hosp_explore copy/objects/joined_archive_data_2022")
+y <- qread("covid_hosp_explore/objects/joined_archive_data_2022")
diff --git a/scripts/run.R b/scripts/run.R
@@ -26,9 +26,6 @@
 #   # Save to disk
 #   saveRDS(scorecards, "exploration-scorecards-2023-10-04.RDS")
 
-
-
-
 tar_project <- Sys.getenv("TAR_PROJECT", "covid_hosp_explore")
 external_scores_path <- Sys.getenv("EXTERNAL_SCORES_PATH", "")
 debug_mode <- as.logical(Sys.getenv("DEBUG_MODE", TRUE))