Merge pull request #40 from cmu-delphi/ndefries/baseline-display-and-scaling

nmdefries · web-flow · commit 98c13702f4e8 · 2023-10-18T10:26:19.000-04:00
Support baseline score scaling and baseline forecaster selection
diff --git a/R/small_utils.R b/R/small_utils.R
@@ -27,9 +27,9 @@ add_id <- function(df, n_adj = 2) {
     mutate(id = hash_animal(id, n_adj = n_adj)$words) %>%
     mutate(id = paste(id[1:n_adj], sep="", collapse = " "))
   df %<>%
-    mutate(id = stringified) %>%
+    mutate(parent_id = stringified$id) %>%
     rowwise() %>%
-    mutate(id = paste(id, ahead, collapse = " ")) %>%
+    mutate(id = paste(parent_id, ahead, collapse = " ")) %>%
     ungroup()
   return(df)
 }
diff --git a/app.R b/app.R
@@ -67,9 +67,19 @@ shinyApp(
             choices = forecaster_options,
             multiple = TRUE
           ),
+          selectInput("baseline",
+            "Baseline forecaster:",
+            choices = forecaster_options,
+            multiple = FALSE
+          ),
+          checkboxInput(
+            "scale_by_baseline",
+            "Scale by baseline forecaster",
+            value = FALSE,
+          ),
           radioButtons(
             "selected_metric",
-            "Metric:",
+            "Error metric:",
             c(
               "Mean WIS" = "wis",
               # "Mean WIS per 100k" = "wis_per_100k",
@@ -88,12 +98,10 @@ shinyApp(
             choices = c("forecaster", "ahead", "geo_value"),
             multiple = TRUE
           ),
-          radioButtons("facets_share_scale",
-            "Share y scale between subplots:",
-            c(
-              "Yes" = "fixed",
-              "No" = "free_y"
-            )
+          checkboxInput(
+            "facets_share_scale",
+            "Share y scale between subplots",
+            value = TRUE,
           ),
           sliderInput("selected_forecast_date_range",
             "Forecast date range:",
@@ -125,9 +133,10 @@ shinyApp(
   },
   server = function(input, output, session) {
     filtered_scorecards_reactive <- reactive({
-      if (length(input$selected_forecasters) == 0) { return(data.frame()) }
+      agg_forecasters <- unique(c(input$selected_forecasters, input$baseline))
+      if (length(agg_forecasters) == 0) { return(data.frame()) }
 
-      processed_evaluations_internal <- lapply(input$selected_forecasters, function(forecaster) {
+      processed_evaluations_internal <- lapply(agg_forecasters, function(forecaster) {
           load_forecast_data(forecaster) %>>%
           filter(
             .data$forecast_date %>>% between(.env$input$selected_forecast_date_range[[1L]], .env$input$selected_forecast_date_range[[2L]]),
@@ -141,8 +150,35 @@ shinyApp(
       input_df <- filtered_scorecards_reactive()
       if (nrow(input_df) == 0) { return() }
 
+      # Normalize by baseline scores. This is not relevant for coverage, which is compared
+      # to the nominal confidence level.
+      if (input$scale_by_baseline && input$selected_metric != "ic80") {
+        # These merge keys are overkill; this should be fully specified by
+        # c("forecast_date", "target_end_date", "geo_value")
+        merge_keys <- c("forecast_date", "target_end_date", "ahead", "issue", "geo_value")
+        # Load selected baseline
+        baseline_scores <- load_forecast_data(input$baseline)[, c(merge_keys, input$selected_metric)]
+
+        baseline_scores$score_baseline <- baseline_scores[[input$selected_metric]]
+        baseline_scores[[input$selected_metric]] <- NULL
+
+        # Add on reference scores from baseline forecaster.
+        # Note that this drops any scores where there isn't a corresponding
+        # baseline value. If a forecaster and a baseline cover
+        # non-overlapping dates or use different aheads, the forecaster will
+        # not be shown.
+        input_df <- inner_join(
+          input_df, baseline_scores,
+          by = merge_keys, suffix = c("", "")
+        )
+        # Scale score by baseline forecaster
+        input_df[[input$selected_metric]] <- input_df[[input$selected_metric]] / input_df$score_baseline
+      }
+
+
       x_tick_angle <- list(tickangle = -30)
       facet_x_tick_angles <- setNames(rep(list(x_tick_angle), 10), paste0("xaxis", 1:10))
+      scale_type <- ifelse(input$facets_share_scale, "fixed", "free_y" )
 
       input_df %>>%
         # Aggregate scores over all geos
@@ -180,9 +216,9 @@ shinyApp(
         `+`(if (length(input$facet_vars) == 0L) {
           theme()
         } else if (length(input$facet_vars) == 1L) {
-          facet_wrap(input$facet_vars, scales = input$facets_share_scale)
+          facet_wrap(input$facet_vars, scales = scale_type)
         } else {
-          facet_grid(as.formula(paste0(input$facet_vars[[1L]], " ~ ", paste(collapse = " + ", input$facet_vars[-1L]))), scales = input$facets_share_scale)
+          facet_grid(as.formula(paste0(input$facet_vars[[1L]], " ~ ", paste(collapse = " + ", input$facet_vars[-1L]))), scales = scale_type)
         }) %>>%
         ggplotly() %>>%
         {inject(layout(., hovermode = "x unified", legend = list(orientation = "h", title = list(text = "forecaster")), xaxis = x_tick_angle, !!!facet_x_tick_angles))}
diff --git a/covid_hosp_explore.R b/covid_hosp_explore.R
@@ -35,13 +35,13 @@ tar_option_set(
 source("covid_hosp_explore/forecaster_instantiation.R")
 source("covid_hosp_explore/data_targets.R")
 
-forecasts_and_scores <- tar_map(
+forecasts_and_scores_by_ahead <- tar_map(
   values = forecaster_param_grids,
   names = id,
   unlist = FALSE,
-  tar_target(
-    name = forecast,
-    command = {
+  tar_target_raw(
+    name = ONE_AHEAD_FORECAST_NAME,
+    command = expression(
       forecaster_pred(
         data = joined_archive_data_2022,
         outcome = "hhs",
@@ -52,20 +52,39 @@ forecasts_and_scores <- tar_map(
         forecaster_args = params,
         forecaster_args_names = param_names
       )
-    }
+    )
   ),
-  tar_target(
-    name = score,
-    command = {
+  tar_target_raw(
+    name = ONE_AHEAD_SCORE_NAME,
+    command = expression(
       run_evaluation_measure(
-        data = forecast,
+        data = forecast_by_ahead,
         evaluation_data = hhs_evaluation_data,
         measure = list(
           wis = weighted_interval_score,
           ae = absolute_error,
           ic80 = interval_coverage(0.8)
         )
       )
+    )
+  )
+)
+
+forecasts_and_scores <- tar_map(
+  values = forecaster_parent_id_map,
+  names = parent_id,
+  tar_target(
+    name = forecast,
+    command = {
+      bind_rows(forecast_component_ids) %>%
+        mutate(parent_forecaster = parent_id)
+    }
+  ),
+  tar_target(
+    name = score,
+    command = {
+      bind_rows(score_component_ids) %>%
+        mutate(parent_forecaster = parent_id)
     }
   )
 )
@@ -89,8 +108,8 @@ ensemble_forecast <- tar_map(
     name = ensemble_forecast,
     # TODO: Needs a lookup table to select the right forecasters
     list(
-      forecasts_and_scores[["forecast"]][[1]],
-      forecasts_and_scores[["forecast"]][[2]]
+      forecasts_and_scores_by_ahead[["forecast_by_ahead"]][[1]],
+      forecasts_and_scores_by_ahead[["forecast_by_ahead"]][[2]]
     ),
     command = {
       bind_rows(!!!.x, .id = "forecaster") %>%
@@ -124,6 +143,7 @@ ensemble_forecast <- tar_map(
 list(
   data,
   forecasters,
+  forecasts_and_scores_by_ahead,
   forecasts_and_scores,
   ensembles,
   ensemble_forecast
diff --git a/covid_hosp_explore/forecaster_instantiation.R b/covid_hosp_explore/forecaster_instantiation.R
@@ -20,9 +20,19 @@ grids <- list(
 # bind them together and give static ids; if you add a new field to a given
 # expand_grid, everything will get a new id, so it's better to add a new
 # expand_grid instead
-param_grid <- bind_rows(map(grids, add_id)) %>% relocate(id, .after = last_col())
+param_grid <- bind_rows(map(grids, add_id)) %>%
+  relocate(parent_id, id, .after = last_col())
 
-forecaster_param_grids <- make_target_param_grid(param_grid)
+ONE_AHEAD_FORECAST_NAME <- "forecast_by_ahead"
+ONE_AHEAD_SCORE_NAME <- "score_by_ahead"
+forecaster_parent_id_map <- param_grid %>%
+  group_by(parent_id) %>%
+  summarize(
+    forecast_component_ids = list(syms(paste0(ONE_AHEAD_FORECAST_NAME, "_", gsub(" ", ".", id, fixed = TRUE)))),
+    score_component_ids = list(syms(paste0(ONE_AHEAD_SCORE_NAME, "_", gsub(" ", ".", id, fixed = TRUE))))
+  )
+
+forecaster_param_grids <- make_target_param_grid(select(param_grid, -parent_id))
 
 # not actually used downstream, this is for lookup during plotting and human evaluation
 forecasters <- list(
diff --git a/run.R b/run.R
@@ -53,7 +53,7 @@ tar_make()
 
 # Prevent functions defined in /R dir from being loaded unnecessarily
 options(shiny.autoload.r=FALSE)
-forecaster_options <- tar_read(forecasters)[["id"]]
+forecaster_options <- unique(tar_read(forecasters)[["parent_id"]])
 # Map forecaster names to score files
 forecaster_options <- setNames(
   paste0("score_", gsub(" ", ".", forecaster_options)),