Skip to content

Commit 8490952

Browse files
committed
wip: production pipeline
1 parent 368f8af commit 8490952

File tree

2 files changed

+62
-44
lines changed

2 files changed

+62
-44
lines changed

R/plotting.R

Lines changed: 62 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,18 @@
11
library(dplyr)
2-
library(evalcast)
32
library(ggplot2)
43
library(magrittr)
54
library(tidyr)
65

76

8-
get_truth_data <- function(exclude_geos, ...) {
9-
download_args <- list(...)
10-
truth_data <- do.call(download_signal, download_args)
11-
truth_data %<>% filter(!(.data$geo_value %in% exclude_geos))
12-
truth_data <- truth_data %>%
13-
dplyr::select(.data$geo_value, .data$time_value, .data$value) %>%
14-
dplyr::rename(target_end_date = .data$time_value)
15-
return(truth_data %>% tibble())
16-
}
17-
187
get_quantiles_df <- function(predictions_cards, intervals = c(.5, .9), ...) {
198
predictions_cards <- predictions_cards %>%
209
dplyr::select(
21-
.data$geo_value, .data$quantile,
22-
.data$value, .data$forecaster, .data$forecast_date,
23-
.data$target_end_date
10+
geo_value,
11+
quantile,
12+
value,
13+
forecaster,
14+
forecast_date,
15+
target_end_date
2416
)
2517

2618
lower_bounds <- predictions_cards %>%
@@ -103,15 +95,36 @@ plot_points <- function(g, points_df) {
10395
return(g)
10496
}
10597

106-
plot_state_forecasters <- function(predictions_cards, exclude_geos = c(), start_day = NULL, ncol = 5, offline_signal_dir = NULL) {
98+
plot_state_forecasters <- function(predictions_cards, exclude_geos = c(), start_day = NULL, ncol = 5) {
10799
if (nrow(predictions_cards) == 0) {
108100
return(NULL)
109101
}
110102

111-
td1 <- get_truth_data(exclude_geos = exclude_geos, data_source = "hhs", signal = "confirmed_admissions_covid_1d", start_day = start_day, geo_type = "state", offline_signal_dir = offline_signal_dir)
112-
td1$data_source <- "hhs"
113-
td2 <- get_truth_data(exclude_geos = exclude_geos, data_source = "jhu-csse", signal = "confirmed_7dav_incidence_num", start_day = start_day, geo_type = "state", offline_signal_dir = offline_signal_dir)
114-
td2$data_source <- "jhu"
103+
td1 <- epidatr::pub_covidcast(
104+
source = "hhs",
105+
signals = "confirmed_admissions_covid_1d",
106+
geo_type = "state",
107+
time_type = "day",
108+
geo_values = "*",
109+
time_values = epidatr::epirange(start_day, Sys.Date())
110+
) %>%
111+
filter(!(.data$geo_value %in% exclude_geos)) %>%
112+
dplyr::select(.data$geo_value, .data$time_value, .data$value) %>%
113+
dplyr::rename(target_end_date = .data$time_value) %>%
114+
mutate(data_source = "hhs")
115+
td2 <- epidatr::pub_covidcast(
116+
source = "jhu-csse",
117+
signals = "confirmed_7dav_incidence_num",
118+
geo_type = "state",
119+
time_type = "day",
120+
geo_values = "*",
121+
time_values = epidatr::epirange(start_day, Sys.Date())
122+
) %>%
123+
filter(!(.data$geo_value %in% exclude_geos)) %>%
124+
dplyr::select(.data$geo_value, .data$time_value, .data$value) %>%
125+
dplyr::rename(target_end_date = .data$time_value) %>%
126+
mutate(data_source = "jhu")
127+
115128
td1.max <- td1 %>%
116129
group_by(geo_value) %>%
117130
summarize(max_value = max(value))
@@ -129,14 +142,8 @@ plot_state_forecasters <- function(predictions_cards, exclude_geos = c(), start_
129142

130143
# Setup plot
131144
g <- ggplot(td1, mapping = aes(x = .data$target_end_date, color = .data$forecaster, fill = .data$forecaster))
132-
133-
points_df <- get_points_df(predictions_cards)
134-
g <- plot_points(g, points_df)
135-
136-
quantiles_df <- get_quantiles_df(predictions_cards)
137-
g <- plot_quantiles(g, quantiles_df)
138-
139-
# Plot truth data by geo
145+
g <- plot_points(g, get_points_df(predictions_cards))
146+
g <- plot_quantiles(g, get_quantiles_df(predictions_cards))
140147
g <- g +
141148
geom_line(mapping = aes(y = .data$value)) +
142149
geom_line(data = td2, mapping = aes(x = .data$target_end_date, y = .data$scaled_value)) +
@@ -146,34 +153,47 @@ plot_state_forecasters <- function(predictions_cards, exclude_geos = c(), start_
146153
return(g)
147154
}
148155

149-
plot_nation_forecasters <- function(predictions_cards, exclude_geos = c(), start_day = NULL, ncol = 5, offline_signal_dir = NULL) {
156+
plot_nation_forecasters <- function(predictions_cards, exclude_geos = c(), start_day = NULL, ncol = 5) {
150157
if (nrow(predictions_cards) == 0) {
151158
return(NULL)
152159
}
153160

154-
td1 <- get_truth_data(exclude_geos = exclude_geos, data_source = "hhs", signal = "confirmed_admissions_covid_1d", start_day = start_day, geo_type = "nation", offline_signal_dir = offline_signal_dir)
155-
td1$data_source <- "hhs"
156-
td2 <- get_truth_data(exclude_geos = exclude_geos, data_source = "jhu-csse", signal = "confirmed_7dav_incidence_num", start_day = start_day, geo_type = "nation", offline_signal_dir = offline_signal_dir)
157-
td2$data_source <- "jhu"
161+
td1 <- epidatr::pub_covidcast(
162+
source = "hhs",
163+
signals = "confirmed_admissions_covid_1d",
164+
geo_type = "nation",
165+
time_type = "day",
166+
geo_values = "*",
167+
time_values = epidatr::epirange(start_day, Sys.Date())
168+
) %>%
169+
filter(!(.data$geo_value %in% exclude_geos)) %>%
170+
dplyr::select(.data$time_value, .data$value) %>%
171+
dplyr::rename(target_end_date = .data$time_value) %>%
172+
mutate(data_source = "hhs")
173+
td2 <- epidatr::pub_covidcast(
174+
source = "jhu-csse",
175+
signals = "confirmed_7dav_incidence_num",
176+
geo_type = "nation",
177+
time_type = "day",
178+
geo_values = "*",
179+
time_values = epidatr::epirange(start_day, Sys.Date())
180+
) %>%
181+
filter(!(.data$geo_value %in% exclude_geos)) %>%
182+
dplyr::select(.data$time_value, .data$value) %>%
183+
dplyr::rename(target_end_date = .data$time_value) %>%
184+
mutate(data_source = "jhu")
158185
td1.max <- td1 %>%
159186
summarize(max_value = max(value)) %>%
160187
pull(max_value)
161188
td2.max <- td2 %>%
162189
summarize(max_value = max(value)) %>%
163190
pull(max_value)
164-
td2 <- td2 %>%
165-
mutate(scaled_value = value * td1.max / td2.max)
191+
td2 <- td2 %>% mutate(scaled_value = value * td1.max / td2.max)
166192

167193
# Setup plot
168194
g <- ggplot(td1, mapping = aes(x = .data$target_end_date))
169-
170-
quantiles_df <- get_quantiles_df(predictions_cards)
171-
g <- plot_quantiles(g, quantiles_df)
172-
173-
points_df <- get_points_df(predictions_cards)
174-
g <- plot_points(g, points_df)
175-
176-
# Plot truth data by geo
195+
g <- plot_quantiles(g, get_quantiles_df(predictions_cards))
196+
g <- plot_points(g, get_points_df(predictions_cards))
177197
g <- g +
178198
geom_line(mapping = aes(y = .data$value, color = "confirmed admissions")) +
179199
geom_line(data = td2, mapping = aes(x = .data$target_end_date, y = .data$scaled_value, color = "7day case sum")) +

scripts/covid_hosp_prod.Rmd

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,9 @@ body .main-container {
2828

2929
```{r setup, include=FALSE}
3030
library(dplyr)
31-
library(evalcast)
3231
library(here)
3332
library(magrittr)
3433
library(rlang)
35-
library(targets)
3634
library(tidyr)
3735
source(here("R", "plotting.R"))
3836

0 commit comments

Comments
 (0)