Skip to content

Commit afc0fe1

Browse files
dsweber2dshemetov
andauthored
prod hotfix (#166)
* always run the data download * same for flu * add plain AR forecaster for covid * fixes: update weights, update Makefile pull/push, fix a few filters --------- Co-authored-by: Dmitry Shemetov <[email protected]>
1 parent b992f25 commit afc0fe1

File tree

6 files changed

+82
-71
lines changed

6 files changed

+82
-71
lines changed

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,6 @@ aux_data
1515
.netlify
1616
reports/*.html
1717
reports/report.md
18-
cache
18+
cache/
19+
data/
20+
.vscode/

Makefile

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,34 +24,38 @@ prod: prod-covid prod-flu update_site netlify
2424

2525
submit-covid:
2626
cd ../covid19-forecast-hub; \
27-
git pull delphi main; \
27+
git pull origin main; \
2828
git add model-output/CMU-TimeSeries/*; \
29+
git add model-output/CMU-climate_baseline/*; \
2930
git commit -am "CMU-Delphi submission $(current_date)"; \
3031
git push delphi main; \
3132
gh pr create --title "CMU-TimeSeries $(current_date)" --repo cdcgov/covid19-forecast-hub
3233

3334
submit-flu:
3435
cd ../FluSight-forecast-hub; \
35-
git pull delphi main; \
36+
git pull origin main; \
3637
git add model-output/CMU-TimeSeries/*; \
38+
git add model-output/CMU-climate_baseline/*; \
3739
git commit -am "CMU-Delphi submission $(current_date)"; \
38-
git push delphi; \
40+
git push delphi main; \
3941
gh pr create --title "CMU-TimeSeries $(current_date)" --repo cdcepi/FluSight-forecast-hub
4042

4143
submit-covid-dry:
4244
cd ../covid19-forecast-hub; \
43-
git pull delphi main; \
45+
git pull origin main; \
4446
git add model-output/CMU-TimeSeries/*; \
47+
git add model-output/CMU-climate_baseline/*; \
4548
git commit -am "CMU-Delphi submission $(current_date)"; \
4649
git push delphi main; \
4750
gh pr create --title "CMU-TimeSeries $(current_date)" --repo cdcgov/covid19-forecast-hub --dry-run
4851

4952
submit-flu-dry:
5053
cd ../FluSight-forecast-hub; \
51-
git pull delphi main; \
54+
git pull origin main; \
5255
git add model-output/CMU-TimeSeries/*; \
56+
git add model-output/CMU-climate_baseline/*; \
5357
git commit -am "CMU-Delphi submission $(current_date)"; \
54-
git push delphi; \
58+
git push delphi main; \
5559
gh pr create --title "CMU-TimeSeries $(current_date)" --repo cdcepi/FluSight-forecast-hub --dry-run
5660

5761
submit: submit-covid submit-flu

covid_geo_exclusions.csv

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@ forecast_date,forecaster,geo_value,weight
88
"2024-10-01", "climate_base", "all", 2
99
"2024-10-01", "climate_geo_agged", "all", 0.5
1010
"2024-10-01", "climate_quantile_extrapolated", "all", 0
11+
# feb 5
12+
2025-02-05, all, mp, 0
13+
2025-02-05, windowed_seasonal, all, 3
14+
2025-02-05, windowed_seasonal_extra_sources, all, 3
15+
2025-02-05, linear, all, 0.5
16+
2025-02-05, linearlog, all, 0
17+
2025-02-05, climate_base, all, 0
18+
2025-02-05, climate_geo_agged, all, 0.0
1119
# dec 4th
1220
"2024-12-04", "all", "mp", 0
1321
"2024-12-04", "linear", "all", 3

flu_geo_exclusions.csv

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,14 @@ forecast_date,forecaster,geo_value,weight
77
2024-10-01, climate_base, all, 0.5
88
2024-10-01, climate_geo_agged, all, 0.25
99
2024-10-01, climate_quantile_extrapolated, all, 0
10+
# feb 5
11+
2025-02-05, all, mp, 0
12+
2025-02-05, windowed_seasonal, all, 3
13+
2025-02-05, windowed_seasonal_extra_sources, all, 3
14+
2025-02-05, linear, all, 0.5
15+
2025-02-05, linearlog, all, 0
16+
2025-02-05, climate_base, all, 0
17+
2025-02-05, climate_geo_agged, all, 0.0
1018
# jan 8
1119
2025-01-08, all, mp, 0
1220
2025-01-08, windowed_seasonal, all, 3

scripts/covid_hosp_prod.R

Lines changed: 26 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,22 @@ forecaster_fns <- list2(
3939
geo_agg = TRUE
4040
)
4141
},
42+
windowed_seasonal = function(epi_data, ahead, extra_data, ...) {
43+
fcst <-
44+
epi_data %>%
45+
scaled_pop_seasonal(
46+
outcome = "value",
47+
ahead = ahead * 7,
48+
...,
49+
seasonal_method = "none",
50+
trainer = epipredict::quantile_reg(),
51+
drop_non_seasons = TRUE,
52+
pop_scaling = FALSE,
53+
lags = list(c(0, 7))
54+
) %>%
55+
mutate(target_end_date = target_end_date + 3)
56+
fcst
57+
},
4258
windowed_seasonal_extra_sources = function(epi_data, ahead, extra_data, ...) {
4359
fcst <-
4460
epi_data %>%
@@ -54,7 +70,9 @@ forecaster_fns <- list2(
5470
pop_scaling = FALSE,
5571
lags = list(c(0, 7), c(0, 7))
5672
) %>%
57-
mutate(target_end_date = target_end_date + 3)
73+
mutate(target_end_date = target_end_date + 3) %>%
74+
# Wyoming has no data for NSSP since July 2024
75+
filter(geo_value != "wy")
5876
fcst
5977
}
6078
)
@@ -64,7 +82,7 @@ rlang::list2(
6482
tar_target(aheads, command = -1:3),
6583
tar_target(forecasters, command = indices),
6684
tar_target(
67-
download_latest,
85+
name = nhsn_latest_data,
6886
command = {
6987
if (wday(Sys.Date()) < 6 & wday(Sys.Date()) > 3) {
7088
# download from the preliminary data source from Wednesday to Friday
@@ -78,28 +96,9 @@ rlang::list2(
7896
filter(disease == "nhsn_covid") %>%
7997
select(-disease) %>%
8098
filter(geo_value %nin% insufficient_data_geos)
81-
# if there's not already a result we need to save it no matter what
82-
if (file.exists(here::here(".nhsn_covid_cache.parquet"))) {
83-
previous_result <- qs::qread(here::here(".nhsn_covid_cache.parquet"))
84-
} else
85-
# if something is different, update the file
86-
if (!isTRUE(all.equal(previous_result, most_recent_result))) {
87-
qs::qsave(most_recent_result, here::here(".nhsn_covid_cache.parquet"))
88-
} else {
89-
qs::qsave(most_recent_result, here::here(".nhsn_covid_cache.parquet"))
90-
}
91-
NULL
92-
},
93-
description = "Download the result, and update the file only if it's actually different",
94-
priority = 1,
95-
cue = tar_cue(mode = "always")
96-
),
97-
tar_change(
98-
name = nhsn_latest_data,
99-
command = {
100-
qs::qread(here::here(".nhsn_covid_cache.parquet"))
99+
most_recent_result
101100
},
102-
change = tools::md5sum(here::here(".nhsn_covid_cache.parquet"))
101+
cue = tar_cue("always")
103102
),
104103
tar_target(
105104
name = nhsn_archive_data,
@@ -138,7 +137,7 @@ rlang::list2(
138137
}
139138
),
140139
tar_target(
141-
forecast_res,
140+
name = forecast_res,
142141
command = {
143142
if (as.Date(forecast_generation_date_int) < Sys.Date()) {
144143
train_data <- nhsn_archive_data %>%
@@ -152,13 +151,13 @@ rlang::list2(
152151
train_data <-
153152
nhsn_latest_data %>%
154153
data_substitutions(disease = "covid") %>%
155-
as_epi_df(as_of = as.Date(forecast_date_int))
154+
as_epi_df(as_of = as.Date(forecast_date_int)) %>%
155+
mutate(time_value = time_value - 3)
156156
}
157157
nssp <- current_nssp_archive %>%
158158
epix_as_of(min(forecast_date, current_nssp_archive$versions_end)) %>%
159159
mutate(time_value = time_value)
160160
attributes(train_data)$metadata$as_of <- as.Date(forecast_date_int)
161-
print(names(forecaster_fns[forecasters]))
162161
train_data %>%
163162
forecaster_fns[[forecasters]](ahead = aheads, extra_data = nssp) %>%
164163
mutate(
@@ -196,7 +195,7 @@ rlang::list2(
196195
filter(geo_value %nin% geo_exclusions) %>%
197196
ungroup() %>%
198197
bind_rows(forecast_res %>%
199-
filter(forecaster == "windowed_seasonal_extra_sources") %>%
198+
filter(forecaster %in% c("windowed_seasonal", "windowed_seasonal_extra_sources")) %>%
200199
filter(forecast_date < target_end_date)) %>% # don't use for neg aheads
201200
group_by(geo_value, forecast_date, target_end_date, quantile) %>%
202201
summarize(value = mean(value, na.rm = TRUE), .groups = "drop") %>%

scripts/flu_hosp_prod.R

Lines changed: 27 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ rlang::list2(
125125
}
126126
),
127127
tar_target(
128-
download_latest_nhsn,
128+
nhsn_latest_data,
129129
command = {
130130
if (wday(Sys.Date()) < 6 & wday(Sys.Date()) > 3) {
131131
# download from the preliminary data source from Wednesday to Friday
@@ -148,30 +148,11 @@ rlang::list2(
148148
select(-version) %>%
149149
data_substitutions(disease = "flu") %>%
150150
as_epi_df(other_keys = "source", as_of = Sys.Date())
151-
# if there's not already a result we need to save it no matter what
152-
if (file.exists(here::here(".nhsn_flu_cache.parquet"))) {
153-
previous_result <- qs::qread(here::here(".nhsn_flu_cache.parquet"))
154-
# if something is different, update the file
155-
# !isTRUE(all.equal) is true iff there's at least one difference
156-
# can't use isFALSE(all.equal) because a bunch of strings are not, in fact, false
157-
if (!isTRUE(all.equal(previous_result, most_recent_result))) {
158-
qs::qsave(most_recent_result, here::here(".nhsn_flu_cache.parquet"))
159-
}
160-
} else {
161-
qs::qsave(most_recent_result, here::here(".nhsn_flu_cache.parquet"))
162-
}
163-
NULL
151+
most_recent_result
164152
},
165153
description = "Download the result, and update the file only if it's actually different",
166154
priority = 1,
167-
cue = tar_cue(mode="always")
168-
),
169-
tar_change(
170-
name = nhsn_latest_data,
171-
command = {
172-
qs::qread(here::here(".nhsn_flu_cache.parquet"))
173-
},
174-
change = tools::md5sum(here::here(".nhsn_flu_cache.parquet"))
155+
cue = tar_cue(mode = "always")
175156
),
176157
tar_map(
177158
# Because targets relies on R metaprogramming, it loses the Date class.
@@ -275,38 +256,47 @@ rlang::list2(
275256
ensemble_linear_climate(aheads, other_weights = geo_forecasters_weights) %>%
276257
filter(geo_value %nin% geo_exclusions) %>%
277258
ungroup() %>%
278-
# Ensemble with windowed_seasonal
279-
bind_rows(forecast_res %>% filter(forecaster == "windowed_seasonal")) %>%
259+
sort_by_quantile()
260+
}
261+
),
262+
tar_target(
263+
name = ens_climate_linear_window_season,
264+
command = {
265+
climate_linear %>%
266+
# Ensemble with windowed_seasonal and windowed_seasonal_extra_sources
267+
bind_rows(forecast_res %>% filter(forecaster %in% c("windowed_seasonal", "windowed_seasonal_extra_sources"))) %>%
280268
group_by(geo_value, forecast_date, target_end_date, quantile) %>%
281269
summarize(value = mean(value, na.rm = TRUE), .groups = "drop") %>%
282270
sort_by_quantile()
283271
}
284272
),
285273
tar_target(
286-
name = ens_climate_linear_window_season,
274+
name = ens_ar_only,
287275
command = {
288276
forecast_res %>%
289-
# Apply the ahead-by-quantile weighting scheme
290-
ensemble_linear_climate(aheads, other_weights = geo_forecasters_weights) %>%
291-
filter(geo_value %nin% geo_exclusions) %>%
292-
ungroup() %>%
293-
# Ensemble with windowed_seasonal
294-
bind_rows(forecast_res %>% filter(forecaster == "windowed_seasonal", forecaster == "windowed_seasonal_extra_sources")) %>%
277+
filter(forecaster %in% c("windowed_seasonal", "windowed_seasonal_extra_sources")) %>%
295278
group_by(geo_value, forecast_date, target_end_date, quantile) %>%
296279
summarize(value = mean(value, na.rm = TRUE), .groups = "drop") %>%
297280
sort_by_quantile()
298281
}
299282
),
300283
tar_target(
301-
name = ens_climate_linear_window_season_ave_data,
284+
name = climate_linear_modified,
302285
command = {
303286
forecast_res_modified %>%
304287
# Apply the ahead-by-quantile weighting scheme
305288
ensemble_linear_climate(aheads, other_weights = geo_forecasters_weights) %>%
306289
filter(geo_value %nin% geo_exclusions) %>%
307290
ungroup() %>%
291+
sort_by_quantile()
292+
}
293+
),
294+
tar_target(
295+
name = ens_climate_linear_window_season_modified,
296+
command = {
297+
climate_linear_modified %>%
308298
# Ensemble with windowed_seasonal
309-
bind_rows(forecast_res_modified %>% filter(forecaster == "windowed_seasonal")) %>%
299+
bind_rows(forecast_res_modified %>% filter(forecaster %in% c("windowed_seasonal", "windowed_seasonal_extra_sources"))) %>%
310300
group_by(geo_value, forecast_date, target_end_date, quantile) %>%
311301
summarize(value = mean(value, na.rm = TRUE), .groups = "drop") %>%
312302
sort_by_quantile()
@@ -316,7 +306,7 @@ rlang::list2(
316306
name = combo_ens_climate_linear_window_season,
317307
command = {
318308
inner_join(
319-
ens_climate_linear_window_season, ens_climate_linear_window_season_ave_data,
309+
ens_climate_linear_window_season, ens_climate_linear_window_season_modified,
320310
by = join_by(geo_value, forecast_date, target_end_date, quantile)
321311
) %>%
322312
rowwise() %>%
@@ -332,10 +322,10 @@ rlang::list2(
332322
command = {
333323
bind_rows(
334324
forecast_res,
335-
climate_linear %>% mutate(forecaster = "ensemble"),
325+
climate_linear %>% mutate(forecaster = "climate_linear"),
326+
ens_ar_only %>% mutate(forecaster = "ens_ar_only"),
336327
ens_climate_linear_window_season %>% mutate(forecaster = "ensemble_linclim_windowed_seasonal"),
337-
ens_climate_linear_window_season_ave_data %>% mutate(forecaster = "ensemble_ave_data"),
338-
combo_ens_climate_linear_window_season %>% mutate(forecaster = "ensemble_overall")
328+
combo_ens_climate_linear_window_season %>% mutate(forecaster = "ensemble_combo")
339329
)
340330
}
341331
),

0 commit comments

Comments
 (0)