Skip to content

Commit f12ab6f

Browse files
authored
Merge pull request #277 from cmu-delphi/release/7.0.0
Release 7.0.0
2 parents de9232f + 12059e8 commit f12ab6f

File tree

10 files changed

+458
-377
lines changed

10 files changed

+458
-377
lines changed

.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 6.1.0
2+
current_version = 7.0.0
33
commit = False
44
tag = False
55

DESCRIPTION

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: forecasteval
22
Title: Forecast Evaluation Dashboard
3-
Version: 6.1.0
3+
Version: 7.0.0
44
Authors@R: c(person("Kate", "Harwood", email = "[email protected]", role = "cre"),
55
person("Chris", "Scott", role = "ctb"),
66
person("Jed", "Grabman", role = "ctb"))
@@ -24,7 +24,9 @@ Imports:
2424
covidcast,
2525
stringr,
2626
markdown,
27-
memoise
27+
memoise,
28+
purrr,
29+
data.table
2830
Suggests:
2931
styler,
3032
lintr,

Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
SHELL:=/bin/bash
2+
PWD=$(shell pwd)
3+
14
.DEFAULT_GOAL:=build
25
S3_URL=https://forecast-eval.s3.us-east-2.amazonaws.com
36
S3_BUCKET=s3://forecast-eval
@@ -10,7 +13,7 @@ r_build:
1013
%.rds: dist
1114
test -f dist/$@ || curl -o dist/$@ $(S3_URL)/$@
1215

13-
pull_data: score_cards_state_deaths.rds score_cards_state_cases.rds score_cards_nation_cases.rds score_cards_nation_deaths.rds score_cards_state_hospitalizations.rds score_cards_nation_hospitalizations.rds datetime_created_utc.rds
16+
pull_data: score_cards_state_deaths.rds score_cards_state_cases.rds score_cards_nation_cases.rds score_cards_nation_deaths.rds score_cards_state_hospitalizations.rds score_cards_nation_hospitalizations.rds datetime_created_utc.rds predictions_cards.rds
1417

1518
dist:
1619
mkdir $@

app/R/data.R

Lines changed: 56 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ library(aws.s3)
55
shinyOptions(cache = cachem::cache_mem(max_size = 1000 * 1024^2, evict = "lru"))
66
cache <- getShinyOption("cache")
77

8-
# Since covidcast data updates about once a day. Add date arg to
8+
# Since covidcast data updates about once a day, add date arg to
99
# covidcast_signal so caches aren't used after that.
1010
covidcast_signal_mem <- function(..., date = Sys.Date()) {
1111
return(covidcast_signal(...))
@@ -67,65 +67,74 @@ getCreationDate <- function(loadFile) {
6767
}
6868

6969

70-
getAllData <- function(loadFile) {
71-
dfStateCases <- loadFile("score_cards_state_cases.rds")
72-
dfStateDeaths <- loadFile("score_cards_state_deaths.rds")
73-
dfStateHospitalizations <- loadFile("score_cards_state_hospitalizations.rds")
74-
dfNationCases <- loadFile("score_cards_nation_cases.rds")
75-
dfNationDeaths <- loadFile("score_cards_nation_deaths.rds")
76-
dfNationHospitalizations <- loadFile("score_cards_nation_hospitalizations.rds")
70+
getAllData <- function(loadFile, targetVariable) {
71+
df <- switch(targetVariable,
72+
"Deaths" = bind_rows(
73+
loadFile("score_cards_state_deaths.rds"),
74+
loadFile("score_cards_nation_deaths.rds")
75+
),
76+
"Cases" = bind_rows(
77+
loadFile("score_cards_state_cases.rds"),
78+
loadFile("score_cards_nation_cases.rds")
79+
),
80+
"Hospitalizations" = bind_rows(
81+
loadFile("score_cards_state_hospitalizations.rds"),
82+
loadFile("score_cards_nation_hospitalizations.rds")
83+
)
84+
)
7785

78-
# Pick out expected columns only
79-
covCols <- paste0("cov_", COVERAGE_INTERVALS)
80-
expectedCols <- c(
86+
# The names of the `covCols` elements become the new names of those columns
87+
# when we use this vector in the `select` below.
88+
covCols <- setNames(paste0("cov_", COVERAGE_INTERVALS), COVERAGE_INTERVALS)
89+
keepCols <- c(
8190
"ahead", "geo_value", "forecaster", "forecast_date",
8291
"data_source", "signal", "target_end_date", "incidence_period",
8392
"actual", "wis", "sharpness", "ae", "value_50",
8493
covCols
8594
)
86-
87-
df <- bind_rows(
88-
dfStateCases %>% select(all_of(expectedCols)),
89-
dfStateDeaths %>% select(all_of(expectedCols)),
90-
dfStateHospitalizations %>% select(all_of(expectedCols)),
91-
dfNationCases %>% select(all_of(expectedCols)),
92-
dfNationDeaths %>% select(all_of(expectedCols)),
93-
dfNationHospitalizations %>% select(all_of(expectedCols))
94-
)
95-
df <- df %>% rename(
96-
"10" = cov_10, "20" = cov_20, "30" = cov_30,
97-
"40" = cov_40, "50" = cov_50, "60" = cov_60, "70" = cov_70,
98-
"80" = cov_80, "90" = cov_90, "95" = cov_95, "98" = cov_98
99-
)
95+
df <- select(df, all_of(keepCols))
10096

10197
return(df)
10298
}
10399

104100
createS3DataLoader <- function() {
101+
# Cached connection info
105102
s3bucket <- getS3Bucket()
106-
df <- data.frame()
103+
s3DataFetcher <- createS3DataFactory(s3bucket)
104+
s3Contents <- s3bucket[attr(s3bucket, "names", exact = TRUE)]
105+
106+
# Cached data
107+
df_list <- list()
107108
dataCreationDate <- as.Date(NA)
108109

109-
getRecentData <- function() {
110-
newS3bucket <- getS3Bucket()
110+
getRecentData <- function(targetVariable = TARGET_OPTIONS) {
111+
targetVariable <- match.arg(targetVariable)
111112

112-
s3Contents <- s3bucket[attr(s3bucket, "names", exact = TRUE)]
113+
newS3bucket <- getS3Bucket()
113114
newS3Contents <- newS3bucket[attr(newS3bucket, "names", exact = TRUE)]
115+
s3BucketHasChanged <- !identical(s3Contents, newS3Contents)
114116

115-
# Fetch new score data if contents of S3 bucket has changed (including file
117+
# Fetch new data if contents of S3 bucket has changed (including file
116118
# names, sizes, and last modified timestamps). Ignores characteristics of
117-
# bucket and request, including bucket region, name, content type, request
118-
# date, request ID, etc.
119-
if (nrow(df) == 0 || !identical(s3Contents, newS3Contents)) {
120-
# Save new data and new bucket connection info to vars in env of
121-
# `getRecentDataHelper`. They persist between calls to `getRecentData` a
122-
# la https://stackoverflow.com/questions/1088639/static-variables-in-r
119+
# bucket and request, including bucket region, name, content type,
120+
# request date, request ID, etc.
121+
#
122+
# Save new score data and new bucket connection info to vars in env of
123+
# `createS3DataLoader`. They persist between calls to `getRecentData` a
124+
# la https://stackoverflow.com/questions/1088639/static-variables-in-r
125+
if (s3BucketHasChanged) {
123126
s3bucket <<- newS3bucket
124-
df <<- getAllData(createS3DataFactory(s3bucket))
125-
dataCreationDate <<- getCreationDate(createS3DataFactory(s3bucket))
127+
s3DataFetcher <<- createS3DataFactory(newS3bucket)
128+
s3Contents <<- newS3Contents
129+
}
130+
if (s3BucketHasChanged ||
131+
!(targetVariable %chin% names(df_list)) ||
132+
nrow(df_list[[targetVariable]]) == 0) {
133+
df_list[[targetVariable]] <<- getAllData(s3DataFetcher, targetVariable)
134+
dataCreationDate <<- getCreationDate(s3DataFetcher)
126135
}
127136

128-
return(list(df = df, dataCreationDate = dataCreationDate))
137+
return(list(df_list = df_list, dataCreationDate = dataCreationDate))
129138
}
130139

131140
return(getRecentData)
@@ -134,12 +143,17 @@ createS3DataLoader <- function() {
134143

135144
#' create a data loader with fallback data only
136145
createFallbackDataLoader <- function() {
137-
df <- getAllData(getFallbackData)
146+
df_list <- list()
147+
for (targetVariable in TARGET_OPTIONS) {
148+
df_list[[targetVariable]] <- getAllData(getFallbackData, targetVariable)
149+
}
150+
dataCreationDate <- getCreationDate(getFallbackData)
138151

139152
dataLoader <- function() {
140-
df
153+
return(list(df_list = df_list, dataCreationDate = dataCreationDate))
141154
}
142-
dataLoader
155+
156+
return(dataLoader)
143157
}
144158

145159

app/R/data_manipulation.R

Lines changed: 19 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,18 @@ renameScoreCol <- function(filteredScoreDf, scoreType, coverageInterval) {
1414

1515
filterOverAllLocations <- function(filteredScoreDf, scoreType, hasAsOfData = FALSE, filterDate) {
1616
locationsIntersect <- list()
17-
filteredScoreDf <- filteredScoreDf %>% filter(!is.na(Score) | target_end_date >= filterDate)
17+
filteredScoreDf <- filter(filteredScoreDf, !is.na(Score) | target_end_date >= filterDate)
1818
# Create df with col for all locations across each unique date, ahead and forecaster combo
1919
locationDf <- filteredScoreDf %>%
2020
group_by(forecaster, target_end_date, ahead) %>%
2121
summarize(location_list = paste(sort(unique(geo_value)), collapse = ","))
22-
locationDf <- locationDf %>% filter(location_list != c("us"))
22+
locationDf <- filter(locationDf, location_list != c("us"))
2323
# Create a list containing each row's location list
2424
locationList <- sapply(locationDf$location_list, function(x) strsplit(x, ","))
2525
locationList <- lapply(locationList, function(x) x[x != "us"])
2626
# Get the intersection of all the locations in these lists
2727
locationsIntersect <- unique(Reduce(intersect, locationList))
28-
filteredScoreDf <- filteredScoreDf %>% filter(geo_value %in% locationsIntersect)
28+
filteredScoreDf <- filter(filteredScoreDf, geo_value %chin% locationsIntersect)
2929
if (scoreType == "coverage") {
3030
if (hasAsOfData) {
3131
filteredScoreDf <- filteredScoreDf %>%
@@ -56,40 +56,23 @@ filterOverAllLocations <- function(filteredScoreDf, scoreType, hasAsOfData = FAL
5656
# Only use weekly aheads for hospitalizations
5757
# May change in the future
5858
filterHospitalizationsAheads <- function(scoreDf) {
59-
scoreDf["weekday"] <- weekdays(as.Date(scoreDf$target_end_date))
60-
scoreDf <- scoreDf %>% filter(weekday == HOSPITALIZATIONS_TARGET_DAY)
59+
days_list <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")
60+
# Make sure to use `data.table`'s `wday`; `lubridate` has a function of the same name.
61+
scoreDf["weekday"] <- days_list[data.table::wday(as.Date(scoreDf$target_end_date, "%Y-%m-%d"))]
62+
scoreDf <- filter(scoreDf, weekday == HOSPITALIZATIONS_TARGET_DAY)
63+
scoreDf$ahead_group <- case_when(
64+
scoreDf$ahead >= HOSPITALIZATIONS_OFFSET & scoreDf$ahead < 7 + HOSPITALIZATIONS_OFFSET ~ 1L,
65+
scoreDf$ahead >= 7 + HOSPITALIZATIONS_OFFSET & scoreDf$ahead < 14 + HOSPITALIZATIONS_OFFSET ~ 2L,
66+
scoreDf$ahead >= 14 + HOSPITALIZATIONS_OFFSET & scoreDf$ahead < 21 + HOSPITALIZATIONS_OFFSET ~ 3L,
67+
scoreDf$ahead >= 21 + HOSPITALIZATIONS_OFFSET & scoreDf$ahead < 28 + HOSPITALIZATIONS_OFFSET ~ 4L,
68+
TRUE ~ NA_integer_
69+
)
6170

62-
oneAheadDf <- scoreDf %>%
63-
filter(ahead >= HOSPITALIZATIONS_OFFSET) %>%
64-
filter(ahead < 7 + HOSPITALIZATIONS_OFFSET) %>%
65-
group_by(target_end_date, forecaster) %>%
66-
filter(ahead == min(ahead)) %>%
67-
mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[1])
68-
69-
return(bind_rows(
70-
scoreDf %>%
71-
filter(ahead >= HOSPITALIZATIONS_OFFSET) %>%
72-
filter(ahead < 7 + HOSPITALIZATIONS_OFFSET) %>%
73-
group_by(target_end_date, forecaster) %>%
74-
filter(ahead == min(ahead)) %>%
75-
mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[1]),
76-
scoreDf %>%
77-
filter(ahead >= 7 + HOSPITALIZATIONS_OFFSET) %>%
78-
filter(ahead < 14 + HOSPITALIZATIONS_OFFSET) %>%
79-
group_by(target_end_date, forecaster) %>%
80-
filter(ahead == min(ahead)) %>%
81-
mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[2]),
82-
scoreDf %>%
83-
filter(ahead >= 14 + HOSPITALIZATIONS_OFFSET) %>%
84-
filter(ahead < 21 + HOSPITALIZATIONS_OFFSET) %>%
85-
group_by(target_end_date, forecaster) %>%
86-
filter(ahead == min(ahead)) %>%
87-
mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[3]),
71+
return(
8872
scoreDf %>%
89-
filter(ahead >= 21 + HOSPITALIZATIONS_OFFSET) %>%
90-
filter(ahead < 28 + HOSPITALIZATIONS_OFFSET) %>%
91-
group_by(target_end_date, forecaster) %>%
73+
filter(!is.na(ahead_group)) %>%
74+
group_by(target_end_date, forecaster, ahead_group) %>%
9275
filter(ahead == min(ahead)) %>%
93-
mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[4])
94-
))
76+
mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[ahead_group])
77+
)
9578
}

app/R/exportScores.R

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,24 +6,20 @@ exportScoresUI <- function(id = "exportScores") {
66
}
77

88
createExportScoresDataFrame <- function(scoreDf, targetVariable, scoreType, forecasters, loc, coverageInterval) {
9-
signalFilter <- CASE_FILTER
10-
if (targetVariable == "Deaths") {
11-
signalFilter <- DEATH_FILTER
12-
} else if (targetVariable == "Hospitalizations") {
13-
signalFilter <- HOSPITALIZATIONS_FILTER
14-
}
9+
scoreDf <- filter(
10+
scoreDf[[targetVariable]],
11+
forecaster %chin% forecasters
12+
)
1513
scoreDf <- renameScoreCol(scoreDf, scoreType, coverageInterval)
16-
scoreDf <- scoreDf %>%
17-
filter(signal == signalFilter) %>%
18-
filter(forecaster %in% forecasters)
14+
1915
if (loc == TOTAL_LOCATIONS || scoreType == "coverage") {
20-
if (signalFilter == HOSPITALIZATIONS_FILTER) {
16+
if (targetVariable == "Hospitalizations") {
2117
scoreDf <- filterHospitalizationsAheads(scoreDf)
2218
}
2319
scoreDf <- filterOverAllLocations(scoreDf, scoreType)
2420
return(scoreDf[[1]])
2521
} else {
26-
scoreDf <- scoreDf %>% filter(geo_value == tolower(loc))
22+
scoreDf <- filter(scoreDf, geo_value == tolower(loc))
2723
scoreDf <- scoreDf[c(
2824
"ahead", "geo_value", "forecaster", "forecast_date",
2925
"data_source", "target_end_date", "Score", "actual"

app/global.R

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,26 @@ library(shiny)
22
library(shinyjs)
33
library(plotly)
44
library(tidyr)
5+
library(purrr)
56
library(dplyr, warn.conflicts = FALSE)
67
library(lubridate)
78
library(viridis)
89
library(tsibble)
910
library(covidcast)
11+
library(data.table)
1012

11-
appVersion <- "6.1.0"
13+
appVersion <- "7.0.0"
1214

1315
COVERAGE_INTERVALS <- c("10", "20", "30", "40", "50", "60", "70", "80", "90", "95", "98")
14-
DEATH_FILTER <- "deaths_incidence_num"
15-
CASE_FILTER <- "confirmed_incidence_num"
1616
CASES_DEATHS_TARGET_DAY <- "Saturday"
17-
HOSPITALIZATIONS_FILTER <- "confirmed_admissions_covid_1d"
1817
HOSPITALIZATIONS_TARGET_DAY <- "Wednesday"
1918
TOTAL_LOCATIONS <- "Totaled Over States*"
2019
AHEAD_OPTIONS <- c(1, 2, 3, 4)
2120

21+
INIT_SCORE_TYPE <- "wis"
22+
INIT_TARGET <- "Hospitalizations"
23+
TARGET_OPTIONS <- c("Deaths", "Cases", "Hospitalizations")
24+
2225
# Num days to offset the forecast week by
2326
# Example: if HOSPITALIZATIONS_TARGET_DAY is Wednesday and HOSPITALIZATIONS_OFFSET is 2,
2427
# ahead 1 has to have forecast date of Monday or earlier,
@@ -29,8 +32,18 @@ HOSPITALIZATIONS_AHEAD_OPTIONS <- c(
2932
HOSPITALIZATIONS_OFFSET + 14, HOSPITALIZATIONS_OFFSET + 21
3033
)
3134

32-
# Sets the previous target to be the same as the first one, Deaths
33-
PREV_TARGET <- "Deaths"
35+
CURRENT_TAB_SUFFIX <- ""
36+
ARCHIVE_TAB_SUFFIX <- "_archive"
37+
38+
39+
TARGET_VARS_BY_TAB <- list()
40+
TARGET_VARS_BY_TAB[[paste0("evaluations", CURRENT_TAB_SUFFIX)]] <- c(
41+
"Hospital Admissions" = "Hospitalizations"
42+
)
43+
TARGET_VARS_BY_TAB[[paste0("evaluations", ARCHIVE_TAB_SUFFIX)]] <- c(
44+
"Incident Deaths" = "Deaths",
45+
"Incident Cases" = "Cases"
46+
)
3447

3548
# When RE_RENDER_TRUTH = TRUE
3649
# summaryPlot will be called only to update TruthPlot

0 commit comments

Comments
 (0)