1
1
library(dplyr )
2
- library(evalcast )
3
2
library(ggplot2 )
4
3
library(magrittr )
5
4
library(tidyr )
6
5
7
6
8
- get_truth_data <- function (exclude_geos , ... ) {
9
- download_args <- list (... )
10
- truth_data <- do.call(download_signal , download_args )
11
- truth_data %<> % filter(! (.data $ geo_value %in% exclude_geos ))
12
- truth_data <- truth_data %> %
13
- dplyr :: select(.data $ geo_value , .data $ time_value , .data $ value ) %> %
14
- dplyr :: rename(target_end_date = .data $ time_value )
15
- return (truth_data %> % tibble())
16
- }
17
-
18
7
get_quantiles_df <- function (predictions_cards , intervals = c(.5 , .9 ), ... ) {
19
8
predictions_cards <- predictions_cards %> %
20
9
dplyr :: select(
21
- .data $ geo_value , .data $ quantile ,
22
- .data $ value , .data $ forecaster , .data $ forecast_date ,
23
- .data $ target_end_date
10
+ geo_value ,
11
+ quantile ,
12
+ value ,
13
+ forecaster ,
14
+ forecast_date ,
15
+ target_end_date
24
16
)
25
17
26
18
lower_bounds <- predictions_cards %> %
@@ -103,15 +95,36 @@ plot_points <- function(g, points_df) {
103
95
return (g )
104
96
}
105
97
106
- plot_state_forecasters <- function (predictions_cards , exclude_geos = c(), start_day = NULL , ncol = 5 , offline_signal_dir = NULL ) {
98
+ plot_state_forecasters <- function (predictions_cards , exclude_geos = c(), start_day = NULL , ncol = 5 ) {
107
99
if (nrow(predictions_cards ) == 0 ) {
108
100
return (NULL )
109
101
}
110
102
111
- td1 <- get_truth_data(exclude_geos = exclude_geos , data_source = " hhs" , signal = " confirmed_admissions_covid_1d" , start_day = start_day , geo_type = " state" , offline_signal_dir = offline_signal_dir )
112
- td1 $ data_source <- " hhs"
113
- td2 <- get_truth_data(exclude_geos = exclude_geos , data_source = " jhu-csse" , signal = " confirmed_7dav_incidence_num" , start_day = start_day , geo_type = " state" , offline_signal_dir = offline_signal_dir )
114
- td2 $ data_source <- " jhu"
103
+ td1 <- epidatr :: pub_covidcast(
104
+ source = " hhs" ,
105
+ signals = " confirmed_admissions_covid_1d" ,
106
+ geo_type = " state" ,
107
+ time_type = " day" ,
108
+ geo_values = " *" ,
109
+ time_values = epidatr :: epirange(start_day , Sys.Date())
110
+ ) %> %
111
+ filter(! (.data $ geo_value %in% exclude_geos )) %> %
112
+ dplyr :: select(.data $ geo_value , .data $ time_value , .data $ value ) %> %
113
+ dplyr :: rename(target_end_date = .data $ time_value ) %> %
114
+ mutate(data_source = " hhs" )
115
+ td2 <- epidatr :: pub_covidcast(
116
+ source = " jhu-csse" ,
117
+ signals = " confirmed_7dav_incidence_num" ,
118
+ geo_type = " state" ,
119
+ time_type = " day" ,
120
+ geo_values = " *" ,
121
+ time_values = epidatr :: epirange(start_day , Sys.Date())
122
+ ) %> %
123
+ filter(! (.data $ geo_value %in% exclude_geos )) %> %
124
+ dplyr :: select(.data $ geo_value , .data $ time_value , .data $ value ) %> %
125
+ dplyr :: rename(target_end_date = .data $ time_value ) %> %
126
+ mutate(data_source = " jhu" )
127
+
115
128
td1.max <- td1 %> %
116
129
group_by(geo_value ) %> %
117
130
summarize(max_value = max(value ))
@@ -129,14 +142,8 @@ plot_state_forecasters <- function(predictions_cards, exclude_geos = c(), start_
129
142
130
143
# Setup plot
131
144
g <- ggplot(td1 , mapping = aes(x = .data $ target_end_date , color = .data $ forecaster , fill = .data $ forecaster ))
132
-
133
- points_df <- get_points_df(predictions_cards )
134
- g <- plot_points(g , points_df )
135
-
136
- quantiles_df <- get_quantiles_df(predictions_cards )
137
- g <- plot_quantiles(g , quantiles_df )
138
-
139
- # Plot truth data by geo
145
+ g <- plot_points(g , get_points_df(predictions_cards ))
146
+ g <- plot_quantiles(g , get_quantiles_df(predictions_cards ))
140
147
g <- g +
141
148
geom_line(mapping = aes(y = .data $ value )) +
142
149
geom_line(data = td2 , mapping = aes(x = .data $ target_end_date , y = .data $ scaled_value )) +
@@ -146,34 +153,47 @@ plot_state_forecasters <- function(predictions_cards, exclude_geos = c(), start_
146
153
return (g )
147
154
}
148
155
149
- plot_nation_forecasters <- function (predictions_cards , exclude_geos = c(), start_day = NULL , ncol = 5 , offline_signal_dir = NULL ) {
156
+ plot_nation_forecasters <- function (predictions_cards , exclude_geos = c(), start_day = NULL , ncol = 5 ) {
150
157
if (nrow(predictions_cards ) == 0 ) {
151
158
return (NULL )
152
159
}
153
160
154
- td1 <- get_truth_data(exclude_geos = exclude_geos , data_source = " hhs" , signal = " confirmed_admissions_covid_1d" , start_day = start_day , geo_type = " nation" , offline_signal_dir = offline_signal_dir )
155
- td1 $ data_source <- " hhs"
156
- td2 <- get_truth_data(exclude_geos = exclude_geos , data_source = " jhu-csse" , signal = " confirmed_7dav_incidence_num" , start_day = start_day , geo_type = " nation" , offline_signal_dir = offline_signal_dir )
157
- td2 $ data_source <- " jhu"
161
+ td1 <- epidatr :: pub_covidcast(
162
+ source = " hhs" ,
163
+ signals = " confirmed_admissions_covid_1d" ,
164
+ geo_type = " nation" ,
165
+ time_type = " day" ,
166
+ geo_values = " *" ,
167
+ time_values = epidatr :: epirange(start_day , Sys.Date())
168
+ ) %> %
169
+ filter(! (.data $ geo_value %in% exclude_geos )) %> %
170
+ dplyr :: select(.data $ time_value , .data $ value ) %> %
171
+ dplyr :: rename(target_end_date = .data $ time_value ) %> %
172
+ mutate(data_source = " hhs" )
173
+ td2 <- epidatr :: pub_covidcast(
174
+ source = " jhu-csse" ,
175
+ signals = " confirmed_7dav_incidence_num" ,
176
+ geo_type = " nation" ,
177
+ time_type = " day" ,
178
+ geo_values = " *" ,
179
+ time_values = epidatr :: epirange(start_day , Sys.Date())
180
+ ) %> %
181
+ filter(! (.data $ geo_value %in% exclude_geos )) %> %
182
+ dplyr :: select(.data $ time_value , .data $ value ) %> %
183
+ dplyr :: rename(target_end_date = .data $ time_value ) %> %
184
+ mutate(data_source = " jhu" )
158
185
td1.max <- td1 %> %
159
186
summarize(max_value = max(value )) %> %
160
187
pull(max_value )
161
188
td2.max <- td2 %> %
162
189
summarize(max_value = max(value )) %> %
163
190
pull(max_value )
164
- td2 <- td2 %> %
165
- mutate(scaled_value = value * td1.max / td2.max )
191
+ td2 <- td2 %> % mutate(scaled_value = value * td1.max / td2.max )
166
192
167
193
# Setup plot
168
194
g <- ggplot(td1 , mapping = aes(x = .data $ target_end_date ))
169
-
170
- quantiles_df <- get_quantiles_df(predictions_cards )
171
- g <- plot_quantiles(g , quantiles_df )
172
-
173
- points_df <- get_points_df(predictions_cards )
174
- g <- plot_points(g , points_df )
175
-
176
- # Plot truth data by geo
195
+ g <- plot_quantiles(g , get_quantiles_df(predictions_cards ))
196
+ g <- plot_points(g , get_points_df(predictions_cards ))
177
197
g <- g +
178
198
geom_line(mapping = aes(y = .data $ value , color = " confirmed admissions" )) +
179
199
geom_line(data = td2 , mapping = aes(x = .data $ target_end_date , y = .data $ scaled_value , color = " 7day case sum" )) +
0 commit comments