Skip to content

Commit e1cbd56

Browse files
committed
Merge branch 'main' of https://github.com/dajmcdon/epiprocess into km/remove-gginnards
2 parents 74761ac + 0b16591 commit e1cbd56

25 files changed

+495
-0
lines changed

.Rbuildignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@
55
^docs$
66
^_pkgdown.yml
77
^index\.md$
8+
^data-raw$

DESCRIPTION

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,6 @@ Encoding: UTF-8
5252
LazyData: true
5353
Roxygen: list(markdown = TRUE)
5454
RoxygenNote: 7.1.2
55+
Depends:
56+
R (>= 2.10)
57+

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ importFrom(dplyr,group_modify)
5959
importFrom(dplyr,mutate)
6060
importFrom(dplyr,relocate)
6161
importFrom(dplyr,rename)
62+
importFrom(dplyr,select)
6263
importFrom(dplyr,slice)
6364
importFrom(dplyr,ungroup)
6465
importFrom(lubridate,days)

R/archive.R

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,23 @@ epi_archive =
448448
#' ```
449449
#'
450450
#' @export
451+
#' @examples
452+
#' df <- data.frame (geo_value = c(replicate(2, "ca"), replicate(2, "fl")),
453+
#' county = c(1, 3, 2, 5),
454+
#' time_value = c("2020-06-01",
455+
#' "2020-06-02",
456+
#' "2020-06-01",
457+
#' "2020-06-02"),
458+
#' version = c("2020-06-02",
459+
#' "2020-06-03",
460+
#' "2020-06-02",
461+
#' "2020-06-03"),
462+
#' cases = c(1, 2, 3, 4),
463+
#' cases_rate = c(0.01, 0.02, 0.01, 0.05))
464+
#'
465+
#' x <- df %>% as_epi_archive(geo_type = "state",
466+
#' time_type = "day",
467+
#' other_keys = "county")
451468
as_epi_archive = function(x, geo_type, time_type, other_keys,
452469
additional_metadata = list()) {
453470
epi_archive$new(x, geo_type, time_type, other_keys, additional_metadata)
@@ -459,6 +476,9 @@ as_epi_archive = function(x, geo_type, time_type, other_keys,
459476
#' @return `TRUE` if the object inherits from `epi_archive`.
460477
#'
461478
#' @export
479+
#' @examples
480+
#' is_epi_archive(jhu_csse_daily) # FALSE
481+
#' is_epi_archive(archive_cases_dv) # TRUE
462482
is_epi_archive = function(x) {
463483
inherits(x, "epi_archive")
464484
}

R/correlation.R

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,33 @@
4040
#' @importFrom rlang .data !! !!! enquo syms
4141
#' @importFrom tidyselect eval_select
4242
#' @export
43+
#' @examples
44+
#'
45+
#' # linear association of case and death rates on any given day
46+
#' epi_cor(x = jhu_csse_daily,
47+
#' var1 = case_rate_7d_av,
48+
#' var2 = death_rate_7d_av,
49+
#' cor_by = "time_value")
50+
#'
51+
#' # correlation of death rates and lagged case rates
52+
#' epi_cor(x = jhu_csse_daily,
53+
#' var1 = case_rate_7d_av,
54+
#' var2 = death_rate_7d_av,
55+
#' cor_by = time_value,
56+
#' dt1 = -10)
57+
#'
58+
#' # correlation grouped by location
59+
#' epi_cor(x = jhu_csse_daily,
60+
#' var1 = case_rate_7d_av,
61+
#' var2 = death_rate_7d_av,
62+
#' cor_by = geo_value)
63+
#'
64+
#' # correlation grouped by location and incorporates lagged cases rates
65+
#' epi_cor(x = jhu_csse_daily,
66+
#' var1 = case_rate_7d_av,
67+
#' var2 = death_rate_7d_av,
68+
#' cor_by = geo_value,
69+
#' dt1 = -10)
4370
epi_cor = function(x, var1, var2, dt1 = 0, dt2 = 0, shift_by = geo_value,
4471
cor_by = geo_value, use = "na.or.complete",
4572
method = c("pearson", "kendall", "spearman")) {

R/data.R

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#' JHU daily cases and deaths from California and Florida
2+
#'
3+
#' This data source of confirmed COVID-19 cases and deaths
4+
#' is based on reports made available by the Center for
5+
#' Systems Science and Engineering at Johns Hopkins University.
6+
#' This example data ranges from June 1 to June 15, 2020.
7+
#'
8+
#' @format A tibble with 30 rows and 6 variables:
9+
#' \describe{
10+
#' \item{geo_value}{the geographic value associated with each row of measurements.}
11+
#' \item{time_value}{the time value associated with each row of measurements.}
12+
#' \item{case_rate_7d_av}{7-day average signal of number of new confirmed COVID-19 cases per 100,000 population, daily}
13+
#' \item{death_rate_7d_av}{7-day average signal of number of new confirmed deaths due to COVID-19 per 100,000 population, daily}
14+
#' \item{cases}{Number of new confirmed COVID-19 cases, daily}
15+
#' \item{cases_7d_av}{7-day average signal of number of new confirmed COVID-19 cases, daily}
16+
#' }
17+
#' @source COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University.
18+
#' \url{https://github.com/CSSEGISandData/COVID-19}
19+
"jhu_csse_daily"
20+
21+
22+
#' Daily doctor visits and cases from California and Florida in archive format
23+
#'
24+
#' This data source is based on information about outpatient visits,
25+
#' provided to us by health system partners, and also contains confirmed
26+
#' COVID-19 cases based on reports made available by the Center for
27+
#' Systems Science and Engineering at Johns Hopkins University.
28+
#' This example data ranges from June 1 to June 15, 2020.
29+
#'
30+
#' @format An `epi_archive` data format. The data table DT has 160 rows and 5 columns:
31+
#' \describe{
32+
#' \item{geo_value}{the geographic value associated with each row of measurements.}
33+
#' \item{time_value}{the time value associated with each row of measurements.}
34+
#' \item{version}{ the time value specifying the version for each row of measurements. }
35+
#' \item{percent_cli}{percentage of doctor’s visits with CLI (COVID-like illness) computed from medical insurance claims}
36+
#' \item{case_rate}{7-day average signal of number of new confirmed deaths due to COVID-19 per 100,000 population, daily}
37+
#' }
38+
#' @source These data sources are provided under the terms of the
39+
#' \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution license:}
40+
#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html}{Doctor Visits}
41+
#'
42+
#' COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University.
43+
#' \url{https://github.com/CSSEGISandData/COVID-19}
44+
"archive_cases_dv"
45+

R/methods-epi_archive.R

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@
2727
#' ```
2828
#'
2929
#' @export
30+
#' @examples
31+
#' epix_as_of(x = archive_cases_dv,
32+
#' max_version = max(archive_cases_dv$DT$version))
3033
epix_as_of = function(x, max_version, min_time_value = -Inf) {
3134
if (!inherits(x, "epi_archive")) Abort("`x` must be of class `epi_archive`.")
3235
return(x$as_of(max_version, min_time_value))
@@ -67,6 +70,17 @@ epix_as_of = function(x, max_version, min_time_value = -Inf) {
6770
#' ```
6871
#'
6972
#' @export
73+
#' @examples
74+
#' # create two example epi_archive datasets
75+
#' x <- archive_cases_dv$DT %>%
76+
#' dplyr::select(geo_value,time_value,version,case_rate) %>%
77+
#' as_epi_archive()
78+
#' y <- archive_cases_dv$DT %>%
79+
#' dplyr::select(geo_value,time_value,version,percent_cli) %>%
80+
#' as_epi_archive()
81+
#'
82+
#' # a full join stored in x
83+
#' epix_merge(x, y, all = TRUE)
7084
epix_merge = function(x, y, ..., locf = TRUE, nan = NA) {
7185
if (!inherits(x, "epi_archive")) Abort("`x` must be of class `epi_archive`.")
7286
return(x$merge(y, ..., locf = locf, nan = nan))
@@ -167,6 +181,17 @@ epix_merge = function(x, y, ..., locf = TRUE, nan = NA) {
167181
#'
168182
#' @importFrom rlang enquo
169183
#' @export
184+
#' @examples
185+
#' # every date is a reference time point for the 3 day average sliding window
186+
#' fc_time_values <- seq(as.Date("2020-06-01"),
187+
#' as.Date("2020-06-15"),
188+
#' by = "1 day")
189+
#' epix_slide(x = archive_cases_dv,
190+
#' f = ~ mean(.x$case_rate),
191+
#' n = 3,
192+
#' group_by = geo_value,
193+
#' ref_time_values = fc_time_values,
194+
#' new_col_name = 'case_rate_3d_av')
170195
epix_slide = function(x, f, ..., n = 7, group_by, ref_time_values,
171196
time_step, new_col_name = "slide_value",
172197
as_list_col = FALSE, names_sep = "_", all_rows = FALSE) {

R/outliers.R

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,33 @@
4444
#' STL decomposition.
4545
#'
4646
#' @export
47+
#' @importFrom dplyr select
48+
#' @examples
49+
#' detection_methods = dplyr::bind_rows(
50+
#' dplyr::tibble(method = "rm",
51+
#' args = list(list(detect_negatives = TRUE,
52+
#' detection_multiplier = 2.5)),
53+
#' abbr = "rm"),
54+
#' dplyr::tibble(method = "stl",
55+
#' args = list(list(detect_negatives = TRUE,
56+
#' detection_multiplier = 2.5,
57+
#' seasonal_period = 7)),
58+
#' abbr = "stl_seasonal"),
59+
#' dplyr::tibble(method = "stl",
60+
#' args = list(list(detect_negatives = TRUE,
61+
#' detection_multiplier = 2.5,
62+
#' seasonal_period = NULL)),
63+
#' abbr = "stl_nonseasonal"))
64+
#'
65+
#' x <- jhu_csse_daily %>%
66+
#' dplyr::select(geo_value,time_value,cases) %>%
67+
#' as_epi_df()%>%
68+
#' group_by(geo_value) %>%
69+
#' mutate(outlier_info = detect_outlr(
70+
#' x = time_value, y = cases,
71+
#' methods = detection_methods,
72+
#' combiner = "median")) %>%
73+
#' unnest(outlier_info)
4774
detect_outlr = function(x = seq_along(y), y,
4875
methods = tibble::tibble(method = "rm",
4976
args = list(list()),
@@ -120,6 +147,15 @@ detect_outlr = function(x = seq_along(y), y,
120147
#' `lower`, `upper`, and `replacement`.
121148
#'
122149
#' @export
150+
#' @examples
151+
# # Detect outliers based on a rolling median
152+
#' jhu_csse_daily %>%
153+
#' dplyr::select(geo_value,time_value,cases) %>%
154+
#' as_epi_df()%>%
155+
#' group_by(geo_value) %>%
156+
#' mutate(outlier_info = detect_outlr_rm(
157+
#' x = time_value, y = cases)) %>%
158+
#' unnest(outlier_info)
123159
detect_outlr_rm = function(x = seq_along(y), y, n = 21,
124160
log_transform = FALSE,
125161
detect_negatives = FALSE,
@@ -208,6 +244,16 @@ detect_outlr_rm = function(x = seq_along(y), y, n = 21,
208244
#' @importFrom stats median
209245
#' @importFrom tidyselect starts_with
210246
#' @export
247+
#' @examples
248+
# # Detects outliers based on a seasonal-trend decomposition using LOESS
249+
#' jhu_csse_daily %>%
250+
#' dplyr::select(geo_value,time_value,cases) %>%
251+
#' as_epi_df()%>%
252+
#' group_by(geo_value) %>%
253+
#' mutate(outlier_info = detect_outlr_stl(
254+
#' x = time_value, y = cases,
255+
#' seasonal_period = 7 )) %>% # weekly seasonality for daily data
256+
#' unnest(outlier_info)
211257
detect_outlr_stl = function(x = seq_along(y), y,
212258
n_trend = 21,
213259
n_seasonal = 21,

R/slide.R

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,25 @@
8888
#' @importFrom lubridate days weeks
8989
#' @importFrom rlang .data .env !! enquo enquos sym
9090
#' @export
91+
#' @examples
92+
#' # slide a 7-day trailing average formula on cases
93+
#' jhu_csse_daily %>%
94+
#' group_by(geo_value) %>%
95+
#' epi_slide(cases_7dav = mean(cases), n = 7,
96+
#' align = "right")
97+
#'
98+
#' # slide a left-aligned 7-day trailing average
99+
#' jhu_csse_daily %>%
100+
#' group_by(geo_value) %>%
101+
#' epi_slide(cases_7dav = mean(cases), n = 7,
102+
#' align = "left")
103+
#'
104+
#' # nested new columns
105+
#' jhu_csse_daily %>%
106+
#' group_by(geo_value) %>%
107+
#' epi_slide(a = data.frame(cases_2dav = mean(cases),
108+
#' cases_2dma = mad(cases)),
109+
#' n = 2, as_list_col = TRUE)
91110
epi_slide = function(x, f, ..., n = 7, ref_time_values,
92111
align = c("right", "center", "left"), before, time_step,
93112
new_col_name = "slide_value", as_list_col = FALSE,

data-raw/archive_cases_dv.R

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
library(delphi.epidata)
2+
library(epiprocess)
3+
library(data.table)
4+
library(dplyr)
5+
6+
archive_cases_dv <- covidcast(
7+
data_source = "doctor-visits",
8+
signals = "smoothed_adj_cli",
9+
time_type = "day",
10+
geo_type = "state",
11+
time_value = epirange(20200601, 20200615),
12+
geo_values = "ca,fl",
13+
issues = epirange(20200601, 20200615)
14+
) %>%
15+
fetch_tbl() %>%
16+
select(geo_value, time_value, version = issue, percent_cli = value) %>%
17+
as_epi_archive()
18+
19+
case_rate <- covidcast(
20+
data_source = "jhu-csse",
21+
signals = "confirmed_7dav_incidence_prop",
22+
time_type = "day",
23+
geo_type = "state",
24+
time_value = epirange(20200601, 20200615),
25+
geo_values = "ca,fl",
26+
issues = epirange(20200601, 20200615)
27+
) %>%
28+
fetch_tbl() %>%
29+
select(geo_value, time_value, version = issue, case_rate = value) %>%
30+
as_epi_archive()
31+
32+
epix_merge(archive_cases_dv, case_rate, all = TRUE)
33+
34+
usethis::use_data(archive_cases_dv, overwrite = TRUE)

data-raw/jhu_csse_daily.R

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
library(delphi.epidata)
2+
library(epiprocess)
3+
library(dplyr)
4+
5+
confirmed_7dav_incidence_prop <- covidcast(
6+
data_source = "jhu-csse",
7+
signals = "confirmed_7dav_incidence_prop",
8+
time_type = "day",
9+
geo_type = "state",
10+
time_values = epirange(20200601, 20200615),
11+
geo_values = "ca,fl"
12+
) %>%
13+
fetch_tbl() %>%
14+
select(geo_value, time_value, case_rate_7d_av = value) %>%
15+
arrange(geo_value, time_value)
16+
17+
deaths_7dav_incidence_prop <- covidcast(
18+
data_source = "jhu-csse",
19+
signals = "deaths_7dav_incidence_prop",
20+
time_type = "day",
21+
geo_type = "state",
22+
time_values = epirange(20200601, 20200615),
23+
geo_values = "ca,fl"
24+
) %>%
25+
fetch_tbl() %>%
26+
select(geo_value, time_value, death_rate_7d_av = value) %>%
27+
arrange(geo_value, time_value)
28+
29+
confirmed_incidence_num <- covidcast(
30+
data_source = "jhu-csse",
31+
signals = "confirmed_incidence_num",
32+
time_type = "day",
33+
geo_type = "state",
34+
time_values = epirange(20200601, 20200615),
35+
geo_values = "ca,fl"
36+
) %>%
37+
fetch_tbl() %>%
38+
select(geo_value, time_value, cases = value) %>%
39+
arrange(geo_value, time_value)
40+
41+
confirmed_7dav_incidence_num <- covidcast(
42+
data_source = "jhu-csse",
43+
signals = "confirmed_7dav_incidence_num",
44+
time_type = "day",
45+
geo_type = "state",
46+
time_values = epirange(20200601, 20200615),
47+
geo_values = "ca,fl"
48+
) %>%
49+
fetch_tbl() %>%
50+
select(geo_value, time_value, cases_7d_av = value) %>%
51+
arrange(geo_value, time_value)
52+
53+
jhu_csse_daily <- confirmed_7dav_incidence_prop %>%
54+
full_join(deaths_7dav_incidence_prop,
55+
by = c("geo_value", "time_value")) %>%
56+
full_join(confirmed_incidence_num,
57+
by = c("geo_value", "time_value")) %>%
58+
full_join(confirmed_7dav_incidence_num,
59+
by = c("geo_value", "time_value")) %>%
60+
as_epi_df()
61+
62+
usethis::use_data(jhu_csse_daily, overwrite = TRUE)

data/archive_cases_dv.rda

39.5 KB
Binary file not shown.

data/jhu_csse_daily.rda

1.33 KB
Binary file not shown.

0 commit comments

Comments
 (0)