Skip to content

Commit b62552e

Browse files
authored
Merge pull request #10 from dajmcdon/cy-add-roxygen-example
Change vignette dataset
2 parents 3c83c72 + 6c8f252 commit b62552e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+601
-262
lines changed

R/archive.R

+2-2
Original file line numberDiff line numberDiff line change
@@ -477,8 +477,8 @@ as_epi_archive = function(x, geo_type, time_type, other_keys,
477477
#'
478478
#' @export
479479
#' @examples
480-
#' is_epi_archive(jhu_csse_daily) # FALSE (this is an epi_df, not epi_archive)
481-
#' is_epi_archive(archive_cases_dv) # TRUE
480+
#' is_epi_archive(jhu_csse_daily_subset) # FALSE (this is an epi_df, not epi_archive)
481+
#' is_epi_archive(archive_cases_dv_subset) # TRUE
482482
is_epi_archive = function(x) {
483483
inherits(x, "epi_archive")
484484
}

R/correlation.R

+6-6
Original file line numberDiff line numberDiff line change
@@ -43,30 +43,30 @@
4343
#' @examples
4444
#'
4545
#' # linear association of case and death rates on any given day
46-
#' epi_cor(x = jhu_csse_daily,
46+
#' epi_cor(x = jhu_csse_daily_subset,
4747
#' var1 = case_rate_7d_av,
4848
#' var2 = death_rate_7d_av,
4949
#' cor_by = "time_value")
5050
#'
5151
#' # correlation of death rates and lagged case rates
52-
#' epi_cor(x = jhu_csse_daily,
52+
#' epi_cor(x = jhu_csse_daily_subset,
5353
#' var1 = case_rate_7d_av,
5454
#' var2 = death_rate_7d_av,
5555
#' cor_by = time_value,
56-
#' dt1 = -10)
56+
#' dt1 = -2)
5757
#'
5858
#' # correlation grouped by location
59-
#' epi_cor(x = jhu_csse_daily,
59+
#' epi_cor(x = jhu_csse_daily_subset,
6060
#' var1 = case_rate_7d_av,
6161
#' var2 = death_rate_7d_av,
6262
#' cor_by = geo_value)
6363
#'
6464
#' # correlation grouped by location and incorporates lagged cases rates
65-
#' epi_cor(x = jhu_csse_daily,
65+
#' epi_cor(x = jhu_csse_daily_subset,
6666
#' var1 = case_rate_7d_av,
6767
#' var2 = death_rate_7d_av,
6868
#' cor_by = geo_value,
69-
#' dt1 = -10)
69+
#' dt1 = -2)
7070
epi_cor = function(x, var1, var2, dt1 = 0, dt2 = 0, shift_by = geo_value,
7171
cor_by = geo_value, use = "na.or.complete",
7272
method = c("pearson", "kendall", "spearman")) {

R/data.R

+77-17
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
#' JHU daily cases and deaths from California and Florida
1+
#' Subset of JHU daily cases and deaths from California, Florida, Texas, New York, Georgia, and Pennsylvania
22
#'
33
#' This data source of confirmed COVID-19 cases and deaths
44
#' is based on reports made available by the Center for
55
#' Systems Science and Engineering at Johns Hopkins University.
6-
#' This example data ranges from June 1 to June 15, 2020.
6+
#' This example data ranges from Mar 1, 2020 to Dec 31, 2021, and is limited to California, Florida, Texas, New York, Georgia, and Pennsylvania.
77
#'
8-
#' @format A tibble with 30 rows and 6 variables:
8+
#' @format A tibble with 4026 rows and 6 variables:
99
#' \describe{
1010
#' \item{geo_value}{the geographic value associated with each row of measurements.}
1111
#' \item{time_value}{the time value associated with each row of measurements.}
@@ -14,32 +14,92 @@
1414
#' \item{cases}{Number of new confirmed COVID-19 cases, daily}
1515
#' \item{cases_7d_av}{7-day average signal of number of new confirmed COVID-19 cases, daily}
1616
#' }
17-
#' @source COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University.
18-
#' \url{https://github.com/CSSEGISandData/COVID-19}
19-
"jhu_csse_daily"
17+
#' @source This object contains a modified part of the \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. This data set is licensed under the terms of the
18+
#' \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
19+
#' by the Johns Hopkins University on behalf of its Center for Systems Science in Engineering.
20+
#' Copyright Johns Hopkins University 2020.
21+
#'
22+
#' Modifications:
23+
#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}: These signals are taken directly from the JHU CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository} without changes. The 7-day average signals are computed by Delphi by calculating moving averages of the preceding 7 days, so the signal for June 7 is the average of the underlying data for June 1 through 7, inclusive.
24+
#' * Furthermore, the data has been limited to a very small number of rows, the signal names slightly altered, and formatted into a tibble.
25+
"jhu_csse_daily_subset"
2026

2127

22-
#' Daily doctor visits and cases from California and Florida in archive format
28+
#' Subset of daily doctor visits and cases from California, Florida, Texas, and New York in archive format
2329
#'
2430
#' This data source is based on information about outpatient visits,
2531
#' provided to us by health system partners, and also contains confirmed
2632
#' COVID-19 cases based on reports made available by the Center for
2733
#' Systems Science and Engineering at Johns Hopkins University.
28-
#' This example data ranges from June 1 to June 15, 2020.
34+
#' This example data ranges from June 1, 2020 to Dec 1, 2021, and is also limited to California, Florida, Texas, and New York.
2935
#'
30-
#' @format An `epi_archive` data format. The data table DT has 160 rows and 5 columns:
36+
#' @format An `epi_archive` data format. The data table DT has 129,638 rows and 5 columns:
3137
#' \describe{
3238
#' \item{geo_value}{the geographic value associated with each row of measurements.}
3339
#' \item{time_value}{the time value associated with each row of measurements.}
34-
#' \item{version}{ the time value specifying the version for each row of measurements. }
40+
#' \item{version}{the time value specifying the version for each row of measurements. }
3541
#' \item{percent_cli}{percentage of doctor’s visits with CLI (COVID-like illness) computed from medical insurance claims}
36-
#' \item{case_rate}{7-day average signal of number of new confirmed deaths due to COVID-19 per 100,000 population, daily}
42+
#' \item{case_rate_7d_av}{7-day average signal of number of new confirmed deaths due to COVID-19 per 100,000 population, daily}
43+
#' }
44+
#' @source
45+
#' This object contains a modified part of the \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. This data set is licensed under the terms of the
46+
#' \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
47+
#' by Johns Hopkins University on behalf of its Center for Systems Science in Engineering.
48+
#' Copyright Johns Hopkins University 2020.
49+
#'
50+
#' Modifications:
51+
#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html}{From the COVIDcast Epidata Doctor Visits API}: These signals are taken directly from the JHU CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository} without changes. The 7-day average signals are computed by Delphi by calculating moving averages of the preceding 7 days, so the signal for June 7 is the average of the underlying data for June 1 through 7, inclusive.
52+
#' * Furthermore, the data has been limited to a very small number of rows, the signal names slightly altered, and formatted into a tibble.
53+
"archive_cases_dv_subset"
54+
55+
56+
#' Subset of JHU daily cases from California and Florida
57+
#'
58+
#' This data source of confirmed COVID-19 cases
59+
#' is based on reports made available by the Center for
60+
#' Systems Science and Engineering at Johns Hopkins University.
61+
#' This example data is a snapshot as of Oct 28, 2021 and captures the cases from June 1, 2020 to May 31, 2021
62+
#' and is limited to California and Florida.
63+
#'
64+
#' @format A tibble with 730 rows and 3 variables:
65+
#' \describe{
66+
#' \item{geo_value}{the geographic value associated with each row of measurements.}
67+
#' \item{time_value}{the time value associated with each row of measurements.}
68+
#' \item{cases}{Number of new confirmed COVID-19 cases, daily}
69+
#' }
70+
#' @source This object contains a modified part of the \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. This data set is licensed under the terms of the
71+
#' \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
72+
#' by the Johns Hopkins University on behalf of its Center for Systems Science in Engineering.
73+
#' Copyright Johns Hopkins University 2020.
74+
#'
75+
#' Modifications:
76+
#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}:
77+
#' These signals are taken directly from the JHU CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository} without changes.
78+
#' * Furthermore, the data has been limited to a very small number of rows, the signal names slightly altered, and formatted into a tibble.
79+
"incidence_num_outlier_example"
80+
81+
#' Subset of JHU daily cases from counties in Massachusetts and Vermont
82+
#'
83+
#' This data source of confirmed COVID-19 cases and deaths
84+
#' is based on reports made available by the Center for
85+
#' Systems Science and Engineering at Johns Hopkins University.
86+
#' This example data ranges from Mar 1, 2020 to Dec 31, 2021, and is limited to Massachusetts and Vermont.
87+
#'
88+
#' @format A tibble with 16,212 rows and 5 variables:
89+
#' \describe{
90+
#' \item{geo_value}{the geographic value associated with each row of measurements.}
91+
#' \item{time_value}{the time value associated with each row of measurements.}
92+
#' \item{cases}{Number of new confirmed COVID-19 cases, daily}
93+
#' \item{county_name}{the name of the county}
94+
#' \item{state_name}{the full name of the state}
3795
#' }
38-
#' @source These data sources are provided under the terms of the
39-
#' \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution license:}
40-
#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html}{Doctor Visits}
96+
#' @source This object contains a modified part of the \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. This data set is licensed under the terms of the
97+
#' \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
98+
#' by the Johns Hopkins University on behalf of its Center for Systems Science in Engineering.
99+
#' Copyright Johns Hopkins University 2020.
41100
#'
42-
#' COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University.
43-
#' \url{https://github.com/CSSEGISandData/COVID-19}
44-
"archive_cases_dv"
101+
#' Modifications:
102+
#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}: These signals are taken directly from the JHU CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository} without changes. The 7-day average signals are computed by Delphi by calculating moving averages of the preceding 7 days, so the signal for June 7 is the average of the underlying data for June 1 through 7, inclusive.
103+
#' * Furthermore, the data has been limited to a very small number of rows, the signal names slightly altered, and formatted into a tibble.
45104

105+
"jhu_csse_county_level_subset"

R/methods-epi_archive.R

+17-7
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,12 @@
2828
#'
2929
#' @export
3030
#' @examples
31-
#' epix_as_of(x = archive_cases_dv,
32-
#' max_version = max(archive_cases_dv$DT$version))
31+
#' # warning message of data latency shown
32+
#' epix_as_of(x = archive_cases_dv_subset,
33+
#' max_version = max(archive_cases_dv_subset$DT$version))
34+
#'
35+
#' # no warning shown
36+
#' epix_as_of(archive_cases_dv_subset, max_version = as.Date("2020-06-10"))
3337
epix_as_of = function(x, max_version, min_time_value = -Inf) {
3438
if (!inherits(x, "epi_archive")) Abort("`x` must be of class `epi_archive`.")
3539
return(x$as_of(max_version, min_time_value))
@@ -72,10 +76,10 @@ epix_as_of = function(x, max_version, min_time_value = -Inf) {
7276
#' @export
7377
#' @examples
7478
#' # create two example epi_archive datasets
75-
#' x <- archive_cases_dv$DT %>%
76-
#' dplyr::select(geo_value,time_value,version,case_rate) %>%
79+
#' x <- archive_cases_dv_subset$DT %>%
80+
#' dplyr::select(geo_value,time_value,version,case_rate_7d_av) %>%
7781
#' as_epi_archive()
78-
#' y <- archive_cases_dv$DT %>%
82+
#' y <- archive_cases_dv_subset$DT %>%
7983
#' dplyr::select(geo_value,time_value,version,percent_cli) %>%
8084
#' as_epi_archive()
8185
#'
@@ -182,11 +186,17 @@ epix_merge = function(x, y, ..., locf = TRUE, nan = NA) {
182186
#' @importFrom rlang enquo
183187
#' @export
184188
#' @examples
185-
#' # every date is a reference time point for the 3 day average sliding window
189+
#' # these dates are reference time points for the 3 day average sliding window
190+
#' # The resulting epi_archive ends up including data averaged from:
191+
#' # 0 day which has no results, for 2020-06-01
192+
#' # 1 day, for 2020-06-02
193+
#' # 2 days, for the rest of the results
194+
#' # never 3 days dur to data latency
195+
#'
186196
#' time_values <- seq(as.Date("2020-06-01"),
187197
#' as.Date("2020-06-15"),
188198
#' by = "1 day")
189-
#' epix_slide(x = archive_cases_dv,
199+
#' epix_slide(x = archive_cases_dv_subset,
190200
#' f = ~ mean(.x$case_rate),
191201
#' n = 3,
192202
#' group_by = geo_value,

R/outliers.R

+3-3
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
#' seasonal_period = NULL)),
6363
#' abbr = "stl_nonseasonal"))
6464
#'
65-
#' x <- jhu_csse_daily %>%
65+
#' x <- incidence_num_outlier_example %>%
6666
#' dplyr::select(geo_value,time_value,cases) %>%
6767
#' as_epi_df() %>%
6868
#' group_by(geo_value) %>%
@@ -149,7 +149,7 @@ detect_outlr = function(x = seq_along(y), y,
149149
#' @export
150150
#' @examples
151151
#' # Detect outliers based on a rolling median
152-
#' jhu_csse_daily %>%
152+
#' incidence_num_outlier_example %>%
153153
#' dplyr::select(geo_value,time_value,cases) %>%
154154
#' as_epi_df() %>%
155155
#' group_by(geo_value) %>%
@@ -246,7 +246,7 @@ detect_outlr_rm = function(x = seq_along(y), y, n = 21,
246246
#' @export
247247
#' @examples
248248
#' # Detects outliers based on a seasonal-trend decomposition using LOESS
249-
#' jhu_csse_daily %>%
249+
#' incidence_num_outlier_example %>%
250250
#' dplyr::select(geo_value,time_value,cases) %>%
251251
#' as_epi_df() %>%
252252
#' group_by(geo_value) %>%

R/slide.R

+4-4
Original file line numberDiff line numberDiff line change
@@ -90,19 +90,19 @@
9090
#' @export
9191
#' @examples
9292
#' # slide a 7-day trailing average formula on cases
93-
#' jhu_csse_daily %>%
93+
#' jhu_csse_daily_subset %>%
9494
#' group_by(geo_value) %>%
9595
#' epi_slide(cases_7dav = mean(cases), n = 7,
9696
#' align = "right")
9797
#'
98-
#' # slide a left-aligned 7-day trailing average
99-
#' jhu_csse_daily %>%
98+
#' # slide a left-aligned 7-day average
99+
#' jhu_csse_daily_subset %>%
100100
#' group_by(geo_value) %>%
101101
#' epi_slide(cases_7dav = mean(cases), n = 7,
102102
#' align = "left")
103103
#'
104104
#' # nested new columns
105-
#' jhu_csse_daily %>%
105+
#' jhu_csse_daily_subset %>%
106106
#' group_by(geo_value) %>%
107107
#' epi_slide(a = data.frame(cases_2dav = mean(cases),
108108
#' cases_2dma = mad(cases)),

data-raw/archive_cases_dv.R

-34
This file was deleted.

data-raw/archive_cases_dv_subset.R

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
library(delphi.epidata)
2+
library(epiprocess)
3+
library(data.table)
4+
library(dplyr)
5+
6+
archive_cases_dv_subset <- covidcast(
7+
data_source = "doctor-visits",
8+
signals = "smoothed_adj_cli",
9+
time_type = "day",
10+
geo_type = "state",
11+
time_value = epirange(20200601, 20211201),
12+
geo_values = "ca,fl,ny,tx",
13+
issues = epirange(20200601, 20211201)
14+
) %>%
15+
fetch_tbl() %>%
16+
select(geo_value, time_value, version = issue, percent_cli = value) %>%
17+
as_epi_archive()
18+
19+
case_rate_subset <- covidcast(
20+
data_source = "jhu-csse",
21+
signals = "confirmed_7dav_incidence_prop",
22+
time_type = "day",
23+
geo_type = "state",
24+
time_value = epirange(20200601, 20211201),
25+
geo_values = "ca,fl,ny,tx",
26+
issues = epirange(20200601, 20211201)
27+
) %>%
28+
fetch_tbl() %>%
29+
select(geo_value, time_value, version = issue, case_rate_7d_av = value) %>%
30+
as_epi_archive()
31+
32+
epix_merge(archive_cases_dv_subset, case_rate_subset, all = TRUE)
33+
34+
usethis::use_data(archive_cases_dv_subset, overwrite = TRUE)
+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
library(delphi.epidata)
2+
library(epiprocess)
3+
library(dplyr)
4+
library(tidyr)
5+
6+
incidence_num_outlier_example <- covidcast(
7+
data_source = "jhu-csse",
8+
signals = "confirmed_incidence_num",
9+
time_type = "day",
10+
geo_type = "state",
11+
time_values = epirange(20200601, 20210531),
12+
geo_values = "fl,nj",
13+
as_of = 20211028
14+
) %>%
15+
fetch_tbl() %>%
16+
select(geo_value, time_value, cases = value) %>%
17+
as_epi_df()
18+
19+
usethis::use_data(incidence_num_outlier_example, overwrite = TRUE)
+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
library(delphi.epidata)
2+
library(covidcast)
3+
library(epiprocess)
4+
library(dplyr)
5+
6+
# Use covidcast::county_census to get the county and state names
7+
y <- covidcast::county_census %>%
8+
filter(STNAME %in% c("Massachusetts", "Vermont"), STNAME != CTYNAME) %>%
9+
select(geo_value = FIPS, county_name = CTYNAME, state_name = STNAME)
10+
11+
# Fetch only counties from Massachusetts and Vermont, then append names columns as well
12+
jhu_csse_county_level_subset <- covidcast(
13+
data_source = "jhu-csse",
14+
signals = "confirmed_incidence_num",
15+
time_type = "day",
16+
geo_type = "county",
17+
time_values = epirange(20200601, 20211231),
18+
geo_values = paste(y$geo_value, collapse = ",")
19+
) %>%
20+
fetch_tbl() %>%
21+
select(geo_value, time_value, cases = value) %>%
22+
full_join(y, by = "geo_value") %>%
23+
as_epi_df()
24+
25+
usethis::use_data(jhu_csse_county_level_subset, overwrite = TRUE)

0 commit comments

Comments
 (0)