Skip to content

Commit dbd72e6

Browse files
authored
Merge pull request #4 from cmu-delphi/ndefries/match-scripts-to-epiprocess
Move data script formatting improvements from `epiprocess`
2 parents ca86f03 + 7c0c12d commit dbd72e6

6 files changed

+48
-43
lines changed

DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,5 @@ Remotes:
2828
cmu-delphi/epiprocess
2929
Encoding: UTF-8
3030
LazyData: true
31-
RoxygenNote: 7.2.3
31+
RoxygenNote: 7.3.1
3232
URL: https://cmu-delphi.github.io/epidatasets/

data-raw/archive_cases_dv_subset_dt.R

+8-7
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
1-
library(dplyr)
21
library(epidatr)
32
library(epiprocess)
3+
library(data.table)
4+
library(dplyr)
45

56
dv_subset <- pub_covidcast(
67
source = "doctor-visits",
78
signals = "smoothed_adj_cli",
8-
time_type = "day",
99
geo_type = "state",
10-
time_values = epirange(20200601, 20211201),
10+
time_type = "day",
1111
geo_values = "ca,fl,ny,tx",
12+
time_values = epirange(20200601, 20211201),
1213
issues = epirange(20200601, 20211201)
1314
) %>%
1415
select(geo_value, time_value, version = issue, percent_cli = value) %>%
@@ -19,16 +20,16 @@ dv_subset <- pub_covidcast(
1920
case_rate_subset <- pub_covidcast(
2021
source = "jhu-csse",
2122
signals = "confirmed_7dav_incidence_prop",
22-
time_type = "day",
2323
geo_type = "state",
24-
time_values = epirange(20200601, 20211201),
24+
time_type = "day",
2525
geo_values = "ca,fl,ny,tx",
26+
time_values = epirange(20200601, 20211201),
2627
issues = epirange(20200601, 20211201)
2728
) %>%
2829
select(geo_value, time_value, version = issue, case_rate_7d_av = value) %>%
2930
as_epi_archive(compactify = FALSE)
3031

31-
archive_cases_dv_subset = epix_merge(
32+
archive_cases_dv_subset <- epix_merge(
3233
dv_subset, case_rate_subset,
3334
sync = "locf",
3435
compactify = FALSE)
@@ -39,4 +40,4 @@ archive_cases_dv_subset = epix_merge(
3940
# objects; store the DT and construct the R6 object on request.
4041
archive_cases_dv_subset_dt <- archive_cases_dv_subset$DT
4142

42-
usethis::use_data(archive_cases_dv_subset_dt, overwrite = TRUE)
43+
usethis::use_data(archive_cases_dv_subset_dt, overwrite = TRUE, internal = TRUE)

data-raw/cases_deaths_subset.R

+30-27
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,61 @@
1-
library(dplyr)
21
library(epidatr)
32
library(epiprocess)
3+
library(dplyr)
44

5-
confirmed_7dav_incidence_prop <- pub_covidcast(
5+
confirmed_incidence_num <- pub_covidcast(
66
source = "jhu-csse",
7-
signals = "confirmed_7dav_incidence_prop",
8-
time_type = "day",
7+
signals = "confirmed_incidence_num",
98
geo_type = "state",
9+
time_type = "day",
10+
geo_values = "ca,fl,ny,tx,ga,pa",
1011
time_values = epirange(20200301, 20211231),
11-
geo_values = "ca,fl,ny,tx,ga,pa"
1212
) %>%
13-
select(geo_value, time_value, case_rate_7d_av = value) %>%
13+
select(geo_value, time_value, cases = value) %>%
1414
arrange(geo_value, time_value)
1515

16-
deaths_7dav_incidence_prop <- pub_covidcast(
16+
confirmed_7dav_incidence_num <- pub_covidcast(
1717
source = "jhu-csse",
18-
signals = "deaths_7dav_incidence_prop",
19-
time_type = "day",
18+
signals = "confirmed_7dav_incidence_num",
2019
geo_type = "state",
20+
time_type = "day",
21+
geo_values = "ca,fl,ny,tx,ga,pa",
2122
time_values = epirange(20200301, 20211231),
22-
geo_values = "ca,fl,ny,tx,ga,pa"
2323
) %>%
24-
select(geo_value, time_value, death_rate_7d_av = value) %>%
24+
select(geo_value, time_value, cases_7d_av = value) %>%
2525
arrange(geo_value, time_value)
2626

27-
confirmed_incidence_num <- pub_covidcast(
27+
confirmed_7dav_incidence_prop <- pub_covidcast(
2828
source = "jhu-csse",
29-
signals = "confirmed_incidence_num",
30-
time_type = "day",
29+
signals = "confirmed_7dav_incidence_prop",
3130
geo_type = "state",
31+
time_type = "day",
32+
geo_values = "ca,fl,ny,tx,ga,pa",
3233
time_values = epirange(20200301, 20211231),
33-
geo_values = "ca,fl,ny,tx,ga,pa"
3434
) %>%
35-
select(geo_value, time_value, cases = value) %>%
35+
select(geo_value, time_value, case_rate_7d_av = value) %>%
3636
arrange(geo_value, time_value)
3737

38-
confirmed_7dav_incidence_num <- pub_covidcast(
38+
deaths_7dav_incidence_prop <- pub_covidcast(
3939
source = "jhu-csse",
40-
signals = "confirmed_7dav_incidence_num",
41-
time_type = "day",
40+
signals = "deaths_7dav_incidence_prop",
4241
geo_type = "state",
42+
time_type = "day",
43+
geo_values = "ca,fl,ny,tx,ga,pa",
4344
time_values = epirange(20200301, 20211231),
44-
geo_values = "ca,fl,ny,tx,ga,pa"
4545
) %>%
46-
select(geo_value, time_value, cases_7d_av = value) %>%
46+
select(geo_value, time_value, death_rate_7d_av = value) %>%
4747
arrange(geo_value, time_value)
4848

49-
cases_deaths_subset <- confirmed_7dav_incidence_prop %>%
50-
full_join(deaths_7dav_incidence_prop,
51-
by = c("geo_value", "time_value")) %>%
52-
full_join(confirmed_incidence_num,
53-
by = c("geo_value", "time_value")) %>%
49+
cases_deaths_subset <- confirmed_incidence_num %>%
5450
full_join(confirmed_7dav_incidence_num,
55-
by = c("geo_value", "time_value")) %>%
51+
by = c("geo_value", "time_value")
52+
) %>%
53+
full_join(confirmed_7dav_incidence_prop,
54+
by = c("geo_value", "time_value")
55+
) %>%
56+
full_join(deaths_7dav_incidence_prop,
57+
by = c("geo_value", "time_value")
58+
) %>%
5659
as_epi_df()
5760

5861
usethis::use_data(cases_deaths_subset, overwrite = TRUE)

data-raw/covid_incidence_county_subset.R

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
# Use covidcast::county_census to get the county and state names
2-
library(dplyr)
3-
library(covidcast)
41
library(epidatr)
2+
library(covidcast)
53
library(epiprocess)
4+
library(dplyr)
65

6+
# Use covidcast::county_census to get the county and state names
77
y <- covidcast::county_census %>%
88
filter(STNAME %in% c("Massachusetts", "Vermont"), STNAME != CTYNAME) %>%
99
select(geo_value = FIPS, county_name = CTYNAME, state_name = STNAME)
@@ -12,10 +12,10 @@ y <- covidcast::county_census %>%
1212
covid_incidence_county_subset <- pub_covidcast(
1313
source = "jhu-csse",
1414
signals = "confirmed_incidence_num",
15-
time_type = "day",
1615
geo_type = "county",
16+
time_type = "day",
17+
geo_values = paste(y$geo_value, collapse = ","),
1718
time_values = epirange(20200601, 20211231),
18-
geo_values = paste(y$geo_value, collapse = ",")
1919
) %>%
2020
select(geo_value, time_value, cases = value) %>%
2121
full_join(y, by = "geo_value") %>%

data-raw/covid_incidence_outliers.R

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
1-
library(dplyr)
21
library(epidatr)
32
library(epiprocess)
3+
library(dplyr)
4+
library(tidyr)
45

56
covid_incidence_outliers <- pub_covidcast(
67
source = "jhu-csse",
78
signals = "confirmed_incidence_num",
8-
time_type = "day",
99
geo_type = "state",
10-
time_values = epirange(20200601, 20210531),
10+
time_type = "day",
1111
geo_values = "fl,nj",
12+
time_values = epirange(20200601, 20210531),
1213
as_of = 20211028
1314
) %>%
1415
select(geo_value, time_value, cases = value) %>%

data/cases_deaths_subset.rda

332 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)