Skip to content

Commit 0428c40

Browse files
committed
make the vignette render finally
1 parent 3bf0c58 commit 0428c40

File tree

8 files changed

+26
-46
lines changed

8 files changed

+26
-46
lines changed

NAMESPACE

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,5 @@ importFrom(stats,predict)
137137
importFrom(stats,qnorm)
138138
importFrom(stats,quantile)
139139
importFrom(stats,residuals)
140-
importFrom(stats,setNames)
141140
importFrom(tibble,is_tibble)
142141
importFrom(tibble,tibble)

R/data.R

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,12 @@
3636

3737
#' Subset of Statistics Canada median employment income for postsecondary graduates
3838
#'
39-
#' @format A tibble with 10193 rows and 8 variables:
39+
#' @format A tibble with 1607 rows and 7 variables:
4040
#' \describe{
4141
#' \item{geo_value}{The province in Canada associated with each
4242
#' row of measurements.}
4343
#' \item{time_value}{The time value, a year integer in YYYY format}
4444
#' \item{edu_qual}{The education qualification}
45-
#' \item{fos}{The field of study}
4645
#' \item{age_group}{The age group; either 15 to 34 or 35 to 64}
4746
#' \item{num_graduates}{The number of graduates for the given row of characteristics}
4847
#' \item{med_income_2y}{The median employment income two years after graduation}

data-raw/grad_employ_subset.R

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,9 @@ gemploy <- statcan_grad_employ %>%
7474
# Drop aggregates for some columns
7575
geo_value != "Canada" &
7676
age_group != "15 to 64 years" &
77-
fos != "Total, field of study" &
7877
edu_qual != "Total, educational qualification" &
7978
# Keep aggregates for keys we don't want to keep
79+
fos == "Total, field of study" &
8080
gender == "Total, gender" &
8181
student_status == "Canadian and international students" &
8282
# Since we're looking at 2y and 5y employment income, the only
@@ -89,7 +89,7 @@ gemploy <- statcan_grad_employ %>%
8989
is.na(status) &
9090
# Drop NA value rows
9191
!is.na(num_graduates) & !is.na(med_income_2y) & !is.na(med_income_5y)) %>%
92-
select(-c(status, gender, student_status, grad_charac))
92+
select(-c(status, gender, student_status, grad_charac, fos))
9393

9494
# gemploy$time_value %>% unique()
9595
# class(gemploy$fos)
@@ -99,16 +99,13 @@ gemploy <- statcan_grad_employ %>%
9999
nrow(gemploy)
100100
ncol(gemploy)
101101

102-
gemploy$grad_charac %>% unique()
103-
gemploy %>% group_by(grad_charac) %>% slice(1)
104-
105102
grad_employ_subset <- gemploy %>%
106103
tsibble::as_tsibble(
107104
index=time_value,
108-
key=c(geo_value, age_group, fos, edu_qual)) %>%
105+
key=c(geo_value, age_group, edu_qual)) %>%
109106
as_epi_df(
110107
geo_type = "custom", time_type = "year", as_of = "2022-07-19",
111-
additional_metadata=c(other_keys=list("age_group", "fos", "edu_qual")))
108+
additional_metadata = list(other_keys = c("age_group", "edu_qual")))
112109
usethis::use_data(grad_employ_subset, overwrite = TRUE)
113110

114111
# ================== EDA ==================

data/grad_employ_subset.rda

-31.4 KB
Binary file not shown.

man/bake.Rd

Lines changed: 6 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/epi_juice.Rd

Lines changed: 3 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/grad_employ_subset.Rd

Lines changed: 1 addition & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vignettes/panel-data.Rmd

Lines changed: 11 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,9 @@ gemploy <- statcan_grad_employ %>%
122122
# Drop aggregates for some columns
123123
geo_value != "Canada" &
124124
age_group != "15 to 64 years" &
125-
fos != "Total, field of study" &
126125
edu_qual != "Total, educational qualification" &
127126
# Keep aggregates for keys we don't want to keep
127+
fos == "Total, field of study" &
128128
gender == "Total, gender" &
129129
student_status == "Canadian and international students" &
130130
# Since we're looking at 2y and 5y employment income, the only
@@ -138,7 +138,7 @@ gemploy <- statcan_grad_employ %>%
138138
# Drop NA value rows
139139
!is.na(num_graduates) & !is.na(med_income_2y) & !is.na(med_income_5y)
140140
) %>%
141-
select(-c(status, gender, student_status, grad_charac))
141+
select(-c(status, gender, student_status, grad_charac, fos))
142142
```
143143

144144
To use this data with `epipredict`, we need to convert it into `epi_df` format
@@ -156,11 +156,11 @@ a list of all the `type_type`s available.
156156
grad_employ_subset <- gemploy %>%
157157
tsibble::as_tsibble(
158158
index = time_value,
159-
key = c(geo_value, age_group, fos, edu_qual)
159+
key = c(geo_value, age_group, edu_qual)
160160
) %>%
161161
as_epi_df(
162162
geo_type = "custom", time_type = "year",
163-
additional_metadata = c(other_keys = list("age_group", "fos", "edu_qual"))
163+
additional_metadata = c(other_keys = list("age_group", "edu_qual"))
164164
)
165165
```
166166

@@ -202,34 +202,24 @@ As a simple example, let's work with the `num_graduates` column for now.
202202

203203
```{r employ-small, include=T}
204204
employ_small <- employ %>%
205-
group_by(geo_value, time_value, age_group, edu_qual) %>%
206-
summarise_if(is.numeric, sum) %>%
207-
ungroup() %>%
208205
# Incomplete data - exclude
209206
filter(geo_value != "Territories") %>%
210207
# Select groups where there are complete timeseries values
211208
group_by(geo_value, age_group, edu_qual) %>%
212209
filter(n() >= 6) %>%
213210
mutate(
214-
num_graduates_prop = num_graduates / sum(num_graduates)
215-
) %>%
216-
# med_income_2y_prop = med_income_2y / sum(med_income_2y),
217-
# med_income_5y_prop = med_income_5y / sum(med_income_5y)) %>%
218-
ungroup() %>%
219-
# select(-c(med_income_2y, med_income_5y, num_graduates)) %>%
220-
# Bug: shouldn't have to cast back to epi_df
221-
as_epi_df(
222-
geo_type = "custom",
223-
time_type = "year",
224-
additional_metadata = c(other_keys = list("age_group", "edu_qual")))
211+
num_graduates_prop = num_graduates / sum(num_graduates),
212+
med_income_2y_prop = med_income_2y / sum(med_income_2y),
213+
med_income_5y_prop = med_income_5y / sum(med_income_5y)) %>%
214+
ungroup()
225215
head(employ_small)
226216
```
227217

228218
Below is a visualization for a sample of the small data. Note that some groups
229219
do not have any time series information since we filtered out all timeseries
230220
with incomplete dates.
231221

232-
```{r employ-small-graph, include=F, eval=F}
222+
```{r employ-small-graph, include=T, eval=T}
233223
employ_small %>%
234224
filter(geo_value %in% c("British Columbia", "Ontario")) %>%
235225
filter(grepl("degree", edu_qual, fixed = T)) %>%
@@ -263,13 +253,7 @@ Our `epi_recipe` should add one `ahead` column representing $x_{t+1}$ and
263253
since we specified our `time_type` to be `year`, our `lag` and `lead`
264254
values are both in years.
265255

266-
```{r make-recipe, include=T, eval=F}
267-
# r <- epi_recipe(employ) %>%
268-
# step_epi_ahead(num_graduates, ahead = 1) %>% # lag & ahead units in years
269-
# step_epi_lag(num_graduates, lag = 0:2) %>%
270-
# step_epi_naomit()
271-
# r
272-
256+
```{r make-recipe, include=T, eval=T}
273257
r <- epi_recipe(employ_small) %>%
274258
step_epi_ahead(num_graduates_prop, ahead = 1) %>% # lag & ahead units in years
275259
step_epi_lag(num_graduates_prop, lag = 0:2) %>%
@@ -290,7 +274,7 @@ baked_sample <- r %>%
290274
prep() %>%
291275
bake(new_data = employ_small) %>%
292276
sample_n(5)
293-
# baked_sample
277+
baked_sample
294278
```
295279

296280
We can see that the `prep` and `bake` steps created new columns according to

0 commit comments

Comments
 (0)