From eb206efc635b4487fed1a921fcc05ff27d906836 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 6 May 2022 14:30:44 -0700 Subject: [PATCH 01/36] Taken from Chloe's branch. --- data/ca_daily_cases.rda | Bin 0 -> 1775 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 data/ca_daily_cases.rda diff --git a/data/ca_daily_cases.rda b/data/ca_daily_cases.rda new file mode 100644 index 0000000000000000000000000000000000000000..bc9839aba89c778a70dc014a78c80091a13d8232 GIT binary patch literal 1775 zcmai!do0x&U2pc_dL%z-#@o+Ra6XvQ;@#*$Jxdhf?>9oJ1_o2||5W%5=+=}HH5b7DYd(@CIy8-^ z=vf5-0|5d61dxv50*2@z{Ek#AvIt{+*9}M{9!WaFTmoz)d>B!{<8@y z%7#tP*wFPqyugSk-th2(^#vIDfsD(X0;^L*1NC9lk%EzP3b9EAl_`;kLF=g~Zy6uO z66hK8U;}V}EA>tSOi;maTO{89k?u6qx>46W1`0x?zrCj-&BP~}m?|Tyt?W{x6?v3mZ+4*yS z+;O`aSs1PBs`BZX_)t8*a%}jRuX%turo0H}ULw6V{16PSCL!X5v)I*B!XT+&r87&E z?xKG;pWYUtp&{?Z>T-JU6X1`~Cvp_*VjkxWVL|@*ud^U!;a;S0j zcI=wu#buysRhtGJj-`Kx=3j~s4Xmr7w*_^k4_+r=)od4Eqhg&c?l^0;B??51q=%7Wp8bSmV zUMgjbr+iYj-*>EgsK`u$w$S3z(SA5dKPc#SI3oz1G}x^rQ#(PWqdTXAZSO62g9@!| zCCv=F@RD$%1$k9rTq?L^noa;-fB8+~w&i29EfCfBsUiO3pLKe6V^@Vs3$bByX=7B2 z_EJt@BPl5k2CZLBjl3hHmv_0sVbZj)PZQI=$Nu^tozg*%$XDcHsY%Rj9k0QNz3)Mj zYJa?&0$5qLPiUKugsIdy8iB&bDp$UuNY;ry39r&D1)a>yizrf2!lt6b_gFb@!mT(T z-V}e_(rT!;C*U-h-==Qs4u2xq_&S%gJG%dFX}M$Ki^pGk&nCZ9(Fbs!z|#vJh`^?; zjI^X6)jAwXqFo4ADww*@RF9+`3b6U_aoS>9zUSKiA1sy#=>#vbyW-zn;+W61I`o}n zc`Z-LHKQ*RXUfW>1wXu<))iIQm8DNbqFRW8Fg`0UdM4BQqlMwRnIWm@?nOn`4qx|$ zLd}Zj*y==bgg2UOm;a&sk5=bfb2W3yo_&nIbfg0W3%;2~-RBbeh##~#1tumLu^r&P zokH6aTGE-&347IX98DUk&bU)-lyW83B=Ui;A3OnlZ+lf7RA5}8B1aZte@&7+$GbRq zINr#n>^A@9t(EK3oI*^?i&8%W+o2k4G?Tn`4&efcCCU9Kqw4CHj(VTT06!a*KTOO# zrCgBGKaL=5;&Wly1cv8PUshhuY?dZFaM$miNd)i08eF&VFRl}+I=s#eMG?K|2HyB^ z*cY#QS`%9dEn~bBpU>-)4Yx)=R;DIq4sDP3SnYv`VqcY(e`wR7nA1g{GV5rH!y+-7 zpW*?V>deQ_^?_;iF@bTET)V z(LZqhfaeRdUe74`*mMK5YT>-?XkzEEUv4N}L$^H0E+g1+%77-@?b$D!n(G+aEB13l o`j2v*23vojVBZnC!1gKfMjE0uJ#A7~x1_0b?P8ax5^DDR2Pb(HnE(I) literal 0 HcmV?d00001 From a1b734c9829d8e2e0f7a39d70ab93c474fddc49b Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 9 May 2022 09:13:47 -0700 Subject: [PATCH 02/36] Updated growth_rate with an example. --- R/growth_rate.R | 52 +++++++++++++++++++++++++++++++++++++++++++++- man/growth_rate.Rd | 3 +++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/R/growth_rate.R b/R/growth_rate.R index 6f3d38f2..a84c522f 100644 --- a/R/growth_rate.R +++ b/R/growth_rate.R @@ -100,12 +100,62 @@ #' user. #' #' @export +#' @examples +#' library(delphi.epidata) +#' library(epiprocess) +#' library(dplyr) +#' library(tidyr) +#' +#' # We then want to load the dataset for this example. +#' x <- covidcast( +#' data_source = "jhu-csse", +#' signals = "confirmed_7dav_incidence_num", +#' time_type = "day", +#' geo_type = "state", +#' time_values = epirange(20200601, 20211231), +#' geo_values = "ga,pa" +#' ) %>% +#' fetch_tbl() %>% +#' select(geo_value, time_value, cases = value) %>% +#' arrange(geo_value, time_value) %>% +#' as_epi_df() +#' +#' # We want to group by geographical values and make a new column using mutate to +#' # calculate the growth rate of cases. +#' +#' x <- x %>% +#' group_by(geo_value) %>% +#' mutate(cases_gr1 = growth_rate(time_value, cases)) +#' +#' head(x) +#' +#' # Note how these values can be used to denote growth and decline rates, such as +#' # in this graph below, with red denoting growth and blue denoting decline. +#' +#' library(ggplot2) +#' theme_set(theme_bw()) +#' +#' upper = 0.01 +#' lower = -0.01 +#' +#' ggplot(x, aes(x = time_value, y = cases)) + +#' geom_tile(data = x %>% filter(cases_gr1 >= upper), +#' aes(x = time_value, y = 0, width = 7, height = Inf), +#' fill = 2, alpha = 0.08) + +#' geom_tile(data = x %>% filter(cases_gr1 <= lower), +#' aes(x = time_value, y = 0, width = 7, height = Inf), +#' fill = 4, alpha = 0.08) + +#' geom_line() + +#' facet_wrap(vars(geo_value), scales = "free_y") + +#' scale_x_date(minor_breaks = "month", date_labels = "%b %y") + +#' labs(x = "Date", y = "Reported COVID-19 cases") + growth_rate = function(x = seq_along(y), y, x0 = x, method = c("rel_change", "linear_reg", "smooth_spline", "trend_filter"), h = 7, log_scale = FALSE, dup_rm = FALSE, na_rm = FALSE, ...) { - # Check x, y, x0 +# Check x, y, x0 if (length(x) != length(y)) Abort("`x` and `y` must have the same length.") if (!all(x0 %in% x)) Abort("`x0` must be a subset of `x`.") diff --git a/man/growth_rate.Rd b/man/growth_rate.Rd index 0ce68771..652e8b95 100644 --- a/man/growth_rate.Rd +++ b/man/growth_rate.Rd @@ -134,3 +134,6 @@ user. } } +\examples{ +x = rnorm(5) +} From 8eb16cf6ad2d52385268bbbe5c3e0c4b573bbc43 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 9 May 2022 10:37:09 -0700 Subject: [PATCH 03/36] Made example much shorter. --- R/growth_rate.R | 51 ++-------------------------------------------- man/growth_rate.Rd | 2 +- 2 files changed, 3 insertions(+), 50 deletions(-) diff --git a/R/growth_rate.R b/R/growth_rate.R index a84c522f..e41ff8e9 100644 --- a/R/growth_rate.R +++ b/R/growth_rate.R @@ -101,61 +101,14 @@ #' #' @export #' @examples -#' library(delphi.epidata) -#' library(epiprocess) -#' library(dplyr) -#' library(tidyr) -#' -#' # We then want to load the dataset for this example. -#' x <- covidcast( -#' data_source = "jhu-csse", -#' signals = "confirmed_7dav_incidence_num", -#' time_type = "day", -#' geo_type = "state", -#' time_values = epirange(20200601, 20211231), -#' geo_values = "ga,pa" -#' ) %>% -#' fetch_tbl() %>% -#' select(geo_value, time_value, cases = value) %>% -#' arrange(geo_value, time_value) %>% -#' as_epi_df() -#' -#' # We want to group by geographical values and make a new column using mutate to -#' # calculate the growth rate of cases. -#' -#' x <- x %>% -#' group_by(geo_value) %>% -#' mutate(cases_gr1 = growth_rate(time_value, cases)) -#' -#' head(x) -#' -#' # Note how these values can be used to denote growth and decline rates, such as -#' # in this graph below, with red denoting growth and blue denoting decline. -#' -#' library(ggplot2) -#' theme_set(theme_bw()) -#' -#' upper = 0.01 -#' lower = -0.01 -#' -#' ggplot(x, aes(x = time_value, y = cases)) + -#' geom_tile(data = x %>% filter(cases_gr1 >= upper), -#' aes(x = time_value, y = 0, width = 7, height = Inf), -#' fill = 2, alpha = 0.08) + -#' geom_tile(data = x %>% filter(cases_gr1 <= lower), -#' aes(x = time_value, y = 0, width = 7, height = Inf), -#' fill = 4, alpha = 0.08) + -#' geom_line() + -#' facet_wrap(vars(geo_value), scales = "free_y") + -#' scale_x_date(minor_breaks = "month", date_labels = "%b %y") + -#' labs(x = "Date", y = "Reported COVID-19 cases") +#' head(growth_rate(ca_daily_cases$time_value, ca_daily_cases$cases)) growth_rate = function(x = seq_along(y), y, x0 = x, method = c("rel_change", "linear_reg", "smooth_spline", "trend_filter"), h = 7, log_scale = FALSE, dup_rm = FALSE, na_rm = FALSE, ...) { -# Check x, y, x0 + # Check x, y, x0 if (length(x) != length(y)) Abort("`x` and `y` must have the same length.") if (!all(x0 %in% x)) Abort("`x0` must be a subset of `x`.") diff --git a/man/growth_rate.Rd b/man/growth_rate.Rd index 652e8b95..25478528 100644 --- a/man/growth_rate.Rd +++ b/man/growth_rate.Rd @@ -135,5 +135,5 @@ user. } \examples{ -x = rnorm(5) +head(growth_rate(ca_daily_cases$time_value, ca_daily_cases$cases)) } From 8bbbf78e6c5b098ffbccb9fbb767cff196fc6a77 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 9 May 2022 13:10:20 -0700 Subject: [PATCH 04/36] Shortened growth_rate example. --- R/growth_rate.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/growth_rate.R b/R/growth_rate.R index e41ff8e9..cc61f4f4 100644 --- a/R/growth_rate.R +++ b/R/growth_rate.R @@ -101,7 +101,7 @@ #' #' @export #' @examples -#' head(growth_rate(ca_daily_cases$time_value, ca_daily_cases$cases)) +#' growth_rate(ca_daily_cases$time_value, ca_daily_cases$cases) growth_rate = function(x = seq_along(y), y, x0 = x, method = c("rel_change", "linear_reg", From 84eaba9cd7331705b3197273f464d22880477cb2 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 9 May 2022 13:10:53 -0700 Subject: [PATCH 05/36] Shortened example for growth_rate after using document(). --- man/growth_rate.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/growth_rate.Rd b/man/growth_rate.Rd index 25478528..22c39036 100644 --- a/man/growth_rate.Rd +++ b/man/growth_rate.Rd @@ -135,5 +135,5 @@ user. } \examples{ -head(growth_rate(ca_daily_cases$time_value, ca_daily_cases$cases)) +growth_rate(ca_daily_cases$time_value, ca_daily_cases$cases) } From cb65ca386513c00a143bb723cbc03dd5a8175562 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 10 May 2022 13:17:33 -0700 Subject: [PATCH 06/36] Updated examples to be more specific. --- R/growth_rate.R | 12 +++++++++++- man/growth_rate.Rd | 12 +++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/R/growth_rate.R b/R/growth_rate.R index cc61f4f4..c84bad00 100644 --- a/R/growth_rate.R +++ b/R/growth_rate.R @@ -101,7 +101,17 @@ #' #' @export #' @examples -#' growth_rate(ca_daily_cases$time_value, ca_daily_cases$cases) +#' Y <- ca_daily_cases$cases +#' z <- growth_rate(y=Y) # Simple example +#' # The values of smooth splines differ from the default rel_change +#' plot(z, growth_rate(y=Y,method="smooth_spline")) +#' # More use of different variables: +#' growth_rate( +#' x = ca_daily_cases$time_value, # Case dates +#' y = Y, # Number of cases is what we measure, as always +#' ord = 4, # order (degree) 4 polynomial +#' k = 6 # Cross validate with 6 folds +#' ) growth_rate = function(x = seq_along(y), y, x0 = x, method = c("rel_change", "linear_reg", diff --git a/man/growth_rate.Rd b/man/growth_rate.Rd index 22c39036..1639a9bd 100644 --- a/man/growth_rate.Rd +++ b/man/growth_rate.Rd @@ -135,5 +135,15 @@ user. } \examples{ -growth_rate(ca_daily_cases$time_value, ca_daily_cases$cases) +Y <- ca_daily_cases$cases +z <- growth_rate(y=Y) # Simple example +# The values of smooth splines differ from the default rel_change +plot(z, growth_rate(y=Y,method="smooth_spline")) +# More use of different variables: +growth_rate( + x = ca_daily_cases$time_value, # Case dates + y = Y, # Number of cases is what we measure, as always + ord = 4, # order (degree) 4 polynomial + k = 6 # Cross validate with 6 folds +) } From c6a88f36cfce8434339c82085df4b446789ae9a1 Mon Sep 17 00:00:00 2001 From: admin Date: Tue, 10 May 2022 16:53:16 -0700 Subject: [PATCH 07/36] Added \n to last sprintf statement --- R/archive.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/archive.R b/R/archive.R index f436d509..69d5c208 100644 --- a/R/archive.R +++ b/R/archive.R @@ -180,7 +180,7 @@ epi_archive = cat(sprintf("Data archive (stored in DT field): %i x %i\n", nrow(self$DT), ncol(self$DT))) cat("----------\n") - cat(sprintf("Public methods: %s", + cat(sprintf("Public methods: %s\n", paste(names(epi_archive$public_methods), collapse = ", "))) }, From a5780ed454666f7de394640e472c1d8144271ee7 Mon Sep 17 00:00:00 2001 From: admin Date: Tue, 10 May 2022 19:24:17 -0700 Subject: [PATCH 08/36] Testing out creation of error message (note not yet refined) --- R/slide.R | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/R/slide.R b/R/slide.R index ce563187..05ab50b6 100644 --- a/R/slide.R +++ b/R/slide.R @@ -102,8 +102,7 @@ epi_slide = function(x, f, ..., n = 7, ref_time_values, # intersect with observed time values if (missing(ref_time_values)) { ref_time_values = unique(x$time_value) - } - else { + } else { ref_time_values = ref_time_values[ref_time_values %in% unique(x$time_value)] } @@ -142,9 +141,18 @@ epi_slide = function(x, f, ..., n = 7, ref_time_values, } # Now set up starts and stops for sliding/hopping - time_range = range(unique(x$time_value)) + time_range = range(unique(x$time_value)) #%% + #print(time_range) starts = in_range(ref_time_values - before_num, time_range) + print(starts) + print(length(starts)) + #class(starts) stops = in_range(ref_time_values + after_num, time_range) + print(stops) + + if(length(starts) == 0 & length(stops) == 0){ + Abort("The starting and/or stopping dates used to construct the slide windows are out of bounds with respect to the time range in your data. Check the values used for ref_time_values and align (or before, depending on which of align or before you've used).") + } # Symbolize new column name new_col = sym(new_col_name) From 2c84c10657f4bdd28359c2cf87cdeea3da0308f5 Mon Sep 17 00:00:00 2001 From: admin Date: Tue, 10 May 2022 20:00:18 -0700 Subject: [PATCH 09/36] Probs should be or --- R/slide.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/slide.R b/R/slide.R index 05ab50b6..c2b781d0 100644 --- a/R/slide.R +++ b/R/slide.R @@ -150,7 +150,7 @@ epi_slide = function(x, f, ..., n = 7, ref_time_values, stops = in_range(ref_time_values + after_num, time_range) print(stops) - if(length(starts) == 0 & length(stops) == 0){ + if(length(starts) == 0 | length(stops) == 0){ Abort("The starting and/or stopping dates used to construct the slide windows are out of bounds with respect to the time range in your data. Check the values used for ref_time_values and align (or before, depending on which of align or before you've used).") } From c3e32b8d354fd012e336539410265b405bdb664b Mon Sep 17 00:00:00 2001 From: admin Date: Wed, 11 May 2022 01:16:42 -0700 Subject: [PATCH 10/36] Updated error message --- R/slide.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/slide.R b/R/slide.R index c2b781d0..55129e1d 100644 --- a/R/slide.R +++ b/R/slide.R @@ -144,14 +144,14 @@ epi_slide = function(x, f, ..., n = 7, ref_time_values, time_range = range(unique(x$time_value)) #%% #print(time_range) starts = in_range(ref_time_values - before_num, time_range) - print(starts) - print(length(starts)) + #print(starts) + #print(length(starts)) #class(starts) stops = in_range(ref_time_values + after_num, time_range) - print(stops) + #print(stops) if(length(starts) == 0 | length(stops) == 0){ - Abort("The starting and/or stopping dates used to construct the slide windows are out of bounds with respect to the time range in your data. Check the values used for ref_time_values and align (or before, depending on which of align or before you've used).") + Abort("The starting and/or stopping times for sliding are out of bounds with respect to the range of times in your data. Check the values used for ref_time_values and align (or before, depending on which of align or before you've used).") } # Symbolize new column name From 7ba7434891b3aa76991db7f29b56e57042082daf Mon Sep 17 00:00:00 2001 From: admin Date: Wed, 11 May 2022 01:19:29 -0700 Subject: [PATCH 11/36] Update error message again --- R/slide.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/slide.R b/R/slide.R index 55129e1d..11670336 100644 --- a/R/slide.R +++ b/R/slide.R @@ -151,7 +151,7 @@ epi_slide = function(x, f, ..., n = 7, ref_time_values, #print(stops) if(length(starts) == 0 | length(stops) == 0){ - Abort("The starting and/or stopping times for sliding are out of bounds with respect to the range of times in your data. Check the values used for ref_time_values and align (or before, depending on which of align or before you've used).") + Abort("The starting and/or stopping times for sliding are out of bounds with respect to the range of times in your data. Check the values used for ref_time_values and align (or before if that was specified instead).") } # Symbolize new column name From 2ca40221693161faf9defcc0ca49f1dfba95d1d5 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Thu, 12 May 2022 11:42:52 -0700 Subject: [PATCH 12/36] Updated examples for less redundancy. --- R/growth_rate.R | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/R/growth_rate.R b/R/growth_rate.R index c84bad00..e41eb068 100644 --- a/R/growth_rate.R +++ b/R/growth_rate.R @@ -101,14 +101,13 @@ #' #' @export #' @examples -#' Y <- ca_daily_cases$cases -#' z <- growth_rate(y=Y) # Simple example -#' # The values of smooth splines differ from the default rel_change -#' plot(z, growth_rate(y=Y,method="smooth_spline")) -#' # More use of different variables: +#' # Example adding California's growth rate in COVID cases +#' mutate(ca_daily_cases, gr = growth_rate(time_value,cases)) +#' +#' # Example using degree 4 polynomial and 6-fold cross validation #' growth_rate( #' x = ca_daily_cases$time_value, # Case dates -#' y = Y, # Number of cases is what we measure, as always +#' y = ca_daily_cases$cases # Cases each day #' ord = 4, # order (degree) 4 polynomial #' k = 6 # Cross validate with 6 folds #' ) From 330a649c72fce598469aed41d528a84a0baee6b3 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Thu, 12 May 2022 11:44:44 -0700 Subject: [PATCH 13/36] Commit with updated files and fewer example redundancies. --- R/growth_rate.R | 10 ++++------ man/growth_rate.Rd | 15 ++++++--------- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/R/growth_rate.R b/R/growth_rate.R index e41eb068..2fcb8863 100644 --- a/R/growth_rate.R +++ b/R/growth_rate.R @@ -101,15 +101,13 @@ #' #' @export #' @examples -#' # Example adding California's growth rate in COVID cases +#' # California's growth rate in COVID cases #' mutate(ca_daily_cases, gr = growth_rate(time_value,cases)) #' -#' # Example using degree 4 polynomial and 6-fold cross validation +#' # Log scale, degree 4 polynomial and 6-fold cross validation #' growth_rate( -#' x = ca_daily_cases$time_value, # Case dates -#' y = ca_daily_cases$cases # Cases each day -#' ord = 4, # order (degree) 4 polynomial -#' k = 6 # Cross validate with 6 folds +#' x = ca_daily_cases$time_value, y = ca_daily_cases$cases, +#' log_scale = TRUE, ord = 4, k = 6 #' ) growth_rate = function(x = seq_along(y), y, x0 = x, diff --git a/man/growth_rate.Rd b/man/growth_rate.Rd index 1639a9bd..3bb29aa0 100644 --- a/man/growth_rate.Rd +++ b/man/growth_rate.Rd @@ -135,15 +135,12 @@ user. } \examples{ -Y <- ca_daily_cases$cases -z <- growth_rate(y=Y) # Simple example -# The values of smooth splines differ from the default rel_change -plot(z, growth_rate(y=Y,method="smooth_spline")) -# More use of different variables: +# California's growth rate in COVID cases + mutate(ca_daily_cases, gr = growth_rate(time_value,cases)) + +# Log scale, degree 4 polynomial and 6-fold cross validation growth_rate( - x = ca_daily_cases$time_value, # Case dates - y = Y, # Number of cases is what we measure, as always - ord = 4, # order (degree) 4 polynomial - k = 6 # Cross validate with 6 folds + x = ca_daily_cases$time_value, y = ca_daily_cases$cases, + log_scale = TRUE, ord = 4, k = 6 ) } From 648ee625560542b460567e2f80b296866e14f1e0 Mon Sep 17 00:00:00 2001 From: admin Date: Thu, 12 May 2022 15:27:52 -0700 Subject: [PATCH 14/36] Took out print statements that were used for testing --- R/slide.R | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/R/slide.R b/R/slide.R index 11670336..06fcb9b0 100644 --- a/R/slide.R +++ b/R/slide.R @@ -142,13 +142,18 @@ epi_slide = function(x, f, ..., n = 7, ref_time_values, # Now set up starts and stops for sliding/hopping time_range = range(unique(x$time_value)) #%% - #print(time_range) + print(time_range[1]) + print(time_range[2]) + print(ref_time_values - before_num) starts = in_range(ref_time_values - before_num, time_range) - #print(starts) + #print(ref_time_values) + print(starts) #print(length(starts)) #class(starts) stops = in_range(ref_time_values + after_num, time_range) - #print(stops) + print(stops) + print(before_num) + print(after_num) if(length(starts) == 0 | length(stops) == 0){ Abort("The starting and/or stopping times for sliding are out of bounds with respect to the range of times in your data. Check the values used for ref_time_values and align (or before if that was specified instead).") From 7c636a7415a490ca8ab2d4b49f261ee46014848a Mon Sep 17 00:00:00 2001 From: admin Date: Thu, 12 May 2022 15:28:21 -0700 Subject: [PATCH 15/36] Took out print statement and clarified error message --- R/slide.R | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/R/slide.R b/R/slide.R index 06fcb9b0..55b11f42 100644 --- a/R/slide.R +++ b/R/slide.R @@ -141,22 +141,12 @@ epi_slide = function(x, f, ..., n = 7, ref_time_values, } # Now set up starts and stops for sliding/hopping - time_range = range(unique(x$time_value)) #%% - print(time_range[1]) - print(time_range[2]) - print(ref_time_values - before_num) + time_range = range(unique(x$time_value)) starts = in_range(ref_time_values - before_num, time_range) - #print(ref_time_values) - print(starts) - #print(length(starts)) - #class(starts) stops = in_range(ref_time_values + after_num, time_range) - print(stops) - print(before_num) - print(after_num) if(length(starts) == 0 | length(stops) == 0){ - Abort("The starting and/or stopping times for sliding are out of bounds with respect to the range of times in your data. Check the values used for ref_time_values and align (or before if that was specified instead).") + Abort("The starting and/or stopping times for sliding are out of bounds with respect to the range of times in your data. Check your settings for ref_time_values and align (and before, if specified).") } # Symbolize new column name From 7ddcbbe46bce7ee1760d76c6a2cdfd764a5b9d80 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 13 May 2022 11:09:10 -0700 Subject: [PATCH 16/36] Updated with new dataset. --- R/growth_rate.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/growth_rate.R b/R/growth_rate.R index 2fcb8863..c2bedbed 100644 --- a/R/growth_rate.R +++ b/R/growth_rate.R @@ -102,11 +102,11 @@ #' @export #' @examples #' # California's growth rate in COVID cases -#' mutate(ca_daily_cases, gr = growth_rate(time_value,cases)) +#' mutate(jhu_csse_daily, gr = growth_rate(time_value,cases)) #' #' # Log scale, degree 4 polynomial and 6-fold cross validation #' growth_rate( -#' x = ca_daily_cases$time_value, y = ca_daily_cases$cases, +#' x = jhu_csse_daily$time_value, y = jhu_csse_daily$cases, #' log_scale = TRUE, ord = 4, k = 6 #' ) From a1a0ec27d863c519aea971eea2c1b1575ec8f8e3 Mon Sep 17 00:00:00 2001 From: admin Date: Fri, 13 May 2022 15:06:31 -0700 Subject: [PATCH 17/36] Added testing and made stylistic changes as per pull request comments --- R/archive.R | 99 ++++++++++++++++----------------- R/slide.R | 2 +- tests/testthat/test-epi_slide.R | 23 ++++++++ 3 files changed, 73 insertions(+), 51 deletions(-) create mode 100644 tests/testthat/test-epi_slide.R diff --git a/R/archive.R b/R/archive.R index 7154ef4f..f436d509 100644 --- a/R/archive.R +++ b/R/archive.R @@ -16,7 +16,7 @@ #' @details An `epi_archive` is an R6 class which contains a data table `DT`, of #' class `data.table` from the `data.table` package, with (at least) the #' following columns: -#' +#' #' * `geo_value`: the geographic value associated with each row of measurements. #' * `time_value`: the time value associated with each row of measurements. #' * `version`: the time value specifying the version for each row of @@ -31,7 +31,7 @@ #' on `DT` directly). There can only be a single row per unique combination of #' key variables, and thus the key variables are critical for figuring out how #' to generate a snapshot of data from the archive, as of a given version. -#' +#' #' In general, last observation carried forward (LOCF) is used to data in #' between recorded versions. Currently, deletions must be represented as #' revising a row to a special state (e.g., making the entries `NA` or @@ -43,7 +43,7 @@ #' reference semantics. A primary consequence of this is that objects are not #' copied when modified. You can read more about this in Hadley Wickham's #' [Advanced R](https://adv-r.hadley.nz/r6.html#r6-semantics) book. -#' +#' #' @section Metadata: #' The following pieces of metadata are included as fields in an `epi_archive` #' object: @@ -75,8 +75,7 @@ #' sliding computation at any given reference time point t is performed on #' **data that would have been available as of t**. More details on `slide()` #' are documented in the wrapper function `epix_slide()`. -#' -#' @importFrom R6 R6Class +#' #' @export epi_archive = R6::R6Class( @@ -89,7 +88,7 @@ epi_archive = additional_metadata = NULL, #' @description Creates a new `epi_archive` object. #' @param x A data frame, data table, or tibble, with columns `geo_value`, -#' `time_value`, `version`, and then any additional number of columns. +#' `time_value`, `version`, and then any additional number of columns. #' @param geo_type Type for the geo values. If missing, then the function will #' attempt to infer it from the geo values present; if this fails, then it #' will be set to "custom". @@ -105,12 +104,12 @@ epi_archive = #' @return An `epi_archive` object. #' @importFrom data.table as.data.table key setkeyv initialize = function(x, geo_type, time_type, other_keys, - additional_metadata) { + additional_metadata) { # Check that we have a data frame if (!is.data.frame(x)) { Abort("`x` must be a data frame.") } - + # Check that we have geo_value, time_value, version columns if (!("geo_value" %in% names(x))) { Abort("`x` must contain a `geo_value` column.") @@ -121,7 +120,7 @@ epi_archive = if (!("version" %in% names(x))) { Abort("`x` must contain a `version` column.") } - + # If geo type is missing, then try to guess it if (missing(geo_type)) { geo_type = guess_geo_type(x$geo_value) @@ -131,7 +130,7 @@ epi_archive = if (missing(time_type)) { time_type = guess_time_type(x$time_value) } - + # Finish off with small checks on keys variables and metadata if (missing(other_keys)) other_keys = NULL if (missing(additional_metadata)) additional_metadata = list() @@ -145,7 +144,7 @@ epi_archive = c("geo_type", "time_type"))) { Warn("`additional_metadata` names overlap with existing metadata fields \"geo_type\", \"time_type\".") } - + # Create the data table; if x was an un-keyed data.table itself, # then the call to as.data.table() will fail to set keys, so we # need to check this, then do it manually if needed @@ -163,8 +162,8 @@ epi_archive = cat("An `epi_archive` object, with metadata:\n") cat(sprintf("* %-9s = %s\n", "geo_type", self$geo_type)) cat(sprintf("* %-9s = %s\n", "time_type", self$time_type)) - if (!is.null(self$additional_metadata)) { - sapply(self$additional_metadata, function(m) { + if (!is.null(self$additional_metadata)) { + sapply(self$additional_metadata, function(m) { cat(sprintf("* %-9s = %s\n", names(m), m)) }) } @@ -178,7 +177,7 @@ epi_archive = cat(sprintf("* %-14s = %s\n", "max version", max(self$DT$version))) cat("----------\n") - cat(sprintf("Data archive (stored in DT field): %i x %i\n", + cat(sprintf("Data archive (stored in DT field): %i x %i\n", nrow(self$DT), ncol(self$DT))) cat("----------\n") cat(sprintf("Public methods: %s", @@ -195,7 +194,7 @@ epi_archive = other_keys = setdiff(key(self$DT), c("geo_value", "time_value", "version")) if (length(other_keys) == 0) other_keys = NULL - + # Check a few things on max_version if (!identical(class(max_version), class(self$DT$version))) { Abort("`max_version` and `DT$version` must have same class.") @@ -209,17 +208,17 @@ epi_archive = if (max_version == self_max) { Warn("Getting data as of the latest version possible. For a variety of reasons, it is possible that we only have a preliminary picture of this version (e.g., the upstream source has updated it but we have not seen it due to latency in synchronization). Thus, the snapshot that we produce here might not be reproducible at a later time (e.g., when the archive has caught up in terms of synchronization).") } - + # Filter by version and return return( - # Make sure to use data.table ways of filtering and selecting + # Make sure to use data.table ways of filtering and selecting self$DT[between(time_value, min_time_value, max_version) & version <= max_version, ] %>% unique(by = c("geo_value", "time_value", other_keys), fromLast = TRUE) %>% - tibble::as_tibble() %>% + tibble::as_tibble() %>% dplyr::select(-.data$version) %>% as_epi_df(geo_type = self$geo_type, time_type = self$time_type, @@ -227,7 +226,7 @@ epi_archive = additional_metadata = c(self$additional_metadata, other_keys = other_keys)) ) - }, + }, ##### #' @description Merges another `data.table` with the current one, and allows for #' a post-filling of `NA` values by last observation carried forward (LOCF). @@ -236,7 +235,7 @@ epi_archive = merge = function(y, ..., locf = TRUE, nan = NA) { # Check we have a `data.table` object if (!(inherits(y, "data.table") || inherits(y, "epi_archive"))) { - Abort("`y` must be of class `data.table` or `epi_archive`.") + Abort("`y` must be of class `data.table` or `epi_archive`.") } # Use the data.table merge function, carrying through ... args @@ -251,25 +250,25 @@ epi_archive = # Important: use nafill and not setnafill because the latter # returns the entire data frame by reference, and the former can - # be set to act on particular columns by reference using := + # be set to act on particular columns by reference using := self$DT[, - (cols) := nafill(.SD, type = "locf", nan = nan), - .SDcols = cols, + (cols) := nafill(.SD, type = "locf", nan = nan), + .SDcols = cols, by = by] } - }, + }, ##### #' @description Slides a given function over variables in an `epi_archive` #' object. See the documentation for the wrapper function `epix_as_of()` for -#' details. +#' details. #' @importFrom data.table key #' @importFrom rlang !! !!! enquo enquos is_quosure sym syms - slide = function(f, ..., n = 7, group_by, ref_time_values, + slide = function(f, ..., n = 7, group_by, ref_time_values, time_step, new_col_name = "slide_value", as_list_col = FALSE, names_sep = "_", - all_rows = FALSE) { + all_rows = FALSE) { # If missing, then set ref time values to be everything; else make - # sure we intersect with observed time values + # sure we intersect with observed time values if (missing(ref_time_values)) { ref_time_values = unique(self$DT$time_value) } @@ -277,16 +276,16 @@ epi_archive = ref_time_values = ref_time_values[ref_time_values %in% unique(self$DT$time_value)] } - - # If a custom time step is specified, then redefine units + + # If a custom time step is specified, then redefine units before_num = n-1 if (!missing(time_step)) before_num = time_step(n-1) - + # What to group by? If missing, set according to internal keys if (missing(group_by)) { group_by = setdiff(key(self$DT), c("time_value", "version")) } - + # Symbolize column name, defuse grouping variables. We have to do # the middle step here which is a bit complicated (unfortunately) # since the function epix_slide() could have called the current one, @@ -298,20 +297,20 @@ epi_archive = # Key variable names, apart from time value and version key_vars = setdiff(key(self$DT), c("time_value", "version")) - + # Computation for one group, one time value comp_one_grp = function(.data_group, - f, ..., + f, ..., time_value, key_vars, new_col) { - # Carry out the specified computation + # Carry out the specified computation comp_value = f(.data_group, ...) # Count the number of appearances of the reference time value. # Note: ideally, we want to directly count occurrences of the ref # time value but due to latency, this will often not appear in the - # data group. So we count the number of unique key values, outside + # data group. So we count the number of unique key values, outside # of the time value column count = sum(!duplicated(.data_group[, key_vars])) @@ -345,23 +344,23 @@ epi_archive = else { Abort("The slide computation must return an atomic vector or a data frame.") } - + # Note that we've already recycled comp value to make size stable, # so tibble() will just recycle time value appropriately - return(tibble::tibble(time_value = time_value, + return(tibble::tibble(time_value = time_value, !!new_col := comp_value)) } - + # If f is not missing, then just go ahead, slide by group if (!missing(f)) { if (rlang::is_formula(f)) f = rlang::as_function(f) - + x = purrr::map_dfr(ref_time_values, function(t) { self$as_of(t, min_time_value = t - before_num) %>% - tibble::as_tibble() %>% + tibble::as_tibble() %>% dplyr::group_by(!!!group_by) %>% dplyr::group_modify(comp_one_grp, - f = f, ..., + f = f, ..., time_value = t, key_vars = key_vars, new_col = new_col, @@ -379,14 +378,14 @@ epi_archive = if (length(quos) > 1) { Abort("If `f` is missing then only a single computation can be specified via `...`.") } - + quo = quos[[1]] f = function(x, quo, ...) rlang::eval_tidy(quo, x) new_col = sym(names(rlang::quos_auto_name(quos))) x = purrr::map_dfr(ref_time_values, function(t) { self$as_of(t, min_time_value = t - before_num) %>% - tibble::as_tibble() %>% + tibble::as_tibble() %>% dplyr::group_by(!!!group_by) %>% dplyr::group_modify(comp_one_grp, f = f, quo = quo, @@ -397,12 +396,12 @@ epi_archive = dplyr::ungroup() }) } - + # Unnest if we need to if (!as_list_col) { x = tidyr::unnest(x, !!new_col, names_sep = names_sep) } - + # Join to get all rows, if we need to, then return if (all_rows) { cols = c(as.character(group_by), "time_value") @@ -413,7 +412,7 @@ epi_archive = } ) ) - + #' Convert to `epi_archive` format #' #' Converts a data frame, data table, or tibble into an `epi_archive` @@ -449,15 +448,15 @@ epi_archive = #' #' @export as_epi_archive = function(x, geo_type, time_type, other_keys, - additional_metadata = list()) { - epi_archive$new(x, geo_type, time_type, other_keys, additional_metadata) + additional_metadata = list()) { + epi_archive$new(x, geo_type, time_type, other_keys, additional_metadata) } #' Test for `epi_archive` format #' #' @param x An object. #' @return `TRUE` if the object inherits from `epi_archive`. -#' +#' #' @export is_epi_archive = function(x) { inherits(x, "epi_archive") diff --git a/R/slide.R b/R/slide.R index 55b11f42..8bcce8c6 100644 --- a/R/slide.R +++ b/R/slide.R @@ -145,7 +145,7 @@ epi_slide = function(x, f, ..., n = 7, ref_time_values, starts = in_range(ref_time_values - before_num, time_range) stops = in_range(ref_time_values + after_num, time_range) - if(length(starts) == 0 | length(stops) == 0){ + if( length(starts) == 0 || length(stops) == 0 ) { Abort("The starting and/or stopping times for sliding are out of bounds with respect to the range of times in your data. Check your settings for ref_time_values and align (and before, if specified).") } diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R new file mode 100644 index 00000000..ed801e1a --- /dev/null +++ b/tests/testthat/test-epi_slide.R @@ -0,0 +1,23 @@ +## --- These cases generate the error: --- +test_that("`ref_time_values` + `align` that result in no slide data, generate the error", { + expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01")), + "starting and/or stopping times for sliding are out of bounds") # before the first, no data in the slide windows + expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01")+207L), + "starting and/or stopping times for sliding are out of bounds") # beyond the last, no data in window +}) + +test_that("`ref_time_values` + `align` that have some slide data, but generate the error due to ref. time being out of time range", { + expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01"), align="left"), + "starting and/or stopping times for sliding are out of bounds") # before the first, but we'd expect there to be data in the window + expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01")+201L), "starting and/or stopping times for sliding are out of bounds") # beyond the last, but still with data in window +}) + +## --- This case doesn't generate the error: --- +test_that("these doesn't produce an error; the error appears only if the ref time values are out of the range for every group", { + expect_identical(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01")+200L) %>% dplyr::select("geo_value","slide_value_value"), + tibble(geo_value = "ak", slide_value_value = 199) %>% group_by(geo_value)) # out of range for one group + expect_identical(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-04")) %>% dplyr::select("geo_value","slide_value_value"), + tibble(geo_value = c("ak", "al"), slide_value_value = c(2, -2)) %>% group_by(geo_value)) # not out of range for either group + + +}) \ No newline at end of file From d95a7b6ebd7f7c53846cc30c93de418f6595adfe Mon Sep 17 00:00:00 2001 From: admin Date: Fri, 13 May 2022 15:28:22 -0700 Subject: [PATCH 18/36] Added code to make edf and f --- tests/testthat/test-epi_slide.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index ed801e1a..f142f1b0 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -1,3 +1,11 @@ +edf = bind_rows( + tibble(geo_value = "ak", time_value = as.Date("2020-01-01") + 1:200, value=1:200), + tibble(geo_value = "al", time_value=as.Date("2020-01-01") + 1:5, value=-(1:5)) +) %>% + as_epi_df() + +f = function(x, ...) tibble(value=mean(x$value), count=length(x$value)) + ## --- These cases generate the error: --- test_that("`ref_time_values` + `align` that result in no slide data, generate the error", { expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01")), From bcbd37a6c74b00b3b458a2a0e3cf73921c986ac0 Mon Sep 17 00:00:00 2001 From: admin Date: Fri, 13 May 2022 15:28:50 -0700 Subject: [PATCH 19/36] Re-worded comment a bit --- tests/testthat/test-epi_slide.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index f142f1b0..48cf7720 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -20,7 +20,7 @@ test_that("`ref_time_values` + `align` that have some slide data, but generate t expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01")+201L), "starting and/or stopping times for sliding are out of bounds") # beyond the last, but still with data in window }) -## --- This case doesn't generate the error: --- +## --- These cases doesn't generate the error: --- test_that("these doesn't produce an error; the error appears only if the ref time values are out of the range for every group", { expect_identical(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01")+200L) %>% dplyr::select("geo_value","slide_value_value"), tibble(geo_value = "ak", slide_value_value = 199) %>% group_by(geo_value)) # out of range for one group From fa98b617cf75e6d2aa4b1ea2bde5ee393f54b5b9 Mon Sep 17 00:00:00 2001 From: admin Date: Fri, 13 May 2022 19:41:41 -0700 Subject: [PATCH 20/36] Created helper file for testing --- NAMESPACE | 1 - tests/testthat/helper-epi_slide.R | 9 +++++++++ tests/testthat/test-epi_slide.R | 8 -------- 3 files changed, 9 insertions(+), 9 deletions(-) create mode 100644 tests/testthat/helper-epi_slide.R diff --git a/NAMESPACE b/NAMESPACE index 11f488e4..a7cc20b0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -45,7 +45,6 @@ export(rename) export(slice) export(ungroup) export(unnest) -importFrom(R6,R6Class) importFrom(data.table,as.data.table) importFrom(data.table,between) importFrom(data.table,key) diff --git a/tests/testthat/helper-epi_slide.R b/tests/testthat/helper-epi_slide.R new file mode 100644 index 00000000..ab4e4641 --- /dev/null +++ b/tests/testthat/helper-epi_slide.R @@ -0,0 +1,9 @@ +## Create an epi. df and a function to test epi_slide with + +edf = bind_rows( + tibble(geo_value = "ak", time_value = as.Date("2020-01-01") + 1:200, value=1:200), + tibble(geo_value = "al", time_value=as.Date("2020-01-01") + 1:5, value=-(1:5)) +) %>% + as_epi_df() + +f = function(x, ...) tibble(value=mean(x$value), count=length(x$value)) diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index 48cf7720..20a29e22 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -1,11 +1,3 @@ -edf = bind_rows( - tibble(geo_value = "ak", time_value = as.Date("2020-01-01") + 1:200, value=1:200), - tibble(geo_value = "al", time_value=as.Date("2020-01-01") + 1:5, value=-(1:5)) -) %>% - as_epi_df() - -f = function(x, ...) tibble(value=mean(x$value), count=length(x$value)) - ## --- These cases generate the error: --- test_that("`ref_time_values` + `align` that result in no slide data, generate the error", { expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01")), From 29678d580a0f21ddbc9f40abc3c682367d136a39 Mon Sep 17 00:00:00 2001 From: admin Date: Fri, 13 May 2022 20:05:07 -0700 Subject: [PATCH 21/36] Made sure dplyr fun can be accessed in tests --- tests/testthat/helper-epi_slide.R | 8 ++++---- tests/testthat/test-epi_slide.R | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/testthat/helper-epi_slide.R b/tests/testthat/helper-epi_slide.R index ab4e4641..7ae570a6 100644 --- a/tests/testthat/helper-epi_slide.R +++ b/tests/testthat/helper-epi_slide.R @@ -1,9 +1,9 @@ ## Create an epi. df and a function to test epi_slide with -edf = bind_rows( - tibble(geo_value = "ak", time_value = as.Date("2020-01-01") + 1:200, value=1:200), - tibble(geo_value = "al", time_value=as.Date("2020-01-01") + 1:5, value=-(1:5)) +edf = dplyr::bind_rows( + dplyr::tibble(geo_value = "ak", time_value = as.Date("2020-01-01") + 1:200, value=1:200), + dplyr::tibble(geo_value = "al", time_value=as.Date("2020-01-01") + 1:5, value=-(1:5)) ) %>% as_epi_df() -f = function(x, ...) tibble(value=mean(x$value), count=length(x$value)) +f = function(x, ...) dplyr::tibble(value=mean(x$value), count=length(x$value)) diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index 20a29e22..baa338fd 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -15,9 +15,9 @@ test_that("`ref_time_values` + `align` that have some slide data, but generate t ## --- These cases doesn't generate the error: --- test_that("these doesn't produce an error; the error appears only if the ref time values are out of the range for every group", { expect_identical(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01")+200L) %>% dplyr::select("geo_value","slide_value_value"), - tibble(geo_value = "ak", slide_value_value = 199) %>% group_by(geo_value)) # out of range for one group + dplyr::tibble(geo_value = "ak", slide_value_value = 199) %>% group_by(geo_value)) # out of range for one group expect_identical(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-04")) %>% dplyr::select("geo_value","slide_value_value"), - tibble(geo_value = c("ak", "al"), slide_value_value = c(2, -2)) %>% group_by(geo_value)) # not out of range for either group + dplyr::tibble(geo_value = c("ak", "al"), slide_value_value = c(2, -2)) %>% group_by(geo_value)) # not out of range for either group }) \ No newline at end of file From d436ae981131264d4dea978764e928a855d58bd5 Mon Sep 17 00:00:00 2001 From: admin Date: Fri, 13 May 2022 22:11:53 -0700 Subject: [PATCH 22/36] Printed column names of DT as requested --- R/archive.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/archive.R b/R/archive.R index 69d5c208..2f1e5b05 100644 --- a/R/archive.R +++ b/R/archive.R @@ -180,6 +180,9 @@ epi_archive = cat(sprintf("Data archive (stored in DT field): %i x %i\n", nrow(self$DT), ncol(self$DT))) cat("----------\n") + cat(sprintf("Column names in DT: %s\n", + paste(colnames(self$DT), collapse = ", "))) + cat("----------\n") cat(sprintf("Public methods: %s\n", paste(names(epi_archive$public_methods), collapse = ", "))) From 8c1307af16084a253ac00c31c5a22eefee2816aa Mon Sep 17 00:00:00 2001 From: admin Date: Sun, 15 May 2022 18:29:16 -0700 Subject: [PATCH 23/36] Updated sprintf statement to better accomodate many cols --- R/archive.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/archive.R b/R/archive.R index 7b765e24..e9be87a6 100644 --- a/R/archive.R +++ b/R/archive.R @@ -181,8 +181,11 @@ epi_archive = cat(sprintf("Data archive (stored in DT field): %i x %i\n", nrow(self$DT), ncol(self$DT))) cat("----------\n") - cat(sprintf("Column names in DT: %s\n", - paste(colnames(self$DT), collapse = ", "))) + cat(sprintf("Column names in DT: %s\n", paste(ifelse(length(colnames(self$DT))<=10, paste(colnames(self$DT), collapse = ", "), + paste(paste(colnames(self$DT)[1:3], collapse = ", "), ",\n", paste(colnames(self$DT)[4:10], collapse = ", "), sep = "")), + "\nand", length(colnames(self$DT)[11:length(colnames(self$DT))]), "more columns"))) + #cat(sprintf("Column names in DT: %s\n", + # paste(ifelse(length(colnames(self$DT))<=10, list(colnames(self$DT)), list(c(colnames(self$DT)[1:5], "\n",6)))[[1]], collapse = ", "))) cat("----------\n") cat(sprintf("Public methods: %s\n", paste(names(epi_archive$public_methods), From 7e757ee08b9c0d93bb944da60d604f38a4f49a40 Mon Sep 17 00:00:00 2001 From: admin Date: Sun, 15 May 2022 18:32:36 -0700 Subject: [PATCH 24/36] Deleted commented out old code --- R/archive.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/archive.R b/R/archive.R index e9be87a6..23697906 100644 --- a/R/archive.R +++ b/R/archive.R @@ -184,8 +184,6 @@ epi_archive = cat(sprintf("Column names in DT: %s\n", paste(ifelse(length(colnames(self$DT))<=10, paste(colnames(self$DT), collapse = ", "), paste(paste(colnames(self$DT)[1:3], collapse = ", "), ",\n", paste(colnames(self$DT)[4:10], collapse = ", "), sep = "")), "\nand", length(colnames(self$DT)[11:length(colnames(self$DT))]), "more columns"))) - #cat(sprintf("Column names in DT: %s\n", - # paste(ifelse(length(colnames(self$DT))<=10, list(colnames(self$DT)), list(c(colnames(self$DT)[1:5], "\n",6)))[[1]], collapse = ", "))) cat("----------\n") cat(sprintf("Public methods: %s\n", paste(names(epi_archive$public_methods), From 34b0b1dbf81df5c80cbb4e48e1fda41c6aac4396 Mon Sep 17 00:00:00 2001 From: admin Date: Sun, 15 May 2022 21:25:27 -0700 Subject: [PATCH 25/36] Re-wrote explan. a bit. --- R/slide.R | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/R/slide.R b/R/slide.R index 07652c3d..06abde9d 100644 --- a/R/slide.R +++ b/R/slide.R @@ -84,7 +84,27 @@ #' tidy evaluation (first example, above), then the name for the new column is #' inferred from the given expression and overrides any name passed explicitly #' through the `new_col_name` argument. -#' +#' +#' If a tibble that does not have a designated grouping variable is passed in +#' as the method argument to `f`, to prevent the specified method for `f` +#' from being overridden, include a parameter for the grouping-variable in +#' function() just prior to specifying the method. For example: +#' ``` +#' # Construct an tibble with an unnamed grouping-variable +#' edf = bind_rows(tibble(geo_value = "ak", time_value = as.Date("2020-01-01") +#' + 1:10, x1=1:10, y=1:10 + rnorm(10L))) %>% +#' as_epi_df() +#' +#' # Now, include a row parameter for the grouping-variable in the tibble, +#' # which we denote as g, just prior to method = "qr" +#' # Note that if g was not included below, then the method = "qr" would be +#' # overridden, as described above +#' edf %>% +#' group_by(geo_value) %>% +#' epi_slide(function(x, g, method="qr", ...) tibble(model=list( +#' lm(y ~ x1, x, method=method))), n=7L) +#' ``` +#' #' @importFrom lubridate days weeks #' @importFrom rlang .data .env !! enquo enquos sym #' @export From 75d6d685756bdebde51071c380a7c401fed6ea18 Mon Sep 17 00:00:00 2001 From: admin Date: Sun, 15 May 2022 21:25:45 -0700 Subject: [PATCH 26/36] Re-wrote explan. some more --- R/slide.R | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/R/slide.R b/R/slide.R index 06abde9d..26223c3e 100644 --- a/R/slide.R +++ b/R/slide.R @@ -85,17 +85,18 @@ #' inferred from the given expression and overrides any name passed explicitly #' through the `new_col_name` argument. #' -#' If a tibble that does not have a designated grouping variable is passed in -#' as the method argument to `f`, to prevent the specified method for `f` -#' from being overridden, include a parameter for the grouping-variable in -#' function() just prior to specifying the method. For example: +#' When `f` is a named function with arguments, if a tibble that does not have a +#' designated grouping variable is passed in as the method argument to `f`, +#' to prevent the specified method for `f` from being overridden, include a +#' parameter for the grouping-variable in function() just prior to specifying +#' the method. For example: #' ``` -#' # Construct an tibble with an unnamed grouping-variable +#' # Construct an tibble with an unnamed grouping variable #' edf = bind_rows(tibble(geo_value = "ak", time_value = as.Date("2020-01-01") #' + 1:10, x1=1:10, y=1:10 + rnorm(10L))) %>% #' as_epi_df() #' -#' # Now, include a row parameter for the grouping-variable in the tibble, +#' # Now, include a row parameter for the grouping variable in the tibble, #' # which we denote as g, just prior to method = "qr" #' # Note that if g was not included below, then the method = "qr" would be #' # overridden, as described above From 434f7ebfedb24815c06a46ef9e70f400b8f1430d Mon Sep 17 00:00:00 2001 From: admin Date: Mon, 16 May 2022 14:02:34 -0700 Subject: [PATCH 27/36] Some minor re-wording --- R/slide.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/slide.R b/R/slide.R index 26223c3e..9ce9f181 100644 --- a/R/slide.R +++ b/R/slide.R @@ -85,11 +85,11 @@ #' inferred from the given expression and overrides any name passed explicitly #' through the `new_col_name` argument. #' -#' When `f` is a named function with arguments, if a tibble that does not have a -#' designated grouping variable is passed in as the method argument to `f`, -#' to prevent the specified method for `f` from being overridden, include a -#' parameter for the grouping-variable in function() just prior to specifying -#' the method. For example: +#' If `f` is a named function with arguments and a tibble with an unnamed +#' grouping variable is passed in as its method argument, then naturally +#' any specified method for `f` would be overridden. To prevent this, include +#' a parameter for the grouping variable in `function()` just prior to +#' specifying the method. For example: #' ``` #' # Construct an tibble with an unnamed grouping variable #' edf = bind_rows(tibble(geo_value = "ak", time_value = as.Date("2020-01-01") From efe3a2df1f9f817380a67340e5ac252cc97203dc Mon Sep 17 00:00:00 2001 From: admin Date: Mon, 16 May 2022 14:02:46 -0700 Subject: [PATCH 28/36] More minor re-wording --- R/slide.R | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/R/slide.R b/R/slide.R index 9ce9f181..d7877879 100644 --- a/R/slide.R +++ b/R/slide.R @@ -85,11 +85,10 @@ #' inferred from the given expression and overrides any name passed explicitly #' through the `new_col_name` argument. #' -#' If `f` is a named function with arguments and a tibble with an unnamed -#' grouping variable is passed in as its method argument, then naturally -#' any specified method for `f` would be overridden. To prevent this, include -#' a parameter for the grouping variable in `function()` just prior to -#' specifying the method. For example: +#' When `f` is a named function with arguments, if a tibble with an unnamed +#' grouping variable is passed in as the method argument to `f`, include a +#' parameter for the grouping-variable in `function()` just prior to +#' specifying the method to prevent that from being overridden. For example: #' ``` #' # Construct an tibble with an unnamed grouping variable #' edf = bind_rows(tibble(geo_value = "ak", time_value = as.Date("2020-01-01") From cccfea92eb171c6a9c9bd96b511dc21ed1c2b0d7 Mon Sep 17 00:00:00 2001 From: admin Date: Mon, 16 May 2022 14:12:28 -0700 Subject: [PATCH 29/36] Converted roxygen comments to Rd file --- man/epi_slide.Rd | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/man/epi_slide.Rd b/man/epi_slide.Rd index 1e42dbf3..5a424bee 100644 --- a/man/epi_slide.Rd +++ b/man/epi_slide.Rd @@ -115,6 +115,24 @@ Thus, to be clear, when the computation is specified via an expression for tidy evaluation (first example, above), then the name for the new column is inferred from the given expression and overrides any name passed explicitly through the \code{new_col_name} argument. + +When \code{f} is a named function with arguments, if a tibble with an unnamed +grouping variable is passed in as the method argument to \code{f}, include a +parameter for the grouping-variable in \verb{function()} just prior to +specifying the method to prevent that from being overridden. For example:\preformatted{# Construct an tibble with an unnamed grouping variable +edf = bind_rows(tibble(geo_value = "ak", time_value = as.Date("2020-01-01") + + 1:10, x1=1:10, y=1:10 + rnorm(10L))) \%>\% + as_epi_df() + +# Now, include a row parameter for the grouping variable in the tibble, +# which we denote as g, just prior to method = "qr" +# Note that if g was not included below, then the method = "qr" would be +# overridden, as described above +edf \%>\% +group_by(geo_value) \%>\% +epi_slide(function(x, g, method="qr", ...) tibble(model=list( + lm(y ~ x1, x, method=method))), n=7L) +} } \examples{ # slide a 7-day trailing average formula on cases From 462b950667c06b650eb10aa8d7ed3091a8c9449b Mon Sep 17 00:00:00 2001 From: admin Date: Tue, 17 May 2022 12:49:21 -0700 Subject: [PATCH 30/36] Moved helper file code to test file and deleted helper file --- tests/testthat/helper-epi_slide.R | 9 --------- tests/testthat/test-epi_slide.R | 10 ++++++++++ 2 files changed, 10 insertions(+), 9 deletions(-) delete mode 100644 tests/testthat/helper-epi_slide.R diff --git a/tests/testthat/helper-epi_slide.R b/tests/testthat/helper-epi_slide.R deleted file mode 100644 index 7ae570a6..00000000 --- a/tests/testthat/helper-epi_slide.R +++ /dev/null @@ -1,9 +0,0 @@ -## Create an epi. df and a function to test epi_slide with - -edf = dplyr::bind_rows( - dplyr::tibble(geo_value = "ak", time_value = as.Date("2020-01-01") + 1:200, value=1:200), - dplyr::tibble(geo_value = "al", time_value=as.Date("2020-01-01") + 1:5, value=-(1:5)) -) %>% - as_epi_df() - -f = function(x, ...) dplyr::tibble(value=mean(x$value), count=length(x$value)) diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index baa338fd..df52e059 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -1,3 +1,13 @@ +## Create an epi. df and a function to test epi_slide with + +edf = dplyr::bind_rows( + dplyr::tibble(geo_value = "ak", time_value = as.Date("2020-01-01") + 1:200, value=1:200), + dplyr::tibble(geo_value = "al", time_value=as.Date("2020-01-01") + 1:5, value=-(1:5)) +) %>% + as_epi_df() + +f = function(x, ...) dplyr::tibble(value=mean(x$value), count=length(x$value)) + ## --- These cases generate the error: --- test_that("`ref_time_values` + `align` that result in no slide data, generate the error", { expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01")), From 6f463b4e644ad1e69da1708dd3ab8f20360ed5d3 Mon Sep 17 00:00:00 2001 From: admin Date: Tue, 17 May 2022 12:49:51 -0700 Subject: [PATCH 31/36] Fixed some arrangement of code --- tests/testthat/test-epi_slide.R | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index df52e059..71a180c4 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -19,15 +19,16 @@ test_that("`ref_time_values` + `align` that result in no slide data, generate th test_that("`ref_time_values` + `align` that have some slide data, but generate the error due to ref. time being out of time range", { expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01"), align="left"), "starting and/or stopping times for sliding are out of bounds") # before the first, but we'd expect there to be data in the window - expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01")+201L), "starting and/or stopping times for sliding are out of bounds") # beyond the last, but still with data in window + expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01")+201L), + "starting and/or stopping times for sliding are out of bounds") # beyond the last, but still with data in window }) ## --- These cases doesn't generate the error: --- test_that("these doesn't produce an error; the error appears only if the ref time values are out of the range for every group", { - expect_identical(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01")+200L) %>% dplyr::select("geo_value","slide_value_value"), + expect_identical(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01")+200L) %>% + dplyr::select("geo_value","slide_value_value"), dplyr::tibble(geo_value = "ak", slide_value_value = 199) %>% group_by(geo_value)) # out of range for one group - expect_identical(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-04")) %>% dplyr::select("geo_value","slide_value_value"), + expect_identical(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-04")) %>% + dplyr::select("geo_value","slide_value_value"), dplyr::tibble(geo_value = c("ak", "al"), slide_value_value = c(2, -2)) %>% group_by(geo_value)) # not out of range for either group - - }) \ No newline at end of file From 2999a9e9e1576e7cba0fd21aa7083c8627a448f6 Mon Sep 17 00:00:00 2001 From: admin Date: Tue, 17 May 2022 13:59:34 -0700 Subject: [PATCH 32/36] Simplified code a bit --- R/archive.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/archive.R b/R/archive.R index 23697906..683d63bf 100644 --- a/R/archive.R +++ b/R/archive.R @@ -181,9 +181,10 @@ epi_archive = cat(sprintf("Data archive (stored in DT field): %i x %i\n", nrow(self$DT), ncol(self$DT))) cat("----------\n") - cat(sprintf("Column names in DT: %s\n", paste(ifelse(length(colnames(self$DT))<=10, paste(colnames(self$DT), collapse = ", "), - paste(paste(colnames(self$DT)[1:3], collapse = ", "), ",\n", paste(colnames(self$DT)[4:10], collapse = ", "), sep = "")), - "\nand", length(colnames(self$DT)[11:length(colnames(self$DT))]), "more columns"))) + cat(sprintf("Columns in DT: %s\n", paste(ifelse(length( + colnames(self$DT)) <= 4, paste(colnames(self$DT), collapse = ", "), + paste(paste(colnames(self$DT)[1:4], collapse = ", "), "and", + length(colnames(self$DT)[5:length(colnames(self$DT))]), "more columns"))))) cat("----------\n") cat(sprintf("Public methods: %s\n", paste(names(epi_archive$public_methods), From b6d4dc1730ff40b9f348ce1a4f61979154d10bf9 Mon Sep 17 00:00:00 2001 From: admin Date: Tue, 17 May 2022 14:45:04 -0700 Subject: [PATCH 33/36] R6 class --- NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/NAMESPACE b/NAMESPACE index a7cc20b0..11f488e4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -45,6 +45,7 @@ export(rename) export(slice) export(ungroup) export(unnest) +importFrom(R6,R6Class) importFrom(data.table,as.data.table) importFrom(data.table,between) importFrom(data.table,key) From 3d3b2a27bb694e7440e528a577563e63ef556160 Mon Sep 17 00:00:00 2001 From: rachlobay <42976509+rachlobay@users.noreply.github.com> Date: Tue, 17 May 2022 14:47:27 -0700 Subject: [PATCH 34/36] Update slide.R --- R/slide.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/slide.R b/R/slide.R index c336f3cd..fc8e3345 100644 --- a/R/slide.R +++ b/R/slide.R @@ -121,7 +121,8 @@ epi_slide = function(x, f, ..., n = 7, ref_time_values, # intersect with observed time values if (missing(ref_time_values)) { ref_time_values = unique(x$time_value) - } else { + } + else { ref_time_values = ref_time_values[ref_time_values %in% unique(x$time_value)] } From 4df45a4eb194dade4c4759d6521ee78895b5a5fc Mon Sep 17 00:00:00 2001 From: rachlobay <42976509+rachlobay@users.noreply.github.com> Date: Tue, 17 May 2022 14:50:26 -0700 Subject: [PATCH 35/36] Unsure why that got deleted so re-add that import --- R/archive.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/archive.R b/R/archive.R index 7d2038f6..599c724a 100644 --- a/R/archive.R +++ b/R/archive.R @@ -76,6 +76,7 @@ #' **data that would have been available as of t**. More details on `slide()` #' are documented in the wrapper function `epix_slide()`. #' +#' @importFrom R6 R6Class #' @export epi_archive = R6::R6Class( From 7864cdda47bcbce673b321a14741a1b695218fe5 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 24 Jun 2022 15:17:11 -0700 Subject: [PATCH 36/36] Updated examples from Chloe's comment. --- R/growth_rate.R | 13 +++++++------ man/as_epi_archive.Rd | 12 ++++++++---- man/epi_archive.Rd | 36 ++++++++++++++++++------------------ man/epi_slide.Rd | 18 ++++++++++++------ man/epix_as_of.Rd | 12 ++++++++---- man/epix_merge.Rd | 12 ++++++++---- man/epix_slide.Rd | 12 ++++++++---- man/growth_rate.Rd | 13 +++++++------ 8 files changed, 76 insertions(+), 52 deletions(-) diff --git a/R/growth_rate.R b/R/growth_rate.R index c2bedbed..d3ca9e31 100644 --- a/R/growth_rate.R +++ b/R/growth_rate.R @@ -101,14 +101,15 @@ #' #' @export #' @examples -#' # California's growth rate in COVID cases -#' mutate(jhu_csse_daily, gr = growth_rate(time_value,cases)) +#' # COVID cases growth rate by state using default method relative change +#' jhu_csse_daily_subset %>% +#' group_by(geo_value) %>% +#' mutate(cases_gr = growth_rate(x = time_value, y = cases)) #' #' # Log scale, degree 4 polynomial and 6-fold cross validation -#' growth_rate( -#' x = jhu_csse_daily$time_value, y = jhu_csse_daily$cases, -#' log_scale = TRUE, ord = 4, k = 6 -#' ) +#' jhu_csse_daily_subset %>% +#' group_by(geo_value) %>% +#' mutate(gr_poly = growth_rate( x = time_value, y = cases, log_scale = TRUE, ord = 4, k = 6)) growth_rate = function(x = seq_along(y), y, x0 = x, method = c("rel_change", "linear_reg", diff --git a/man/as_epi_archive.Rd b/man/as_epi_archive.Rd index a95550be..c7098f77 100644 --- a/man/as_epi_archive.Rd +++ b/man/as_epi_archive.Rd @@ -42,11 +42,15 @@ examples. } \details{ This simply a wrapper around the \code{new()} method of the \code{epi_archive} -class, so for example:\preformatted{x <- as_epi_archive(df, geo_type = "state", time_type = "day") -} +class, so for example: -would be equivalent to:\preformatted{x <- epi_archive$new(df, geo_type = "state", time_type = "day") -} +\if{html}{\out{
}}\preformatted{x <- as_epi_archive(df, geo_type = "state", time_type = "day") +}\if{html}{\out{
}} + +would be equivalent to: + +\if{html}{\out{
}}\preformatted{x <- epi_archive$new(df, geo_type = "state", time_type = "day") +}\if{html}{\out{
}} } \examples{ df <- data.frame (geo_value = c(replicate(2, "ca"), replicate(2, "fl")), diff --git a/man/epi_archive.Rd b/man/epi_archive.Rd index fff4e714..5214b01d 100644 --- a/man/epi_archive.Rd +++ b/man/epi_archive.Rd @@ -84,17 +84,17 @@ are documented in the wrapper function \code{epix_slide()}. \section{Methods}{ \subsection{Public methods}{ \itemize{ -\item \href{#method-new}{\code{epi_archive$new()}} -\item \href{#method-print}{\code{epi_archive$print()}} -\item \href{#method-as_of}{\code{epi_archive$as_of()}} -\item \href{#method-merge}{\code{epi_archive$merge()}} -\item \href{#method-slide}{\code{epi_archive$slide()}} -\item \href{#method-clone}{\code{epi_archive$clone()}} +\item \href{#method-epi_archive-new}{\code{epi_archive$new()}} +\item \href{#method-epi_archive-print}{\code{epi_archive$print()}} +\item \href{#method-epi_archive-as_of}{\code{epi_archive$as_of()}} +\item \href{#method-epi_archive-merge}{\code{epi_archive$merge()}} +\item \href{#method-epi_archive-slide}{\code{epi_archive$slide()}} +\item \href{#method-epi_archive-clone}{\code{epi_archive$clone()}} } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-new}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-epi_archive-new}{}}} \subsection{Method \code{new()}}{ Creates a new \code{epi_archive} object. \subsection{Usage}{ @@ -130,8 +130,8 @@ An \code{epi_archive} object. } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-print}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-epi_archive-print}{}}} \subsection{Method \code{print()}}{ \subsection{Usage}{ \if{html}{\out{
}}\preformatted{epi_archive$print()}\if{html}{\out{
}} @@ -139,8 +139,8 @@ An \code{epi_archive} object. } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-as_of}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-epi_archive-as_of}{}}} \subsection{Method \code{as_of()}}{ Generates a snapshot in \code{epi_df} format as of a given version. See the documentation for the wrapper function \code{epix_as_of()} for details. @@ -150,8 +150,8 @@ See the documentation for the wrapper function \code{epix_as_of()} for details. } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-merge}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-epi_archive-merge}{}}} \subsection{Method \code{merge()}}{ Merges another \code{data.table} with the current one, and allows for a post-filling of \code{NA} values by last observation carried forward (LOCF). @@ -162,8 +162,8 @@ See the documentation for the wrapper function \code{epix_merge()} for details. } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-slide}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-epi_archive-slide}{}}} \subsection{Method \code{slide()}}{ Slides a given function over variables in an \code{epi_archive} object. See the documentation for the wrapper function \code{epix_as_of()} for @@ -185,8 +185,8 @@ details. } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-clone}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-epi_archive-clone}{}}} \subsection{Method \code{clone()}}{ The objects of this class are cloneable with this method. \subsection{Usage}{ diff --git a/man/epi_slide.Rd b/man/epi_slide.Rd index 30c61a11..903cb017 100644 --- a/man/epi_slide.Rd +++ b/man/epi_slide.Rd @@ -104,12 +104,16 @@ incomplete windows) is therefore left up to the user, either through the specified function or formula \code{f}, or through post-processing. If \code{f} is missing, then an expression for tidy evaluation can be specified, -for example, as in:\preformatted{epi_slide(x, cases_7dav = mean(cases), n = 7) -} +for example, as in: + +\if{html}{\out{
}}\preformatted{epi_slide(x, cases_7dav = mean(cases), n = 7) +}\if{html}{\out{
}} + +which would be equivalent to: -which would be equivalent to:\preformatted{epi_slide(x, function(x, ...) mean(x$cases), n = 7, +\if{html}{\out{
}}\preformatted{epi_slide(x, function(x, ...) mean(x$cases), n = 7, new_col_name = "cases_7dav") -} +}\if{html}{\out{
}} Thus, to be clear, when the computation is specified via an expression for tidy evaluation (first example, above), then the name for the new column is @@ -119,7 +123,9 @@ through the \code{new_col_name} argument. When \code{f} is a named function with arguments, if a tibble with an unnamed grouping variable is passed in as the method argument to \code{f}, include a parameter for the grouping-variable in \verb{function()} just prior to -specifying the method to prevent that from being overridden. For example:\preformatted{# Construct an tibble with an unnamed grouping variable +specifying the method to prevent that from being overridden. For example: + +\if{html}{\out{
}}\preformatted{# Construct an tibble with an unnamed grouping variable edf = bind_rows(tibble(geo_value = "ak", time_value = as.Date("2020-01-01") + 1:10, x1=1:10, y=1:10 + rnorm(10L))) \%>\% as_epi_df() @@ -132,7 +138,7 @@ edf \%>\% group_by(geo_value) \%>\% epi_slide(function(x, g, method="qr", ...) tibble(model=list( lm(y ~ x1, x, method=method))), n=7L) -} +}\if{html}{\out{
}} } \examples{ # slide a 7-day trailing average formula on cases diff --git a/man/epix_as_of.Rd b/man/epix_as_of.Rd index 658e7169..b5d5969c 100644 --- a/man/epix_as_of.Rd +++ b/man/epix_as_of.Rd @@ -29,11 +29,15 @@ examples. } \details{ This is simply a wrapper around the \code{as_of()} method of the -\code{epi_archive} class, so if \code{x} is an \code{epi_archive} object, then:\preformatted{epix_as_of(x, max_version = v) -} +\code{epi_archive} class, so if \code{x} is an \code{epi_archive} object, then: -is equivalent to:\preformatted{x$as_of(max_version = v) -} +\if{html}{\out{
}}\preformatted{epix_as_of(x, max_version = v) +}\if{html}{\out{
}} + +is equivalent to: + +\if{html}{\out{
}}\preformatted{x$as_of(max_version = v) +}\if{html}{\out{
}} } \examples{ # warning message of data latency shown diff --git a/man/epix_merge.Rd b/man/epix_merge.Rd index 781ef6fe..3d1b2e1c 100644 --- a/man/epix_merge.Rd +++ b/man/epix_merge.Rd @@ -35,11 +35,15 @@ examples. } \details{ This is simply a wrapper around the \code{merge()} method of the -\code{epi_archive} class, so if \code{x} and \code{y} are an \code{epi_archive} objects, then:\preformatted{epix_merge(x, y) -} +\code{epi_archive} class, so if \code{x} and \code{y} are an \code{epi_archive} objects, then: -is equivalent to:\preformatted{x$merge(y) -} +\if{html}{\out{
}}\preformatted{epix_merge(x, y) +}\if{html}{\out{
}} + +is equivalent to: + +\if{html}{\out{
}}\preformatted{x$merge(y) +}\if{html}{\out{
}} } \examples{ # create two example epi_archive datasets diff --git a/man/epix_slide.Rd b/man/epix_slide.Rd index f01a0a71..b6f7a323 100644 --- a/man/epix_slide.Rd +++ b/man/epix_slide.Rd @@ -115,11 +115,15 @@ should never be used in place of \code{epi_slide()}, and only used when version-aware sliding is necessary (as it its purpose). Finally, this is simply a wrapper around the \code{slide()} method of the -\code{epi_archive} class, so if \code{x} is an \code{epi_archive} object, then:\preformatted{epix_slide(x, new_var = comp(old_var), n = 120) -} +\code{epi_archive} class, so if \code{x} is an \code{epi_archive} object, then: -is equivalent to:\preformatted{x$slide(x, new_var = comp(old_var), n = 120) -} +\if{html}{\out{
}}\preformatted{epix_slide(x, new_var = comp(old_var), n = 120) +}\if{html}{\out{
}} + +is equivalent to: + +\if{html}{\out{
}}\preformatted{x$slide(x, new_var = comp(old_var), n = 120) +}\if{html}{\out{
}} } \examples{ # these dates are reference time points for the 3 day average sliding window diff --git a/man/growth_rate.Rd b/man/growth_rate.Rd index 3bb29aa0..173eff43 100644 --- a/man/growth_rate.Rd +++ b/man/growth_rate.Rd @@ -135,12 +135,13 @@ user. } \examples{ -# California's growth rate in COVID cases - mutate(ca_daily_cases, gr = growth_rate(time_value,cases)) +# COVID cases growth rate by state using default method relative change +jhu_csse_daily_subset \%>\% + group_by(geo_value) \%>\% + mutate(cases_gr = growth_rate(x = time_value, y = cases)) # Log scale, degree 4 polynomial and 6-fold cross validation -growth_rate( - x = ca_daily_cases$time_value, y = ca_daily_cases$cases, - log_scale = TRUE, ord = 4, k = 6 -) +jhu_csse_daily_subset \%>\% + group_by(geo_value) \%>\% + mutate(gr_poly = growth_rate( x = time_value, y = cases, log_scale = TRUE, ord = 4, k = 6)) }