@@ -216,11 +216,23 @@ epi_archive <-
216
216
classname = " epi_archive" ,
217
217
# ####
218
218
public = list (
219
+ # ' @field DT (`data.table`)\cr
220
+ # ' the compressed datatable
219
221
DT = NULL ,
222
+ # ' @field geo_type (`character()`)\cr
223
+ # ' the resolution of the geographic label (e.g. state)
220
224
geo_type = NULL ,
225
+ # ' @field time_type (`character()`)\cr
226
+ # ' the resolution of the time column (e.g. day)
221
227
time_type = NULL ,
228
+ # ' @field additional_metadata (`list()`)\cr
229
+ # ' any extra fields, such as `other_keys`
222
230
additional_metadata = NULL ,
231
+ # ' @field clobberable_versions_start (`Date()`)\cr
232
+ # ' the earliest date that new data should overwrite existing data
223
233
clobberable_versions_start = NULL ,
234
+ # ' @field versions_end (`Date()`)\cr
235
+ # ' the latest version observed
224
236
versions_end = NULL ,
225
237
# ' @description Creates a new `epi_archive` object.
226
238
# ' @param x A data frame, data table, or tibble, with columns `geo_value`,
@@ -679,7 +691,18 @@ epi_archive <-
679
691
680
692
return (invisible (self ))
681
693
},
682
- # ####
694
+ # ' group an epi_archive
695
+ # ' @description
696
+ # ' group an epi_archive
697
+ # ' @param ... variables or computations to group by. Computations are always
698
+ # ' done on the ungrouped data frame. To perform computations on the grouped
699
+ # ' data, you need to use a separate [`mutate()`] step before the
700
+ # ' [`group_by()`]
701
+ # ' @param .add When `FALSE`, the default, [`group_by()`] will override existing
702
+ # ' groups. To add to the existing groups, use `.add = TRUE`.
703
+ # ' @param .drop Drop groups formed by factor levels that don't appear in the
704
+ # ' data. The default is `TRUE` except when `.data` has been previously grouped
705
+ # ' with `.drop = FALSE`. See [`group_by_drop_default()`] for details.
683
706
group_by = function (... , .add = FALSE , .drop = dplyr :: group_by_drop_default(self )) {
684
707
group_by.epi_archive(self , ... , .add = .add , .drop = .drop )
685
708
},
@@ -688,6 +711,74 @@ epi_archive <-
688
711
# ' details.
689
712
# ' @importFrom data.table key
690
713
# ' @importFrom rlang !! !!! enquo quo_is_missing enquos is_quosure sym syms
714
+ # ' @param f Function, formula, or missing; together with `...` specifies the
715
+ # ' computation to slide. To "slide" means to apply a computation over a
716
+ # ' sliding (a.k.a. "rolling") time window for each data group. The window is
717
+ # ' determined by the `before` parameter described below. One time step is
718
+ # ' typically one day or one week; see [`epi_slide`] details for more
719
+ # ' explanation. If a function, `f` must take an `epi_df` with the same
720
+ # ' column names as the archive's `DT`, minus the `version` column; followed
721
+ # ' by a one-row tibble containing the values of the grouping variables for
722
+ # ' the associated group; followed by a reference time value, usually as a
723
+ # ' `Date` object; followed by any number of named arguments. If a formula,
724
+ # ' `f` can operate directly on columns accessed via `.x$var` or `.$var`, as
725
+ # ' in `~ mean (.x$var)` to compute a mean of a column `var` for each
726
+ # ' group-`ref_time_value` combination. The group key can be accessed via
727
+ # ' `.y` or `.group_key`, and the reference time value can be accessed via
728
+ # ' `.z` or `.ref_time_value`. If `f` is missing, then `...` will specify the
729
+ # ' computation.
730
+ # ' @param ... Additional arguments to pass to the function or formula specified
731
+ # ' via `f`. Alternatively, if `f` is missing, then `...` is interpreted as an
732
+ # ' expression for tidy evaluation; in addition to referring to columns
733
+ # ' directly by name, the expression has access to `.data` and `.env` pronouns
734
+ # ' as in `dplyr` verbs, and can also refer to the `.group_key` and
735
+ # ' `.ref_time_value`. See details of [`epi_slide`].
736
+ # ' @param before How far `before` each `ref_time_value` should the sliding
737
+ # ' window extend? If provided, should be a single, non-NA,
738
+ # ' [integer-compatible][vctrs::vec_cast] number of time steps. This window
739
+ # ' endpoint is inclusive. For example, if `before = 7`, and one time step is
740
+ # ' one day, then to produce a value for a `ref_time_value` of January 8, we
741
+ # ' apply the given function or formula to data (for each group present) with
742
+ # ' `time_value`s from January 1 onward, as they were reported on January 8.
743
+ # ' For typical disease surveillance sources, this will not include any data
744
+ # ' with a `time_value` of January 8, and, depending on the amount of reporting
745
+ # ' latency, may not include January 7 or even earlier `time_value`s. (If
746
+ # ' instead the archive were to hold nowcasts instead of regular surveillance
747
+ # ' data, then we would indeed expect data for `time_value` January 8. If it
748
+ # ' were to hold forecasts, then we would expect data for `time_value`s after
749
+ # ' January 8, and the sliding window would extend as far after each
750
+ # ' `ref_time_value` as needed to include all such `time_value`s.)
751
+ # ' @param ref_time_values Reference time values / versions for sliding
752
+ # ' computations; each element of this vector serves both as the anchor point
753
+ # ' for the `time_value` window for the computation and the `max_version`
754
+ # ' `as_of` which we fetch data in this window. If missing, then this will set
755
+ # ' to a regularly-spaced sequence of values set to cover the range of
756
+ # ' `version`s in the `DT` plus the `versions_end`; the spacing of values will
757
+ # ' be guessed (using the GCD of the skips between values).
758
+ # ' @param time_step Optional function used to define the meaning of one time
759
+ # ' step, which if specified, overrides the default choice based on the
760
+ # ' `time_value` column. This function must take a positive integer and return
761
+ # ' an object of class `lubridate::period`. For example, we can use `time_step
762
+ # ' = lubridate::hours` in order to set the time step to be one hour (this
763
+ # ' would only be meaningful if `time_value` is of class `POSIXct`).
764
+ # ' @param new_col_name String indicating the name of the new column that will
765
+ # ' contain the derivative values. Default is "slide_value"; note that setting
766
+ # ' `new_col_name` equal to an existing column name will overwrite this column.
767
+ # ' @param as_list_col Should the slide results be held in a list column, or be
768
+ # ' [unchopped][tidyr::unchop]/[unnested][tidyr::unnest]? Default is `FALSE`,
769
+ # ' in which case a list object returned by `f` would be unnested (using
770
+ # ' [`tidyr::unnest()`]), and, if the slide computations output data frames,
771
+ # ' the names of the resulting columns are given by prepending `new_col_name`
772
+ # ' to the names of the list elements.
773
+ # ' @param names_sep String specifying the separator to use in `tidyr::unnest()`
774
+ # ' when `as_list_col = FALSE`. Default is "_". Using `NULL` drops the prefix
775
+ # ' from `new_col_name` entirely.
776
+ # ' @param all_versions (Not the same as `all_rows` parameter of `epi_slide`.) If
777
+ # ' `all_versions = TRUE`, then `f` will be passed the version history (all
778
+ # ' `version <= ref_time_value`) for rows having `time_value` between
779
+ # ' `ref_time_value - before` and `ref_time_value`. Otherwise, `f` will be
780
+ # ' passed only the most recent `version` for every unique `time_value`.
781
+ # ' Default is `FALSE`.
691
782
slide = function (f , ... , before , ref_time_values ,
692
783
time_step , new_col_name = " slide_value" ,
693
784
as_list_col = FALSE , names_sep = " _" ,
0 commit comments