diff --git a/DESCRIPTION b/DESCRIPTION index 7f36ad7c..7b21d628 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: epiprocess Title: Tools for basic signal processing in epidemiology -Version: 0.7.3 +Version: 0.7.4 Authors@R: c( person("Jacob", "Bien", role = "ctb"), person("Logan", "Brooks", role = "aut"), diff --git a/NAMESPACE b/NAMESPACE index 5a1ddfa0..ef55f68c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -69,10 +69,24 @@ export(ungroup) export(unnest) importFrom(R6,R6Class) importFrom(checkmate,anyInfinite) +importFrom(checkmate,anyMissing) importFrom(checkmate,assert) importFrom(checkmate,assert_character) +importFrom(checkmate,assert_class) +importFrom(checkmate,assert_data_frame) importFrom(checkmate,assert_int) +importFrom(checkmate,assert_list) +importFrom(checkmate,assert_logical) +importFrom(checkmate,assert_numeric) +importFrom(checkmate,assert_scalar) +importFrom(checkmate,check_atomic) +importFrom(checkmate,check_data_frame) +importFrom(checkmate,test_set_equal) +importFrom(checkmate,test_subset) +importFrom(checkmate,vname) +importFrom(cli,cli_abort) importFrom(cli,cli_inform) +importFrom(cli,cli_warn) importFrom(data.table,":=") importFrom(data.table,address) importFrom(data.table,as.data.table) diff --git a/NEWS.md b/NEWS.md index e4a404e2..d6aca370 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,146 +6,147 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat ## Improvements -* `epi_slide` computations are now 2-4 times faster after changing how +- `epi_slide` computations are now 2-4 times faster after changing how reference time values, made accessible within sliding functions, are calculated (#397). -* regenerated the `jhu_csse_daily_subset` dataset with the latest versions of +- regenerated the `jhu_csse_daily_subset` dataset with the latest versions of the data from the API -* changed approach to versioning, see DEVELOPMENT.md for details -* `select` on grouped `epi_df`s now only drops `epi_df`ness if it makes sense; PR #390 -* Minor documentation updates; PR #393 -* Improved `epi_archive` print method. Compactified metadata and shows a snippet +- changed approach to versioning, see DEVELOPMENT.md for details +- `select` on grouped `epi_df`s now only drops `epi_df`ness if it makes sense; PR #390 +- Minor documentation updates; PR #393 +- Improved `epi_archive` print method. Compactified metadata and shows a snippet of the underlying `DT` (#341). +- Added `autoplot` method for `epi_df` objects, which creates a ggplot2 plot of + the `epi_df` (#382). +- Refactored internals to use `cli` for warnings/errors and `checkmate` for + argument checking (#413). ## Breaking changes -* Switched `epi_df`'s `other_keys` default from `NULL` to `character(0)`; PR #390 +- Switched `epi_df`'s `other_keys` default from `NULL` to `character(0)`; PR #390 # epiprocess 0.7.0 ## Improvements -* Updated vignettes for compatibility with epidatr 1.0.0 in PR #377. +- Updated vignettes for compatibility with epidatr 1.0.0 in PR #377. ## Breaking changes -* Changes to `epi_slide` and `epix_slide`: - * If `f` is a function, it is now required to take at least three arguments. +- Changes to `epi_slide` and `epix_slide`: + - If `f` is a function, it is now required to take at least three arguments. `f` must take an `epi_df` with the same column names as the archive's `DT`, minus the `version` column; followed by a one-row tibble containing the values of the grouping variables for the associated group; followed by a reference time value, usually as a `Date` object. Optionally, it can take any number of additional arguments after that, and forward values for those arguments through `epi[x]_slide`'s `...` args. - * To make your existing slide computations work, add a third argument to - your `f` function to accept this new input: e.g., change `f = function(x, - g, ) { }` to `f = function(x, g, rt, ) { }`. + - To make your existing slide computations work, add a third argument to + your `f` function to accept this new input: e.g., change `f = function(x, g, ) { }` + to `f = function(x, g, rt, ) { }`. ## New features -* `epi_slide` and `epix_slide` also make the window data, group key and +- `epi_slide` and `epix_slide` also make the window data, group key and reference time value available to slide computations specified as formulas or tidy evaluation expressions, in additional or completely new ways. - * If `f` is a formula, it can now access the reference time value via `.z` or + - If `f` is a formula, it can now access the reference time value via `.z` or `.ref_time_value`. - * If `f` is missing, the tidy evaluation expression in `...` can now refer to + - If `f` is missing, the tidy evaluation expression in `...` can now refer to the window data as an `epi_df` or `tibble` with `.x`, the group key with `.group_key`, and the reference time value with `.ref_time_value`. The usual `.data` and `.env` pronouns also work, but`pick()` and `cur_data()` are not; work off of `.x` instead. -* `epix_slide` has been made more like `dplyr::group_modify`. It will no longer +- `epix_slide` has been made more like `dplyr::group_modify`. It will no longer perform element/row recycling for size stability, accepts slide computation outputs containing any number of rows, and no longer supports `all_rows`. - * To keep the old behavior, manually perform row recycling within `f` + - To keep the old behavior, manually perform row recycling within `f` computations, and/or `left_join` a data frame representing the desired output structure with the current `epix_slide()` result to obtain the desired repetitions and completions expected with `all_rows = TRUE`. -* `epix_slide` will only output grouped or ungrouped tibbles. Previously, it +- `epix_slide` will only output grouped or ungrouped tibbles. Previously, it would sometimes output `epi_df`s, but not consistently, and not always with the metadata desired. Future versions will revisit this design, and consider more closely whether/when/how to output an `epi_df`. - * To keep the old behavior, convert the output of `epix_slide()` to `epi_df` + - To keep the old behavior, convert the output of `epix_slide()` to `epi_df` when desired and set the metadata appropriately. ## Improvements -* `epi_slide` and `epix_slide` now support `as_list_col = TRUE` when the slide +- `epi_slide` and `epix_slide` now support `as_list_col = TRUE` when the slide computations output atomic vectors, and output a list column in "chopped" format (see `tidyr::chop`). -* `epi_slide` now works properly with slide computations that output just a +- `epi_slide` now works properly with slide computations that output just a `Date` vector, rather than converting `slide_value` to a numeric column. -* Fix `?archive_cases_dv_subset` information regarding modifications of upstream +- Fix `?archive_cases_dv_subset` information regarding modifications of upstream data by @brookslogan in (#299). -* Update to use updated `epidatr` (`fetch_tbl` -> `fetch`) by @brookslogan in +- Update to use updated `epidatr` (`fetch_tbl` -> `fetch`) by @brookslogan in (#319). # epiprocess 0.6.0 ## Breaking changes -* Changes to both `epi_slide` and `epix_slide`: - * The `n`, `align`, and `before` arguments have been replaced by new `before` +- Changes to both `epi_slide` and `epix_slide`: + - The `n`, `align`, and `before` arguments have been replaced by new `before` and `after` arguments. To migrate to the new version, replace these arguments in every `epi_slide` and `epix_slide` call. If you were only using - the `n` argument, then this means replacing `n = ` with `before = - - 1`. - * `epi_slide`'s time windows now extend `before` time steps before and + the `n` argument, then this means replacing `n = ` with `before = - 1`. + - `epi_slide`'s time windows now extend `before` time steps before and `after` time steps after the corresponding `ref_time_values`. See `?epi_slide` for details on matching old alignments. - * `epix_slide`'s time windows now extend `before` time steps before the + - `epix_slide`'s time windows now extend `before` time steps before the corresponding `ref_time_values` all the way through the latest data available at the corresponding `ref_time_values`. - * Slide functions now keep any grouping of `x` in their results, like + - Slide functions now keep any grouping of `x` in their results, like `mutate` and `group_modify`. - * To obtain the old behavior, `dplyr::ungroup` the slide results immediately. -* Additional `epi_slide` changes: - * When using `as_list_col = TRUE` together with `ref_time_values` and + - To obtain the old behavior, `dplyr::ungroup` the slide results immediately. +- Additional `epi_slide` changes: + - When using `as_list_col = TRUE` together with `ref_time_values` and `all_rows=TRUE`, the marker for excluded computations is now a `NULL` entry in the list column, rather than a `NA`; if you are using `tidyr::unnest()` afterward and want to keep these missing data markers, you will need to replace the `NULL` entries with `NA`s. Skipped computations are now more uniformly detectable using `vctrs` methods. -* Additional`epix_slide` changes: - * `epix_slide`'s `group_by` argument has been replaced by `dplyr::group_by` and +- Additional`epix_slide` changes: + - `epix_slide`'s `group_by` argument has been replaced by `dplyr::group_by` and `dplyr::ungroup` S3 methods. The `group_by` method uses "data masking" (also referred to as "tidy evaluation") rather than "tidy selection". - * Old syntax: - * `x %>% epix_slide(, group_by=c(col1, col2))` - * `x %>% epix_slide(, group_by=all_of(colname_vector))` - * New syntax: - * `x %>% group_by(col1, col2) %>% epix_slide()` - * `x %>% group_by(across(all_of(colname_vector))) %>% epix_slide()` - * `epix_slide` no longer defaults to grouping by non-`time_value`, non-`version` + - Old syntax: + - `x %>% epix_slide(, group_by=c(col1, col2))` + - `x %>% epix_slide(, group_by=all_of(colname_vector))` + - New syntax: + - `x %>% group_by(col1, col2) %>% epix_slide()` + - `x %>% group_by(across(all_of(colname_vector))) %>% epix_slide()` + - `epix_slide` no longer defaults to grouping by non-`time_value`, non-`version` key columns, instead considering all data to be in one big group. - * To obtain the old behavior, precede each `epix_slide` call lacking a + - To obtain the old behavior, precede each `epix_slide` call lacking a `group_by` argument with an appropriate `group_by` call. - * `epix_slide` now guesses `ref_time_values` to be a regularly spaced sequence + - `epix_slide` now guesses `ref_time_values` to be a regularly spaced sequence covering all the `DT$version` values and the `version_end`, rather than the distinct `DT$time_value`s. To obtain the old behavior, pass in `ref_time_values = unique($DT$time_value)`. -* `epi_archive`'s `clobberable_versions_start`'s default is now `NA`, so there +- `epi_archive`'s `clobberable_versions_start`'s default is now `NA`, so there will be no warnings by default about potential nonreproducibility. To obtain - the old behavior, pass in `clobberable_versions_start = - max_version_with_row_in(x)`. + the old behavior, pass in `clobberable_versions_start = max_version_with_row_in(x)`. ## Potentially-breaking changes -* Fixed `[` on grouped `epi_df`s to maintain the grouping if possible when +- Fixed `[` on grouped `epi_df`s to maintain the grouping if possible when dropping the `epi_df` class (e.g., when removing the `time_value` column). -* Fixed `epi_df` operations to be more consistent about decaying into +- Fixed `epi_df` operations to be more consistent about decaying into non-`epi_df`s when the result of the operation doesn't make sense as an `epi_df` (e.g., when removing the `time_value` column). -* Changed `bind_rows` on grouped `epi_df`s to not drop the `epi_df` class. Like +- Changed `bind_rows` on grouped `epi_df`s to not drop the `epi_df` class. Like with ungrouped `epi_df`s, the metadata of the result is still simply taken from the first result, and may be inappropriate ([#242](https://github.com/cmu-delphi/epiprocess/issues/242)). -* `epi_slide` and `epix_slide` now raise an error rather than silently filtering +- `epi_slide` and `epix_slide` now raise an error rather than silently filtering out `ref_time_values` that don't meet their expectations. ## New features -* `epix_slide`, `$slide` have a new parameter `all_versions`. With +- `epix_slide`, `$slide` have a new parameter `all_versions`. With `all_versions=TRUE`, `epix_slide` will pass a filtered `epi_archive` to each computation rather than an `epi_df` snapshot. This enables, e.g., performing pseudoprospective forecasts with a revision-aware forecaster using nested @@ -153,124 +154,124 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat ## Improvements -* Added `dplyr::group_by` and `dplyr::ungroup` S3 methods for `epi_archive` +- Added `dplyr::group_by` and `dplyr::ungroup` S3 methods for `epi_archive` objects, plus corresponding `$group_by` and `$ungroup` R6 methods. The `group_by` implementation supports the `.add` and `.drop` arguments, and `ungroup` supports partial ungrouping with `...`. -* `as_epi_archive`, `epi_archive$new` now perform checks for the key uniqueness +- `as_epi_archive`, `epi_archive$new` now perform checks for the key uniqueness requirement (part of [#154](https://github.com/cmu-delphi/epiprocess/issues/154)). ## Cleanup -* Added a `NEWS.md` file to track changes to the package. -* Implemented `?dplyr::dplyr_extending` for `epi_df`s +- Added a `NEWS.md` file to track changes to the package. +- Implemented `?dplyr::dplyr_extending` for `epi_df`s ([#223](https://github.com/cmu-delphi/epiprocess/issues/223)). -* Fixed various small documentation issues ([#217](https://github.com/cmu-delphi/epiprocess/issues/217)). +- Fixed various small documentation issues ([#217](https://github.com/cmu-delphi/epiprocess/issues/217)). # epiprocess 0.5.0 ## Potentially-breaking changes -* `epix_slide`, `$slide` now feed `f` an `epi_df` rather than +- `epix_slide`, `$slide` now feed `f` an `epi_df` rather than converting to a tibble/`tbl_df` first, allowing use of `epi_df` methods and metadata, and often yielding `epi_df`s out of the slide as a result. To obtain the old behavior, convert to a tibble within `f`. ## Improvements -* Fixed `epix_merge`, `$merge` always raising error on `sync="truncate"`. +- Fixed `epix_merge`, `$merge` always raising error on `sync="truncate"`. ## Cleanup -* Added `Remotes:` entry for `genlasso`, which was removed from CRAN. -* Added `as_epi_archive` tests. -* Added missing `epix_merge` test for `sync="truncate"`. +- Added `Remotes:` entry for `genlasso`, which was removed from CRAN. +- Added `as_epi_archive` tests. +- Added missing `epix_merge` test for `sync="truncate"`. # epiprocess 0.4.0 ## Potentially-breaking changes -* Fixed `[.epi_df` to not reorder columns, which was incompatible with +- Fixed `[.epi_df` to not reorder columns, which was incompatible with downstream packages. -* Changed `[.epi_df` decay-to-tibble logic to more coherent with `epi_df`s +- Changed `[.epi_df` decay-to-tibble logic to more coherent with `epi_df`s current tolerance of nonunique keys: stopped decaying to a tibble in some cases where a unique key wouldn't have been preserved, since we don't enforce a unique key elsewhere. -* Fixed `[.epi_df` to adjust `"other_keys"` metadata when corresponding +- Fixed `[.epi_df` to adjust `"other_keys"` metadata when corresponding columns are selected out. -* Fixed `[.epi_df` to raise an error if resulting column names would be +- Fixed `[.epi_df` to raise an error if resulting column names would be nonunique. -* Fixed `[.epi_df` to drop metadata if decaying to a tibble (due to removal +- Fixed `[.epi_df` to drop metadata if decaying to a tibble (due to removal of essential columns). ## Improvements -* Added check that `epi_df` `additional_metadata` is list. -* Fixed some incorrect `as_epi_df` examples. +- Added check that `epi_df` `additional_metadata` is list. +- Fixed some incorrect `as_epi_df` examples. ## Cleanup -* Applied rename of upstream package in examples: `delphi.epidata` -> +- Applied rename of upstream package in examples: `delphi.epidata` -> `epidatr`. -* Rounded out `[.epi_df` tests. +- Rounded out `[.epi_df` tests. # epiprocess 0.3.0 ## Breaking changes -* `as_epi_archive`, `epi_archive$new`: - * Compactification (see below) by default may change results if working +- `as_epi_archive`, `epi_archive$new`: + - Compactification (see below) by default may change results if working directly with the `epi_archive`'s `DT` field; to disable, pass in `compactify=FALSE`. -* `epi_archive`'s wrappers and R6 methods have been updated to follow these +- `epi_archive`'s wrappers and R6 methods have been updated to follow these rules regarding reference semantics: - * `epix_` will not mutate input `epi_archive`s, but may alias them + - `epix_` will not mutate input `epi_archive`s, but may alias them or alias their fields (which should not be a worry if a user sticks to these `epix_*` functions and "regular" R functions with copy-on-write-like behavior, avoiding mutating functions `[.data.table`). - * `x$` may mutate `x`; if it mutates `x`, it will return `x` + - `x$` may mutate `x`; if it mutates `x`, it will return `x` invisibly (where this makes sense), and, for each of its fields, may either mutate the object to which it refers or reseat the reference (but not both); if `x$` does not mutate `x`, its result may contain aliases to `x` or its fields. -* `epix_merge`, `$merge`: - * Removed `...`, `locf`, and `nan` parameters. - * Changed the default behavior, which now corresponds to using +- `epix_merge`, `$merge`: + - Removed `...`, `locf`, and `nan` parameters. + - Changed the default behavior, which now corresponds to using `by=key(x$DT)` (but demanding that is the same set of column names as `key(y$DT)`), `all=TRUE`, `locf=TRUE`, `nan=NaN` (but with the post-filling step fixed to only apply to gaps, and no longer fill over `NA`s originating from `x$DT` and `y$DT`). - * `x` and `y` are no longer allowed to share names of non-`by` columns. - * `epix_merge` no longer mutates its `x` argument (but `$merge` continues + - `x` and `y` are no longer allowed to share names of non-`by` columns. + - `epix_merge` no longer mutates its `x` argument (but `$merge` continues to do so). - * Removed (undocumented) capability of passing a `data.table` as `y`. -* `epix_slide`: - * Removed inappropriate/misleading `n=7` default argument (due to - reporting latency, `n=7` will *not* yield 7 days of data in a typical + - Removed (undocumented) capability of passing a `data.table` as `y`. +- `epix_slide`: + - Removed inappropriate/misleading `n=7` default argument (due to + reporting latency, `n=7` will _not_ yield 7 days of data in a typical daily-reporting surveillance data source, as one might have assumed). ## New features -* `as_epi_archive`, `epi_archive$new`: - * New `compactify` parameter allows removal of rows that are redundant for the +- `as_epi_archive`, `epi_archive$new`: + - New `compactify` parameter allows removal of rows that are redundant for the purposes of `epi_archive`'s methods, which use the last version of each observation carried forward. - * New `clobberable_versions_start` field allows marking a range of versions + - New `clobberable_versions_start` field allows marking a range of versions that could be "clobbered" (rewritten without assigning new version tags); previously, this was hard-coded as `max($DT$version)`. - * New `versions_end` field allows marking a range of versions beyond + - New `versions_end` field allows marking a range of versions beyond `max($DT$version)` that were observed, but contained no changes. -* `epix_merge`, `$merge`: - * New `sync` parameter controls what to do if `x` and `y` aren't equally +- `epix_merge`, `$merge`: + - New `sync` parameter controls what to do if `x` and `y` aren't equally up to date (i.e., if `x$versions_end` and `y$versions_end` are different). -* New function `epix_fill_through_version`, method +- New function `epix_fill_through_version`, method `$fill_through_version`: non-mutating & mutating way to ensure that an archive contains versions at least through some `fill_versions_end`, extrapolating according to `how` if necessary. -* Example archive data object is now constructed on demand from its +- Example archive data object is now constructed on demand from its underlying data, so it will be based on the user's version of `epi_archive` rather than an outdated R6 implementation from whenever the data object was generated. @@ -279,130 +280,130 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat ## Breaking changes -* Removed default `n=7` argument to `epix_slide`. +- Removed default `n=7` argument to `epix_slide`. ## Improvements -* Ignore `NA`s when printing `time_value` range for an `epi_archive`. -* Fixed misleading column naming in `epix_slide` example. -* Trimmed down `epi_slide` examples. -* Synced out-of-date docs. +- Ignore `NA`s when printing `time_value` range for an `epi_archive`. +- Fixed misleading column naming in `epix_slide` example. +- Trimmed down `epi_slide` examples. +- Synced out-of-date docs. ## Cleanup -* Removed dependency of some `epi_archive` tests on an example archive. +- Removed dependency of some `epi_archive` tests on an example archive. object, and made them more understandable by reading without running. -* Fixed `epi_df` tests relying on an S3 method for `epi_df` implemented +- Fixed `epi_df` tests relying on an S3 method for `epi_df` implemented externally to `epiprocess`. -* Added tests for `epi_archive` methods and wrapper functions. -* Removed some dead code. -* Made `.{Rbuild,git}ignore` files more comprehensive. +- Added tests for `epi_archive` methods and wrapper functions. +- Removed some dead code. +- Made `.{Rbuild,git}ignore` files more comprehensive. # epiprocess 0.1.2 ## New features -* New `new_epi_df` function is similar to `as_epi_df`, but (i) recalculates, +- New `new_epi_df` function is similar to `as_epi_df`, but (i) recalculates, overwrites, and/or drops most metadata of `x` if it has any, (ii) may still reorder the columns of `x` even if it's already an `epi_df`, and (iii) treats `x` as optional, constructing an empty `epi_df` by default. ## Improvements -* Fixed `geo_type` guessing on alphabetical strings with more than 2 +- Fixed `geo_type` guessing on alphabetical strings with more than 2 characters to yield `"custom"`, not US `"nation"`. -* Fixed `time_type` guessing to actually detect `Date`-class `time_value`s +- Fixed `time_type` guessing to actually detect `Date`-class `time_value`s regularly spaced 7 days apart as `"week"`-type as intended. -* Improved printing of `epi_df`s, `epi_archives`s. -* Fixed `as_of` to not cut off any (forecast-like) data with `time_value > - max_version`. -* Expanded `epi_df` docs to include conversion from `tsibble`/`tbl_ts` objects, +- Improved printing of `epi_df`s, `epi_archives`s. +- Fixed `as_of` to not cut off any (forecast-like) data with `time_value > +max_version`. +- Expanded `epi_df` docs to include conversion from `tsibble`/`tbl_ts` objects, usage of `other_keys`, and pre-processing objects not following the `geo_value`, `time_value` naming scheme. -* Expanded `epi_slide` examples to show how to use an `f` argument with +- Expanded `epi_slide` examples to show how to use an `f` argument with named parameters. -* Updated examples to print relevant columns given a common 80-column +- Updated examples to print relevant columns given a common 80-column terminal width. -* Added growth rate examples. -* Improved `as_epi_archive` and `epi_archive$new`/`$initialize` +- Added growth rate examples. +- Improved `as_epi_archive` and `epi_archive$new`/`$initialize` documentation, including constructing a toy archive. ## Cleanup -* Added tests for `epi_slide`, `epi_cor`, and internal utility functions. -* Fixed currently-unused internal utility functions `MiddleL`, `MiddleR` to +- Added tests for `epi_slide`, `epi_cor`, and internal utility functions. +- Fixed currently-unused internal utility functions `MiddleL`, `MiddleR` to yield correct results on odd-length vectors. # epiprocess 0.1.1 ## New features -* New example data objects allow one to quickly experiment with `epi_df`s +- New example data objects allow one to quickly experiment with `epi_df`s and `epi_archives` without relying/waiting on an API to fetch data. ## Improvements -* Improved `epi_slide` error messaging. -* Fixed description of the appropriate parameters for an `f` argument to +- Improved `epi_slide` error messaging. +- Fixed description of the appropriate parameters for an `f` argument to `epi_slide`; previous description would give incorrect behavior if `f` had named parameters that did not receive values from `epi_slide`'s `...`. -* Added some examples throughout the package. -* Using example data objects in vignettes also speeds up vignette compilation. +- Added some examples throughout the package. +- Using example data objects in vignettes also speeds up vignette compilation. ## Cleanup -* Set up gh-actions CI. -* Added tests for `epi_df`s. +- Set up gh-actions CI. +- Added tests for `epi_df`s. # epiprocess 0.1.0 ## Implemented core functionality, vignettes -* Classes: - * `epi_df`: specialized `tbl_df` for geotemporal epidemiological time +- Classes: + - `epi_df`: specialized `tbl_df` for geotemporal epidemiological time series data, with optional metadata recording other key columns (e.g., demographic breakdowns) and `as_of` what time/version this data was current/published. Associated functions: - * `as_epi_df` converts to an `epi_df`, guessing the `geo_type`, + - `as_epi_df` converts to an `epi_df`, guessing the `geo_type`, `time_type`, `other_keys`, and `as_of` if not specified. - * `as_epi_df.tbl_ts` and `as_tsibble.epi_df` automatically set + - `as_epi_df.tbl_ts` and `as_tsibble.epi_df` automatically set `other_keys` and `key`&`index`, respectively. - * `epi_slide` applies a user-supplied computation to a sliding/rolling + - `epi_slide` applies a user-supplied computation to a sliding/rolling time window and user-specified groups, adding the results as new columns, and recycling/broadcasting results to keep the result size stable. Allows computation to be provided as a function, `purrr`-style formula, or tidyeval dots. Uses `slider` underneath for efficiency. - * `epi_cor` calculates Pearson, Kendall, or Spearman correlations + - `epi_cor` calculates Pearson, Kendall, or Spearman correlations between two (optionally time-shifted) variables in an `epi_df` within user-specified groups. - * Convenience function: `is_epi_df`. - * `epi_archive`: R6 class for version (patch) data for geotemporal + - Convenience function: `is_epi_df`. + - `epi_archive`: R6 class for version (patch) data for geotemporal epidemiological time series data sets. Comes with S3 methods and regular functions that wrap around this functionality for those unfamiliar with R6 methods. Associated functions: - * `as_epi_archive`: prepares an `epi_archive` object from a data frame + - `as_epi_archive`: prepares an `epi_archive` object from a data frame containing snapshots and/or patch data for every available version of the data set. - * `as_of`: extracts a snapshot of the data set as of some requested + - `as_of`: extracts a snapshot of the data set as of some requested version, in `epi_df` format. - * `epix_slide`, `$slide`: similar to `epi_slide`, but for + - `epix_slide`, `$slide`: similar to `epi_slide`, but for `epi_archive`s; for each requested `ref_time_value` and group, applies a time window and user-specified computation to a snapshot of the data as of `ref_time_value`. - * `epix_merge`, `$merge`: like `merge` for `epi_archive`s, + - `epix_merge`, `$merge`: like `merge` for `epi_archive`s, but allowing for the last version of each observation to be carried forward to fill in gaps in `x` or `y`. - * Convenience function: `is_epi_archive`. -* Additional functions: - * `growth_rate`: estimates growth rate of a time series using one of a few + - Convenience function: `is_epi_archive`. +- Additional functions: + - `growth_rate`: estimates growth rate of a time series using one of a few built-in `method`s based on relative change, linear regression, smoothing splines, or trend filtering. - * `detect_outlr`: applies one or more outlier detection methods to a given + - `detect_outlr`: applies one or more outlier detection methods to a given signal variable, and optionally aggregates the outputs to create a consensus result. - * `detect_outlr_rm`: outlier detection function based on a + - `detect_outlr_rm`: outlier detection function based on a rolling-median-based outlier detection function; one of the methods included in `detect_outlr`. - * `detect_outlr_stl`: outlier detection function based on a seasonal-trend + - `detect_outlr_stl`: outlier detection function based on a seasonal-trend decomposition using LOESS (STL); one of the methods included in `detect_outlr`. diff --git a/R/archive.R b/R/archive.R index 08540955..428cce76 100644 --- a/R/archive.R +++ b/R/archive.R @@ -14,7 +14,7 @@ #' @param version_bound the version bound to validate #' @param x a data frame containing a version column with which to check #' compatibility -#' @param na_ok Boolean; is `NULL` an acceptable "bound"? (If so, `NULL` will +#' @param na_ok Boolean; is `NA` an acceptable "bound"? (If so, `NA` will #' have a special context-dependent meaning.) #' @param version_bound_arg optional string; what to call the version bound in #' error messages @@ -22,47 +22,31 @@ #' @section Side effects: raises an error if version bound appears invalid #' #' @noRd -validate_version_bound <- function(version_bound, x, na_ok, +validate_version_bound <- function(version_bound, x, na_ok = FALSE, version_bound_arg = rlang::caller_arg(version_bound), x_arg = rlang::caller_arg(version_bound)) { - # We might want some (optional?) validation here to detect internal bugs. - if (length(version_bound) != 1L) { - # Check for length-1-ness fairly early so we don't have to worry as much - # about our `if`s receiving non-length-1 "Boolean"s. - Abort( - sprintf( - "`version_bound` must have length 1, but instead was length %d", - length(version_bound) - ), - class = sprintf("epiprocess__%s_is_not_length_1", version_bound_arg) + if (is.null(version_bound)) { + cli_abort( + "{version_bound_arg} cannot be NULL" ) - } else if (is.na(version_bound)) { - # Check for NA before class&type, as any-class&type NA should be fine for - # our purposes, and some version classes&types might not have their own NA - # value to pass in. - if (na_ok) { - # Looks like a valid version bound; exit without error. - return(invisible(NULL)) - } else { - Abort(sprintf( - "`%s` must not satisfy `is.na` (NAs are not allowed for this kind of version bound)", - version_bound_arg - ), class = sprintf("epiprocess__%s_is_na", version_bound_arg)) - } - } else if (!identical(class(version_bound), class(x[["version"]])) || - !identical(typeof(version_bound), typeof(x[["version"]]))) { - Abort(sprintf( - "`class(%1$s)` must be identical to `class(%2$s)` and `typeof(%1$s)` must be identical to `typeof(%2$s)`", - version_bound_arg, - # '{x_arg}[["version"]]' except adding parentheses if needed: - rlang::expr_deparse(rlang::new_call( - quote(`[[`), rlang::pairlist2(rlang::parse_expr(x_arg), "version") - )) - ), class = sprintf("epiprocess__%s_has_invalid_class_or_typeof", version_bound_arg)) - } else { - # Looks like a valid version bound; exit without error. + } + if (na_ok && is.na(version_bound)) { return(invisible(NULL)) } + if (!test_set_equal(class(version_bound), class(x[["version"]]))) { + cli_abort( + "{version_bound_arg} must have the same classes as x$version, + which is {class(x$version)}", + ) + } + if (!test_set_equal(typeof(version_bound), typeof(x[["version"]]))) { + cli_abort( + "{version_bound_arg} must have the same types as x$version, + which is {typeof(x$version)}", + ) + } + + return(invisible(NULL)) } #' `max(x$version)`, with error if `x` has 0 rows @@ -77,13 +61,18 @@ validate_version_bound <- function(version_bound, x, na_ok, #' @export max_version_with_row_in <- function(x) { if (nrow(x) == 0L) { - Abort(sprintf("`nrow(x)==0L`, representing a data set history with no row up through the latest observed version, but we don't have a sensible guess at what version that is, or whether any of the empty versions might be clobbered in the future; if we use `x` to form an `epi_archive`, then `clobberable_versions_start` and `versions_end` must be manually specified."), + cli_abort( + "`nrow(x)==0L`, representing a data set history with no row up through the + latest observed version, but we don't have a sensible guess at what version + that is, or whether any of the empty versions might be clobbered in the + future; if we use `x` to form an `epi_archive`, then + `clobberable_versions_start` and `versions_end` must be manually specified.", class = "epiprocess__max_version_cannot_be_used" ) } else { version_col <- purrr::pluck(x, "version") # error not NULL if doesn't exist if (anyNA(version_col)) { - Abort("version values cannot be NA", + cli_abort("version values cannot be NA", class = "epiprocess__version_values_must_not_be_na" ) } else { @@ -278,26 +267,15 @@ epi_archive <- initialize = function(x, geo_type, time_type, other_keys, additional_metadata, compactify, clobberable_versions_start, versions_end) { - # Check that we have a data frame - if (!is.data.frame(x)) { - Abort("`x` must be a data frame.") - } - - # Check that we have geo_value, time_value, version columns - if (!("geo_value" %in% names(x))) { - Abort("`x` must contain a `geo_value` column.") - } - if (!("time_value" %in% names(x))) { - Abort("`x` must contain a `time_value` column.") - } - if (!("version" %in% names(x))) { - Abort("`x` must contain a `version` column.") - } - if (anyNA(x$version)) { - Abort("`x$version` must not contain `NA`s", - class = "epiprocess__version_values_must_not_be_na" + assert_data_frame(x) + if (!test_subset(c("geo_value", "time_value", "version"), names(x))) { + cli_abort( + "Columns `geo_value`, `time_value`, and `version` must be present in `x`." ) } + if (anyMissing(x$version)) { + cli_abort("Column `version` must not contain missing values.") + } # If geo type is missing, then try to guess it if (missing(geo_type)) { @@ -312,24 +290,21 @@ epi_archive <- # Finish off with small checks on keys variables and metadata if (missing(other_keys)) other_keys <- NULL if (missing(additional_metadata)) additional_metadata <- list() - if (!all(other_keys %in% names(x))) { - Abort("`other_keys` must be contained in the column names of `x`.") + if (!test_subset(other_keys, names(x))) { + cli_abort("`other_keys` must be contained in the column names of `x`.") } if (any(c("geo_value", "time_value", "version") %in% other_keys)) { - Abort("`other_keys` cannot contain \"geo_value\", \"time_value\", or \"version\".") + cli_abort("`other_keys` cannot contain \"geo_value\", \"time_value\", or \"version\".") } - if (any(names(additional_metadata) %in% - c("geo_type", "time_type"))) { - Warn("`additional_metadata` names overlap with existing metadata fields \"geo_type\", \"time_type\".") + if (any(names(additional_metadata) %in% c("geo_type", "time_type"))) { + cli_warn("`additional_metadata` names overlap with existing metadata fields \"geo_type\", \"time_type\".") } # Conduct checks and apply defaults for `compactify` if (missing(compactify)) { compactify <- NULL - } else if (!rlang::is_bool(compactify) && - !rlang::is_null(compactify)) { - Abort("compactify must be boolean or null.") } + assert_logical(compactify, len = 1, null.ok = TRUE) # Apply defaults and conduct checks for # `clobberable_versions_start`, `versions_end`: @@ -342,7 +317,7 @@ epi_archive <- validate_version_bound(clobberable_versions_start, x, na_ok = TRUE) validate_version_bound(versions_end, x, na_ok = FALSE) if (nrow(x) > 0L && versions_end < max(x[["version"]])) { - Abort( + cli_abort( sprintf( "`versions_end` was %s, but `x` contained updates for a later version or versions, up through %s", @@ -352,7 +327,7 @@ epi_archive <- ) } if (!is.na(clobberable_versions_start) && clobberable_versions_start > versions_end) { - Abort( + cli_abort( sprintf( "`versions_end` was %s, but a `clobberable_versions_start` of %s indicated that there were later observed versions", @@ -373,7 +348,11 @@ epi_archive <- maybe_first_duplicate_key_row_index <- anyDuplicated(DT, by = key(DT)) if (maybe_first_duplicate_key_row_index != 0L) { - Abort("`x` must have one row per unique combination of the key variables. If you have additional key variables other than `geo_value`, `time_value`, and `version`, such as an age group column, please specify them in `other_keys`. Otherwise, check for duplicate rows and/or conflicting values for the same measurement.", + cli_abort("`x` must have one row per unique combination of the key variables. If you + have additional key variables other than `geo_value`, `time_value`, and + `version`, such as an age group column, please specify them in `other_keys`. + Otherwise, check for duplicate rows and/or conflicting values for the same + measurement.", class = "epiprocess__epi_archive_requires_unique_key" ) } @@ -410,24 +389,22 @@ epi_archive <- # Warns about redundant rows if (is.null(compactify) && nrow(elim) > 0) { - warning_intro <- break_str(paste( - "Found rows that appear redundant based on", - "last (version of each) observation carried forward;", - 'these rows have been removed to "compactify" and save space:' - )) - + warning_intro <- cli::format_inline( + "Found rows that appear redundant based on + last (version of each) observation carried forward; + these rows have been removed to 'compactify' and save space:", + keep_whitespace = FALSE + ) warning_data <- paste(collapse = "\n", capture.output(print(elim, topn = 3L, nrows = 7L))) - - warning_outro <- break_str(paste( - "Built-in `epi_archive` functionality should be unaffected,", - "but results may change if you work directly with its fields (such as `DT`).", - "See `?as_epi_archive` for details.", - "To silence this warning but keep compactification,", - "you can pass `compactify=TRUE` when constructing the archive." - )) - + warning_outro <- cli::format_inline( + "Built-in `epi_archive` functionality should be unaffected, + but results may change if you work directly with its fields (such as `DT`). + See `?as_epi_archive` for details. + To silence this warning but keep compactification, + you can pass `compactify=TRUE` when constructing the archive.", + keep_whitespace = FALSE + ) warning_message <- paste(sep = "\n", warning_intro, warning_data, warning_outro) - rlang::warn(warning_message, class = "epiprocess__compactify_default_removed_rows") } @@ -447,8 +424,8 @@ epi_archive <- print = function(class = TRUE, methods = TRUE) { cli_inform( c( - ">" = if (class) {"An `epi_archive` object, with metadata:"}, - "i" = if (length(setdiff(key(self$DT), c('geo_value', 'time_value', 'version'))) > 0) { + ">" = if (class) "An `epi_archive` object, with metadata:", + "i" = if (length(setdiff(key(self$DT), c("geo_value", "time_value", "version"))) > 0) { "Non-standard DT keys: {setdiff(key(self$DT), c('geo_value', 'time_value', 'version'))}" }, "i" = "Min/max time values: {min(self$DT$time_value)} / {max(self$DT$time_value)}", @@ -457,12 +434,12 @@ epi_archive <- "Clobberable versions start: {self$clobberable_versions_start}" }, "i" = "Versions end: {self$versions_end}", - "i" = if (methods) {"Public R6 methods: {names(epi_archive$public_methods)}"}, + "i" = if (methods) "Public R6 methods: {names(epi_archive$public_methods)}", "i" = "A preview of the table ({nrow(self$DT)} rows x {ncol(self$DT)} columns):" ) ) - return(invisible(self$DT %>% print)) + return(invisible(self$DT %>% print())) }, ##### #' @description Generates a snapshot in `epi_df` format as of a given version. @@ -493,24 +470,28 @@ epi_archive <- if (length(other_keys) == 0) other_keys <- NULL # Check a few things on max_version - if (!identical(class(max_version), class(self$DT$version)) || - !identical(typeof(max_version), typeof(self$DT$version))) { - Abort("`max_version` and `DT$version` must have same `class` and `typeof`.") - } - if (length(max_version) != 1) { - Abort("`max_version` cannot be a vector.") + if (!test_set_equal(class(max_version), class(self$DT$version))) { + cli_abort( + "`max_version` must have the same classes as `self$DT$version`." + ) } - if (is.na(max_version)) { - Abort("`max_version` must not be NA.") + if (!test_set_equal(typeof(max_version), typeof(self$DT$version))) { + cli_abort( + "`max_version` must have the same types as `self$DT$version`." + ) } + assert_scalar(max_version, na.ok = FALSE) if (max_version > self$versions_end) { - Abort("`max_version` must be at most `self$versions_end`.") - } - if (!rlang::is_bool(all_versions)) { - Abort("`all_versions` must be TRUE or FALSE.") + cli_abort("`max_version` must be at most `self$versions_end`.") } + assert_logical(all_versions, len = 1) if (!is.na(self$clobberable_versions_start) && max_version >= self$clobberable_versions_start) { - Warn('Getting data as of some recent version which could still be overwritten (under routine circumstances) without assigning a new version number (a.k.a. "clobbered"). Thus, the snapshot that we produce here should not be expected to be reproducible later. See `?epi_archive` for more info and `?epix_as_of` on how to muffle.', + cli_warn( + 'Getting data as of some recent version which could still be + overwritten (under routine circumstances) without assigning a new + version number (a.k.a. "clobbered"). Thus, the snapshot that we + produce here should not be expected to be reproducible later. See + `?epi_archive` for more info and `?epix_as_of` on how to muffle.', class = "epiprocess__snapshot_as_of_clobberable_version" ) } @@ -526,8 +507,7 @@ epi_archive <- return( # Make sure to use data.table ways of filtering and selecting - self$DT[time_value >= min_time_value & - version <= max_version, ] %>% + self$DT[time_value >= min_time_value & version <= max_version, ] %>% unique( by = c("geo_value", "time_value", other_keys), fromLast = TRUE @@ -573,7 +553,7 @@ epi_archive <- nonkey_cols <- setdiff(names(self$DT), key(self$DT)) next_version_tag <- next_after(self$versions_end) if (next_version_tag > fill_versions_end) { - Abort(sprintf(paste( + cli_abort(sprintf(paste( "Apparent problem with `next_after` method:", "archive contained observations through version %s", "and the next possible version was supposed to be %s,", @@ -621,25 +601,21 @@ epi_archive <- #' @param x as in [`epix_truncate_versions_after`] #' @param max_version as in [`epix_truncate_versions_after`] truncate_versions_after = function(max_version) { - if (length(max_version) != 1) { - Abort("`max_version` cannot be a vector.") - } - if (is.na(max_version)) { - Abort("`max_version` must not be NA.") + if (!test_set_equal(class(max_version), class(self$DT$version))) { + cli_abort("`max_version` must have the same classes as `self$DT$version`.") } - if (!identical(class(max_version), class(self$DT$version)) || - !identical(typeof(max_version), typeof(self$DT$version))) { - Abort("`max_version` and `DT$version` must have same `class` and `typeof`.") + if (!test_set_equal(typeof(max_version), typeof(self$DT$version))) { + cli_abort("`max_version` must have the same types as `self$DT$version`.") } + assert_scalar(max_version, na.ok = FALSE) if (max_version > self$versions_end) { - Abort("`max_version` must be at most `self$versions_end`.") + cli_abort("`max_version` must be at most `self$versions_end`.") } self$DT <- self$DT[self$DT$version <= max_version, colnames(self$DT), with = FALSE] # (^ this filter operation seems to always copy the DT, even if it # keeps every entry; we don't guarantee this behavior in # documentation, though, so we could change to alias in this case) - if (!is.na(self$clobberable_versions_start) && - self$clobberable_versions_start > max_version) { + if (!is.na(self$clobberable_versions_start) && self$clobberable_versions_start > max_version) { self$clobberable_versions_start <- NA } self$versions_end <- max_version @@ -662,7 +638,7 @@ epi_archive <- ) if (length(epi_archive$private_fields) != 0L) { - Abort("expected no private fields in epi_archive", + cli_abort("expected no private fields in epi_archive", internal = TRUE ) } diff --git a/R/correlation.R b/R/correlation.R index a4a56d1e..e4272fdd 100644 --- a/R/correlation.R +++ b/R/correlation.R @@ -78,12 +78,11 @@ epi_cor <- function(x, var1, var2, dt1 = 0, dt2 = 0, shift_by = geo_value, cor_by = geo_value, use = "na.or.complete", method = c("pearson", "kendall", "spearman")) { - # Check we have an `epi_df` object - if (!inherits(x, "epi_df")) Abort("`x` must be of class `epi_df`.") + assert_class(x, "epi_df") # Check that we have variables to do computations on - if (missing(var1)) Abort("`var1` must be specified.") - if (missing(var2)) Abort("`var2` must be specified.") + if (missing(var1)) cli_abort("`var1` must be specified.") + if (missing(var2)) cli_abort("`var2` must be specified.") var1 <- enquo(var1) var2 <- enquo(var2) diff --git a/R/data.R b/R/data.R index c528039c..2a5e5738 100644 --- a/R/data.R +++ b/R/data.R @@ -144,7 +144,7 @@ delayed_assign_with_unregister_awareness <- function(x, value, # all.) rlang::eval_bare(rlang::quo_get_expr(value_quosure), rlang::quo_get_env(value_quosure)), error = function(err) { - Abort( + cli_abort( paste( "An error was raised while attempting to evaluate a promise", "(prepared with `delayed_assign_with_unregister_awareness`)", diff --git a/R/epi_df.R b/R/epi_df.R index 91e6c9d9..1c648ff8 100644 --- a/R/epi_df.R +++ b/R/epi_df.R @@ -114,14 +114,9 @@ NULL #' @export new_epi_df <- function(x = tibble::tibble(), geo_type, time_type, as_of, additional_metadata = list(), ...) { - # Check that we have a data frame - if (!is.data.frame(x)) { - Abort("`x` must be a data frame.") - } + assert_data_frame(x) + assert_list(additional_metadata) - if (!is.list(additional_metadata)) { - Abort("`additional_metadata` must be a list type.") - } if (is.null(additional_metadata[["other_keys"]])) { additional_metadata[["other_keys"]] <- character(0L) } @@ -302,13 +297,9 @@ as_epi_df.epi_df <- function(x, ...) { #' @export as_epi_df.tbl_df <- function(x, geo_type, time_type, as_of, additional_metadata = list(), ...) { - # Check that we have geo_value and time_value columns - if (!("geo_value" %in% names(x))) { - Abort("`x` must contain a `geo_value` column.") - } - if (!("time_value" %in% names(x))) { - Abort("`x` must contain a `time_value` column.") - } + if (!test_subset(c("geo_value", "time_value"), names(x))) cli_abort( + "Columns `geo_value` and `time_value` must be present in `x`." + ) new_epi_df( x, geo_type, time_type, as_of, diff --git a/R/epiprocess.R b/R/epiprocess.R index 254ebd01..05737d58 100644 --- a/R/epiprocess.R +++ b/R/epiprocess.R @@ -4,7 +4,11 @@ #' measured over space and time, and offers associated utilities to perform #' basic signal processing tasks. #' -#' @importFrom checkmate assert assert_character assert_int anyInfinite +#' @importFrom checkmate assert assert_scalar assert_data_frame anyMissing +#' assert_logical assert_list assert_character assert_class +#' assert_int assert_numeric check_data_frame vname check_atomic +#' anyInfinite test_subset test_set_equal +#' @importFrom cli cli_abort cli_inform cli_warn #' @name epiprocess "_PACKAGE" utils::globalVariables(c(".x", ".group_key", ".ref_time_value")) diff --git a/R/grouped_epi_archive.R b/R/grouped_epi_archive.R index f083cf93..9ddad684 100644 --- a/R/grouped_epi_archive.R +++ b/R/grouped_epi_archive.R @@ -53,41 +53,24 @@ grouped_epi_archive <- public = list( initialize = function(ungrouped, vars, drop) { if (inherits(ungrouped, "grouped_epi_archive")) { - Abort("`ungrouped` must not already be grouped (neither automatic regrouping nor nested grouping is supported). Either use `group_by` with `.add=TRUE`, or `ungroup` first.", + cli_abort("`ungrouped` must not already be grouped (neither automatic regrouping nor nested grouping is supported). Either use `group_by` with `.add=TRUE`, or `ungroup` first.", class = "epiprocess__grouped_epi_archive__ungrouped_arg_is_already_grouped", epiprocess__ungrouped_class = class(ungrouped), epiprocess__ungrouped_groups = groups(ungrouped) ) } - if (!inherits(ungrouped, "epi_archive")) { - Abort("`ungrouped` must be an epi_archive", - class = "epiprocess__grouped_epi_archive__ungrouped_arg_is_not_epi_archive", - epiprocess__ungrouped_class = class(ungrouped) - ) - } - if (!is.character(vars)) { - Abort("`vars` must be a character vector (any tidyselection should have already occurred in a helper method).", - class = "epiprocess__grouped_epi_archive__vars_is_not_chr", - epiprocess__vars_class = class(vars), - epiprocess__vars_type = typeof(vars) - ) - } - if (!all(vars %in% names(ungrouped$DT))) { - Abort("`vars` must be selected from the names of columns of `ungrouped$DT`", - class = "epiprocess__grouped_epi_archive__vars_contains_invalid_entries", - epiprocess__vars = vars, - epiprocess__DT_names = names(ungrouped$DT) + assert_class(ungrouped, "epi_archive") + assert_character(vars) + if (!test_subset(vars, names(ungrouped$DT))) { + cli_abort( + "All grouping variables `vars` must be present in the data.", ) } if ("version" %in% vars) { - Abort("`version` has a special interpretation and cannot be used by itself as a grouping variable") - } - if (!rlang::is_bool(drop)) { - Abort("`drop` must be a Boolean", - class = "epiprocess__grouped_epi_archive__drop_is_not_bool", - epiprocess__drop = drop - ) + cli_abort("`version` has a special interpretation and cannot be used by itself as a grouping variable") } + assert_logical(drop, len = 1) + # ----- private$ungrouped <- ungrouped private$vars <- vars @@ -136,11 +119,9 @@ grouped_epi_archive <- invisible(self) }, group_by = function(..., .add = FALSE, .drop = dplyr::group_by_drop_default(self)) { - if (!rlang::is_bool(.add)) { - Abort("`.add` must be a Boolean") - } + assert_logical(.add, len = 1) if (!.add) { - Abort('`group_by` on a `grouped_epi_archive` with `.add=FALSE` is forbidden + cli_abort('`group_by` on a `grouped_epi_archive` with `.add=FALSE` is forbidden (neither automatic regrouping nor nested grouping is supported). If you want to "regroup", replacing the existing grouping vars, `ungroup` first and then `group_by`. If you want to add to the existing grouping vars, call `group_by` specifying `.add=TRUE`. @@ -210,7 +191,7 @@ grouped_epi_archive <- # early development versions and much more likely to be clutter than # informative in the signature. if ("group_by" %in% nse_dots_names(...)) { - Abort(" + cli_abort(" The `group_by` argument to `slide` has been removed; please use the `group_by` S3 generic function or `$group_by` R6 method before the slide instead. (If you were instead trying to pass a @@ -221,7 +202,7 @@ grouped_epi_archive <- ", class = "epiprocess__epix_slide_group_by_parameter_deprecated") } if ("all_rows" %in% nse_dots_names(...)) { - Abort(" + cli_abort(" The `all_rows` argument has been removed from `epix_slide` (but is still supported in `epi_slide`). Add rows for excluded results with a manual join instead. @@ -230,15 +211,14 @@ grouped_epi_archive <- if (missing(ref_time_values)) { ref_time_values <- epix_slide_ref_time_values_default(private$ungrouped) - } else if (length(ref_time_values) == 0L) { - Abort("`ref_time_values` must have at least one element.") - } else if (any(is.na(ref_time_values))) { - Abort("`ref_time_values` must not include `NA`.") - } else if (anyDuplicated(ref_time_values) != 0L) { - Abort("`ref_time_values` must not contain any duplicates; use `unique` if appropriate.") - } else if (any(ref_time_values > private$ungrouped$versions_end)) { - Abort("All `ref_time_values` must be `<=` the `versions_end`.") } else { + assert_numeric(ref_time_values, min.len = 1L, null.ok = FALSE, any.missing = FALSE) + if (any(ref_time_values > private$ungrouped$versions_end)) { + cli_abort("Some `ref_time_values` are greater than the latest version in the archive.") + } + if (anyDuplicated(ref_time_values) != 0L) { + cli_abort("Some `ref_time_values` are duplicated.") + } # Sort, for consistency with `epi_slide`, although the current # implementation doesn't take advantage of it. ref_time_values <- sort(ref_time_values) @@ -246,16 +226,14 @@ grouped_epi_archive <- # Validate and pre-process `before`: if (missing(before)) { - Abort("`before` is required (and must be passed by name); + cli_abort("`before` is required (and must be passed by name); if you did not want to apply a sliding window but rather to map `as_of` and `f` across various `ref_time_values`, pass a large `before` value (e.g., if time steps are days, `before=365000`).") } before <- vctrs::vec_cast(before, integer()) - if (length(before) != 1L || is.na(before) || before < 0L) { - Abort("`before` must be length-1, non-NA, non-negative.") - } + assert_int(before, lower = 0L, null.ok = FALSE, na.ok = FALSE) # If a custom time step is specified, then redefine units @@ -265,15 +243,9 @@ grouped_epi_archive <- new_col <- sym(new_col_name) # Validate rest of parameters: - if (!rlang::is_bool(as_list_col)) { - Abort("`as_list_col` must be TRUE or FALSE.") - } - if (!(rlang::is_string(names_sep) || is.null(names_sep))) { - Abort("`names_sep` must be a (single) string or NULL.") - } - if (!rlang::is_bool(all_versions)) { - Abort("`all_versions` must be TRUE or FALSE.") - } + assert_logical(as_list_col, len = 1L) + assert_logical(all_versions, len = 1L) + assert_character(names_sep, len = 1L, null.ok = TRUE) # Computation for one group, one time value comp_one_grp <- function(.data_group, .group_key, @@ -290,9 +262,7 @@ grouped_epi_archive <- .data_group <- .data_group$DT } - if (!(is.atomic(comp_value) || is.data.frame(comp_value))) { - Abort("The slide computation must return an atomic vector or a data frame.") - } + assert(check_atomic(comp_value, any.missing = TRUE), check_data_frame(comp_value), combine = "or", .var.name = vname(comp_value)) # Label every result row with the `ref_time_value` res <- list(time_value = ref_time_value) @@ -312,10 +282,10 @@ grouped_epi_archive <- if (missing(f)) { quos <- enquos(...) if (length(quos) == 0) { - Abort("If `f` is missing then a computation must be specified via `...`.") + cli_abort("If `f` is missing then a computation must be specified via `...`.") } if (length(quos) > 1) { - Abort("If `f` is missing then only a single computation can be specified via `...`.") + cli_abort("If `f` is missing then only a single computation can be specified via `...`.") } f <- quos[[1]] diff --git a/R/growth_rate.R b/R/growth_rate.R index f54d1277..b584f7e3 100644 --- a/R/growth_rate.R +++ b/R/growth_rate.R @@ -118,10 +118,8 @@ growth_rate <- function(x = seq_along(y), y, x0 = x, h = 7, log_scale = FALSE, dup_rm = FALSE, na_rm = FALSE, ...) { # Check x, y, x0 - if (length(x) != length(y)) Abort("`x` and `y` must have the same length.") - if (!all(x0 %in% x)) Abort("`x0` must be a subset of `x`.") - - # Check the method + if (length(x) != length(y)) cli_abort("`x` and `y` must have the same length.") + if (!all(x0 %in% x)) cli_abort("`x0` must be a subset of `x`.") method <- match.arg(method) # Arrange in increasing order of x @@ -137,7 +135,7 @@ growth_rate <- function(x = seq_along(y), y, x0 = x, if (dup_rm) { o <- !duplicated(x) if (any(!o)) { - Warn("`x` contains duplicate values. (If being run on a column in an `epi_df`, did you group by relevant key variables?)") + cli_warn("`x` contains duplicate values. (If being run on a column in an `epi_df`, did you group by relevant key variables?)") } x <- x[o] y <- y[o] @@ -238,7 +236,7 @@ growth_rate <- function(x = seq_along(y), y, x0 = x, # Check cv and df combo if (is.numeric(df)) cv <- FALSE if (!cv && !(is.numeric(df) && df == round(df))) { - Abort("If `cv = FALSE`, then `df` must be an integer.") + cli_abort("If `cv = FALSE`, then `df` must be an integer.") } # Compute trend filtering path diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R index 45db2855..43b816bc 100644 --- a/R/methods-epi_archive.R +++ b/R/methods-epi_archive.R @@ -77,7 +77,7 @@ #' #' @export epix_as_of <- function(x, max_version, min_time_value = -Inf, all_versions = FALSE) { - if (!inherits(x, "epi_archive")) Abort("`x` must be of class `epi_archive`.") + assert_class(x, "epi_archive") return(x$as_of(max_version, min_time_value, all_versions = all_versions)) } @@ -113,7 +113,7 @@ epix_as_of <- function(x, max_version, min_time_value = -Inf, all_versions = FAL #' @return An `epi_archive` epix_fill_through_version <- function(x, fill_versions_end, how = c("na", "locf")) { - if (!inherits(x, "epi_archive")) Abort("`x` must be of class `epi_archive`.") + assert_class(x, "epi_archive") # Enclosing parentheses drop the invisibility flag. See description above of # potential mutation and aliasing behavior. (x$clone()$fill_through_version(fill_versions_end, how = how)) @@ -179,31 +179,25 @@ epix_fill_through_version <- function(x, fill_versions_end, epix_merge <- function(x, y, sync = c("forbid", "na", "locf", "truncate"), compactify = TRUE) { - if (!inherits(x, "epi_archive")) { - Abort("`x` must be of class `epi_archive`.") - } - - if (!inherits(y, "epi_archive")) { - Abort("`y` must be of class `epi_archive`.") - } - + assert_class(x, "epi_archive") + assert_class(y, "epi_archive") sync <- rlang::arg_match(sync) if (!identical(x$geo_type, y$geo_type)) { - Abort("`x` and `y` must have the same `$geo_type`") + cli_abort("`x` and `y` must have the same `$geo_type`") } if (!identical(x$time_type, y$time_type)) { - Abort("`x` and `y` must have the same `$time_type`") + cli_abort("`x` and `y` must have the same `$time_type`") } if (length(x$additional_metadata) != 0L) { - Warn("x$additional_metadata won't appear in merge result", + cli_warn("x$additional_metadata won't appear in merge result", class = "epiprocess__epix_merge_ignores_additional_metadata" ) } if (length(y$additional_metadata) != 0L) { - Warn("y$additional_metadata won't appear in merge result", + cli_warn("y$additional_metadata won't appear in merge result", class = "epiprocess__epix_merge_ignores_additional_metadata" ) } @@ -222,7 +216,7 @@ epix_merge <- function(x, y, # partially-mutated `x` on failure. if (sync == "forbid") { if (!identical(x$versions_end, y$versions_end)) { - Abort(paste( + cli_abort(paste( "`x` and `y` were not equally up to date version-wise:", "`x$versions_end` was not identical to `y$versions_end`;", "either ensure that `x` and `y` are equally up to date before merging,", @@ -242,7 +236,7 @@ epix_merge <- function(x, y, x_DT <- x$DT[x[["DT"]][["version"]] <= new_versions_end, names(x$DT), with = FALSE] y_DT <- y$DT[y[["DT"]][["version"]] <= new_versions_end, names(y$DT), with = FALSE] } else { - Abort("unimplemented") + cli_abort("unimplemented") } # key(x_DT) should be the same as key(x$DT) and key(y_DT) should be the same @@ -257,7 +251,7 @@ epix_merge <- function(x, y, x_DT_key_as_expected <- identical(key(x$DT), key(x_DT)) y_DT_key_as_expected <- identical(key(y$DT), key(y_DT)) if (!x_DT_key_as_expected || !y_DT_key_as_expected) { - Warn(" + cli_warn(" `epiprocess` internal warning (please report): pre-processing for epix_merge unexpectedly resulted in an intermediate data table (or tables) with a different key than the corresponding input archive. @@ -272,7 +266,7 @@ epix_merge <- function(x, y, # sensible default treatment of count-type and rate-type value columns would # differ. if (!identical(sort(key(x_DT)), sort(key(y_DT)))) { - Abort(" + cli_abort(" The archives must have the same set of key column names; if the key columns represent the same things, just with different names, please retry after manually renaming to match; if they @@ -289,14 +283,14 @@ epix_merge <- function(x, y, # version carried forward via rolling joins by <- key(x_DT) # = some perm of key(y_DT) if (!all(c("geo_value", "time_value", "version") %in% key(x_DT))) { - Abort('Invalid `by`; `by` is currently set to the common `key` of + cli_abort('Invalid `by`; `by` is currently set to the common `key` of the two archives, and is expected to contain "geo_value", "time_value", and "version".', class = "epiprocess__epi_archive_must_have_required_key_cols" ) } if (length(by) < 1L || utils::tail(by, 1L) != "version") { - Abort('Invalid `by`; `by` is currently set to the common `key` of + cli_abort('Invalid `by`; `by` is currently set to the common `key` of the two archives, and is expected to have a "version" as the last key col.', class = "epiprocess__epi_archive_must_have_version_at_end_of_key" @@ -305,7 +299,7 @@ epix_merge <- function(x, y, x_nonby_colnames <- setdiff(names(x_DT), by) y_nonby_colnames <- setdiff(names(y_DT), by) if (length(intersect(x_nonby_colnames, y_nonby_colnames)) != 0L) { - Abort(" + cli_abort(" `x` and `y` DTs have overlapping non-by column names; this is currently not supported; please manually fix up first: any overlapping columns that can are key-like should be @@ -314,7 +308,7 @@ epix_merge <- function(x, y, } x_by_vals <- x_DT[, by, with = FALSE] if (anyDuplicated(x_by_vals) != 0L) { - Abort(" + cli_abort(" The `by` columns must uniquely determine rows of `x$DT`; the `by` is currently set to the common `key` of the two archives, so this can be resolved by adding key-like columns @@ -323,7 +317,7 @@ epix_merge <- function(x, y, } y_by_vals <- y_DT[, by, with = FALSE] if (anyDuplicated(y_by_vals) != 0L) { - Abort(" + cli_abort(" The `by` columns must uniquely determine rows of `y$DT`; the `by` is currently set to the common `key` of the two archives, so this can be resolved by adding key-like columns @@ -409,11 +403,7 @@ epix_merge <- function(x, y, #' #' @noRd new_col_modify_recorder_df <- function(parent_df) { - if (!inherits(parent_df, "data.frame")) { - Abort('`parent_df` must inherit class `"data.frame"`', - internal = TRUE - ) - } + assert_class(parent_df, "data.frame") `class<-`(parent_df, c("col_modify_recorder_df", class(parent_df))) } @@ -425,11 +415,7 @@ new_col_modify_recorder_df <- function(parent_df) { #' #' @noRd destructure_col_modify_recorder_df <- function(col_modify_recorder_df) { - if (!inherits(col_modify_recorder_df, "col_modify_recorder_df")) { - Abort('`col_modify_recorder_df` must inherit class `"col_modify_recorder_df"`', - internal = TRUE - ) - } + assert_class(col_modify_recorder_df, "col_modify_recorder_df") list( unchanged_parent_df = col_modify_recorder_df %>% `attr<-`("epiprocess::col_modify_recorder_df::cols", NULL) %>% @@ -451,7 +437,7 @@ destructure_col_modify_recorder_df <- function(col_modify_recorder_df) { #' @noRd dplyr_col_modify.col_modify_recorder_df <- function(data, cols) { if (!is.null(attr(data, "epiprocess::col_modify_recorder_df::cols", exact = TRUE))) { - Abort("`col_modify_recorder_df` can only record `cols` once", + cli_abort("`col_modify_recorder_df` can only record `cols` once", internal = TRUE ) } @@ -676,19 +662,17 @@ epix_detailed_restricted_mutate <- function(.data, ...) { group_by.epi_archive <- function(.data, ..., .add = FALSE, .drop = dplyr::group_by_drop_default(.data)) { # `add` makes no difference; this is an ungrouped `epi_archive`. detailed_mutate <- epix_detailed_restricted_mutate(.data, ...) - if (!rlang::is_bool(.drop)) { - Abort("`.drop` must be TRUE or FALSE") - } + assert_logical(.drop) if (!.drop) { grouping_cols <- as.list(detailed_mutate[["archive"]][["DT"]])[detailed_mutate[["request_names"]]] grouping_col_is_factor <- purrr::map_lgl(grouping_cols, is.factor) # ^ Use `as.list` to try to avoid any possibility of a deep copy. if (!any(grouping_col_is_factor)) { - Warn("`.drop=FALSE` but there are no factor grouping columns; did you mean to convert one of the columns to a factor beforehand?", + cli_warn("`.drop=FALSE` but there are no factor grouping columns; did you mean to convert one of the columns to a factor beforehand?", class = "epiprocess__group_by_epi_archive__drop_FALSE_no_factors" ) } else if (any(diff(grouping_col_is_factor) == -1L)) { - Warn("`.drop=FALSE` but there are one or more non-factor grouping columns listed after a factor grouping column; this may produce groups with `NA`s for these columns; see https://github.com/tidyverse/dplyr/issues/5369#issuecomment-683762553; depending on how you want completion to work, you might instead want to convert all grouping columns to factors beforehand, specify the non-factor grouping columns first, or use `.drop=TRUE` and add a call to `tidyr::complete`.", + cli_warn("`.drop=FALSE` but there are one or more non-factor grouping columns listed after a factor grouping column; this may produce groups with `NA`s for these columns; see https://github.com/tidyverse/dplyr/issues/5369#issuecomment-683762553; depending on how you want completion to work, you might instead want to convert all grouping columns to factors beforehand, specify the non-factor grouping columns first, or use `.drop=TRUE` and add a call to `tidyr::complete`.", class = "epiprocess__group_by_epi_archive__drop_FALSE_nonfactor_after_factor" ) } @@ -956,7 +940,7 @@ epix_slide <- function(x, f, ..., before, ref_time_values, as_list_col = FALSE, names_sep = "_", all_versions = FALSE) { if (!is_epi_archive(x, grouped_okay = TRUE)) { - Abort("`x` must be of class `epi_archive` or `grouped_epi_archive`.") + cli_abort("`x` must be of class `epi_archive` or `grouped_epi_archive`.") } return(x$slide(f, ..., before = before, diff --git a/R/methods-epi_df.R b/R/methods-epi_df.R index 7e002320..3636d966 100644 --- a/R/methods-epi_df.R +++ b/R/methods-epi_df.R @@ -129,10 +129,10 @@ dplyr_reconstruct.epi_df <- function(data, template) { cn <- names(res) - # Duplicate columns, Abort + # Duplicate columns, cli_abort dup_col_names <- cn[duplicated(cn)] if (length(dup_col_names) != 0) { - Abort(paste0( + cli_abort(paste0( "Column name(s) ", paste(unique(dup_col_names), collapse = ", " diff --git a/R/outliers.R b/R/outliers.R index 1eb3ea01..ee59d64b 100644 --- a/R/outliers.R +++ b/R/outliers.R @@ -97,7 +97,7 @@ detect_outlr <- function(x = seq_along(y), y, # Validate that x contains all distinct values if (any(duplicated(x))) { - Abort("`x` cannot contain duplicate values. (If being run on a column in an `epi_df`, did you group by relevant key variables?)") + cli_abort("`x` cannot contain duplicate values. (If being run on a column in an `epi_df`, did you group by relevant key variables?)") } # Run all outlier detection methods @@ -108,10 +108,10 @@ detect_outlr <- function(x = seq_along(y), y, results <- do.call(method, args = c(list("x" = x, "y" = y), args)) # Validate the output - if (!is.data.frame(results) || - !all(c("lower", "upper", "replacement") %in% colnames(results))) { - Abort("Outlier detection method must return a data frame with columns `lower`, `upper`, and `replacement`.") - } + assert_data_frame(results) + if (!test_subset(c("lower", "upper", "replacement"), colnames(results))) cli_abort( + "Columns `lower`, `upper`, and `replacement` must be present in the output of the outlier detection method." + ) # Update column names with model abbreviation colnames(results) <- paste(abbr, colnames(results), sep = "_") diff --git a/R/slide.R b/R/slide.R index e2c0bf55..9adabf9e 100644 --- a/R/slide.R +++ b/R/slide.R @@ -168,47 +168,37 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, time_step, new_col_name = "slide_value", as_list_col = FALSE, names_sep = "_", all_rows = FALSE) { - # Check we have an `epi_df` object - if (!inherits(x, "epi_df")) Abort("`x` must be of class `epi_df`.") + assert_class(x, "epi_df") if (missing(ref_time_values)) { ref_time_values <- unique(x$time_value) - } - - # Some of these `ref_time_values` checks and processing steps also apply to - # the `ref_time_values` default; for simplicity, just apply all the steps - # regardless of whether we are working with a default or user-provided - # `ref_time_values`: - if (length(ref_time_values) == 0L) { - Abort("`ref_time_values` must have at least one element.") - } else if (any(is.na(ref_time_values))) { - Abort("`ref_time_values` must not include `NA`.") - } else if (anyDuplicated(ref_time_values) != 0L) { - Abort("`ref_time_values` must not contain any duplicates; use `unique` if appropriate.") - } else if (!all(ref_time_values %in% unique(x$time_value))) { - Abort("All `ref_time_values` must appear in `x$time_value`.") } else { - ref_time_values <- sort(ref_time_values) + assert_numeric(ref_time_values, min.len = 1L, null.ok = FALSE, any.missing = FALSE) + if (!test_subset(ref_time_values, unique(x$time_value))) { + cli_abort( + "`ref_time_values` must be a unique subset of the time values in `x`." + ) + } + if (anyDuplicated(ref_time_values) != 0L) { + cli_abort("`ref_time_values` must not contain any duplicates; use `unique` if appropriate.") + } } + ref_time_values <- sort(ref_time_values) # Validate and pre-process `before`, `after`: if (!missing(before)) { before <- vctrs::vec_cast(before, integer()) - if (length(before) != 1L || is.na(before) || before < 0L) { - Abort("`before` must be length-1, non-NA, non-negative") - } + assert_int(before, lower = 0, null.ok = FALSE, na.ok = FALSE) } if (!missing(after)) { after <- vctrs::vec_cast(after, integer()) - if (length(after) != 1L || is.na(after) || after < 0L) { - Abort("`after` must be length-1, non-NA, non-negative") - } + assert_int(after, lower = 0, null.ok = FALSE, na.ok = FALSE) } if (missing(before)) { if (missing(after)) { - Abort("Either or both of `before`, `after` must be provided.") + cli_abort("Either or both of `before`, `after` must be provided.") } else if (after == 0L) { - Warn("`before` missing, `after==0`; maybe this was intended to be some + cli_warn("`before` missing, `after==0`; maybe this was intended to be some non-zero-width trailing window, but since `before` appears to be missing, it's interpreted as a zero-width window (`before=0, after=0`).") @@ -216,7 +206,7 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, before <- 0L } else if (missing(after)) { if (before == 0L) { - Warn("`before==0`, `after` missing; maybe this was intended to be some + cli_warn("`before==0`, `after` missing; maybe this was intended to be some non-zero-width leading window, but since `after` appears to be missing, it's interpreted as a zero-width window (`before=0, after=0`).") @@ -283,7 +273,7 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, if (!all(purrr::map_lgl(slide_values_list, is.atomic)) && !all(purrr::map_lgl(slide_values_list, is.data.frame))) { - Abort("The slide computations must return always atomic vectors or data frames (and not a mix of these two structures).") + cli_abort("The slide computations must return always atomic vectors or data frames (and not a mix of these two structures).") } # Unlist if appropriate: @@ -309,7 +299,7 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, )) } if (vctrs::vec_size(slide_values) != num_ref_rows) { - Abort("The slide computations must either (a) output a single element/row each, or (b) one element/row per appearance of the reference time value in the local window.") + cli_abort("The slide computations must either (a) output a single element/row each, or (b) one element/row per appearance of the reference time value in the local window.") } } @@ -330,10 +320,10 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, if (missing(f)) { quos <- enquos(...) if (length(quos) == 0) { - Abort("If `f` is missing then a computation must be specified via `...`.") + cli_abort("If `f` is missing then a computation must be specified via `...`.") } if (length(quos) > 1) { - Abort("If `f` is missing then only a single computation can be specified via `...`.") + cli_abort("If `f` is missing then only a single computation can be specified via `...`.") } f <- quos[[1]] diff --git a/R/utils.R b/R/utils.R index a8160159..6bef5e0a 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,9 +1,3 @@ -break_str <- function(str, nchar = 79, init = "") { - str <- paste(strwrap(str, nchar, initial = init), collapse = "\n") - str[1] <- substring(str, nchar(init) + 1) - return(str) -} - # Note: update `wrap_symbolics` and `wrap_varnames` (parameters, parameter # defaults, bodies) together. @@ -33,17 +27,13 @@ wrap_symbolics <- function(symbolics, initial = "", common_prefix = "", none_str = "", width = getOption("width", 80L)) { if (!all(purrr::map_lgl(symbolics, rlang::is_symbolic))) { - Abort("`symbolics` must be a list of symbolic objects") - } - if (!rlang::is_string(initial)) { - Abort("`initial` must be a string") - } - if (!rlang::is_string(common_prefix)) { - Abort("`common_prefix` must be a string") - } - if (!rlang::is_string(none_str)) { - Abort("`none_str` must be a string") + cli_abort("`symbolics` must be a list of symbolic objects") } + assert_character(initial, len = 1L) + assert_character(common_prefix, len = 1L) + assert_character(none_str, len = 1L) + assert_int(width, lower = 1L) + prefix <- strrep(" ", nchar(initial, type = "width")) full_initial <- paste0(common_prefix, initial) full_prefix <- paste0(common_prefix, prefix) @@ -85,9 +75,7 @@ wrap_varnames <- function(nms, width = getOption("width", 80L)) { # (Repeating parameter names and default args here for better autocomplete. # Using `...` instead would require less upkeep, but have worse autocomplete.) - if (!rlang::is_character(nms)) { - Abort("`nms` must be a character vector") - } + assert_character(nms) wrap_symbolics(rlang::syms(nms), initial = initial, common_prefix = common_prefix, none_str = none_str, width = width) } @@ -101,8 +89,6 @@ paste_lines <- function(lines) { paste(paste0(lines, "\n"), collapse = "") } -Abort <- function(msg, ...) rlang::abort(break_str(msg, init = "Error: "), ...) -Warn <- function(msg, ...) rlang::warn(break_str(msg, init = "Warning: "), ...) #' Assert that a sliding computation function takes enough args #' @@ -140,8 +126,12 @@ assert_sufficient_f_args <- function(f, ...) { if (n_f_args_before_dots < n_mandatory_f_args) { mandatory_f_args_in_f_dots <- tail(mandatory_f_args_labels, n_mandatory_f_args - n_f_args_before_dots) + cli::cli_warn( - "`f` might not have enough positional arguments before its `...`; in the current `epi[x]_slide` call, the {mandatory_f_args_in_f_dots} will be included in `f`'s `...`; if `f` doesn't expect those arguments, it may produce confusing error messages", + "`f` might not have enough positional arguments before its `...`; in + the current `epi[x]_slide` call, the {mandatory_f_args_in_f_dots} will + be included in `f`'s `...`; if `f` doesn't expect those arguments, it + may produce confusing error messages", class = "epiprocess__assert_sufficient_f_args__mandatory_f_args_passed_to_f_dots", epiprocess__f = f, epiprocess__mandatory_f_args_in_f_dots = mandatory_f_args_in_f_dots @@ -152,13 +142,16 @@ assert_sufficient_f_args <- function(f, ...) { # `f` doesn't take enough args. if (rlang::dots_n(...) == 0L) { # common case; try for friendlier error message - Abort(sprintf("`f` must take at least %s arguments", n_mandatory_f_args), + cli_abort(sprintf("`f` must take at least %s arguments", n_mandatory_f_args), class = "epiprocess__assert_sufficient_f_args__f_needs_min_args", epiprocess__f = f ) } else { # less common; highlight that they are (accidentally?) using dots forwarding - Abort(sprintf("`f` must take at least %s arguments plus the %s arguments forwarded through `epi[x]_slide`'s `...`, or a named argument to `epi[x]_slide` was misspelled", n_mandatory_f_args, rlang::dots_n(...)), + cli_abort( + "`f` must take at least {n_mandatory_f_args} arguments plus the + {rlang::dots_n(...)} arguments forwarded through `epi[x]_slide`'s + `...`, or a named argument to `epi[x]_slide` was misspelled", class = "epiprocess__assert_sufficient_f_args__f_needs_min_args_plus_forwarded", epiprocess__f = f ) @@ -181,7 +174,13 @@ assert_sufficient_f_args <- function(f, ...) { default_check_mandatory_args_labels[has_default_replaced_by_mandatory] args_with_default_replaced_by_mandatory <- rlang::syms(default_check_args_names[has_default_replaced_by_mandatory]) - cli::cli_abort("`epi[x]_slide` would pass the {mandatory_args_replacing_defaults} to `f`'s {args_with_default_replaced_by_mandatory} argument{?s}, which {?has a/have} default value{?s}; we suspect that `f` doesn't expect {?this arg/these args} at all and may produce confusing error messages. Please add additional arguments to `f` or remove defaults as appropriate.", + cli::cli_abort( + "`epi[x]_slide` would pass the {mandatory_args_replacing_defaults} to + `f`'s {args_with_default_replaced_by_mandatory} argument{?s}, which + {?has a/have} default value{?s}; we suspect that `f` doesn't expect + {?this arg/these args} at all and may produce confusing error messages. + Please add additional arguments to `f` or remove defaults as + appropriate.", class = "epiprocess__assert_sufficient_f_args__required_args_contain_defaults", epiprocess__f = f ) @@ -315,14 +314,16 @@ as_slide_computation <- function(f, ...) { if (is_formula(f)) { if (length(f) > 2) { - Abort(sprintf("%s must be a one-sided formula", arg), + cli_abort(sprintf("%s must be a one-sided formula", arg), class = "epiprocess__as_slide_computation__formula_is_twosided", epiprocess__f = f, call = call ) } if (rlang::dots_n(...) > 0L) { - Abort("No arguments can be passed via `...` when `f` is a formula, or there are unrecognized/misspelled parameter names.", + cli_abort( + "No arguments can be passed via `...` when `f` is a formula, or there + are unrecognized/misspelled parameter names.", class = "epiprocess__as_slide_computation__formula_with_dots", epiprocess__f = f, epiprocess__enquos_dots = enquos(...) @@ -331,7 +332,7 @@ as_slide_computation <- function(f, ...) { env <- f_env(f) if (!is_environment(env)) { - Abort("Formula must carry an environment.", + cli_abort("Formula must carry an environment.", class = "epiprocess__as_slide_computation__formula_has_no_env", epiprocess__f = f, epiprocess__f_env = env, @@ -350,7 +351,8 @@ as_slide_computation <- function(f, ...) { return(fn) } - Abort(sprintf("Can't convert an object of class %s to a slide computation", paste(collapse = " ", deparse(class(f)))), + cli_abort( + sprintf("Can't convert an object of class %s to a slide computation", paste(collapse = " ", deparse(class(f)))), class = "epiprocess__as_slide_computation__cant_convert_catchall", epiprocess__f = f, epiprocess__f_class = class(f), @@ -546,7 +548,7 @@ list2var <- function(x) { #' @noRd deprecated_quo_is_present <- function(quo) { if (!rlang::is_quosure(quo)) { - Abort("`quo` must be a quosure; `enquo` the arg first", + cli_abort("`quo` must be a quosure; `enquo` the arg first", internal = TRUE ) } else if (rlang::quo_is_missing(quo)) { @@ -603,23 +605,13 @@ deprecated_quo_is_present <- function(quo) { #' #' @noRd gcd2num <- function(a, b, rrtol = 1e-6, pqlim = 1e6, irtol = 1e-6) { - if (!is.numeric(a) || length(a) != 1L) { - Abort("`a` must satisfy `is.numeric`, have `length` 1.") - } - if (!is.numeric(b) || length(b) != 1L) { - Abort("`b` must satisfy `is.numeric`, have `length` 1.") - } - if (!is.numeric(rrtol) || length(rrtol) != 1L || rrtol < 0) { - Abort("`rrtol` must satisfy `is.numeric`, have `length` 1, and be non-negative.") - } - if (!is.numeric(pqlim) || length(pqlim) != 1L || pqlim < 0) { - Abort("`pqlim` must satisfy `is.numeric`, have `length` 1, and be non-negative.") - } - if (!is.numeric(irtol) || length(irtol) != 1L || irtol < 0) { - Abort("`irtol` must satisfy `is.numeric`, have `length` 1, and be non-negative.") - } + assert_numeric(a, len = 1L) + assert_numeric(b, len = 1L) + assert_numeric(rrtol, len = 1L, lower = 0) + assert_numeric(pqlim, len = 1L, lower = 0) + assert_numeric(irtol, len = 1L, lower = 0) if (is.na(a) || is.na(b) || a == 0 || b == 0 || abs(a / b) >= pqlim || abs(b / a) >= pqlim) { - Abort("`a` and/or `b` is either `NA` or exactly zero, or one is so much smaller than the other that it looks like it's supposed to be zero; see `pqlim` setting.") + cli_abort("`a` and/or `b` is either `NA` or exactly zero, or one is so much smaller than the other that it looks like it's supposed to be zero; see `pqlim` setting.") } iatol <- irtol * max(a, b) a_curr <- a @@ -627,7 +619,7 @@ gcd2num <- function(a, b, rrtol = 1e-6, pqlim = 1e6, irtol = 1e-6) { while (TRUE) { # `b_curr` is the candidate GCD / iterand; check first if it seems too small: if (abs(b_curr) <= iatol) { - Abort("No GCD found; remaining potential Gads are all too small relative to one/both of the original inputs; see `irtol` setting.") + cli_abort("No GCD found; remaining potential Gads are all too small relative to one/both of the original inputs; see `irtol` setting.") } remainder <- a_curr - round(a_curr / b_curr) * b_curr if (abs(remainder / b_curr) <= rrtol) { @@ -652,10 +644,10 @@ gcd2num <- function(a, b, rrtol = 1e-6, pqlim = 1e6, irtol = 1e-6) { #' @noRd gcd_num <- function(dividends, ..., rrtol = 1e-6, pqlim = 1e6, irtol = 1e-6) { if (!is.numeric(dividends) || length(dividends) == 0L) { - Abort("`dividends` must satisfy `is.numeric`, and have `length` > 0") + cli_abort("`dividends` must satisfy `is.numeric`, and have `length` > 0") } if (rlang::dots_n(...) != 0L) { - Abort("`...` should be empty; all dividends should go in a single `dividends` vector, and all tolerance&limit settings should be passed by name.") + cli_abort("`...` should be empty; all dividends should go in a single `dividends` vector, and all tolerance&limit settings should be passed by name.") } # We expect a bunch of duplicate `dividends` for some applications. # De-duplicate to reduce work. Sort by absolute value to attempt to reduce @@ -701,7 +693,7 @@ gcd_num <- function(dividends, ..., rrtol = 1e-6, pqlim = 1e6, irtol = 1e-6) { guess_period <- function(ref_time_values, ref_time_values_arg = rlang::caller_arg(ref_time_values)) { sorted_distinct_ref_time_values <- sort(unique(ref_time_values)) if (length(sorted_distinct_ref_time_values) < 2L) { - Abort(sprintf("Not enough distinct values in `%s` to guess the period.", ref_time_values_arg)) + cli_abort(sprintf("Not enough distinct values in `%s` to guess the period.", ref_time_values_arg)) } skips <- diff(sorted_distinct_ref_time_values) decayed_skips <- diff --git a/tests/testthat/test-archive-version-bounds.R b/tests/testthat/test-archive-version-bounds.R index 4000727a..720b33de 100644 --- a/tests/testthat/test-archive-version-bounds.R +++ b/tests/testthat/test-archive-version-bounds.R @@ -1,29 +1,18 @@ test_that("`validate_version_bound` allows/catches `NA` as requested", { my_version_bound <- NA - validate_version_bound(my_version_bound, na_ok = TRUE) - expect_error(validate_version_bound(my_version_bound, na_ok = FALSE), - class = "epiprocess__my_version_bound_is_na" - ) - # Note that if the error class name changes, this test may produce some - # confusing output along the following lines: - # - # > Error in `$<-.data.frame`(`*tmp*`, "call_text", value = c("testthat::expect_error(...)", : - # > replacement has 5 rows, data has 3 + x <- tibble::tibble(version = 5L) + validate_version_bound(my_version_bound, x, na_ok = TRUE) + expect_error(validate_version_bound(my_version_bound, x, na_ok = FALSE)) }) test_that("`validate_version_bound` catches bounds that are the wrong length", { + x <- tibble::tibble(version = 5L) my_version_bound1a <- NULL - expect_error(validate_version_bound(my_version_bound1a, na_ok = TRUE), - class = "epiprocess__my_version_bound1a_is_not_length_1" - ) + expect_error(validate_version_bound(my_version_bound1a, x, na_ok = TRUE)) my_version_bound1b <- integer(0L) - expect_error(validate_version_bound(my_version_bound1b, na_ok = TRUE), - class = "epiprocess__my_version_bound1b_is_not_length_1" - ) + expect_error(validate_version_bound(my_version_bound1b, x, na_ok = TRUE)) my_version_bound2 <- c(2, 10) - expect_error(validate_version_bound(my_version_bound2, na_ok = TRUE), - class = "epiprocess__my_version_bound2_is_not_length_1" - ) + expect_error(validate_version_bound(my_version_bound2, x, na_ok = TRUE)) }) test_that("`validate_version_bound` validate and class checks together allow and catch as intended", { @@ -52,29 +41,21 @@ test_that("`validate_version_bound` validate and class checks together allow and x_datetime <- tibble::tibble(version = my_datetime) # Custom classes matter (test vectors and non-vctrs-specialized lists separately): my_version_bound1 <- `class<-`(24, "c1") - expect_error(validate_version_bound(my_version_bound1, x_int, na_ok = FALSE), - class = "epiprocess__my_version_bound1_has_invalid_class_or_typeof" - ) + expect_error(validate_version_bound(my_version_bound1, x_int, na_ok = FALSE), regexp = "must have the same classes as") my_version_bound2 <- `class<-`(list(12), c("c2a", "c2b", "c2c")) - expect_error(validate_version_bound(my_version_bound2, x_list, na_ok = FALSE), - class = "epiprocess__my_version_bound2_has_invalid_class_or_typeof" - ) + expect_error(validate_version_bound(my_version_bound2, x_list, na_ok = FALSE), regexp = "must have the same classes") # Want no error matching date to date or datetime to datetime, but no interop due to tz issues: validate_version_bound(my_date, x_date, version_bound_arg = "vb") validate_version_bound(my_datetime, x_datetime, version_bound_arg = "vb") - expect_error(validate_version_bound(my_datetime, x_date, na_ok = TRUE, version_bound_arg = "vb"), - class = "epiprocess__vb_has_invalid_class_or_typeof" - ) - expect_error(validate_version_bound(my_date, x_datetime, na_ok = TRUE, version_bound_arg = "vb"), - class = "epiprocess__vb_has_invalid_class_or_typeof" - ) + expect_error(validate_version_bound(my_datetime, x_date, na_ok = TRUE, version_bound_arg = "vb"), regexp = "must have the same classes") + expect_error(validate_version_bound(my_date, x_datetime, na_ok = TRUE, version_bound_arg = "vb"), regexp = "must have the same classes") # Bad: - expect_error(validate_version_bound(3.5, x_int, TRUE, "vb")) - expect_error(validate_version_bound(.Machine$integer.max, x_dbl, TRUE, "vb")) + expect_error(validate_version_bound(3.5, x_int, TRUE, "vb"), regexp = "must have the same classes") + expect_error(validate_version_bound(.Machine$integer.max, x_dbl, TRUE, "vb"), regexp = "must have the same classes") expect_error(validate_version_bound( `class<-`(list(2), "clazz"), tibble::tibble(version = `class<-`(5L, "clazz")), TRUE, "vb" - )) + ), regexp = "must have the same types") # Maybe questionable: expect_error(validate_version_bound(3, x_int, TRUE, "vb")) expect_error(validate_version_bound(3L, x_dbl, TRUE, "vb")) @@ -99,28 +80,27 @@ test_that("archive version bounds args work as intended", { clobberable_versions_start = 1241, versions_end = measurement_date ), - class = "epiprocess__clobberable_versions_start_has_invalid_class_or_typeof" + regexp = "must have the same classes" ) - expect_error(as_epi_archive(update_tbl[integer(0L), ]), - class = "epiprocess__max_version_cannot_be_used" + expect_error( + as_epi_archive(update_tbl[integer(0L), ]), + regexp = "don't have a sensible guess at what version that is" ) expect_error( as_epi_archive(update_tbl, clobberable_versions_start = NA, versions_end = measurement_date ), - class = "epiprocess__versions_end_earlier_than_updates" + regexp = "`x` contained updates for a later version" ) expect_error( as_epi_archive(update_tbl, clobberable_versions_start = measurement_date + 6L, versions_end = measurement_date + 5L ), - class = "epiprocess__versions_end_earlier_than_clobberable_versions_start" - ) - expect_error(as_epi_archive(update_tbl, versions_end = NA), - regexp = "versions_end.*must not satisfy.*is.na" + regexp = "`clobberable_versions_start`.*indicated that there were later observed versions" ) + expect_error(as_epi_archive(update_tbl, versions_end = NA), regexp = "must have the same classes") ea_default <- as_epi_archive(update_tbl) ea_default$as_of(measurement_date + 4L) expect_warning( @@ -128,7 +108,6 @@ test_that("archive version bounds args work as intended", { ea_default$as_of(measurement_date + 5L), class = "epiprocess__snapshot_as_of_clobberable_version" ) - expect_error(ea_default$as_of(measurement_date + 6L), - regexp = "max_version.*at most.*versions_end" - ) + ea_default$as_of(measurement_date + 5L) + expect_error(ea_default$as_of(measurement_date + 6L)) }) diff --git a/tests/testthat/test-archive.R b/tests/testthat/test-archive.R index 73f0e166..2eba383d 100644 --- a/tests/testthat/test-archive.R +++ b/tests/testthat/test-archive.R @@ -2,7 +2,7 @@ library(dplyr) test_that("first input must be a data.frame", { expect_error(as_epi_archive(c(1, 2, 3), compactify = FALSE), - regexp = "`x` must be a data frame." + regexp = "Must be of type 'data.frame'." ) }) @@ -10,13 +10,13 @@ dt <- archive_cases_dv_subset$DT test_that("data.frame must contain geo_value, time_value and version columns", { expect_error(as_epi_archive(select(dt, -geo_value), compactify = FALSE), - regexp = "`x` must contain a `geo_value` column." + regexp = "Columns `geo_value`, `time_value`, and `version` must be present in `x`." ) expect_error(as_epi_archive(select(dt, -time_value), compactify = FALSE), - regexp = "`x` must contain a `time_value` column." + regexp = "Columns `geo_value`, `time_value`, and `version` must be present in `x`." ) expect_error(as_epi_archive(select(dt, -version), compactify = FALSE), - regexp = "`x` must contain a `version` column." + regexp = "Columns `geo_value`, `time_value`, and `version` must be present in `x`." ) }) @@ -41,10 +41,10 @@ test_that("other_keys cannot contain names geo_value, time_value or version", { test_that("Warning thrown when other_metadata contains overlapping names with geo_type or time_type fields", { expect_warning(as_epi_archive(dt, additional_metadata = list(geo_type = 1), compactify = FALSE), - regexp = "`additional_metadata` names overlap with existing metadata fields\n\"geo_type\", \"time_type\"." + regexp = "`additional_metadata` names overlap with existing metadata fields \"geo_type\", \"time_type\"." ) expect_warning(as_epi_archive(dt, additional_metadata = list(time_type = 1), compactify = FALSE), - regexp = "`additional_metadata` names overlap with existing metadata fields\n\"geo_type\", \"time_type\"." + regexp = "`additional_metadata` names overlap with existing metadata fields \"geo_type\", \"time_type\"." ) }) diff --git a/tests/testthat/test-data.R b/tests/testthat/test-data.R index 511cc8d7..bd9002a3 100644 --- a/tests/testthat/test-data.R +++ b/tests/testthat/test-data.R @@ -43,12 +43,12 @@ test_that("original `delayedAssign` works as expected on good promises", { }) test_that("`delayed_assign_with_unregister_awareness` doesn't wrap a buggy promise if not unregistering", { - delayed_assign_with_unregister_awareness("x", Abort("msg", class = "original_error_class")) + delayed_assign_with_unregister_awareness("x", cli_abort("msg", class = "original_error_class")) expect_error(force(x), class = "original_error_class") }) test_that("`delayed_assign_with_unregister_awareness` doesn't wrap a buggy promise if not unregistering", { - delayed_assign_with_unregister_awareness("x", Abort("msg", class = "original_error_class")) + delayed_assign_with_unregister_awareness("x", cli_abort("msg", class = "original_error_class")) # Take advantage of a false positive / hedge against package renaming: make # our own `unregister` function to trigger the special error message. unregister <- function(y) y diff --git a/tests/testthat/test-epi_df.R b/tests/testthat/test-epi_df.R index decd6fd7..38257282 100644 --- a/tests/testthat/test-epi_df.R +++ b/tests/testthat/test-epi_df.R @@ -42,7 +42,7 @@ test_that("as_epi_df errors when additional_metadata is not a list", { expect_error( as_epi_df(ex_input, additional_metadata = c(other_keys = "state", "pol")), - "`additional_metadata` must be a list type." + "Must be of type 'list', not 'character'." ) }) diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index 8137cf19..588ad933 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -32,11 +32,11 @@ toy_edf <- tibble::tribble( test_that("`before` and `after` are both vectors of length 1", { expect_error( epi_slide(grouped, f, before = c(0, 1), after = 0, ref_time_values = d + 3), - "`before`.*length-1" + "Assertion on 'before' failed: Must have length 1" ) expect_error( epi_slide(grouped, f, before = 1, after = c(0, 1), ref_time_values = d + 3), - "`after`.*length-1" + "Assertion on 'after' failed: Must have length 1" ) }) @@ -62,11 +62,11 @@ test_that("Test errors/warnings for discouraged features", { test_that("Both `before` and `after` must be non-NA, non-negative, integer-compatible", { expect_error( epi_slide(grouped, f, before = -1L, ref_time_values = d + 2L), - "`before`.*non-negative" + "Assertion on 'before' failed: Element 1 is not >= 0" ) expect_error( epi_slide(grouped, f, before = 2L, after = -1L, ref_time_values = d + 2L), - "`after`.*non-negative" + "Assertion on 'after' failed: Element 1 is not >= 0" ) expect_error(epi_slide(grouped, f, before = "a", ref_time_values = d + 2L), regexp = "before", class = "vctrs_error_incompatible_type" @@ -82,11 +82,11 @@ test_that("Both `before` and `after` must be non-NA, non-negative, integer-compa ) expect_error( epi_slide(grouped, f, before = NA, after = 1L, ref_time_values = d + 2L), - "`before`.*non-NA" + "Assertion on 'before' failed: May not be NA" ) expect_error( epi_slide(grouped, f, before = 1L, after = NA, ref_time_values = d + 2L), - "`after`.*non-NA" + "Assertion on 'after' failed: May not be NA" ) # Non-integer-class but integer-compatible values are allowed: expect_error(epi_slide(grouped, f, before = 1, after = 1, ref_time_values = d + 2L), NA) @@ -95,22 +95,22 @@ test_that("Both `before` and `after` must be non-NA, non-negative, integer-compa test_that("`ref_time_values` + `before` + `after` that result in no slide data, generate the error", { expect_error( epi_slide(grouped, f, before = 2L, ref_time_values = d), - "All `ref_time_values` must appear in `x\\$time_value`." + "`ref_time_values` must be a unique subset of the time values in `x`." ) # before the first, no data in the slide windows expect_error( epi_slide(grouped, f, before = 2L, ref_time_values = d + 207L), - "All `ref_time_values` must appear in `x\\$time_value`." + "`ref_time_values` must be a unique subset of the time values in `x`." ) # beyond the last, no data in window }) test_that("`ref_time_values` + `before` + `after` that have some slide data, but generate the error due to ref. time being out of time range (would also happen if they were in between `time_value`s)", { expect_error( epi_slide(grouped, f, before = 0L, after = 2L, ref_time_values = d), - "All `ref_time_values` must appear in `x\\$time_value`." + "`ref_time_values` must be a unique subset of the time values in `x`." ) # before the first, but we'd expect there to be data in the window expect_error( epi_slide(grouped, f, before = 2L, ref_time_values = d + 201L), - "All `ref_time_values` must appear in `x\\$time_value`." + "`ref_time_values` must be a unique subset of the time values in `x`." ) # beyond the last, but still with data in window }) diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R index b3fff13d..4af84254 100644 --- a/tests/testthat/test-epix_slide.R +++ b/tests/testthat/test-epix_slide.R @@ -187,11 +187,11 @@ test_that("epix_slide `before` validation works", { ) expect_error( xx$slide(f = ~ sum(.x$binary), before = NA), - "`before`.*NA" + "Assertion on 'before' failed: May not be NA" ) expect_error( xx$slide(f = ~ sum(.x$binary), before = -1), - "`before`.*negative" + "Assertion on 'before' failed: Element 1 is not >= 0" ) expect_error(xx$slide(f = ~ sum(.x$binary), before = 1.5), regexp = "before", diff --git a/tests/testthat/test-grouped_epi_archive.R b/tests/testthat/test-grouped_epi_archive.R index 68e7c76d..9fd15e10 100644 --- a/tests/testthat/test-grouped_epi_archive.R +++ b/tests/testthat/test-grouped_epi_archive.R @@ -39,7 +39,7 @@ test_that("Grouping, regrouping, and ungrouping archives works as intended", { # Test `.drop` behavior: expect_error(toy_archive %>% group_by(.drop = "bogus"), - regexp = "\\.drop.*TRUE or FALSE" + regexp = "Must be of type 'logical', not 'character'" ) expect_warning(toy_archive %>% group_by(.drop = FALSE), class = "epiprocess__group_by_epi_archive__drop_FALSE_no_factors" diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 4ad692a0..3454d257 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1,12 +1,3 @@ -test_that("break_string works properly", { - expect_equal(break_str("A dog is here", 6), "A dog\nis\nhere") -}) - -test_that("Abort and Warn work", { - expect_error(Abort("abort")) - expect_warning(Warn("warn")) -}) - test_that("new summarizing functions work", { x <- c(3, 4, 5, 9, NA) expect_equal(Min(x), 3)