From c818bc668a8d4a4df32008e4a174f14b2aadda03 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 9 Jul 2021 18:13:01 -0400 Subject: [PATCH 01/35] clarify old_item requirements for replacements --- facebook/qsf-tools/README.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/facebook/qsf-tools/README.md b/facebook/qsf-tools/README.md index e46a613bc..54eb4d28b 100644 --- a/facebook/qsf-tools/README.md +++ b/facebook/qsf-tools/README.md @@ -20,11 +20,12 @@ questions. These mapping files are created manually and need to be updated for every new survey wave. * `item_replacement_map.csv`: Lists in-survey name of an `new_item` and the - in-survey name of the `old_item` it replaces. `new_item` should be the name - of a single item and be unique, but the `old_item` column has no formatting - requirements. It can hold a list of items, if the corresponding new survey - item is replacing multiple old questions, and a given item name can appear - in multiple rows of the `old_item` field. + in-survey name(s) of the `old_item`(s) it replaces. `new_item` should be the + name of a single item and be unique; the `old_item` column should be a + string. However, `old_item` has no other formatting requirements. For + example, it can list several item names (e.g. "A1, A2"), if the + corresponding new survey item is replacing multiple old questions. A given + item name can also appear in multiple rows of the `old_item` field. * `item_shortquestion_map.csv`: Lists in-survey name of an `item` and a short description of the contents of the question. `item` should be the name of a single item and be unique, but the `description` column has no formatting @@ -83,4 +84,4 @@ which can contain any subset of the following fields: The meaning of "Answers" and "Choices" differs for matrix vs non-matrix items. "Choices" list the vertical components -- subquestions for matrix items and answer choices for non-matrix items. "Answers" list the answer -choices for matrix items and are missing for non-matrix items. \ No newline at end of file +choices for matrix items and are missing for non-matrix items. From 0ba761d20b8309f3ee183d997b2c142e7d63452f Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 9 Jul 2021 18:13:23 -0400 Subject: [PATCH 02/35] add timezone to date metadata field descriptions --- facebook/qsf-tools/static/static_microdata_fields.csv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/facebook/qsf-tools/static/static_microdata_fields.csv b/facebook/qsf-tools/static/static_microdata_fields.csv index 0ac7633cc..09cee071d 100644 --- a/facebook/qsf-tools/static/static_microdata_fields.csv +++ b/facebook/qsf-tools/static/static_microdata_fields.csv @@ -1,6 +1,6 @@ variable,replaces,description,question,matrix_subquestion,type,response_option_randomization -StartDatetime,NA,"survey start timestamp",NA,NA,NA,NA -EndDatetime,NA,"survey end timestamp",NA,NA,NA,NA +StartDatetime,NA,"survey start timestamp in Pacific time (UTC-7)",NA,NA,NA,NA +EndDatetime,NA,"survey end timestamp in Pacific time (UTC-7)",NA,NA,NA,NA wave,NA,"survey version",NA,NA,NA,NA UserLanguage,NA,"survey language",NA,NA,NA,NA fips,NA,"county FIPS code",NA,NA,NA,NA From e84c4bc139436e7d8da85e074b0197d55566904f Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 12 Jul 2021 10:36:51 -0400 Subject: [PATCH 03/35] update replacements mapping --- facebook/qsf-tools/static/item_replacement_map.csv | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/facebook/qsf-tools/static/item_replacement_map.csv b/facebook/qsf-tools/static/item_replacement_map.csv index d1a882f14..433a3ae7b 100644 --- a/facebook/qsf-tools/static/item_replacement_map.csv +++ b/facebook/qsf-tools/static/item_replacement_map.csv @@ -22,6 +22,7 @@ C15,Q36 C13b,C13 C13c,C13a C14a,C14 +C17,C2 C17a,C17 V2a,V2 V3a,V3 @@ -32,4 +33,5 @@ V4a_4,V4_4 V4a_5,V4_5 V11a,V11 V12a,V12 -C7a,C7 \ No newline at end of file +C7a,C7 +B10c,B10a From 7ac4439898e2decfa545e1fa6afc81cd62a5f4c3 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 13 Jul 2021 11:26:17 -0400 Subject: [PATCH 04/35] change write procedure to display correctly in Excel --- facebook/qsf-tools/generate-codebook.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/facebook/qsf-tools/generate-codebook.R b/facebook/qsf-tools/generate-codebook.R index b9044f6ee..c0b72e5c7 100644 --- a/facebook/qsf-tools/generate-codebook.R +++ b/facebook/qsf-tools/generate-codebook.R @@ -388,7 +388,7 @@ get_static_fields <- function(wave, add_qsf_to_codebook <- function(path_to_qsf, path_to_codebook) { qdf <- process_qsf(path_to_qsf) codebook <- add_qdf_to_codebook(qdf, path_to_codebook) - write_csv(codebook, path_to_codebook) + write_excel_csv(codebook, path_to_codebook) } From abc620060ca2801aefca77f84df178bf4caedf0c Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 13 Jul 2021 18:48:14 -0400 Subject: [PATCH 05/35] Revert "Revert "Add workflow to automate fb package testing"" This reverts commit 9141e93b20af1ae2ecbeb61387af5719f3fbd080. --- .github/workflows/python-ci.yml | 2 +- .github/workflows/r-ci.yml | 59 +++++++++++++++++++ .../unit-tests/testthat/test-variables.R | 5 ++ 3 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/r-ci.yml diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml index 809a81f6c..d6dd7260e 100644 --- a/.github/workflows/python-ci.yml +++ b/.github/workflows/python-ci.yml @@ -5,7 +5,7 @@ name: Python package on: push: - branches: [ main, prod, 'release/*' ] + branches: [ main, prod ] pull_request: types: [ opened, synchronize, reopened, ready_for_review ] branches: [ main, prod ] diff --git a/.github/workflows/r-ci.yml b/.github/workflows/r-ci.yml new file mode 100644 index 000000000..c64664fd8 --- /dev/null +++ b/.github/workflows/r-ci.yml @@ -0,0 +1,59 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. +# +# See https://github.com/r-lib/actions/tree/master/examples#readme for +# additional example workflows available for the R community. + +name: R facebook survey + +on: + push: + branches: [ main, prod ] + pull_request: + types: [ opened, synchronize, reopened, ready_for_review ] + branches: [ main, prod ] + +jobs: + build: + runs-on: ubuntu-20.04 + if: github.event.pull_request.draft == false + strategy: + matrix: + r-version: [4.0] + defaults: + run: + working-directory: facebook/delphiFacebook + + steps: + - uses: actions/checkout@v2 + - name: Set up R ${{ matrix.r-version }} + uses: r-lib/actions/setup-r@v1 + with: + r-version: ${{ matrix.r-version }} + - name: Install linux dependencies + run: | + sudo apt-get install libcurl4-openssl-dev + - name: Get month + id: get-month + run: | + echo "::set-output name=month::$(/bin/date -u "+%Y%m")" + - name: Cache R packages + uses: actions/cache@v2 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ runner.os }}-r-facebook-survey-${{ steps.get-month.outputs.month }} + restore-keys: | + ${{ runner.os }}-r-facebook-survey- + - name: Install R dependencies + run: | + install.packages("remotes") + remotes::update_packages(c("rcmdcheck", "mockr"), dependencies=TRUE, upgrade="always") + dependency_list <- remotes::dev_package_deps(dependencies=TRUE) + remotes::update_packages(dependency_list$package, dependencies=TRUE, upgrade="always") + shell: Rscript {0} + - name: Check + run: | + rcmdcheck::rcmdcheck(args = c("--no-manual", "--test-dir=unit-tests"), error_on = "error") + shell: Rscript {0} diff --git a/facebook/delphiFacebook/unit-tests/testthat/test-variables.R b/facebook/delphiFacebook/unit-tests/testthat/test-variables.R index 2851028e4..ec3989262 100644 --- a/facebook/delphiFacebook/unit-tests/testthat/test-variables.R +++ b/facebook/delphiFacebook/unit-tests/testthat/test-variables.R @@ -86,6 +86,7 @@ test_that("mask items correctly coded", { input_data$c_mask_often_7d <- NA input_data$c_others_masked <- c(TRUE, NA, NA, FALSE, TRUE, FALSE) input_data$c_others_masked_public <- NA + input_data$c_others_distanced_public <- NA input_data$c_work_outside_5d <- NA expect_equal(out, input_data) @@ -105,6 +106,7 @@ test_that("mask items correctly coded", { input_data$c_mask_often_7d <- c(NA, TRUE, FALSE, NA, TRUE, FALSE) input_data$c_others_masked <- c(TRUE, NA, NA, FALSE, TRUE, FALSE) input_data$c_others_masked_public <- NA + input_data$c_others_distanced_public <- NA input_data$c_work_outside_5d <- NA expect_equal(out, input_data) @@ -125,6 +127,7 @@ test_that("mask items correctly coded", { input_data$c_mask_often_7d <- NA input_data$c_others_masked <- c(TRUE, NA, NA, FALSE, TRUE, FALSE) input_data$c_others_masked_public <- NA + input_data$c_others_distanced_public <- NA input_data$c_work_outside_5d <- NA expect_equal(out, input_data) @@ -133,6 +136,7 @@ test_that("mask items correctly coded", { input_data <- data.frame( C14 = c(NA, 1, 3, 6, 2, 4), H2 = c(1, NA, 6, 3, 2, 5), + H1 = c(1, NA, 6, 3, 2, 5), C6a = 1 ) @@ -145,6 +149,7 @@ test_that("mask items correctly coded", { input_data$c_mask_often_7d <- NA input_data$c_others_masked <- NA input_data$c_others_masked_public <- c(FALSE, NA, NA, FALSE, FALSE, TRUE) + input_data$c_others_distanced_public <- c(FALSE, NA, NA, FALSE, FALSE, TRUE) input_data$c_work_outside_5d <- NA expect_equal(out, input_data) From ead5d602654a3c150ce688d919b5713d2f241788 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 12 Jul 2021 18:54:13 -0400 Subject: [PATCH 06/35] remove 'suggested' dependencies --- .github/workflows/r-ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/r-ci.yml b/.github/workflows/r-ci.yml index c64664fd8..05a9c0596 100644 --- a/.github/workflows/r-ci.yml +++ b/.github/workflows/r-ci.yml @@ -49,9 +49,9 @@ jobs: - name: Install R dependencies run: | install.packages("remotes") - remotes::update_packages(c("rcmdcheck", "mockr"), dependencies=TRUE, upgrade="always") + remotes::update_packages(c("rcmdcheck", "mockr"), upgrade="always") dependency_list <- remotes::dev_package_deps(dependencies=TRUE) - remotes::update_packages(dependency_list$package, dependencies=TRUE, upgrade="always") + remotes::update_packages(dependency_list$package, upgrade="always") shell: Rscript {0} - name: Check run: | From 4ee90dd86adfdb49a001c3eacf9f9897c58af06a Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 15 Jul 2021 13:03:43 -0400 Subject: [PATCH 07/35] format choice coding as json string --- facebook/qsf-tools/generate-codebook.R | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/facebook/qsf-tools/generate-codebook.R b/facebook/qsf-tools/generate-codebook.R index c0b72e5c7..fb83bf46a 100644 --- a/facebook/qsf-tools/generate-codebook.R +++ b/facebook/qsf-tools/generate-codebook.R @@ -9,6 +9,7 @@ suppressPackageStartupMessages({ library(tidyverse) library(jsonlite) + library(rjson) library(stringr) library(gsubfn) source("qsf-utils.R") @@ -265,7 +266,24 @@ process_qsf <- function(path_to_qsf, NA_character_), wave = get_wave(path_to_qsf) ) %>% - select(wave, variable, replaces, description, question, matrix_subquestion, type, display_logic, response_option_randomization, group_of_respondents_item_was_shown_to) + select(wave, + variable, + replaces, + description, + question, + matrix_subquestion, + choices, + type, + display_logic, + response_option_randomization, + group_of_respondents_item_was_shown_to) + + # Format choices as json string + qdf$choices <- map(qdf$choices, function(x) { + if (is_empty(x)) { NA } + else { toJSON(x) } + }) %>% + unlist() # add free text response options other_text_items <- qdf %>% From 69fcb6a024665cef8b641fa66e50d2cbe2e00010 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 16 Jul 2021 13:47:02 -0400 Subject: [PATCH 08/35] remove choices for other_text items --- facebook/qsf-tools/generate-codebook.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/facebook/qsf-tools/generate-codebook.R b/facebook/qsf-tools/generate-codebook.R index fb83bf46a..a126162b1 100644 --- a/facebook/qsf-tools/generate-codebook.R +++ b/facebook/qsf-tools/generate-codebook.R @@ -277,7 +277,7 @@ process_qsf <- function(path_to_qsf, display_logic, response_option_randomization, group_of_respondents_item_was_shown_to) - + # Format choices as json string qdf$choices <- map(qdf$choices, function(x) { if (is_empty(x)) { NA } @@ -294,6 +294,7 @@ process_qsf <- function(path_to_qsf, description = paste0(description, " other text") ) qdf <- rbind(qdf, other_text_items) + qdf$choices[qdf$type == "Text"] <- NA # Quality checks stopifnot(length(qdf$variable) == length(unique(qdf$variable))) From 7667b5ea441e54dc5b4d12c7fe6a8d61ea79b0db Mon Sep 17 00:00:00 2001 From: Kathryn M Mazaitis Date: Mon, 19 Jul 2021 13:46:45 -0400 Subject: [PATCH 09/35] Add missing sources to sirCAL nchs-mortality, covid-act-now, and hhs (hospitalizations) --- .../templates/sir_complainsalot-params-prod.json.j2 | 12 ++++++++++++ sir_complainsalot/params.json.template | 12 ++++++++++++ 2 files changed, 24 insertions(+) diff --git a/ansible/templates/sir_complainsalot-params-prod.json.j2 b/ansible/templates/sir_complainsalot-params-prod.json.j2 index ae3182344..24a7ed1b6 100644 --- a/ansible/templates/sir_complainsalot-params-prod.json.j2 +++ b/ansible/templates/sir_complainsalot-params-prod.json.j2 @@ -46,6 +46,18 @@ "max_age":6, "maintainers": ["U01AP8GSWG3","U01069KCRS7"], "retired-signals": ["raw_pct_negative","smoothed_pct_negative","raw_tests_per_device","smoothed_tests_per_device"] + }, + "nchs-mortality": { + "max_age":13, + "maintainers": [] + }, + "covid-act-now": { + "max_age":5, + "maintainers": [] + }, + "hhs": { + "max_age":8, + "maintainers": [] } } } diff --git a/sir_complainsalot/params.json.template b/sir_complainsalot/params.json.template index 2f7354598..df9624ce4 100644 --- a/sir_complainsalot/params.json.template +++ b/sir_complainsalot/params.json.template @@ -47,6 +47,18 @@ "max_age":6, "maintainers": ["U01AP8GSWG3","U01069KCRS7"], "retired-signals": ["raw_pct_negative","smoothed_pct_negative","raw_tests_per_device","smoothed_tests_per_device"] + }, + "nchs-mortality": { + "max_age":13, + "maintainers": [] + }, + "covid-act-now": { + "max_age":5, + "maintainers": [] + }, + "hhs": { + "max_age":8, + "maintainers": [] } } } From d1702c2a2855ed3ce9b2be817bd324dfb0b42d5a Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 9 Apr 2021 14:24:12 -0400 Subject: [PATCH 10/35] create function to get every fourth date of input Because this is handled inside the table generation functions, the Makefile doesn't need any logic to choose input files. It will always just provide an empty list for input data. If params file provides a list of input for tables, package will remove any files falling completely outside the desired aggregate range. --- facebook/delphiFacebook/NAMESPACE | 2 + facebook/delphiFacebook/R/contingency_utils.R | 54 +++++++++++++++++-- .../man/get_sparse_filenames.Rd | 21 ++++++++ .../testthat/test-contingency-utils.R | 49 +++++++++++++++++ 4 files changed, 122 insertions(+), 4 deletions(-) create mode 100644 facebook/delphiFacebook/man/get_sparse_filenames.Rd diff --git a/facebook/delphiFacebook/NAMESPACE b/facebook/delphiFacebook/NAMESPACE index 1eac7fc8f..3fe733903 100644 --- a/facebook/delphiFacebook/NAMESPACE +++ b/facebook/delphiFacebook/NAMESPACE @@ -18,6 +18,7 @@ export(get_filenames_in_range) export(get_range_prev_full_month) export(get_range_prev_full_period) export(get_range_prev_full_week) +export(get_sparse_filenames) export(jeffreys_se) export(join_weights) export(load_archive) @@ -112,6 +113,7 @@ importFrom(stringi,stri_split) importFrom(stringi,stri_sub) importFrom(stringi,stri_trans_tolower) importFrom(stringi,stri_trim) +importFrom(stringr,str_remove_all) importFrom(tibble,add_column) importFrom(tibble,as_tibble) importFrom(tibble,tribble) diff --git a/facebook/delphiFacebook/R/contingency_utils.R b/facebook/delphiFacebook/R/contingency_utils.R index 8e2edbdde..39bd269e4 100644 --- a/facebook/delphiFacebook/R/contingency_utils.R +++ b/facebook/delphiFacebook/R/contingency_utils.R @@ -71,8 +71,15 @@ update_params <- function(params) { ) } - params$input <- get_filenames_in_range(date_range[[1]], date_range[[2]], params) - if ( length(params[["input"]]) == 0 || all(is.na(params[["input"]])) ) { + if ( is.null(params[["input"]]) || length(params$input) == 0 ) { + # If params$input empty or not provided, fetch filenames from input_dir. + params$input <- get_sparse_filenames(date_range[[1]], date_range[[2]], params) + } else { + # If input files provided, subset to those in desired date range. + params$input <- get_filenames_in_range(date_range[[1]], date_range[[2]], params) + } + + if ( length(params$input) == 0 || all(is.na(params$input)) ) { stop("no input files to read in") } @@ -80,7 +87,7 @@ update_params <- function(params) { params$end_time <- date_range[[2]] params$start_date <- as_date(date_range[[1]]) params$end_date <- as_date(date_range[[2]]) - + return(params) } @@ -101,7 +108,7 @@ get_filenames_in_range <- function(start_date, end_date, params) { start_date <- as_date(start_date) - days(params$backfill_days) end_date <- as_date(end_date) - if ( is.null(params$input) | length(params$input) == 0 ) { + if ( is.null(params[["input"]]) || length(params$input) == 0 ) { date_pattern <- "^[0-9]{4}-[0-9]{2}-[0-9]{2}.*[.]csv$" youtube_pattern <- ".*YouTube[.]csv$" @@ -123,6 +130,45 @@ get_filenames_in_range <- function(start_date, end_date, params) { return(filenames) } +#' Get sparse list of input data files from `input_dir`. +#' +#' Finds every fourth + last file by date. +#' +#' @param start_date Start of desired date range +#' @param end_date End of desired date range +#' @param params Params object produced by read_params +#' +#' @return Character vector of filenames +#' +#' @importFrom stringr str_remove_all +#' +#' @export +get_sparse_filenames <- function(start_date, end_date, params) { + if (params$use_input_asis) { return(params$input) } + + filenames <- get_filenames_in_range(start_date, end_date, params) + + file_end_dates <- as.integer(str_remove_all(substr(filenames, 1, 10), "-")) + unique_file_end_dates <- unique(file_end_dates) + + max_end_date <- max(unique_file_end_dates) + + # Use every fourth date. + stride <- 4L + curr_date <- min(unique_file_end_dates) + keep_dates <- c() + while ( curr_date < max_end_date ) { + keep_dates <- c(keep_dates, curr_date) + curr_date <- min(curr_date + stride, max_end_date) + } + + # Always add last date + keep_dates <- c(keep_dates, max_end_date) + + filenames <- filenames[file_end_dates %in% keep_dates] + return(filenames) +} + #' Check user-set aggregations for basic validity and add a few necessary cols. #' #' @param aggregations Data frame with columns `name`, `var_weight`, `metric`, diff --git a/facebook/delphiFacebook/man/get_sparse_filenames.Rd b/facebook/delphiFacebook/man/get_sparse_filenames.Rd new file mode 100644 index 000000000..6bb875bc8 --- /dev/null +++ b/facebook/delphiFacebook/man/get_sparse_filenames.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/contingency_utils.R +\name{get_sparse_filenames} +\alias{get_sparse_filenames} +\title{Get sparse list of input data files from `input_dir`.} +\usage{ +get_sparse_filenames(start_date, end_date, params) +} +\arguments{ +\item{start_date}{Start of desired date range} + +\item{end_date}{End of desired date range} + +\item{params}{Params object produced by read_params} +} +\value{ +Character vector of filenames +} +\description{ +Finds every fourth + last file by date. +} diff --git a/facebook/delphiFacebook/unit-tests/testthat/test-contingency-utils.R b/facebook/delphiFacebook/unit-tests/testthat/test-contingency-utils.R index 165017dc9..aac4f6267 100644 --- a/facebook/delphiFacebook/unit-tests/testthat/test-contingency-utils.R +++ b/facebook/delphiFacebook/unit-tests/testthat/test-contingency-utils.R @@ -72,10 +72,59 @@ test_that("testing get_filenames_in_range command", { ) out <- get_filenames_in_range(date_range[[1]], date_range[[2]], params) + expect_equal(out, expected_output) +}) + + +test_that("testing get_sparse_filenames command", { + tdir <- tempfile() + files <- c( + "2020-01-01.2019-12-26_Wave_4.csv", + "2020-01-02.2019-12-27_Wave_4.csv", + "2020-01-03.2019-12-28_Wave_4.csv", + "2020-01-04.2019-12-29_Wave_4.csv", + "2020-01-05.2019-12-30_Wave_4.csv", + "2020-01-05.2019-12-30_Wave_5.csv", + "2020-01-06.2019-12-31_Wave_4.csv", + "2020-01-06.2019-12-31_Wave_5.csv", + "2020-01-07.2019-01-01_Wave_4.csv", + "2020-01-08.2019-01-02_Wave_4.csv", + "2020-01-09.2019-01-03_Wave_4.csv", + "2020-01-10.2019-01-04_Wave_4.csv", + + "2019-11-06.2019-10-30.2020-11-06.Survey_of_COVID-Like_Illness_-_TODEPLOY_......_-_US_Expansion.csv", + "2020-01-16.2020-01-09_YouTube.csv", + "2020-01-16.2020-01-09_Wave_4.csv", + "2020-02-06.2020-01-31_Wave_4.csv", + "2020-02-16.2020-02-09_Wave_3.csv" + ) + create_dir_not_exist(tdir) + for (filename in files) { + write_csv(data.frame(), path = file.path(tdir, filename)) + } + + params <- list( + input = c(), + use_input_asis = FALSE, + backfill_days = 4, + input_dir = tdir + ) + date_range <- list(ymd("2020-01-01"), ymd("2020-01-6")) + + expected_output <- c( + "2020-01-01.2019-12-26_Wave_4.csv", + "2020-01-05.2019-12-30_Wave_4.csv", + "2020-01-05.2019-12-30_Wave_5.csv", + "2020-01-09.2019-01-03_Wave_4.csv", + "2020-01-10.2019-01-04_Wave_4.csv" + ) + + out <- get_sparse_filenames(date_range[[1]], date_range[[2]], params) expect_equal(out, expected_output) }) + test_that("testing verify_aggs command", { # Duplicate rows input_aggs <- tribble( From aac1d7962997eea23f1c37d0e96111bfd6220eb1 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 16 Apr 2021 15:26:55 -0400 Subject: [PATCH 11/35] fetch filenames that meet active/dormant criteria When `input` is not defined in the params file, `get_filenames_in_range` will find all input files whose names match the active survey names as specified in the params file. If the `qualtrics` active/dormant survey names aren't available, hard-coded patterns will be used (for backwards compatibility). --- facebook/delphiFacebook/R/contingency_utils.R | 48 +++++++++++++------ .../man/get_filenames_in_range.Rd | 4 +- .../man/read_contingency_params.Rd | 2 +- 3 files changed, 37 insertions(+), 17 deletions(-) diff --git a/facebook/delphiFacebook/R/contingency_utils.R b/facebook/delphiFacebook/R/contingency_utils.R index 39bd269e4..eddc95e8d 100644 --- a/facebook/delphiFacebook/R/contingency_utils.R +++ b/facebook/delphiFacebook/R/contingency_utils.R @@ -1,7 +1,7 @@ #' Return params file as an R list #' #' Reads a parameters file. Copies global params to contingency params if not -#' already defined. +#' already defined. Uses current date as end_date if not provided. #' #' @param path path to the parameters file; if not present, will try to copy the file #' "params.json.template" @@ -17,15 +17,21 @@ read_contingency_params <- function(path = "params.json", template_path = "param contingency_params$start_time <- ymd_hms( sprintf("%s 00:00:00", contingency_params$start_date), tz = tz_to ) + + # Fill in end_date, if missing, with current date. + contingency_params$end_date <- if_else( + is.null(contingency_params$end_date), as.character(Sys.Date()), contingency_params$end_date + ) + contingency_params$end_time <- ymd_hms( sprintf("%s 23:59:59", contingency_params$end_date), tz = tz_to ) global_params <- c("archive_days", "backfill_days", "static_dir", "cache_dir", "archive_dir", "weights_in_dir", "input_dir", "debug", - "parallel") + "parallel", "qualtrics") for (param in global_params) { - if ( is.null(contingency_params[[param]]) ) { + if ( is.null(contingency_params[[param]]) & !is.null(params[[param]]) ) { contingency_params[[param]] <- params[[param]] } } @@ -55,19 +61,14 @@ read_contingency_params <- function(path = "params.json", template_path = "param #' #' @export update_params <- function(params) { - # Fill in end_time, if missing, with current time. - if (is.null(params$end_time)) { - params$end_time <- Sys.time() - } - # Construct aggregate date range. if ( !is.null(params$start_date) ) { + # If start_date is provided, use start/end dates exactly as given. date_range <- list(params$start_time, params$end_time) } else { # If start_date is not provided, assume want to use preceding full time period. date_range <- get_range_prev_full_period( - as_date(params$end_date) - , params$aggregate_range + as_date(params$end_date), params$aggregate_range ) } @@ -92,6 +93,10 @@ update_params <- function(params) { } #' Get relevant input data file names from `input_dir`. +#' +#' Only include files containing data that falls at least somewhat between start +#' and end dates, and is from an allowed ("active") survey and not a "dormant" +#' survey. #' #' @param start_date Start of desired date range #' @param end_date End of desired date range @@ -109,11 +114,24 @@ get_filenames_in_range <- function(start_date, end_date, params) { end_date <- as_date(end_date) if ( is.null(params[["input"]]) || length(params$input) == 0 ) { - date_pattern <- "^[0-9]{4}-[0-9]{2}-[0-9]{2}.*[.]csv$" - youtube_pattern <- ".*YouTube[.]csv$" + ## Only keep files from active surveys. + + if ( !is.null(params[["qualtrics"]]) ) { + include_patterns <- names(params$qualtrics$surveys$active) + include_patterns <- gsub(" ", "_", include_patterns, fixed=TRUE) + + exclude_patterns <- names(params$qualtrics$surveys$dormant) + exclude_patterns <- gsub(" ", "_", exclude_patterns, fixed=TRUE) + } else { + include_patterns <- c("^[0-9]{4}-[0-9]{2}-[0-9]{2}.*[.]csv$") + exclude_patterns <- c(".*YouTube[.]csv$") + } filenames <- list.files(path=params$input_dir) - filenames <- filenames[grepl(date_pattern, filenames) & !grepl(youtube_pattern, filenames)] + + include_map <- grepl(paste(include_patterns, collapse="|"), filenames) + exclude_map <- grepl(paste(exclude_patterns, collapse="|"), filenames) + filenames <- filenames[include_map & !exclude_map] } else { filenames <- params$input } @@ -121,8 +139,8 @@ get_filenames_in_range <- function(start_date, end_date, params) { file_end_dates <- as_date(substr(filenames, 1, 10)) file_start_dates <- as_date(substr(filenames, 12, 21)) - # Only keep files with data that falls at least somewhat between the desired - # start and end range dates. + ## Only keep files with data that falls at least somewhat between the desired + ## start and end range dates. filenames <- filenames[ !(( file_start_dates < start_date & file_end_dates < start_date ) | ( file_start_dates > end_date & file_end_dates > end_date ))] diff --git a/facebook/delphiFacebook/man/get_filenames_in_range.Rd b/facebook/delphiFacebook/man/get_filenames_in_range.Rd index ddc6f83d0..959cb8d15 100644 --- a/facebook/delphiFacebook/man/get_filenames_in_range.Rd +++ b/facebook/delphiFacebook/man/get_filenames_in_range.Rd @@ -17,5 +17,7 @@ get_filenames_in_range(start_date, end_date, params) Character vector of filenames } \description{ -Get relevant input data file names from `input_dir`. +Only include files containing data that falls at least somewhat between start +and end dates, and is from an allowed ("active") survey and not a "dormant" +survey. } diff --git a/facebook/delphiFacebook/man/read_contingency_params.Rd b/facebook/delphiFacebook/man/read_contingency_params.Rd index bbecc199d..71e35ffbb 100644 --- a/facebook/delphiFacebook/man/read_contingency_params.Rd +++ b/facebook/delphiFacebook/man/read_contingency_params.Rd @@ -20,5 +20,5 @@ a named list of parameters values } \description{ Reads a parameters file. Copies global params to contingency params if not -already defined. +already defined. Uses current date as end_date if not provided. } From 0517cb4b1022fc53b8760d313b2ad7a0e3cf2629 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 16 Apr 2021 15:52:21 -0400 Subject: [PATCH 12/35] update tests --- facebook/delphiFacebook/R/contingency_utils.R | 2 +- .../unit-tests/testthat/test-contingency-utils.R | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/facebook/delphiFacebook/R/contingency_utils.R b/facebook/delphiFacebook/R/contingency_utils.R index eddc95e8d..253e88210 100644 --- a/facebook/delphiFacebook/R/contingency_utils.R +++ b/facebook/delphiFacebook/R/contingency_utils.R @@ -80,7 +80,7 @@ update_params <- function(params) { params$input <- get_filenames_in_range(date_range[[1]], date_range[[2]], params) } - if ( length(params$input) == 0 || all(is.na(params$input)) ) { + if ( length(params[["input"]]) == 0 || all(is.na(params$input)) ) { stop("no input files to read in") } diff --git a/facebook/delphiFacebook/unit-tests/testthat/test-contingency-utils.R b/facebook/delphiFacebook/unit-tests/testthat/test-contingency-utils.R index aac4f6267..96011778c 100644 --- a/facebook/delphiFacebook/unit-tests/testthat/test-contingency-utils.R +++ b/facebook/delphiFacebook/unit-tests/testthat/test-contingency-utils.R @@ -10,7 +10,7 @@ test_that("testing update_params command", { use_input_asis = TRUE, aggregate_range = "month", end_date = "2020-02-01", - input_dir = "./input" + input_dir = "./static" # Using a directory that doesn't contain any valid data files. ) expect_error(update_params(params), "no input files to read in") @@ -30,8 +30,8 @@ test_that("testing update_params command", { use_input_asis = TRUE, aggregate_range = "month", end_date = ymd("2020-01-31"), - end_time = ymd_hms("2020-01-31 23:59:59", tz=timezone), start_time = ymd_hms("2020-01-01 00:00:00", tz=timezone), + end_time = ymd_hms("2020-01-31 23:59:59", tz=timezone), start_date = ymd("2020-01-01") ) From 0292c1b38eb8d4132dbded640e8e3504848b083b Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 26 Apr 2021 18:25:31 -0400 Subject: [PATCH 13/35] output list of input files to txt --- facebook/delphiFacebook/R/contingency_utils.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/facebook/delphiFacebook/R/contingency_utils.R b/facebook/delphiFacebook/R/contingency_utils.R index 253e88210..649a9c0ad 100644 --- a/facebook/delphiFacebook/R/contingency_utils.R +++ b/facebook/delphiFacebook/R/contingency_utils.R @@ -80,6 +80,9 @@ update_params <- function(params) { params$input <- get_filenames_in_range(date_range[[1]], date_range[[2]], params) } + # Overwrites contents of file of the same name. + writeLines(params$input, "contingency_input.txt") + if ( length(params[["input"]]) == 0 || all(is.na(params$input)) ) { stop("no input files to read in") } From cfc5e91268643b969acf5292029674dd7b6e2523 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 26 Apr 2021 18:51:51 -0400 Subject: [PATCH 14/35] remove saved input txt file from testing --- .../delphiFacebook/integration-tests/testthat/teardown-run.R | 1 + 1 file changed, 1 insertion(+) diff --git a/facebook/delphiFacebook/integration-tests/testthat/teardown-run.R b/facebook/delphiFacebook/integration-tests/testthat/teardown-run.R index 63cec3450..52cf09c9b 100644 --- a/facebook/delphiFacebook/integration-tests/testthat/teardown-run.R +++ b/facebook/delphiFacebook/integration-tests/testthat/teardown-run.R @@ -18,6 +18,7 @@ file.remove(test_path("archive")) file.remove(test_path("receiving_full")) file.remove(test_path("individual_full")) file.remove(test_path("receiving_contingency_full")) +file.remove(test_path("contingency_input.txt")) if ( dir.exists(test_path("receiving_contingency_test")) ) { file.remove(test_path("receiving_contingency_test")) From 996f0d0a08c0763cc954a3a74a7b681ac794b4a1 Mon Sep 17 00:00:00 2001 From: Katie Mazaitis Date: Tue, 20 Jul 2021 10:31:22 -0400 Subject: [PATCH 15/35] Adjust nchs-mortality max_age in sirCAL --- ansible/templates/sir_complainsalot-params-prod.json.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/templates/sir_complainsalot-params-prod.json.j2 b/ansible/templates/sir_complainsalot-params-prod.json.j2 index 24a7ed1b6..a9c9775ad 100644 --- a/ansible/templates/sir_complainsalot-params-prod.json.j2 +++ b/ansible/templates/sir_complainsalot-params-prod.json.j2 @@ -48,7 +48,7 @@ "retired-signals": ["raw_pct_negative","smoothed_pct_negative","raw_tests_per_device","smoothed_tests_per_device"] }, "nchs-mortality": { - "max_age":13, + "max_age":16, "maintainers": [] }, "covid-act-now": { From e7dcded0b95a01958287d34fa84d40466c5cdf37 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 20 Jul 2021 11:44:05 -0400 Subject: [PATCH 16/35] replace deprecated arg in write_csv --- .../delphiFacebook/unit-tests/testthat/test-contingency-utils.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/facebook/delphiFacebook/unit-tests/testthat/test-contingency-utils.R b/facebook/delphiFacebook/unit-tests/testthat/test-contingency-utils.R index 165017dc9..e570dd142 100644 --- a/facebook/delphiFacebook/unit-tests/testthat/test-contingency-utils.R +++ b/facebook/delphiFacebook/unit-tests/testthat/test-contingency-utils.R @@ -53,7 +53,7 @@ test_that("testing get_filenames_in_range command", { create_dir_not_exist(tdir) for (filename in files) { - write_csv(data.frame(), path = file.path(tdir, filename)) + write_csv(data.frame(), file.path(tdir, filename)) } params <- list( From 24f0afc8dc49717cf09dcf724f4c00df527e6ad6 Mon Sep 17 00:00:00 2001 From: Katie Mazaitis Date: Tue, 20 Jul 2021 11:56:08 -0400 Subject: [PATCH 17/35] Fix nchs-mortality max age in both files Co-authored-by: QX Teo <37101453+qx-teo@users.noreply.github.com> --- sir_complainsalot/params.json.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sir_complainsalot/params.json.template b/sir_complainsalot/params.json.template index df9624ce4..5c1cdd891 100644 --- a/sir_complainsalot/params.json.template +++ b/sir_complainsalot/params.json.template @@ -49,7 +49,7 @@ "retired-signals": ["raw_pct_negative","smoothed_pct_negative","raw_tests_per_device","smoothed_tests_per_device"] }, "nchs-mortality": { - "max_age":13, + "max_age":16, "maintainers": [] }, "covid-act-now": { From 07f606d6f353869d988361e1910d73befb06d05f Mon Sep 17 00:00:00 2001 From: QX Teo <37101453+qx-teo@users.noreply.github.com> Date: Tue, 20 Jul 2021 12:58:29 -0400 Subject: [PATCH 18/35] Modify summary for alerts - Removed set_summary() function as it's obsolete - Added new summary messages based on whether validation run was successful -Added data source field in summary statement (some sources like HHS doesn't indicate the data source from the logger statement) - Removed obsolete tests --- .../delphi_utils/validator/report.py | 32 ++++++++++++------- .../delphi_utils/validator/validate.py | 3 +- .../tests/validator/test_report.py | 4 --- 3 files changed, 22 insertions(+), 17 deletions(-) diff --git a/_delphi_utils_python/delphi_utils/validator/report.py b/_delphi_utils_python/delphi_utils/validator/report.py index 97d483870..527b9c673 100644 --- a/_delphi_utils_python/delphi_utils/validator/report.py +++ b/_delphi_utils_python/delphi_utils/validator/report.py @@ -7,18 +7,22 @@ class ValidationReport: """Class for reporting the results of validation.""" - def __init__(self, errors_to_suppress: List[ValidationFailure]): + def __init__(self, errors_to_suppress: List[ValidationFailure], data_source: str = ""): """Initialize a ValidationReport. Parameters ---------- errors_to_suppress: List[ValidationFailure] List of ValidationFailures to ignore. + data_source: str + Name of data source as obtained from params Attributes ---------- errors_to_suppress: List[ValidationFailure] See above + data_source: str + See above num_suppressed: int Number of errors suppressed total_checks: int @@ -31,12 +35,12 @@ def __init__(self, errors_to_suppress: List[ValidationFailure]): Errors raised from validation failures not found in `self.errors_to_suppress` """ self.errors_to_suppress = errors_to_suppress + self.data_source = data_source self.num_suppressed = 0 self.total_checks = 0 self.raised_errors = [] self.raised_warnings = [] self.unsuppressed_errors = [] - self.summary = "" def add_raised_error(self, error): """Add an error to the report. @@ -74,21 +78,25 @@ def add_raised_warning(self, warning): """ self.raised_warnings.append(warning) - def set_summary(self): - """Represent summary of report as a string.""" - out_str = f"{self.total_checks} checks run\n" - out_str += f"{len(self.unsuppressed_errors)} checks failed\n" - out_str += f"{self.num_suppressed} checks suppressed\n" - out_str += f"{len(self.raised_warnings)} warnings\n" - self.summary = out_str - def log(self, logger=None): """Log errors and warnings.""" if logger is None: logger = get_structured_logger(__name__) - self.set_summary() - logger.info(self.summary) + if self.success(): + logger.info("Validation run successful", + data_source = self.data_source, + checks_run = self.total_checks, + checks_failed = len(self.unsuppressed_errors), + checks_suppressed = self.num_suppressed, + warnings = len(self.raised_warnings)) + else: + logger.info("Validation run unsuccessful", + data_source = self.data_source, + checks_run = self.total_checks, + checks_failed = len(self.unsuppressed_errors), + checks_suppressed = self.num_suppressed, + warnings = len(self.raised_warnings)) for error in self.unsuppressed_errors: logger.critical(str(error)) for warning in self.raised_warnings: diff --git a/_delphi_utils_python/delphi_utils/validator/validate.py b/_delphi_utils_python/delphi_utils/validator/validate.py index 0b78492ea..8b6b31671 100644 --- a/_delphi_utils_python/delphi_utils/validator/validate.py +++ b/_delphi_utils_python/delphi_utils/validator/validate.py @@ -37,6 +37,7 @@ def __init__(self, params): # Date/time settings self.time_window = TimeWindow.from_params(validation_params["common"]["end_date"], validation_params["common"]["span_length"]) + self.data_source = validation_params["common"].get("data_source", "") self.static_validation = StaticValidator(validation_params) self.dynamic_validation = DynamicValidator(validation_params) @@ -51,7 +52,7 @@ def validate(self): Returns: - ValidationReport collating the validation outcomes """ - report = ValidationReport(self.suppressed_errors) + report = ValidationReport(self.suppressed_errors, self.data_source) frames_list = load_all_files(self.export_dir, self.time_window.start_date, self.time_window.end_date) self.static_validation.validate(frames_list, report) diff --git a/_delphi_utils_python/tests/validator/test_report.py b/_delphi_utils_python/tests/validator/test_report.py index a46f243d0..7f7999983 100644 --- a/_delphi_utils_python/tests/validator/test_report.py +++ b/_delphi_utils_python/tests/validator/test_report.py @@ -40,10 +40,6 @@ def test_str(self): report.add_raised_warning(ImportWarning("right import")) report.add_raised_error(self.ERROR_1) report.add_raised_error(self.ERROR_2) - report.set_summary() - - assert report.summary ==\ - "3 checks run\n1 checks failed\n1 checks suppressed\n2 warnings\n" def test_log(self): """Test that the logs contain all failures and warnings.""" From 4ba546733a28cc31e752f174c0a179dbecc154a8 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 20 Jul 2021 15:51:10 -0400 Subject: [PATCH 19/35] prevent readr from being updated --- .github/workflows/r-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/r-ci.yml b/.github/workflows/r-ci.yml index 05a9c0596..df7a66b76 100644 --- a/.github/workflows/r-ci.yml +++ b/.github/workflows/r-ci.yml @@ -51,7 +51,7 @@ jobs: install.packages("remotes") remotes::update_packages(c("rcmdcheck", "mockr"), upgrade="always") dependency_list <- remotes::dev_package_deps(dependencies=TRUE) - remotes::update_packages(dependency_list$package, upgrade="always") + remotes::update_packages(dependency_list$package[dependency_list$package != "readr"], upgrade="always") shell: Rscript {0} - name: Check run: | From 9cce594788bbf560741e06507f00ab8aad62e4e7 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 20 Jul 2021 11:40:29 -0400 Subject: [PATCH 20/35] correct filename date reference to end date --- facebook/contingency-combine.R | 2 +- facebook/delphiFacebook/R/contingency_utils.R | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/facebook/contingency-combine.R b/facebook/contingency-combine.R index d4b730497..c6f2ad919 100644 --- a/facebook/contingency-combine.R +++ b/facebook/contingency-combine.R @@ -189,7 +189,7 @@ write_rollup <- function(newly_seen_files, seen_file, output_df, output_file) { args <- commandArgs(TRUE) -if (length(args) < 2) { +if (length(args) != 2) { stop("Usage: Rscript contingency-combine.R path/to/individual/files/ path/to/rollup/files/") } diff --git a/facebook/delphiFacebook/R/contingency_utils.R b/facebook/delphiFacebook/R/contingency_utils.R index 649a9c0ad..b36f2d223 100644 --- a/facebook/delphiFacebook/R/contingency_utils.R +++ b/facebook/delphiFacebook/R/contingency_utils.R @@ -117,7 +117,7 @@ get_filenames_in_range <- function(start_date, end_date, params) { end_date <- as_date(end_date) if ( is.null(params[["input"]]) || length(params$input) == 0 ) { - ## Only keep files from active surveys. + ## Keep all files from active surveys that appear in the input dir. if ( !is.null(params[["qualtrics"]]) ) { include_patterns <- names(params$qualtrics$surveys$active) @@ -126,6 +126,8 @@ get_filenames_in_range <- function(start_date, end_date, params) { exclude_patterns <- names(params$qualtrics$surveys$dormant) exclude_patterns <- gsub(" ", "_", exclude_patterns, fixed=TRUE) } else { + # If no active/dormant survey info provided, use basic patterns to + # include/exclude survey files. include_patterns <- c("^[0-9]{4}-[0-9]{2}-[0-9]{2}.*[.]csv$") exclude_patterns <- c(".*YouTube[.]csv$") } @@ -139,7 +141,8 @@ get_filenames_in_range <- function(start_date, end_date, params) { filenames <- params$input } - file_end_dates <- as_date(substr(filenames, 1, 10)) + # Filenames are formatted as "{generation date}.{start date}.{end date}.{survey name}_-_{survey version}.csv". + file_end_dates <- as_date(substr(filenames, 23, 32)) file_start_dates <- as_date(substr(filenames, 12, 21)) ## Only keep files with data that falls at least somewhat between the desired @@ -161,7 +164,7 @@ get_filenames_in_range <- function(start_date, end_date, params) { #' #' @return Character vector of filenames #' -#' @importFrom stringr str_remove_all +#' @importFrom lubridate as_date #' #' @export get_sparse_filenames <- function(start_date, end_date, params) { @@ -169,7 +172,7 @@ get_sparse_filenames <- function(start_date, end_date, params) { filenames <- get_filenames_in_range(start_date, end_date, params) - file_end_dates <- as.integer(str_remove_all(substr(filenames, 1, 10), "-")) + file_end_dates <- as_date(substr(filenames, 23, 32)) unique_file_end_dates <- unique(file_end_dates) max_end_date <- max(unique_file_end_dates) From cbff6b017da6d7509d7e18ec8a8a0a2bfb2fd9b9 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 20 Jul 2021 16:22:34 -0400 Subject: [PATCH 21/35] simplify func to get every fourth data file --- facebook/delphiFacebook/R/contingency_utils.R | 21 ++++++------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/facebook/delphiFacebook/R/contingency_utils.R b/facebook/delphiFacebook/R/contingency_utils.R index b36f2d223..1b4d7aa5d 100644 --- a/facebook/delphiFacebook/R/contingency_utils.R +++ b/facebook/delphiFacebook/R/contingency_utils.R @@ -173,23 +173,14 @@ get_sparse_filenames <- function(start_date, end_date, params) { filenames <- get_filenames_in_range(start_date, end_date, params) file_end_dates <- as_date(substr(filenames, 23, 32)) - unique_file_end_dates <- unique(file_end_dates) - - max_end_date <- max(unique_file_end_dates) - - # Use every fourth date. - stride <- 4L - curr_date <- min(unique_file_end_dates) - keep_dates <- c() - while ( curr_date < max_end_date ) { - keep_dates <- c(keep_dates, curr_date) - curr_date <- min(curr_date + stride, max_end_date) - } - - # Always add last date - keep_dates <- c(keep_dates, max_end_date) + unique_file_end_dates <- sort(unique(file_end_dates)) + # Use every fourth date. Always keep last date. + keep_dates <- c( + seq(1, length(unique_file_end_dates), 4L), + length(unique_file_end_dates)) filenames <- filenames[file_end_dates %in% keep_dates] + return(filenames) } From 472ff22bd43c56c608d995e64d8ab855322eda4d Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 20 Jul 2021 16:33:28 -0400 Subject: [PATCH 22/35] deduplicate keep_dates if a multiple of 4 --- facebook/delphiFacebook/R/contingency_utils.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/facebook/delphiFacebook/R/contingency_utils.R b/facebook/delphiFacebook/R/contingency_utils.R index 1b4d7aa5d..f9817a825 100644 --- a/facebook/delphiFacebook/R/contingency_utils.R +++ b/facebook/delphiFacebook/R/contingency_utils.R @@ -176,9 +176,10 @@ get_sparse_filenames <- function(start_date, end_date, params) { unique_file_end_dates <- sort(unique(file_end_dates)) # Use every fourth date. Always keep last date. - keep_dates <- c( + keep_inds <- unique(c( seq(1, length(unique_file_end_dates), 4L), - length(unique_file_end_dates)) + length(unique_file_end_dates))) + keep_dates <- unique_file_end_dates[keep_inds] filenames <- filenames[file_end_dates %in% keep_dates] return(filenames) From 8734663dfcc7917e5fd61fa77a07ac8a7073f529 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 20 Jul 2021 16:39:42 -0400 Subject: [PATCH 23/35] remove empty test file --- .../unit-tests/testthat/test-contingency-variables.R | 4 ---- facebook/delphiFacebook/unit-tests/testthat/test-utils.R | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) delete mode 100644 facebook/delphiFacebook/unit-tests/testthat/test-contingency-variables.R diff --git a/facebook/delphiFacebook/unit-tests/testthat/test-contingency-variables.R b/facebook/delphiFacebook/unit-tests/testthat/test-contingency-variables.R deleted file mode 100644 index 8561fe7c6..000000000 --- a/facebook/delphiFacebook/unit-tests/testthat/test-contingency-variables.R +++ /dev/null @@ -1,4 +0,0 @@ -library(data.table) -library(tibble) - -context("Testing response recoding and renaming") diff --git a/facebook/delphiFacebook/unit-tests/testthat/test-utils.R b/facebook/delphiFacebook/unit-tests/testthat/test-utils.R index 9ae35ff00..082b8918d 100644 --- a/facebook/delphiFacebook/unit-tests/testthat/test-utils.R +++ b/facebook/delphiFacebook/unit-tests/testthat/test-utils.R @@ -24,7 +24,7 @@ test_that("testing create dir function", { test_that("testing read params when missing file", { - # expect error if missing file, since no template in test dir + # expect error if missing file, since no template in test dir tdir <- tempfile() expect_warning(expect_error(read_params(tdir))) }) From d2eb278908fbefbc47f06bf15405f6bd59621caf Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 20 Jul 2021 18:44:54 -0400 Subject: [PATCH 24/35] explicitly install desired readr version if missing --- .github/workflows/r-ci.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/r-ci.yml b/.github/workflows/r-ci.yml index df7a66b76..b9844f10c 100644 --- a/.github/workflows/r-ci.yml +++ b/.github/workflows/r-ci.yml @@ -48,6 +48,10 @@ jobs: ${{ runner.os }}-r-facebook-survey- - name: Install R dependencies run: | + if ( packageVersion("readr") != "1.4.0" ) { + install.packages("devtools") + devtools::install_version("readr", version = "1.4.0") + } install.packages("remotes") remotes::update_packages(c("rcmdcheck", "mockr"), upgrade="always") dependency_list <- remotes::dev_package_deps(dependencies=TRUE) From a561e94911cfe7d3d76d65cf693d998c90b76afb Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 20 Jul 2021 18:53:08 -0400 Subject: [PATCH 25/35] switch to daily caching --- .github/workflows/r-ci.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/r-ci.yml b/.github/workflows/r-ci.yml index 05a9c0596..0cf75e33a 100644 --- a/.github/workflows/r-ci.yml +++ b/.github/workflows/r-ci.yml @@ -35,15 +35,15 @@ jobs: - name: Install linux dependencies run: | sudo apt-get install libcurl4-openssl-dev - - name: Get month - id: get-month + - name: Get date + id: get-date run: | - echo "::set-output name=month::$(/bin/date -u "+%Y%m")" + echo "::set-output name=date::$(/bin/date -u "+%Y%m%d")" - name: Cache R packages uses: actions/cache@v2 with: path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-r-facebook-survey-${{ steps.get-month.outputs.month }} + key: ${{ runner.os }}-r-facebook-survey-${{ steps.get-date.outputs.date }} restore-keys: | ${{ runner.os }}-r-facebook-survey- - name: Install R dependencies From 5f5f0142fe6c378ffd1f69f07d91a025980294ee Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 21 Jul 2021 10:32:35 -0400 Subject: [PATCH 26/35] update test filename expectations --- facebook/delphiFacebook/NAMESPACE | 1 - .../testthat/test-contingency-utils.R | 72 +++++++++---------- 2 files changed, 36 insertions(+), 37 deletions(-) diff --git a/facebook/delphiFacebook/NAMESPACE b/facebook/delphiFacebook/NAMESPACE index 3fe733903..d659cb93f 100644 --- a/facebook/delphiFacebook/NAMESPACE +++ b/facebook/delphiFacebook/NAMESPACE @@ -113,7 +113,6 @@ importFrom(stringi,stri_split) importFrom(stringi,stri_sub) importFrom(stringi,stri_trans_tolower) importFrom(stringi,stri_trim) -importFrom(stringr,str_remove_all) importFrom(tibble,add_column) importFrom(tibble,as_tibble) importFrom(tibble,tribble) diff --git a/facebook/delphiFacebook/unit-tests/testthat/test-contingency-utils.R b/facebook/delphiFacebook/unit-tests/testthat/test-contingency-utils.R index 96011778c..0a20d59bc 100644 --- a/facebook/delphiFacebook/unit-tests/testthat/test-contingency-utils.R +++ b/facebook/delphiFacebook/unit-tests/testthat/test-contingency-utils.R @@ -42,18 +42,18 @@ test_that("testing update_params command", { test_that("testing get_filenames_in_range command", { tdir <- tempfile() files <- c( - "2019-11-06.2019-10-30.2020-11-06.Survey_of_COVID-Like_Illness_-_TODEPLOY_......_-_US_Expansion.csv", - "2019-12-31.2019-12-24_With_Translations.csv", - "2020-01-06.2019-12-31_Wave_4.csv", - "2020-01-16.2020-01-09_YouTube.csv", - "2020-01-16.2020-01-09_Wave_4.csv", - "2020-02-06.2020-01-31_Wave_4.csv", - "2020-02-16.2020-02-09_Wave_3.csv" + "2029-01-01.2019-10-30.2019-11-06.Survey_of_COVID-Like_Illness_-_TODEPLOY_......_-_US_Expansion.csv", + "2029-01-01.2019-12-24.2019-12-31_With_Translations.csv", + "2029-01-01.2019-12-31.2020-01-06_Wave_4.csv", + "2029-01-01.2020-01-09.2020-01-16_YouTube.csv", + "2029-01-01.2020-01-09.2020-01-16_Wave_4.csv", + "2029-01-01.2020-01-31.2020-02-06_Wave_4.csv", + "2029-01-01.2020-02-09.2020-02-16_Wave_3.csv" ) create_dir_not_exist(tdir) for (filename in files) { - write_csv(data.frame(), path = file.path(tdir, filename)) + write_csv(data.frame(), file.path(tdir, filename)) } params <- list( @@ -65,10 +65,10 @@ test_that("testing get_filenames_in_range command", { date_range <- list(ymd("2020-01-01"), ymd("2020-01-31")) expected_output <- c( - "2019-12-31.2019-12-24_With_Translations.csv", - "2020-01-06.2019-12-31_Wave_4.csv", - "2020-01-16.2020-01-09_Wave_4.csv", - "2020-02-06.2020-01-31_Wave_4.csv" + "2029-01-01.2019-12-24.2019-12-31_With_Translations.csv", + "2029-01-01.2019-12-31.2020-01-06_Wave_4.csv", + "2029-01-01.2020-01-09.2020-01-16_Wave_4.csv", + "2029-01-01.2020-01-31.2020-02-06_Wave_4.csv" ) out <- get_filenames_in_range(date_range[[1]], date_range[[2]], params) @@ -79,24 +79,24 @@ test_that("testing get_filenames_in_range command", { test_that("testing get_sparse_filenames command", { tdir <- tempfile() files <- c( - "2020-01-01.2019-12-26_Wave_4.csv", - "2020-01-02.2019-12-27_Wave_4.csv", - "2020-01-03.2019-12-28_Wave_4.csv", - "2020-01-04.2019-12-29_Wave_4.csv", - "2020-01-05.2019-12-30_Wave_4.csv", - "2020-01-05.2019-12-30_Wave_5.csv", - "2020-01-06.2019-12-31_Wave_4.csv", - "2020-01-06.2019-12-31_Wave_5.csv", - "2020-01-07.2019-01-01_Wave_4.csv", - "2020-01-08.2019-01-02_Wave_4.csv", - "2020-01-09.2019-01-03_Wave_4.csv", - "2020-01-10.2019-01-04_Wave_4.csv", + "2021-12-11.2019-12-26.2020-01-01_Wave_4.csv", + "2021-12-11.2019-12-27.2020-01-02_Wave_4.csv", + "2021-12-11.2019-12-28.2020-01-03_Wave_4.csv", + "2021-12-11.2019-12-29.2020-01-04_Wave_4.csv", + "2021-12-11.2019-12-30.2020-01-05_Wave_4.csv", + "2021-12-11.2019-12-30.2020-01-05_Wave_5.csv", + "2021-12-11.2019-12-31.2020-01-06_Wave_4.csv", + "2021-12-11.2019-12-31.2020-01-06_Wave_5.csv", + "2021-12-11.2019-01-01.2020-01-07_Wave_4.csv", + "2021-12-11.2019-01-02.2020-01-08_Wave_4.csv", + "2021-12-11.2019-01-03.2020-01-09_Wave_4.csv", + "2021-12-11.2019-01-04.2020-01-10_Wave_4.csv", - "2019-11-06.2019-10-30.2020-11-06.Survey_of_COVID-Like_Illness_-_TODEPLOY_......_-_US_Expansion.csv", - "2020-01-16.2020-01-09_YouTube.csv", - "2020-01-16.2020-01-09_Wave_4.csv", - "2020-02-06.2020-01-31_Wave_4.csv", - "2020-02-16.2020-02-09_Wave_3.csv" + "2011-12-11.2019-10-30.2019-11-06.2020-11-06.Survey_of_COVID-Like_Illness_-_TODEPLOY_......_-_US_Expansion.csv", + "2021-12-11.2020-01-09.2020-01-16_YouTube.csv", + "2021-12-11.2020-01-09.2020-01-16_Wave_4.csv", + "2021-12-11.2020-01-31.2020-02-06_Wave_4.csv", + "2021-12-11.2020-02-09.2020-02-16_Wave_3.csv" ) create_dir_not_exist(tdir) @@ -112,13 +112,13 @@ test_that("testing get_sparse_filenames command", { ) date_range <- list(ymd("2020-01-01"), ymd("2020-01-6")) - expected_output <- c( - "2020-01-01.2019-12-26_Wave_4.csv", - "2020-01-05.2019-12-30_Wave_4.csv", - "2020-01-05.2019-12-30_Wave_5.csv", - "2020-01-09.2019-01-03_Wave_4.csv", - "2020-01-10.2019-01-04_Wave_4.csv" - ) + expected_output <- sort(c( + "2021-12-11.2019-12-26.2020-01-01_Wave_4.csv", + "2021-12-11.2019-12-30.2020-01-05_Wave_4.csv", + "2021-12-11.2019-12-30.2020-01-05_Wave_5.csv", + "2021-12-11.2019-01-03.2020-01-09_Wave_4.csv", + "2021-12-11.2019-01-04.2020-01-10_Wave_4.csv" + )) out <- get_sparse_filenames(date_range[[1]], date_range[[2]], params) expect_equal(out, expected_output) From 1568b888fbd21516f3b91b99ab3642095d394d4c Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 21 Jul 2021 10:47:53 -0400 Subject: [PATCH 27/35] set expectation for skipped filename test --- facebook/delphiFacebook/R/contingency_write.R | 2 +- .../unit-tests/testthat/test-contingency-write.R | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/facebook/delphiFacebook/R/contingency_write.R b/facebook/delphiFacebook/R/contingency_write.R index 7b8d25573..f02cf95ef 100644 --- a/facebook/delphiFacebook/R/contingency_write.R +++ b/facebook/delphiFacebook/R/contingency_write.R @@ -148,7 +148,7 @@ add_metadata_vars <- function(data, params, geo_type, groupby_vars) { #' @noRd get_file_name <- function(params, geo_type, groupby_vars) { - aggregation_type <- setdiff(groupby_vars, "geo_id") + aggregation_type <- sort(setdiff(groupby_vars, "geo_id")) if (length(aggregation_type) == 0) aggregation_type <- "overall" file_name <- paste( diff --git a/facebook/delphiFacebook/unit-tests/testthat/test-contingency-write.R b/facebook/delphiFacebook/unit-tests/testthat/test-contingency-write.R index 029fd16e6..0d28520f5 100644 --- a/facebook/delphiFacebook/unit-tests/testthat/test-contingency-write.R +++ b/facebook/delphiFacebook/unit-tests/testthat/test-contingency-write.R @@ -61,7 +61,11 @@ test_that("testing command to create output filenames", { out <- get_file_name(params, "nation", c("gender")) expected <- "DebugOn-DoNotShare_20210101_20210102_monthly_nation_gender.csv" + expect_equal(out, expected) + params$debug <- FALSE out <- get_file_name(params, "nation", c("gender", "race", "ethnicity")) expected <- "20210101_20210102_monthly_nation_ethnicity_gender_race.csv" + + expect_equal(out, expected) }) From b32a4dc11ca39c69dd0a9923500665e3a1e7e80d Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 21 Jul 2021 11:35:20 -0400 Subject: [PATCH 28/35] compare sets instead of sorting for robustness --- .../unit-tests/testthat/test-contingency-utils.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/facebook/delphiFacebook/unit-tests/testthat/test-contingency-utils.R b/facebook/delphiFacebook/unit-tests/testthat/test-contingency-utils.R index 0a20d59bc..5738cce71 100644 --- a/facebook/delphiFacebook/unit-tests/testthat/test-contingency-utils.R +++ b/facebook/delphiFacebook/unit-tests/testthat/test-contingency-utils.R @@ -112,16 +112,16 @@ test_that("testing get_sparse_filenames command", { ) date_range <- list(ymd("2020-01-01"), ymd("2020-01-6")) - expected_output <- sort(c( + expected_output <- c( "2021-12-11.2019-12-26.2020-01-01_Wave_4.csv", "2021-12-11.2019-12-30.2020-01-05_Wave_4.csv", "2021-12-11.2019-12-30.2020-01-05_Wave_5.csv", "2021-12-11.2019-01-03.2020-01-09_Wave_4.csv", "2021-12-11.2019-01-04.2020-01-10_Wave_4.csv" - )) + ) out <- get_sparse_filenames(date_range[[1]], date_range[[2]], params) - expect_equal(out, expected_output) + expect_setequal(out, expected_output) }) From c18b666af8034a4295476aa86c11656c951822bb Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 21 Jul 2021 11:54:48 -0400 Subject: [PATCH 29/35] comments --- facebook/delphiFacebook/R/contingency_utils.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/facebook/delphiFacebook/R/contingency_utils.R b/facebook/delphiFacebook/R/contingency_utils.R index f9817a825..b0f3368ec 100644 --- a/facebook/delphiFacebook/R/contingency_utils.R +++ b/facebook/delphiFacebook/R/contingency_utils.R @@ -145,8 +145,8 @@ get_filenames_in_range <- function(start_date, end_date, params) { file_end_dates <- as_date(substr(filenames, 23, 32)) file_start_dates <- as_date(substr(filenames, 12, 21)) - ## Only keep files with data that falls at least somewhat between the desired - ## start and end range dates. + # Only keep files with data that falls at least somewhat between the desired + # start and end range dates. filenames <- filenames[ !(( file_start_dates < start_date & file_end_dates < start_date ) | ( file_start_dates > end_date & file_end_dates > end_date ))] From b18366112f7cd97fa1de0aa7b98c575f660d0fc5 Mon Sep 17 00:00:00 2001 From: chinandrew Date: Fri, 23 Jul 2021 19:53:28 -0400 Subject: [PATCH 30/35] Remove smoothed version of cumulative signals --- combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py | 1 + jhu/delphi_jhu/run.py | 3 ++- jhu/tests/test_run.py | 2 ++ usafacts/delphi_usafacts/run.py | 2 ++ usafacts/tests/test_run.py | 3 ++- 5 files changed, 9 insertions(+), 2 deletions(-) diff --git a/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py b/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py index b89de2c40..65f5ebdb7 100755 --- a/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py +++ b/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py @@ -322,6 +322,7 @@ def run_module(params): variants = [tuple((metric, geo_res)+sensor_signal(metric, sensor, smoother)) for (metric, geo_res, sensor, smoother) in product(METRICS, GEO_RESOLUTIONS, SENSORS, SMOOTH_TYPES)] + variants = [i for i in variants if "7dav" not in i[2] and "cumulative" not in i[2]] params = configure(variants, params) logger = get_structured_logger( __name__, filename=params["common"].get("log_filename"), diff --git a/jhu/delphi_jhu/run.py b/jhu/delphi_jhu/run.py index 79e041370..ed13dfb67 100644 --- a/jhu/delphi_jhu/run.py +++ b/jhu/delphi_jhu/run.py @@ -106,7 +106,8 @@ def run_module(params: Dict[str, Any]): for metric, geo_res, sensor, smoother in product( METRICS, GEO_RESOLUTIONS, SENSORS, SMOOTHERS ): - print(metric, geo_res, sensor, smoother) + if "cumulative" in sensor and "seven_day_average" in smoother: + continue logger.info( event="generating signal and exporting to CSV", metric=metric, diff --git a/jhu/tests/test_run.py b/jhu/tests/test_run.py index e434b7058..1ff1cc1dd 100644 --- a/jhu/tests/test_run.py +++ b/jhu/tests/test_run.py @@ -31,6 +31,8 @@ def test_output_files_exist(self, run_as_module): for date in dates: for geo in geos: for metric in metrics: + if "7dav" in metric and "cumulative" in metric: + continue # Can't compute 7dav for first few days of data because of NAs if date > "20200305" or "7dav" not in metric: expected_files += [date + "_" + geo + "_" + metric + ".csv"] diff --git a/usafacts/delphi_usafacts/run.py b/usafacts/delphi_usafacts/run.py index 58ddadde5..08a666caa 100644 --- a/usafacts/delphi_usafacts/run.py +++ b/usafacts/delphi_usafacts/run.py @@ -103,6 +103,8 @@ def run_module(params: Dict[str, Dict[str, Any]]): dfs = {metric: pull_usafacts_data(base_url, metric, logger) for metric in METRICS} for metric, geo_res, sensor, smoother in product( METRICS, GEO_RESOLUTIONS, SENSORS, SMOOTHERS): + if "cumulative" in sensor and "seven_day_average" in smoother: + continue logger.info("generating signal and exporting to CSV", geo_res = geo_res, metric = metric, diff --git a/usafacts/tests/test_run.py b/usafacts/tests/test_run.py index d22a514ca..44afd957d 100644 --- a/usafacts/tests/test_run.py +++ b/usafacts/tests/test_run.py @@ -54,8 +54,9 @@ def test_output_files_exist(self): for metric in metrics: if "7dav" in metric and date in dates[:6]: continue # there are no 7dav signals for first 6 days + if "7dav" in metric and "cumulative" in metric: + continue expected_files += [date + "_" + geo + "_" + metric + ".csv"] - assert set(csv_files) == set(expected_files) def test_output_file_format(self): From efde789d9d2218ec77d0a580ef645f76b8cb6ecd Mon Sep 17 00:00:00 2001 From: chinandrew Date: Fri, 23 Jul 2021 20:10:54 -0400 Subject: [PATCH 31/35] fix logic --- combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py b/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py index 65f5ebdb7..f46567616 100755 --- a/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py +++ b/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py @@ -322,7 +322,7 @@ def run_module(params): variants = [tuple((metric, geo_res)+sensor_signal(metric, sensor, smoother)) for (metric, geo_res, sensor, smoother) in product(METRICS, GEO_RESOLUTIONS, SENSORS, SMOOTH_TYPES)] - variants = [i for i in variants if "7dav" not in i[2] and "cumulative" not in i[2]] + variants = [i for i in variants if not ("7dav" in i[2] and "cumulative" in i[2])] params = configure(variants, params) logger = get_structured_logger( __name__, filename=params["common"].get("log_filename"), From 8f1e70fb2189e51cd0c665274d37ddb5c39207b4 Mon Sep 17 00:00:00 2001 From: chinandrew Date: Fri, 23 Jul 2021 23:52:22 -0400 Subject: [PATCH 32/35] Add combo test --- .../tests/receiving/.gitkeep | 0 combo_cases_and_deaths/tests/test_run.py | 49 ++++++++++++++++++- 2 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 combo_cases_and_deaths/tests/receiving/.gitkeep diff --git a/combo_cases_and_deaths/tests/receiving/.gitkeep b/combo_cases_and_deaths/tests/receiving/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/combo_cases_and_deaths/tests/test_run.py b/combo_cases_and_deaths/tests/test_run.py index adcde30ea..7d3b59bba 100644 --- a/combo_cases_and_deaths/tests/test_run.py +++ b/combo_cases_and_deaths/tests/test_run.py @@ -1,13 +1,16 @@ """Tests for running combo cases and deaths indicator.""" from datetime import date from itertools import product +import os import unittest from unittest.mock import patch, call import pandas as pd import numpy as np from delphi_combo_cases_and_deaths.run import ( - extend_raw_date_range, get_updated_dates, + run_module, + extend_raw_date_range, + get_updated_dates, sensor_signal, combine_usafacts_and_jhu, compute_special_geo_dfs, @@ -244,6 +247,50 @@ def test_no_nation_jhu(mock_covidcast_signal): "sample_size": [None]},) ) +@patch("delphi_combo_cases_and_deaths.run.combine_usafacts_and_jhu") +def test_output_files(mock_combine): + params = { + "common": { + "export_dir": "./receiving" + }, + "indicator": { + "export_start_date": [2020, 4, 1], + "source":"indicator-combination", + "wip_signal": "" + } + } + mock_combine.return_value = pd.DataFrame( + { + "geo_id": ["01000"], + "val": [10], + "timestamp": [pd.to_datetime("2021-01-04")], + "issue": [pd.to_datetime("2021-01-04")], + "se": 0, + "sample_size": 0 + }, + index=[1] + ) + run_module(params) + csv_files = [f for f in os.listdir("receiving") if f.endswith(".csv")] + dates = ["20210104"] + geos = ["county", "hrr", "msa", "state", "hhs", "nation"] + + # enumerate metric names. + metrics = [] + for event, span, stat in product(["deaths", "confirmed"], + ["cumulative", "incidence"], + ["num", "prop"]): + metrics.append("_".join([event, span, stat])) + metrics.append("_".join([event, "7dav", span, stat])) + + expected_files = [] + for date in dates: + for geo in geos: + for metric in metrics: + if "7dav" in metric and "cumulative" in metric: + continue + expected_files += [date + "_" + geo + "_" + metric + ".csv"] + assert set(csv_files) == set(expected_files) if __name__ == '__main__': unittest.main() From 3bca88e2c5da23fe4e36430d8b91bcbd9750cb0e Mon Sep 17 00:00:00 2001 From: chinandrew Date: Fri, 23 Jul 2021 23:53:44 -0400 Subject: [PATCH 33/35] fix indent --- combo_cases_and_deaths/tests/test_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/combo_cases_and_deaths/tests/test_run.py b/combo_cases_and_deaths/tests/test_run.py index 7d3b59bba..8d03627d4 100644 --- a/combo_cases_and_deaths/tests/test_run.py +++ b/combo_cases_and_deaths/tests/test_run.py @@ -290,7 +290,7 @@ def test_output_files(mock_combine): if "7dav" in metric and "cumulative" in metric: continue expected_files += [date + "_" + geo + "_" + metric + ".csv"] - assert set(csv_files) == set(expected_files) + assert set(csv_files) == set(expected_files) if __name__ == '__main__': unittest.main() From 488a9d16ff53fc608bd21914ac1c0867f6cdc4db Mon Sep 17 00:00:00 2001 From: chinandrew Date: Sat, 24 Jul 2021 12:15:52 -0400 Subject: [PATCH 34/35] fix test --- jhu/tests/test_smooth.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jhu/tests/test_smooth.py b/jhu/tests/test_smooth.py index 16d49e511..739fcf537 100644 --- a/jhu/tests/test_smooth.py +++ b/jhu/tests/test_smooth.py @@ -9,14 +9,14 @@ def test_output_files_smoothed(self, run_as_module): dates = [str(x) for x in range(20200303, 20200310)] smoothed = pd.read_csv( - join("./receiving", f"{dates[-1]}_state_confirmed_7dav_cumulative_num.csv") + join("./receiving", f"{dates[-1]}_state_confirmed_7dav_incidence_num.csv") ) # Build a dataframe out of the individual day files raw = pd.concat( [ pd.read_csv( - join("./receiving", f"{date}_state_confirmed_cumulative_num.csv") + join("./receiving", f"{date}_state_confirmed_incidence_num.csv") ) for date in dates ] From ac9660d0f64291f58a7fd38e16035c3dac3318d8 Mon Sep 17 00:00:00 2001 From: krivard Date: Mon, 26 Jul 2021 17:01:48 +0000 Subject: [PATCH 35/35] chore: release 0.1.4 --- _delphi_utils_python/.bumpversion.cfg | 2 +- _delphi_utils_python/delphi_utils/__init__.py | 2 +- _delphi_utils_python/setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/_delphi_utils_python/.bumpversion.cfg b/_delphi_utils_python/.bumpversion.cfg index 54c2bd86f..7a5275cec 100644 --- a/_delphi_utils_python/.bumpversion.cfg +++ b/_delphi_utils_python/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.1.3 +current_version = 0.1.4 commit = False tag = False tag_name = delphi-utils/v{new_version} diff --git a/_delphi_utils_python/delphi_utils/__init__.py b/_delphi_utils_python/delphi_utils/__init__.py index 14959f9b1..6682b367a 100644 --- a/_delphi_utils_python/delphi_utils/__init__.py +++ b/_delphi_utils_python/delphi_utils/__init__.py @@ -14,4 +14,4 @@ from .signal import add_prefix from .nancodes import Nans -__version__ = "0.1.3" +__version__ = "0.1.4" diff --git a/_delphi_utils_python/setup.py b/_delphi_utils_python/setup.py index 6d87f2776..6548c9985 100644 --- a/_delphi_utils_python/setup.py +++ b/_delphi_utils_python/setup.py @@ -24,7 +24,7 @@ setup( name="delphi_utils", - version="0.1.3", + version="0.1.4", description="Shared Utility Functions for Indicators", long_description=long_description, long_description_content_type="text/markdown",