diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml index 809a81f6c..d6dd7260e 100644 --- a/.github/workflows/python-ci.yml +++ b/.github/workflows/python-ci.yml @@ -5,7 +5,7 @@ name: Python package on: push: - branches: [ main, prod, 'release/*' ] + branches: [ main, prod ] pull_request: types: [ opened, synchronize, reopened, ready_for_review ] branches: [ main, prod ] diff --git a/.github/workflows/r-ci.yml b/.github/workflows/r-ci.yml new file mode 100644 index 000000000..116537194 --- /dev/null +++ b/.github/workflows/r-ci.yml @@ -0,0 +1,63 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. +# +# See https://github.com/r-lib/actions/tree/master/examples#readme for +# additional example workflows available for the R community. + +name: R facebook survey + +on: + push: + branches: [ main, prod ] + pull_request: + types: [ opened, synchronize, reopened, ready_for_review ] + branches: [ main, prod ] + +jobs: + build: + runs-on: ubuntu-20.04 + if: github.event.pull_request.draft == false + strategy: + matrix: + r-version: [4.0] + defaults: + run: + working-directory: facebook/delphiFacebook + + steps: + - uses: actions/checkout@v2 + - name: Set up R ${{ matrix.r-version }} + uses: r-lib/actions/setup-r@v1 + with: + r-version: ${{ matrix.r-version }} + - name: Install linux dependencies + run: | + sudo apt-get install libcurl4-openssl-dev + - name: Get date + id: get-date + run: | + echo "::set-output name=date::$(/bin/date -u "+%Y%m%d")" + - name: Cache R packages + uses: actions/cache@v2 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ runner.os }}-r-facebook-survey-${{ steps.get-date.outputs.date }} + restore-keys: | + ${{ runner.os }}-r-facebook-survey- + - name: Install R dependencies + run: | + if ( packageVersion("readr") != "1.4.0" ) { + install.packages("devtools") + devtools::install_version("readr", version = "1.4.0") + } + install.packages("remotes") + remotes::update_packages(c("rcmdcheck", "mockr"), upgrade="always") + dependency_list <- remotes::dev_package_deps(dependencies=TRUE) + remotes::update_packages(dependency_list$package[dependency_list$package != "readr"], upgrade="always") + shell: Rscript {0} + - name: Check + run: | + rcmdcheck::rcmdcheck(args = c("--no-manual", "--test-dir=unit-tests"), error_on = "error") + shell: Rscript {0} diff --git a/_delphi_utils_python/.bumpversion.cfg b/_delphi_utils_python/.bumpversion.cfg index 54c2bd86f..7a5275cec 100644 --- a/_delphi_utils_python/.bumpversion.cfg +++ b/_delphi_utils_python/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.1.3 +current_version = 0.1.4 commit = False tag = False tag_name = delphi-utils/v{new_version} diff --git a/_delphi_utils_python/delphi_utils/__init__.py b/_delphi_utils_python/delphi_utils/__init__.py index 14959f9b1..6682b367a 100644 --- a/_delphi_utils_python/delphi_utils/__init__.py +++ b/_delphi_utils_python/delphi_utils/__init__.py @@ -14,4 +14,4 @@ from .signal import add_prefix from .nancodes import Nans -__version__ = "0.1.3" +__version__ = "0.1.4" diff --git a/_delphi_utils_python/delphi_utils/validator/report.py b/_delphi_utils_python/delphi_utils/validator/report.py index 97d483870..527b9c673 100644 --- a/_delphi_utils_python/delphi_utils/validator/report.py +++ b/_delphi_utils_python/delphi_utils/validator/report.py @@ -7,18 +7,22 @@ class ValidationReport: """Class for reporting the results of validation.""" - def __init__(self, errors_to_suppress: List[ValidationFailure]): + def __init__(self, errors_to_suppress: List[ValidationFailure], data_source: str = ""): """Initialize a ValidationReport. Parameters ---------- errors_to_suppress: List[ValidationFailure] List of ValidationFailures to ignore. + data_source: str + Name of data source as obtained from params Attributes ---------- errors_to_suppress: List[ValidationFailure] See above + data_source: str + See above num_suppressed: int Number of errors suppressed total_checks: int @@ -31,12 +35,12 @@ def __init__(self, errors_to_suppress: List[ValidationFailure]): Errors raised from validation failures not found in `self.errors_to_suppress` """ self.errors_to_suppress = errors_to_suppress + self.data_source = data_source self.num_suppressed = 0 self.total_checks = 0 self.raised_errors = [] self.raised_warnings = [] self.unsuppressed_errors = [] - self.summary = "" def add_raised_error(self, error): """Add an error to the report. @@ -74,21 +78,25 @@ def add_raised_warning(self, warning): """ self.raised_warnings.append(warning) - def set_summary(self): - """Represent summary of report as a string.""" - out_str = f"{self.total_checks} checks run\n" - out_str += f"{len(self.unsuppressed_errors)} checks failed\n" - out_str += f"{self.num_suppressed} checks suppressed\n" - out_str += f"{len(self.raised_warnings)} warnings\n" - self.summary = out_str - def log(self, logger=None): """Log errors and warnings.""" if logger is None: logger = get_structured_logger(__name__) - self.set_summary() - logger.info(self.summary) + if self.success(): + logger.info("Validation run successful", + data_source = self.data_source, + checks_run = self.total_checks, + checks_failed = len(self.unsuppressed_errors), + checks_suppressed = self.num_suppressed, + warnings = len(self.raised_warnings)) + else: + logger.info("Validation run unsuccessful", + data_source = self.data_source, + checks_run = self.total_checks, + checks_failed = len(self.unsuppressed_errors), + checks_suppressed = self.num_suppressed, + warnings = len(self.raised_warnings)) for error in self.unsuppressed_errors: logger.critical(str(error)) for warning in self.raised_warnings: diff --git a/_delphi_utils_python/delphi_utils/validator/validate.py b/_delphi_utils_python/delphi_utils/validator/validate.py index 0b78492ea..8b6b31671 100644 --- a/_delphi_utils_python/delphi_utils/validator/validate.py +++ b/_delphi_utils_python/delphi_utils/validator/validate.py @@ -37,6 +37,7 @@ def __init__(self, params): # Date/time settings self.time_window = TimeWindow.from_params(validation_params["common"]["end_date"], validation_params["common"]["span_length"]) + self.data_source = validation_params["common"].get("data_source", "") self.static_validation = StaticValidator(validation_params) self.dynamic_validation = DynamicValidator(validation_params) @@ -51,7 +52,7 @@ def validate(self): Returns: - ValidationReport collating the validation outcomes """ - report = ValidationReport(self.suppressed_errors) + report = ValidationReport(self.suppressed_errors, self.data_source) frames_list = load_all_files(self.export_dir, self.time_window.start_date, self.time_window.end_date) self.static_validation.validate(frames_list, report) diff --git a/_delphi_utils_python/setup.py b/_delphi_utils_python/setup.py index 6d87f2776..6548c9985 100644 --- a/_delphi_utils_python/setup.py +++ b/_delphi_utils_python/setup.py @@ -24,7 +24,7 @@ setup( name="delphi_utils", - version="0.1.3", + version="0.1.4", description="Shared Utility Functions for Indicators", long_description=long_description, long_description_content_type="text/markdown", diff --git a/_delphi_utils_python/tests/validator/test_report.py b/_delphi_utils_python/tests/validator/test_report.py index a46f243d0..7f7999983 100644 --- a/_delphi_utils_python/tests/validator/test_report.py +++ b/_delphi_utils_python/tests/validator/test_report.py @@ -40,10 +40,6 @@ def test_str(self): report.add_raised_warning(ImportWarning("right import")) report.add_raised_error(self.ERROR_1) report.add_raised_error(self.ERROR_2) - report.set_summary() - - assert report.summary ==\ - "3 checks run\n1 checks failed\n1 checks suppressed\n2 warnings\n" def test_log(self): """Test that the logs contain all failures and warnings.""" diff --git a/ansible/templates/sir_complainsalot-params-prod.json.j2 b/ansible/templates/sir_complainsalot-params-prod.json.j2 index ae3182344..a9c9775ad 100644 --- a/ansible/templates/sir_complainsalot-params-prod.json.j2 +++ b/ansible/templates/sir_complainsalot-params-prod.json.j2 @@ -46,6 +46,18 @@ "max_age":6, "maintainers": ["U01AP8GSWG3","U01069KCRS7"], "retired-signals": ["raw_pct_negative","smoothed_pct_negative","raw_tests_per_device","smoothed_tests_per_device"] + }, + "nchs-mortality": { + "max_age":16, + "maintainers": [] + }, + "covid-act-now": { + "max_age":5, + "maintainers": [] + }, + "hhs": { + "max_age":8, + "maintainers": [] } } } diff --git a/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py b/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py index b89de2c40..f46567616 100755 --- a/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py +++ b/combo_cases_and_deaths/delphi_combo_cases_and_deaths/run.py @@ -322,6 +322,7 @@ def run_module(params): variants = [tuple((metric, geo_res)+sensor_signal(metric, sensor, smoother)) for (metric, geo_res, sensor, smoother) in product(METRICS, GEO_RESOLUTIONS, SENSORS, SMOOTH_TYPES)] + variants = [i for i in variants if not ("7dav" in i[2] and "cumulative" in i[2])] params = configure(variants, params) logger = get_structured_logger( __name__, filename=params["common"].get("log_filename"), diff --git a/combo_cases_and_deaths/tests/receiving/.gitkeep b/combo_cases_and_deaths/tests/receiving/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/combo_cases_and_deaths/tests/test_run.py b/combo_cases_and_deaths/tests/test_run.py index adcde30ea..8d03627d4 100644 --- a/combo_cases_and_deaths/tests/test_run.py +++ b/combo_cases_and_deaths/tests/test_run.py @@ -1,13 +1,16 @@ """Tests for running combo cases and deaths indicator.""" from datetime import date from itertools import product +import os import unittest from unittest.mock import patch, call import pandas as pd import numpy as np from delphi_combo_cases_and_deaths.run import ( - extend_raw_date_range, get_updated_dates, + run_module, + extend_raw_date_range, + get_updated_dates, sensor_signal, combine_usafacts_and_jhu, compute_special_geo_dfs, @@ -244,6 +247,50 @@ def test_no_nation_jhu(mock_covidcast_signal): "sample_size": [None]},) ) +@patch("delphi_combo_cases_and_deaths.run.combine_usafacts_and_jhu") +def test_output_files(mock_combine): + params = { + "common": { + "export_dir": "./receiving" + }, + "indicator": { + "export_start_date": [2020, 4, 1], + "source":"indicator-combination", + "wip_signal": "" + } + } + mock_combine.return_value = pd.DataFrame( + { + "geo_id": ["01000"], + "val": [10], + "timestamp": [pd.to_datetime("2021-01-04")], + "issue": [pd.to_datetime("2021-01-04")], + "se": 0, + "sample_size": 0 + }, + index=[1] + ) + run_module(params) + csv_files = [f for f in os.listdir("receiving") if f.endswith(".csv")] + dates = ["20210104"] + geos = ["county", "hrr", "msa", "state", "hhs", "nation"] + + # enumerate metric names. + metrics = [] + for event, span, stat in product(["deaths", "confirmed"], + ["cumulative", "incidence"], + ["num", "prop"]): + metrics.append("_".join([event, span, stat])) + metrics.append("_".join([event, "7dav", span, stat])) + + expected_files = [] + for date in dates: + for geo in geos: + for metric in metrics: + if "7dav" in metric and "cumulative" in metric: + continue + expected_files += [date + "_" + geo + "_" + metric + ".csv"] + assert set(csv_files) == set(expected_files) if __name__ == '__main__': unittest.main() diff --git a/facebook/contingency-combine.R b/facebook/contingency-combine.R index d4b730497..c6f2ad919 100644 --- a/facebook/contingency-combine.R +++ b/facebook/contingency-combine.R @@ -189,7 +189,7 @@ write_rollup <- function(newly_seen_files, seen_file, output_df, output_file) { args <- commandArgs(TRUE) -if (length(args) < 2) { +if (length(args) != 2) { stop("Usage: Rscript contingency-combine.R path/to/individual/files/ path/to/rollup/files/") } diff --git a/facebook/delphiFacebook/NAMESPACE b/facebook/delphiFacebook/NAMESPACE index 1eac7fc8f..d659cb93f 100644 --- a/facebook/delphiFacebook/NAMESPACE +++ b/facebook/delphiFacebook/NAMESPACE @@ -18,6 +18,7 @@ export(get_filenames_in_range) export(get_range_prev_full_month) export(get_range_prev_full_period) export(get_range_prev_full_week) +export(get_sparse_filenames) export(jeffreys_se) export(join_weights) export(load_archive) diff --git a/facebook/delphiFacebook/R/contingency_utils.R b/facebook/delphiFacebook/R/contingency_utils.R index 8e2edbdde..b0f3368ec 100644 --- a/facebook/delphiFacebook/R/contingency_utils.R +++ b/facebook/delphiFacebook/R/contingency_utils.R @@ -1,7 +1,7 @@ #' Return params file as an R list #' #' Reads a parameters file. Copies global params to contingency params if not -#' already defined. +#' already defined. Uses current date as end_date if not provided. #' #' @param path path to the parameters file; if not present, will try to copy the file #' "params.json.template" @@ -17,15 +17,21 @@ read_contingency_params <- function(path = "params.json", template_path = "param contingency_params$start_time <- ymd_hms( sprintf("%s 00:00:00", contingency_params$start_date), tz = tz_to ) + + # Fill in end_date, if missing, with current date. + contingency_params$end_date <- if_else( + is.null(contingency_params$end_date), as.character(Sys.Date()), contingency_params$end_date + ) + contingency_params$end_time <- ymd_hms( sprintf("%s 23:59:59", contingency_params$end_date), tz = tz_to ) global_params <- c("archive_days", "backfill_days", "static_dir", "cache_dir", "archive_dir", "weights_in_dir", "input_dir", "debug", - "parallel") + "parallel", "qualtrics") for (param in global_params) { - if ( is.null(contingency_params[[param]]) ) { + if ( is.null(contingency_params[[param]]) & !is.null(params[[param]]) ) { contingency_params[[param]] <- params[[param]] } } @@ -55,24 +61,29 @@ read_contingency_params <- function(path = "params.json", template_path = "param #' #' @export update_params <- function(params) { - # Fill in end_time, if missing, with current time. - if (is.null(params$end_time)) { - params$end_time <- Sys.time() - } - # Construct aggregate date range. if ( !is.null(params$start_date) ) { + # If start_date is provided, use start/end dates exactly as given. date_range <- list(params$start_time, params$end_time) } else { # If start_date is not provided, assume want to use preceding full time period. date_range <- get_range_prev_full_period( - as_date(params$end_date) - , params$aggregate_range + as_date(params$end_date), params$aggregate_range ) } - params$input <- get_filenames_in_range(date_range[[1]], date_range[[2]], params) - if ( length(params[["input"]]) == 0 || all(is.na(params[["input"]])) ) { + if ( is.null(params[["input"]]) || length(params$input) == 0 ) { + # If params$input empty or not provided, fetch filenames from input_dir. + params$input <- get_sparse_filenames(date_range[[1]], date_range[[2]], params) + } else { + # If input files provided, subset to those in desired date range. + params$input <- get_filenames_in_range(date_range[[1]], date_range[[2]], params) + } + + # Overwrites contents of file of the same name. + writeLines(params$input, "contingency_input.txt") + + if ( length(params[["input"]]) == 0 || all(is.na(params$input)) ) { stop("no input files to read in") } @@ -80,11 +91,15 @@ update_params <- function(params) { params$end_time <- date_range[[2]] params$start_date <- as_date(date_range[[1]]) params$end_date <- as_date(date_range[[2]]) - + return(params) } #' Get relevant input data file names from `input_dir`. +#' +#' Only include files containing data that falls at least somewhat between start +#' and end dates, and is from an allowed ("active") survey and not a "dormant" +#' survey. #' #' @param start_date Start of desired date range #' @param end_date End of desired date range @@ -101,17 +116,33 @@ get_filenames_in_range <- function(start_date, end_date, params) { start_date <- as_date(start_date) - days(params$backfill_days) end_date <- as_date(end_date) - if ( is.null(params$input) | length(params$input) == 0 ) { - date_pattern <- "^[0-9]{4}-[0-9]{2}-[0-9]{2}.*[.]csv$" - youtube_pattern <- ".*YouTube[.]csv$" + if ( is.null(params[["input"]]) || length(params$input) == 0 ) { + ## Keep all files from active surveys that appear in the input dir. + + if ( !is.null(params[["qualtrics"]]) ) { + include_patterns <- names(params$qualtrics$surveys$active) + include_patterns <- gsub(" ", "_", include_patterns, fixed=TRUE) + + exclude_patterns <- names(params$qualtrics$surveys$dormant) + exclude_patterns <- gsub(" ", "_", exclude_patterns, fixed=TRUE) + } else { + # If no active/dormant survey info provided, use basic patterns to + # include/exclude survey files. + include_patterns <- c("^[0-9]{4}-[0-9]{2}-[0-9]{2}.*[.]csv$") + exclude_patterns <- c(".*YouTube[.]csv$") + } filenames <- list.files(path=params$input_dir) - filenames <- filenames[grepl(date_pattern, filenames) & !grepl(youtube_pattern, filenames)] + + include_map <- grepl(paste(include_patterns, collapse="|"), filenames) + exclude_map <- grepl(paste(exclude_patterns, collapse="|"), filenames) + filenames <- filenames[include_map & !exclude_map] } else { filenames <- params$input } - file_end_dates <- as_date(substr(filenames, 1, 10)) + # Filenames are formatted as "{generation date}.{start date}.{end date}.{survey name}_-_{survey version}.csv". + file_end_dates <- as_date(substr(filenames, 23, 32)) file_start_dates <- as_date(substr(filenames, 12, 21)) # Only keep files with data that falls at least somewhat between the desired @@ -123,6 +154,37 @@ get_filenames_in_range <- function(start_date, end_date, params) { return(filenames) } +#' Get sparse list of input data files from `input_dir`. +#' +#' Finds every fourth + last file by date. +#' +#' @param start_date Start of desired date range +#' @param end_date End of desired date range +#' @param params Params object produced by read_params +#' +#' @return Character vector of filenames +#' +#' @importFrom lubridate as_date +#' +#' @export +get_sparse_filenames <- function(start_date, end_date, params) { + if (params$use_input_asis) { return(params$input) } + + filenames <- get_filenames_in_range(start_date, end_date, params) + + file_end_dates <- as_date(substr(filenames, 23, 32)) + unique_file_end_dates <- sort(unique(file_end_dates)) + + # Use every fourth date. Always keep last date. + keep_inds <- unique(c( + seq(1, length(unique_file_end_dates), 4L), + length(unique_file_end_dates))) + keep_dates <- unique_file_end_dates[keep_inds] + filenames <- filenames[file_end_dates %in% keep_dates] + + return(filenames) +} + #' Check user-set aggregations for basic validity and add a few necessary cols. #' #' @param aggregations Data frame with columns `name`, `var_weight`, `metric`, diff --git a/facebook/delphiFacebook/R/contingency_write.R b/facebook/delphiFacebook/R/contingency_write.R index 7b8d25573..f02cf95ef 100644 --- a/facebook/delphiFacebook/R/contingency_write.R +++ b/facebook/delphiFacebook/R/contingency_write.R @@ -148,7 +148,7 @@ add_metadata_vars <- function(data, params, geo_type, groupby_vars) { #' @noRd get_file_name <- function(params, geo_type, groupby_vars) { - aggregation_type <- setdiff(groupby_vars, "geo_id") + aggregation_type <- sort(setdiff(groupby_vars, "geo_id")) if (length(aggregation_type) == 0) aggregation_type <- "overall" file_name <- paste( diff --git a/facebook/delphiFacebook/integration-tests/testthat/teardown-run.R b/facebook/delphiFacebook/integration-tests/testthat/teardown-run.R index 63cec3450..52cf09c9b 100644 --- a/facebook/delphiFacebook/integration-tests/testthat/teardown-run.R +++ b/facebook/delphiFacebook/integration-tests/testthat/teardown-run.R @@ -18,6 +18,7 @@ file.remove(test_path("archive")) file.remove(test_path("receiving_full")) file.remove(test_path("individual_full")) file.remove(test_path("receiving_contingency_full")) +file.remove(test_path("contingency_input.txt")) if ( dir.exists(test_path("receiving_contingency_test")) ) { file.remove(test_path("receiving_contingency_test")) diff --git a/facebook/delphiFacebook/man/get_filenames_in_range.Rd b/facebook/delphiFacebook/man/get_filenames_in_range.Rd index ddc6f83d0..959cb8d15 100644 --- a/facebook/delphiFacebook/man/get_filenames_in_range.Rd +++ b/facebook/delphiFacebook/man/get_filenames_in_range.Rd @@ -17,5 +17,7 @@ get_filenames_in_range(start_date, end_date, params) Character vector of filenames } \description{ -Get relevant input data file names from `input_dir`. +Only include files containing data that falls at least somewhat between start +and end dates, and is from an allowed ("active") survey and not a "dormant" +survey. } diff --git a/facebook/delphiFacebook/man/get_sparse_filenames.Rd b/facebook/delphiFacebook/man/get_sparse_filenames.Rd new file mode 100644 index 000000000..6bb875bc8 --- /dev/null +++ b/facebook/delphiFacebook/man/get_sparse_filenames.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/contingency_utils.R +\name{get_sparse_filenames} +\alias{get_sparse_filenames} +\title{Get sparse list of input data files from `input_dir`.} +\usage{ +get_sparse_filenames(start_date, end_date, params) +} +\arguments{ +\item{start_date}{Start of desired date range} + +\item{end_date}{End of desired date range} + +\item{params}{Params object produced by read_params} +} +\value{ +Character vector of filenames +} +\description{ +Finds every fourth + last file by date. +} diff --git a/facebook/delphiFacebook/man/read_contingency_params.Rd b/facebook/delphiFacebook/man/read_contingency_params.Rd index bbecc199d..71e35ffbb 100644 --- a/facebook/delphiFacebook/man/read_contingency_params.Rd +++ b/facebook/delphiFacebook/man/read_contingency_params.Rd @@ -20,5 +20,5 @@ a named list of parameters values } \description{ Reads a parameters file. Copies global params to contingency params if not -already defined. +already defined. Uses current date as end_date if not provided. } diff --git a/facebook/delphiFacebook/unit-tests/testthat/test-contingency-utils.R b/facebook/delphiFacebook/unit-tests/testthat/test-contingency-utils.R index 165017dc9..5738cce71 100644 --- a/facebook/delphiFacebook/unit-tests/testthat/test-contingency-utils.R +++ b/facebook/delphiFacebook/unit-tests/testthat/test-contingency-utils.R @@ -10,7 +10,7 @@ test_that("testing update_params command", { use_input_asis = TRUE, aggregate_range = "month", end_date = "2020-02-01", - input_dir = "./input" + input_dir = "./static" # Using a directory that doesn't contain any valid data files. ) expect_error(update_params(params), "no input files to read in") @@ -30,8 +30,8 @@ test_that("testing update_params command", { use_input_asis = TRUE, aggregate_range = "month", end_date = ymd("2020-01-31"), - end_time = ymd_hms("2020-01-31 23:59:59", tz=timezone), start_time = ymd_hms("2020-01-01 00:00:00", tz=timezone), + end_time = ymd_hms("2020-01-31 23:59:59", tz=timezone), start_date = ymd("2020-01-01") ) @@ -42,18 +42,18 @@ test_that("testing update_params command", { test_that("testing get_filenames_in_range command", { tdir <- tempfile() files <- c( - "2019-11-06.2019-10-30.2020-11-06.Survey_of_COVID-Like_Illness_-_TODEPLOY_......_-_US_Expansion.csv", - "2019-12-31.2019-12-24_With_Translations.csv", - "2020-01-06.2019-12-31_Wave_4.csv", - "2020-01-16.2020-01-09_YouTube.csv", - "2020-01-16.2020-01-09_Wave_4.csv", - "2020-02-06.2020-01-31_Wave_4.csv", - "2020-02-16.2020-02-09_Wave_3.csv" + "2029-01-01.2019-10-30.2019-11-06.Survey_of_COVID-Like_Illness_-_TODEPLOY_......_-_US_Expansion.csv", + "2029-01-01.2019-12-24.2019-12-31_With_Translations.csv", + "2029-01-01.2019-12-31.2020-01-06_Wave_4.csv", + "2029-01-01.2020-01-09.2020-01-16_YouTube.csv", + "2029-01-01.2020-01-09.2020-01-16_Wave_4.csv", + "2029-01-01.2020-01-31.2020-02-06_Wave_4.csv", + "2029-01-01.2020-02-09.2020-02-16_Wave_3.csv" ) create_dir_not_exist(tdir) for (filename in files) { - write_csv(data.frame(), path = file.path(tdir, filename)) + write_csv(data.frame(), file.path(tdir, filename)) } params <- list( @@ -65,17 +65,66 @@ test_that("testing get_filenames_in_range command", { date_range <- list(ymd("2020-01-01"), ymd("2020-01-31")) expected_output <- c( - "2019-12-31.2019-12-24_With_Translations.csv", - "2020-01-06.2019-12-31_Wave_4.csv", - "2020-01-16.2020-01-09_Wave_4.csv", - "2020-02-06.2020-01-31_Wave_4.csv" + "2029-01-01.2019-12-24.2019-12-31_With_Translations.csv", + "2029-01-01.2019-12-31.2020-01-06_Wave_4.csv", + "2029-01-01.2020-01-09.2020-01-16_Wave_4.csv", + "2029-01-01.2020-01-31.2020-02-06_Wave_4.csv" ) out <- get_filenames_in_range(date_range[[1]], date_range[[2]], params) - expect_equal(out, expected_output) }) + +test_that("testing get_sparse_filenames command", { + tdir <- tempfile() + files <- c( + "2021-12-11.2019-12-26.2020-01-01_Wave_4.csv", + "2021-12-11.2019-12-27.2020-01-02_Wave_4.csv", + "2021-12-11.2019-12-28.2020-01-03_Wave_4.csv", + "2021-12-11.2019-12-29.2020-01-04_Wave_4.csv", + "2021-12-11.2019-12-30.2020-01-05_Wave_4.csv", + "2021-12-11.2019-12-30.2020-01-05_Wave_5.csv", + "2021-12-11.2019-12-31.2020-01-06_Wave_4.csv", + "2021-12-11.2019-12-31.2020-01-06_Wave_5.csv", + "2021-12-11.2019-01-01.2020-01-07_Wave_4.csv", + "2021-12-11.2019-01-02.2020-01-08_Wave_4.csv", + "2021-12-11.2019-01-03.2020-01-09_Wave_4.csv", + "2021-12-11.2019-01-04.2020-01-10_Wave_4.csv", + + "2011-12-11.2019-10-30.2019-11-06.2020-11-06.Survey_of_COVID-Like_Illness_-_TODEPLOY_......_-_US_Expansion.csv", + "2021-12-11.2020-01-09.2020-01-16_YouTube.csv", + "2021-12-11.2020-01-09.2020-01-16_Wave_4.csv", + "2021-12-11.2020-01-31.2020-02-06_Wave_4.csv", + "2021-12-11.2020-02-09.2020-02-16_Wave_3.csv" + ) + + create_dir_not_exist(tdir) + for (filename in files) { + write_csv(data.frame(), path = file.path(tdir, filename)) + } + + params <- list( + input = c(), + use_input_asis = FALSE, + backfill_days = 4, + input_dir = tdir + ) + date_range <- list(ymd("2020-01-01"), ymd("2020-01-6")) + + expected_output <- c( + "2021-12-11.2019-12-26.2020-01-01_Wave_4.csv", + "2021-12-11.2019-12-30.2020-01-05_Wave_4.csv", + "2021-12-11.2019-12-30.2020-01-05_Wave_5.csv", + "2021-12-11.2019-01-03.2020-01-09_Wave_4.csv", + "2021-12-11.2019-01-04.2020-01-10_Wave_4.csv" + ) + + out <- get_sparse_filenames(date_range[[1]], date_range[[2]], params) + expect_setequal(out, expected_output) +}) + + test_that("testing verify_aggs command", { # Duplicate rows input_aggs <- tribble( diff --git a/facebook/delphiFacebook/unit-tests/testthat/test-contingency-variables.R b/facebook/delphiFacebook/unit-tests/testthat/test-contingency-variables.R deleted file mode 100644 index 8561fe7c6..000000000 --- a/facebook/delphiFacebook/unit-tests/testthat/test-contingency-variables.R +++ /dev/null @@ -1,4 +0,0 @@ -library(data.table) -library(tibble) - -context("Testing response recoding and renaming") diff --git a/facebook/delphiFacebook/unit-tests/testthat/test-contingency-write.R b/facebook/delphiFacebook/unit-tests/testthat/test-contingency-write.R index 029fd16e6..0d28520f5 100644 --- a/facebook/delphiFacebook/unit-tests/testthat/test-contingency-write.R +++ b/facebook/delphiFacebook/unit-tests/testthat/test-contingency-write.R @@ -61,7 +61,11 @@ test_that("testing command to create output filenames", { out <- get_file_name(params, "nation", c("gender")) expected <- "DebugOn-DoNotShare_20210101_20210102_monthly_nation_gender.csv" + expect_equal(out, expected) + params$debug <- FALSE out <- get_file_name(params, "nation", c("gender", "race", "ethnicity")) expected <- "20210101_20210102_monthly_nation_ethnicity_gender_race.csv" + + expect_equal(out, expected) }) diff --git a/facebook/delphiFacebook/unit-tests/testthat/test-utils.R b/facebook/delphiFacebook/unit-tests/testthat/test-utils.R index 9ae35ff00..082b8918d 100644 --- a/facebook/delphiFacebook/unit-tests/testthat/test-utils.R +++ b/facebook/delphiFacebook/unit-tests/testthat/test-utils.R @@ -24,7 +24,7 @@ test_that("testing create dir function", { test_that("testing read params when missing file", { - # expect error if missing file, since no template in test dir + # expect error if missing file, since no template in test dir tdir <- tempfile() expect_warning(expect_error(read_params(tdir))) }) diff --git a/facebook/delphiFacebook/unit-tests/testthat/test-variables.R b/facebook/delphiFacebook/unit-tests/testthat/test-variables.R index 2851028e4..ec3989262 100644 --- a/facebook/delphiFacebook/unit-tests/testthat/test-variables.R +++ b/facebook/delphiFacebook/unit-tests/testthat/test-variables.R @@ -86,6 +86,7 @@ test_that("mask items correctly coded", { input_data$c_mask_often_7d <- NA input_data$c_others_masked <- c(TRUE, NA, NA, FALSE, TRUE, FALSE) input_data$c_others_masked_public <- NA + input_data$c_others_distanced_public <- NA input_data$c_work_outside_5d <- NA expect_equal(out, input_data) @@ -105,6 +106,7 @@ test_that("mask items correctly coded", { input_data$c_mask_often_7d <- c(NA, TRUE, FALSE, NA, TRUE, FALSE) input_data$c_others_masked <- c(TRUE, NA, NA, FALSE, TRUE, FALSE) input_data$c_others_masked_public <- NA + input_data$c_others_distanced_public <- NA input_data$c_work_outside_5d <- NA expect_equal(out, input_data) @@ -125,6 +127,7 @@ test_that("mask items correctly coded", { input_data$c_mask_often_7d <- NA input_data$c_others_masked <- c(TRUE, NA, NA, FALSE, TRUE, FALSE) input_data$c_others_masked_public <- NA + input_data$c_others_distanced_public <- NA input_data$c_work_outside_5d <- NA expect_equal(out, input_data) @@ -133,6 +136,7 @@ test_that("mask items correctly coded", { input_data <- data.frame( C14 = c(NA, 1, 3, 6, 2, 4), H2 = c(1, NA, 6, 3, 2, 5), + H1 = c(1, NA, 6, 3, 2, 5), C6a = 1 ) @@ -145,6 +149,7 @@ test_that("mask items correctly coded", { input_data$c_mask_often_7d <- NA input_data$c_others_masked <- NA input_data$c_others_masked_public <- c(FALSE, NA, NA, FALSE, FALSE, TRUE) + input_data$c_others_distanced_public <- c(FALSE, NA, NA, FALSE, FALSE, TRUE) input_data$c_work_outside_5d <- NA expect_equal(out, input_data) diff --git a/facebook/qsf-tools/README.md b/facebook/qsf-tools/README.md index e46a613bc..54eb4d28b 100644 --- a/facebook/qsf-tools/README.md +++ b/facebook/qsf-tools/README.md @@ -20,11 +20,12 @@ questions. These mapping files are created manually and need to be updated for every new survey wave. * `item_replacement_map.csv`: Lists in-survey name of an `new_item` and the - in-survey name of the `old_item` it replaces. `new_item` should be the name - of a single item and be unique, but the `old_item` column has no formatting - requirements. It can hold a list of items, if the corresponding new survey - item is replacing multiple old questions, and a given item name can appear - in multiple rows of the `old_item` field. + in-survey name(s) of the `old_item`(s) it replaces. `new_item` should be the + name of a single item and be unique; the `old_item` column should be a + string. However, `old_item` has no other formatting requirements. For + example, it can list several item names (e.g. "A1, A2"), if the + corresponding new survey item is replacing multiple old questions. A given + item name can also appear in multiple rows of the `old_item` field. * `item_shortquestion_map.csv`: Lists in-survey name of an `item` and a short description of the contents of the question. `item` should be the name of a single item and be unique, but the `description` column has no formatting @@ -83,4 +84,4 @@ which can contain any subset of the following fields: The meaning of "Answers" and "Choices" differs for matrix vs non-matrix items. "Choices" list the vertical components -- subquestions for matrix items and answer choices for non-matrix items. "Answers" list the answer -choices for matrix items and are missing for non-matrix items. \ No newline at end of file +choices for matrix items and are missing for non-matrix items. diff --git a/facebook/qsf-tools/generate-codebook.R b/facebook/qsf-tools/generate-codebook.R index b9044f6ee..a126162b1 100644 --- a/facebook/qsf-tools/generate-codebook.R +++ b/facebook/qsf-tools/generate-codebook.R @@ -9,6 +9,7 @@ suppressPackageStartupMessages({ library(tidyverse) library(jsonlite) + library(rjson) library(stringr) library(gsubfn) source("qsf-utils.R") @@ -265,7 +266,24 @@ process_qsf <- function(path_to_qsf, NA_character_), wave = get_wave(path_to_qsf) ) %>% - select(wave, variable, replaces, description, question, matrix_subquestion, type, display_logic, response_option_randomization, group_of_respondents_item_was_shown_to) + select(wave, + variable, + replaces, + description, + question, + matrix_subquestion, + choices, + type, + display_logic, + response_option_randomization, + group_of_respondents_item_was_shown_to) + + # Format choices as json string + qdf$choices <- map(qdf$choices, function(x) { + if (is_empty(x)) { NA } + else { toJSON(x) } + }) %>% + unlist() # add free text response options other_text_items <- qdf %>% @@ -276,6 +294,7 @@ process_qsf <- function(path_to_qsf, description = paste0(description, " other text") ) qdf <- rbind(qdf, other_text_items) + qdf$choices[qdf$type == "Text"] <- NA # Quality checks stopifnot(length(qdf$variable) == length(unique(qdf$variable))) @@ -388,7 +407,7 @@ get_static_fields <- function(wave, add_qsf_to_codebook <- function(path_to_qsf, path_to_codebook) { qdf <- process_qsf(path_to_qsf) codebook <- add_qdf_to_codebook(qdf, path_to_codebook) - write_csv(codebook, path_to_codebook) + write_excel_csv(codebook, path_to_codebook) } diff --git a/facebook/qsf-tools/static/item_replacement_map.csv b/facebook/qsf-tools/static/item_replacement_map.csv index d1a882f14..433a3ae7b 100644 --- a/facebook/qsf-tools/static/item_replacement_map.csv +++ b/facebook/qsf-tools/static/item_replacement_map.csv @@ -22,6 +22,7 @@ C15,Q36 C13b,C13 C13c,C13a C14a,C14 +C17,C2 C17a,C17 V2a,V2 V3a,V3 @@ -32,4 +33,5 @@ V4a_4,V4_4 V4a_5,V4_5 V11a,V11 V12a,V12 -C7a,C7 \ No newline at end of file +C7a,C7 +B10c,B10a diff --git a/facebook/qsf-tools/static/static_microdata_fields.csv b/facebook/qsf-tools/static/static_microdata_fields.csv index 0ac7633cc..09cee071d 100644 --- a/facebook/qsf-tools/static/static_microdata_fields.csv +++ b/facebook/qsf-tools/static/static_microdata_fields.csv @@ -1,6 +1,6 @@ variable,replaces,description,question,matrix_subquestion,type,response_option_randomization -StartDatetime,NA,"survey start timestamp",NA,NA,NA,NA -EndDatetime,NA,"survey end timestamp",NA,NA,NA,NA +StartDatetime,NA,"survey start timestamp in Pacific time (UTC-7)",NA,NA,NA,NA +EndDatetime,NA,"survey end timestamp in Pacific time (UTC-7)",NA,NA,NA,NA wave,NA,"survey version",NA,NA,NA,NA UserLanguage,NA,"survey language",NA,NA,NA,NA fips,NA,"county FIPS code",NA,NA,NA,NA diff --git a/jhu/delphi_jhu/run.py b/jhu/delphi_jhu/run.py index 79e041370..ed13dfb67 100644 --- a/jhu/delphi_jhu/run.py +++ b/jhu/delphi_jhu/run.py @@ -106,7 +106,8 @@ def run_module(params: Dict[str, Any]): for metric, geo_res, sensor, smoother in product( METRICS, GEO_RESOLUTIONS, SENSORS, SMOOTHERS ): - print(metric, geo_res, sensor, smoother) + if "cumulative" in sensor and "seven_day_average" in smoother: + continue logger.info( event="generating signal and exporting to CSV", metric=metric, diff --git a/jhu/tests/test_run.py b/jhu/tests/test_run.py index e434b7058..1ff1cc1dd 100644 --- a/jhu/tests/test_run.py +++ b/jhu/tests/test_run.py @@ -31,6 +31,8 @@ def test_output_files_exist(self, run_as_module): for date in dates: for geo in geos: for metric in metrics: + if "7dav" in metric and "cumulative" in metric: + continue # Can't compute 7dav for first few days of data because of NAs if date > "20200305" or "7dav" not in metric: expected_files += [date + "_" + geo + "_" + metric + ".csv"] diff --git a/jhu/tests/test_smooth.py b/jhu/tests/test_smooth.py index 16d49e511..739fcf537 100644 --- a/jhu/tests/test_smooth.py +++ b/jhu/tests/test_smooth.py @@ -9,14 +9,14 @@ def test_output_files_smoothed(self, run_as_module): dates = [str(x) for x in range(20200303, 20200310)] smoothed = pd.read_csv( - join("./receiving", f"{dates[-1]}_state_confirmed_7dav_cumulative_num.csv") + join("./receiving", f"{dates[-1]}_state_confirmed_7dav_incidence_num.csv") ) # Build a dataframe out of the individual day files raw = pd.concat( [ pd.read_csv( - join("./receiving", f"{date}_state_confirmed_cumulative_num.csv") + join("./receiving", f"{date}_state_confirmed_incidence_num.csv") ) for date in dates ] diff --git a/sir_complainsalot/params.json.template b/sir_complainsalot/params.json.template index 2f7354598..5c1cdd891 100644 --- a/sir_complainsalot/params.json.template +++ b/sir_complainsalot/params.json.template @@ -47,6 +47,18 @@ "max_age":6, "maintainers": ["U01AP8GSWG3","U01069KCRS7"], "retired-signals": ["raw_pct_negative","smoothed_pct_negative","raw_tests_per_device","smoothed_tests_per_device"] + }, + "nchs-mortality": { + "max_age":16, + "maintainers": [] + }, + "covid-act-now": { + "max_age":5, + "maintainers": [] + }, + "hhs": { + "max_age":8, + "maintainers": [] } } } diff --git a/usafacts/delphi_usafacts/run.py b/usafacts/delphi_usafacts/run.py index 58ddadde5..08a666caa 100644 --- a/usafacts/delphi_usafacts/run.py +++ b/usafacts/delphi_usafacts/run.py @@ -103,6 +103,8 @@ def run_module(params: Dict[str, Dict[str, Any]]): dfs = {metric: pull_usafacts_data(base_url, metric, logger) for metric in METRICS} for metric, geo_res, sensor, smoother in product( METRICS, GEO_RESOLUTIONS, SENSORS, SMOOTHERS): + if "cumulative" in sensor and "seven_day_average" in smoother: + continue logger.info("generating signal and exporting to CSV", geo_res = geo_res, metric = metric, diff --git a/usafacts/tests/test_run.py b/usafacts/tests/test_run.py index d22a514ca..44afd957d 100644 --- a/usafacts/tests/test_run.py +++ b/usafacts/tests/test_run.py @@ -54,8 +54,9 @@ def test_output_files_exist(self): for metric in metrics: if "7dav" in metric and date in dates[:6]: continue # there are no 7dav signals for first 6 days + if "7dav" in metric and "cumulative" in metric: + continue expected_files += [date + "_" + geo + "_" + metric + ".csv"] - assert set(csv_files) == set(expected_files) def test_output_file_format(self):