Skip to content

Commit c678a7f

Browse files
authored
Merge pull request #978 from cmu-delphi/main
Deploy validation fixes to production
2 parents 30b90fb + f2cbec3 commit c678a7f

File tree

20 files changed

+212
-56
lines changed

20 files changed

+212
-56
lines changed

_delphi_utils_python/delphi_utils/runner.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@ def run_indicator_pipeline(indicator_fn: Callable[[Params], None],
1717
archiver_fn: Callable[[Params], Optional[ArchiveDiffer]] = NULL_FN):
1818
"""Run an indicator with its optional validation and archiving.
1919
20+
Each argument to this function should itself be a function that will be passed a common set of
21+
parameters (see details below). This parameter dictionary should have four subdictionaries
22+
keyed as "indicator", "validation", "archive", and "common" corresponding to parameters to be
23+
used in `indicator_fn`, `validator_fn`, `archiver_fn`, and shared across functions,
24+
respectively.
25+
2026
Arguments
2127
---------
2228
indicator_fn: Callable[[Params], None]
@@ -43,7 +49,9 @@ def run_indicator_pipeline(indicator_fn: Callable[[Params], None],
4349
parser.add_argument("indicator_name",
4450
type=str,
4551
help="Name of the Python package containing the indicator. This package "
46-
"must export a `run_module(params)` function.")
52+
"must export a `run.run_module(params)` function.")
4753
args = parser.parse_args()
4854
indicator_module = importlib.import_module(args.indicator_name)
49-
run_indicator_pipeline(indicator_module.run_module, validator_from_params, archiver_from_params)
55+
run_indicator_pipeline(indicator_module.run.run_module,
56+
validator_from_params,
57+
archiver_from_params)

_delphi_utils_python/delphi_utils/validator/datafetcher.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import threading
66
from os import listdir
77
from os.path import isfile, join
8+
import warnings
89
import pandas as pd
910
import numpy as np
1011

@@ -114,9 +115,6 @@ def get_geo_signal_combos(data_source):
114115
geo_signal_combos = list(map(tuple,
115116
source_meta[["geo_type", "signal"]].to_records(index=False)))
116117

117-
print("Number of expected geo region-signal combinations:",
118-
len(geo_signal_combos))
119-
120118
return geo_signal_combos
121119

122120

@@ -126,8 +124,10 @@ def fetch_api_reference(data_source, start_date, end_date, geo_type, signal_type
126124
127125
Formatting is changed to match that of source data CSVs.
128126
"""
129-
api_df = covidcast.signal(
130-
data_source, signal_type, start_date, end_date, geo_type)
127+
with warnings.catch_warnings():
128+
warnings.simplefilter("ignore")
129+
api_df = covidcast.signal(
130+
data_source, signal_type, start_date, end_date, geo_type)
131131

132132
if not isinstance(api_df, pd.DataFrame):
133133
custom_msg = "Error fetching data from " + str(start_date) + \

_delphi_utils_python/delphi_utils/validator/report.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,13 +90,19 @@ def log(self):
9090
for warning in self.raised_warnings:
9191
logger.warning(str(warning))
9292

93-
def print_and_exit(self):
94-
"""Print results and, if any unsuppressed exceptions were raised, exit with non-0 status."""
93+
def print_and_exit(self, die_on_failures=True):
94+
"""Print results and exit.
95+
96+
Arguments
97+
---------
98+
die_on_failures: bool
99+
Whether to return non-zero status if any failures were encountered.
100+
"""
95101
print(self.summary())
96102
self.log()
97103
if self.success():
98104
sys.exit(0)
99-
else:
105+
elif die_on_failures:
100106
sys.exit(1)
101107

102108
def success(self):

_delphi_utils_python/delphi_utils/validator/run.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,21 @@
22
"""Functions to call when running the tool.
33
44
This module should contain a function called `run_module`, that is executed
5-
when the module is run with `python -m delphi_validator`.
5+
when the module is run with `python -m delphi_utils.validator`.
66
"""
7+
import argparse as ap
78
from .. import read_params
89
from .validate import Validator
910

1011

1112
def run_module():
1213
"""Run the validator as a module."""
14+
parser = ap.ArgumentParser()
15+
parser.add_argument("--dry_run", action="store_true", help="When provided, return zero exit"
16+
" status irrespective of the number of failures")
17+
args = parser.parse_args()
1318
validator = Validator(read_params())
14-
validator.validate().print_and_exit()
19+
validator.validate().print_and_exit(not args.dry_run)
1520

1621

1722
def validator_from_params(params):

_template_python/delphi_NAME/__main__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
no need to change this template.
77
"""
88

9+
from delphi_utils import read_params
910
from .run import run_module # pragma: no cover
1011

11-
run_module() # pragma: no cover
12+
run_module(read_params()) # pragma: no cover

_template_python/delphi_NAME/constants.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33
"""
44

55

6-
## example:
6+
## example:
77
# FULL_TIME = "full_time_work_prop"
88
# PART_TIME = "part_time_work_prop"
99
# COVIDNET = "covidnet"
10-
#
10+
#
1111
# SIGNALS = [
1212
# FULL_TIME,
1313
# PART_TIME,

_template_python/delphi_NAME/run.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,34 @@
22
"""Functions to call when running the function.
33
44
This module should contain a function called `run_module`, that is executed
5-
when the module is run with `python -m MODULE_NAME`.
5+
when the module is run with `python -m MODULE_NAME`. `run_module`'s lone argument should be a
6+
nested dictionary of parameters loaded from the params.json file. We expect the `params` to have
7+
the following structure:
8+
- "common":
9+
- "export_dir": str, directory to which the results are exported
10+
- "log_filename": (optional) str, path to log file
11+
- "indicator": (optional)
12+
- "wip_signal": (optional) Any[str, bool], list of signals that are works in progress, or
13+
True if all signals in the registry are works in progress, or False if only
14+
unpublished signals are. See `delphi_utils.add_prefix()`
15+
- Any other indicator-specific settings
616
"""
7-
from delphi_utils import read_params
8-
from .handle_wip_signal import add_prefix
17+
from delphi_utils import add_prefix
918
from .constants import SIGNALS
1019

11-
def run_module():
20+
def run_module(params):
1221
"""
13-
Calls the method for handling the wip signals
22+
Runs the indicator
23+
24+
Arguments
25+
--------
26+
params: Dict[str, Any]
27+
Nested dictionary of parameters.
28+
1429
Returns
1530
-------
1631
prints the updated signal names
1732
"""
18-
params = read_params()
19-
wip_signal = params["wip_signal"]
33+
wip_signal = params["indicator"]["wip_signal"]
2034
signal_names = add_prefix(SIGNALS, wip_signal, prefix="wip_")
2135
print(signal_names)

facebook/delphiFacebook/R/binary.R

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,12 @@ get_binary_indicators <- function() {
167167
"smoothed_wdontneed_reason_not_beneficial", "weight", "v_dontneed_reason_not_beneficial", 6, compute_binary_response, jeffreys_binary,
168168
"smoothed_dontneed_reason_other", "weight_unif", "v_dontneed_reason_other", 6, compute_binary_response, jeffreys_binary,
169169
"smoothed_wdontneed_reason_other", "weight", "v_dontneed_reason_other", 6, compute_binary_response, jeffreys_binary,
170+
171+
# schooling
172+
"smoothed_inperson_school_fulltime", "weight_unif", "s_inperson_school_fulltime", 6, compute_binary_response, jeffreys_binary,
173+
"smoothed_winperson_school_fulltime", "weight", "s_inperson_school_fulltime", 6, compute_binary_response, jeffreys_binary,
174+
"smoothed_inperson_school_parttime", "weight_unif", "s_inperson_school_parttime", 6, compute_binary_response, jeffreys_binary,
175+
"smoothed_winperson_school_parttime", "weight", "s_inperson_school_parttime", 6, compute_binary_response, jeffreys_binary,
170176
)
171177

172178

@@ -210,22 +216,42 @@ compute_binary_response <- function(response, weight, sample_size)
210216
#' @return Updated data frame.
211217
#' @importFrom dplyr mutate
212218
jeffreys_binary <- function(df) {
213-
return(mutate(df,
214-
val = jeffreys_percentage(.data$val, .data$sample_size),
215-
se = binary_se(.data$val, .data$sample_size)))
219+
return( jeffreys_multinomial_factory(2)(df) )
220+
}
221+
222+
#' Generate function that applies Jeffreys correction to multinomial estimates.
223+
#'
224+
#' @param k Number of groups.
225+
#'
226+
#' @return Function to apply multinomial Jeffreys correction.
227+
#' @importFrom dplyr mutate
228+
jeffreys_multinomial_factory <- function(k) {
229+
# Apply a Jeffreys correction to multinomial estimates and their standard errors.
230+
#
231+
# Param df: Data frame
232+
# Returns: Updated data frame.
233+
jeffreys_multinomial <- function(df) {
234+
return(mutate(df,
235+
val = jeffreys_percentage(.data$val, .data$sample_size, k),
236+
se = binary_se(.data$val, .data$sample_size)))
237+
}
238+
239+
return(jeffreys_multinomial)
216240
}
217241

218-
#' Adjust a percentage estimate to use the Jeffreys method.
242+
#' Adjust a multinomial percentage estimate using the Jeffreys method.
219243
#'
220-
#' Takes a previously estimated percentage (calculated with num_yes / total *
244+
#' Takes a previously estimated percentage (calculated with num_group1 / total *
221245
#' 100) and replaces it with the Jeffreys version, where one pseudo-observation
222-
#' with 50% yes is inserted.
246+
#' with 1/k mass in each group is inserted.
223247
#'
224248
#' @param percentage Vector of percentages to adjust.
225249
#' @param sample_size Vector of corresponding sample sizes.
250+
#' @param k Number of groups.
251+
#'
226252
#' @return Vector of adjusted percentages.
227-
jeffreys_percentage <- function(percentage, sample_size) {
228-
return((percentage * sample_size + 50) / (sample_size + 1))
253+
jeffreys_percentage <- function(percentage, sample_size, k) {
254+
return((percentage * sample_size + 100/k) / (sample_size + 1))
229255
}
230256

231257
#' Calculate the standard error for a binary proportion (as a percentage)

facebook/delphiFacebook/R/contingency_aggregate.R

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -254,11 +254,11 @@ summarize_aggs <- function(df, crosswalk_data, aggregations, geo_level, params)
254254

255255
group_vars <- aggregations$group_by[[1]]
256256

257-
if ( !all(groupby_vars %in% names(df)) ) {
257+
if ( !all(group_vars %in% names(df)) ) {
258258
msg_plain(
259259
sprintf(
260260
"not all of grouping columns %s available in data; skipping aggregation",
261-
paste(groupby_vars, collapse=", ")
261+
paste(group_vars, collapse=", ")
262262
))
263263
return( list( ))
264264
}
@@ -267,11 +267,11 @@ summarize_aggs <- function(df, crosswalk_data, aggregations, geo_level, params)
267267
# Keep rows with missing values initially so that we get the correct column
268268
# names. Explicitly drop groups with missing values in second step.
269269
unique_groups_counts <- as.data.frame(
270-
table(df[, groupby_vars, with=FALSE], exclude=NULL, dnn=groupby_vars),
270+
table(df[, group_vars, with=FALSE], exclude=NULL, dnn=group_vars),
271271
stringsAsFactors=FALSE
272272
)
273273
unique_groups_counts <- unique_groups_counts[
274-
complete.cases(unique_groups_counts[, groupby_vars]),
274+
complete.cases(unique_groups_counts[, group_vars]),
275275
]
276276

277277
# Drop groups with less than threshold sample size.
@@ -283,7 +283,7 @@ summarize_aggs <- function(df, crosswalk_data, aggregations, geo_level, params)
283283
## Convert col type in unique_groups to match that in data.
284284
# Filter on data.table in `calculate_group` requires that columns and filter
285285
# values are of the same type.
286-
for (col_var in groupby_vars) {
286+
for (col_var in group_vars) {
287287
if ( class(df[[col_var]]) != class(unique_groups_counts[[col_var]]) ) {
288288
class(unique_groups_counts[[col_var]]) <- class(df[[col_var]])
289289
}
@@ -295,10 +295,10 @@ summarize_aggs <- function(df, crosswalk_data, aggregations, geo_level, params)
295295
setindexv(df, group_vars)
296296

297297
calculate_group <- function(ii) {
298-
target_group <- unique_groups_counts[ii, groupby_vars, drop=FALSE]
298+
target_group <- unique_groups_counts[ii, group_vars, drop=FALSE]
299299
# Use data.table's index to make this filter efficient
300300
out <- summarize_aggregations_group(
301-
df[as.list(target_group), on=groupby_vars],
301+
df[as.list(target_group), on=group_vars],
302302
aggregations,
303303
target_group,
304304
geo_level,

facebook/delphiFacebook/R/responses.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ load_response_one <- function(input_filename, params) {
159159
input_data <- code_testing(input_data)
160160
input_data <- code_activities(input_data)
161161
input_data <- code_vaccines(input_data)
162+
input_data <- code_schooling(input_data)
162163

163164
# create testing variables
164165

facebook/delphiFacebook/R/variables.R

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -471,3 +471,32 @@ code_vaccines <- function(input_data) {
471471

472472
return(input_data)
473473
}
474+
475+
#' Schooling
476+
#'
477+
#' @param input_data input data frame of raw survey data
478+
#' @return data frame augmented with `hh_number_total`
479+
code_schooling <- function(input_data) {
480+
if ("E2_1" %in% names(input_data)) {
481+
# Coded as 2 = "Yes", 3 = "No", 4 = "I don't know"
482+
input_data$s_inperson_school_fulltime <- case_when(
483+
input_data$E2_1 == 2 ~ 1,
484+
input_data$E2_1 == 3 ~ 0,
485+
TRUE ~ NA_real_
486+
)
487+
} else {
488+
input_data$s_inperson_school_fulltime <- NA_real_
489+
}
490+
491+
if ("E2_2" %in% names(input_data)) {
492+
# Coded as 2 = "Yes", 3 = "No", 4 = "I don't know"
493+
input_data$s_inperson_school_parttime <- case_when(
494+
input_data$E2_2 == 2 ~ 1,
495+
input_data$E2_2 == 3 ~ 0,
496+
TRUE ~ NA_real_
497+
)
498+
} else {
499+
input_data$s_inperson_school_parttime <- NA_real_
500+
}
501+
return(input_data)
502+
}

facebook/delphiFacebook/man/code_schooling.Rd

Lines changed: 17 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

facebook/delphiFacebook/man/jeffreys_multinomial_factory.Rd

Lines changed: 17 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

facebook/delphiFacebook/man/jeffreys_percentage.Rd

Lines changed: 6 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)