Skip to content

Commit 3a7db25

Browse files
authored
Merge pull request #1420 from cmu-delphi/release/indicators_v0.2.14_utils_v0.2.7
Release covidcast-indicators 0.2.14
2 parents a2adcc6 + 63c93eb commit 3a7db25

27 files changed

+324
-104
lines changed

.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.2.13
2+
current_version = 0.2.14
33
commit = True
44
message = chore: bump covidcast-indicators to {new_version}
55
tag = False

_delphi_utils_python/.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.2.6
2+
current_version = 0.2.7
33
commit = True
44
message = chore: bump delphi_utils to {new_version}
55
tag = False

_delphi_utils_python/delphi_utils/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,4 @@
1515
from .nancodes import Nans
1616
from .weekday import Weekday
1717

18-
__version__ = "0.2.6"
18+
__version__ = "0.2.7"

_delphi_utils_python/delphi_utils/validator/dynamic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ def pad_reference_api_df(self, reference_api_df, geo_sig_df, reference_end_date)
309309
Returns:
310310
- reference_api_df: Supplemented version of original
311311
"""
312-
reference_api_df_max_date = reference_api_df.time_value.max()
312+
reference_api_df_max_date = reference_api_df.time_value.max().date()
313313
if reference_api_df_max_date < reference_end_date:
314314
# Querying geo_sig_df, only taking relevant rows
315315
geo_sig_df_supplement = geo_sig_df.query(

_delphi_utils_python/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
setup(
2828
name="delphi_utils",
29-
version="0.2.6",
29+
version="0.2.7",
3030
description="Shared Utility Functions for Indicators",
3131
long_description=long_description,
3232
long_description_content_type="text/markdown",

ansible/templates/covid_act_now-params-prod.json.j2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
"data_source": "covid-act-now",
2121
"span_length": 14,
2222
"min_expected_lag": {"all": "3"},
23-
"max_expected_lag": {"all": "6"},
23+
"max_expected_lag": {"all": "9"},
2424
"dry_run": true,
2525
"suppressed_errors": [
2626
{"check_name": "check_se_many_missing",

claims_hosp/delphi_claims_hosp/config.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
44
Author: Maria Jahja
55
Created: 2020-06-01
6-
Modified: 2020-09-27
6+
Modified: 2021-12-11
77
88
"""
99

@@ -26,8 +26,8 @@ class Config:
2626
# (one day needed for smoother to produce values)
2727
BURN_IN_PERIOD = timedelta(days=1)
2828

29-
# shift dates forward for labeling purposes
30-
DAY_SHIFT = timedelta(days=1)
29+
# if desired, shift dates forward for labeling purposes
30+
DAY_SHIFT = timedelta(days=0)
3131

3232
# data columns
3333
CLAIMS_COUNT_COLS = ["Denominator", "Covid_like"]

claims_hosp/tests/test_update_indicator.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def test_write_to_csv_results(self):
141141
updater.write_to_csv(res0, td.name)
142142

143143
# check outputs
144-
expected_name = f"20200502_geography_{Config.signal_name}.csv"
144+
expected_name = f"20200501_geography_{Config.signal_name}.csv"
145145
assert exists(join(td.name, expected_name))
146146
output_data = pd.read_csv(join(td.name, expected_name))
147147
assert (
@@ -155,7 +155,7 @@ def test_write_to_csv_results(self):
155155
assert np.isnan(output_data.direction.values).all()
156156
assert np.isnan(output_data.sample_size.values).all()
157157

158-
expected_name = f"20200503_geography_{Config.signal_name}.csv"
158+
expected_name = f"20200502_geography_{Config.signal_name}.csv"
159159
assert exists(join(td.name, expected_name))
160160
output_data = pd.read_csv(join(td.name, expected_name))
161161
assert (
@@ -167,7 +167,7 @@ def test_write_to_csv_results(self):
167167
assert np.isnan(output_data.direction.values).all()
168168
assert np.isnan(output_data.sample_size.values).all()
169169

170-
expected_name = f"20200505_geography_{Config.signal_name}.csv"
170+
expected_name = f"20200504_geography_{Config.signal_name}.csv"
171171
assert exists(join(td.name, expected_name))
172172
output_data = pd.read_csv(join(td.name, expected_name))
173173
assert (
@@ -221,7 +221,7 @@ def test_write_to_csv_with_se_results(self):
221221
updater.write_to_csv(res0, td.name)
222222

223223
# check outputs
224-
expected_name = f"20200502_geography_{signal_name}.csv"
224+
expected_name = f"20200501_geography_{signal_name}.csv"
225225
assert exists(join(td.name, expected_name))
226226
output_data = pd.read_csv(join(td.name, expected_name))
227227
assert (

covid_act_now/params.json.template

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
"data_source": "covid-act-now",
2121
"span_length": 14,
2222
"min_expected_lag": {"all": "3"},
23-
"max_expected_lag": {"all": "6"},
23+
"max_expected_lag": {"all": "9"},
2424
"dry_run": true,
2525
"suppressed_errors": [
2626
{"check_name": "check_se_many_missing",

facebook/Makefile

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ PYTHON:=env/bin/python
1212
QUALTRICS=$(shell $(PYTHON) -m delphi_utils get input_dir)
1313
WEIGHTS=$(shell $(PYTHON) -m delphi_utils get weights_in_dir)
1414
CIDS=$(shell $(PYTHON) -m delphi_utils get weights_out_dir)
15+
CIDS_EXP=$(shell $(PYTHON) -m delphi_utils get experimental_weights_out_dir)
1516
INDIVIDUAL=$(shell $(PYTHON) -m delphi_utils get individual_dir)
1617
INDIVIDUAL_RACEETH=$(shell $(PYTHON) -m delphi_utils get individual_raceeth_dir)
1718
ARCHIVE=$(shell $(PYTHON) -m delphi_utils get archive_dir)
@@ -25,7 +26,9 @@ SFTP_OPTIONS=$(shell $(PYTHON) -m delphi_utils get sftp_options)
2526
MAX_WEIGHTED=ls -1 $(WEIGHTS) | grep dap | tail -1 | sed 's/_.*//;s/-//g;'
2627

2728
ANTIJOIN:="antijoin.cids.sorted.txt"
29+
ANTIJOIN_EXP:="antijoin.experimental.cids.sorted.txt"
2830
CIDS_DEST:="fb-interchange/cmu_respondent_ids"
31+
CIDS_EXP_DEST:="fb-interchange/cmu_respondent_ww_ids"
2932
INDIVID_DEST:="fb-public-results/"
3033
INDIVID_RACEETH_DEST:="protected-race-ethnicity-data/"
3134
RAW_DEST:="raw"
@@ -59,7 +62,7 @@ tidy: receiving
5962
mv scratch/*.tgz tidy/
6063

6164
clean:
62-
rm -f $(RECEIVING)/*.csv $(INDIVIDUAL)/*.csv $(INDIVIDUAL_RACEETH)/*.csv $(CIDS)/*.csv
65+
rm -f $(RECEIVING)/*.csv $(INDIVIDUAL)/*.csv $(INDIVIDUAL_RACEETH)/*.csv $(CIDS)/*.csv $(CIDS_EXP)/*.csv
6366

6467
clean-archive:
6568
rm -f $(ARCHIVE)/*.Rds
@@ -78,6 +81,9 @@ install: install-python install-R
7881
$(CIDS):
7982
[ -f $(CIDS) ] || mkdir -p $(CIDS)
8083

84+
$(CIDS_EXP):
85+
[ -f $(CIDS_EXP) ] || mkdir -p $(CIDS_EXP)
86+
8187
init-qualtrics:
8288
grep '"token": "..*"' params.json
8389

@@ -133,14 +139,14 @@ dev: delphiFacebook_1.0.tar.gz
133139
lib:
134140
R -e 'roxygen2::roxygenise("delphiFacebook")'
135141

136-
run-R: $(CIDS)
142+
run-R: $(CIDS) $(CIDS_EXP)
137143
rm -rf tmp
138144
time Rscript run.R 2>&1 |tee tmp
139145
grep "run_facebook completed successfully" tmp
140146
grep "scheduled core" tmp ; \
141147
[ "$$?" -eq 1 ]
142148

143-
pipeline: scratch init-qualtrics params.json $(WEIGHTS) run-R post-cids post-individual post-individual-raceeth post-done tidy
149+
pipeline: scratch init-qualtrics params.json $(WEIGHTS) run-R post-cids post-experimental-cids post-individual post-individual-raceeth post-done tidy
144150
grep $(TODAY) params.json
145151
[ -f $(YESTERDAY) ] && rm $(YESTERDAY) || true
146152
touch $@
@@ -184,6 +190,28 @@ post-cids: $(TODAY) $(CIDS)
184190
echo "SUCCESS: $(DRY_MESSAGE)Posted `echo $${POST} | wc -w` cid files" >> $(MESSAGES)
185191
touch $@
186192

193+
post-experimental-cids: $(TODAY) $(CIDS_EXP)
194+
rm -rf tmp
195+
touch $(ANTIJOIN_EXP)
196+
POST=`find $(CIDS_EXP) -maxdepth 1 -newer $(TODAY) -name "cvid_cids_*.csv"`; \
197+
[ -n "$${POST}" ]; \
198+
LC_ALL=C find $(CIDS_EXP) -maxdepth 1 -daystart -mtime +0 -name "cvid_cids*.csv" -exec sort -u -o ${ANTIJOIN_EXP} {} +; \
199+
BATCH=""; \
200+
for f in $${POST}; do \
201+
LC_ALL=C comm -23 <(LC_ALL=C sort $$f) ${ANTIJOIN_EXP} >tmp; \
202+
diff -q tmp $$f || mv $$f $$f.bak; \
203+
mv tmp $$f; \
204+
ncids=`wc -l $$f | awk '{print $$1}'`; \
205+
if [[ $$ncids == "0" ]]; then \
206+
echo "ERROR: 0 CIDs reported for $$f"; \
207+
exit 73; \
208+
fi; \
209+
BATCH="$${BATCH}put $$f ${CIDS_EXP_DEST}\n"; \
210+
done; \
211+
$(SFTP_POST); \
212+
echo "SUCCESS: $(DRY_MESSAGE)Posted `echo $${POST} | wc -w` experimental cid files" >> $(MESSAGES)
213+
touch $@
214+
187215
post-individual: $(TODAY) $(INDIVIDUAL)
188216
POST=`find $(INDIVIDUAL) -maxdepth 1 -newer $(TODAY) -name "cvid_responses_*.csv"`; \
189217
[ -n "$${POST}" ]; \
@@ -210,10 +238,12 @@ post-individual-raceeth: $(TODAY) $(INDIVIDUAL_RACEETH)
210238
echo "SUCCESS: $(DRY_MESSAGE)Posted `echo $${POST} | wc -w` race-ethnicity microresponse files" >> $(MESSAGES)
211239
touch $@
212240

213-
post-done: post-cids
241+
post-done: post-cids post-experimental-cids
214242
touch $(YESTERDAY).done
215243
BATCH="put $(YESTERDAY).done $(CIDS_DEST)\n"; \
216244
$(SFTP_POST)
245+
BATCH="put $(YESTERDAY).done $(CIDS_EXP_DEST)\n"; \
246+
$(SFTP_POST)
217247
echo "SUCCESS: $(DRY_MESSAGE)Posted $(YESTERDAY).done" >> $(MESSAGES)
218248

219249
validate-covidcast:

facebook/delphiFacebook/NAMESPACE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ export(end_of_prev_full_month)
1212
export(end_of_prev_full_week)
1313
export(filter_complete_responses)
1414
export(filter_data_for_aggregation)
15+
export(filter_module_complete_responses)
1516
export(filter_responses)
1617
export(floor_epiweek)
1718
export(get_filenames_in_range)
@@ -51,6 +52,7 @@ export(update_archive)
5152
export(update_params)
5253
export(verify_aggs)
5354
export(write_cid)
55+
export(write_cid_experimental_wrapper)
5456
export(write_contingency_tables)
5557
export(write_data_api)
5658
export(write_individual)
@@ -121,4 +123,5 @@ importFrom(stringi,stri_trans_tolower)
121123
importFrom(stringi,stri_trim)
122124
importFrom(tibble,add_column)
123125
importFrom(tibble,tribble)
126+
importFrom(utils,tail)
124127
useDynLib(delphiFacebook, .registration = TRUE)

facebook/delphiFacebook/R/contingency_variables.R

Lines changed: 0 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -4,70 +4,6 @@
44
## input data is always from only one wave of the survey -- they do not deal
55
## with inputs that have multiple waves mingled in one data frame.
66

7-
#' Gender
8-
#'
9-
#' @param input_data input data frame of raw survey data
10-
#' @param wave integer indicating survey version
11-
#'
12-
#' @return augmented data frame
13-
code_gender <- function(input_data, wave) {
14-
if ("D1" %in% names(input_data)) {
15-
input_data$gender <- case_when(
16-
input_data$D1 == 1 ~ "Male",
17-
input_data$D1 == 2 ~ "Female",
18-
input_data$D1 == 3 ~ "Other",
19-
input_data$D1 == 4 ~ "Other",
20-
input_data$D1 == 5 ~ NA_character_,
21-
TRUE ~ NA_character_
22-
)
23-
} else {
24-
input_data$gender <- NA_character_
25-
}
26-
27-
return(input_data)
28-
}
29-
30-
#' Age-related fields
31-
#'
32-
#' @param input_data input data frame of raw survey data
33-
#' @param wave integer indicating survey version
34-
#'
35-
#' @return augmented data frame
36-
code_age <- function(input_data, wave) {
37-
if ("D2" %in% names(input_data)) {
38-
input_data$agefull <- case_when(
39-
input_data$D2 == 1 ~ "18-24",
40-
input_data$D2 == 2 ~ "25-34",
41-
input_data$D2 == 3 ~ "35-44",
42-
input_data$D2 == 4 ~ "45-54",
43-
input_data$D2 == 5 ~ "55-64",
44-
input_data$D2 == 6 ~ "65-74",
45-
input_data$D2 == 7 ~ "75plus",
46-
TRUE ~ NA_character_
47-
)
48-
49-
# Condensed age categories
50-
input_data$age <- case_when(
51-
input_data$D2 == 1 ~ "18-24",
52-
input_data$D2 == 2 ~ "25-44",
53-
input_data$D2 == 3 ~ "25-44",
54-
input_data$D2 == 4 ~ "45-64",
55-
input_data$D2 == 5 ~ "45-64",
56-
input_data$D2 == 6 ~ "65plus",
57-
input_data$D2 == 7 ~ "65plus",
58-
TRUE ~ NA_character_
59-
)
60-
61-
input_data$age65plus <- input_data$age == "65plus"
62-
} else {
63-
input_data$agefull <- NA_character_
64-
input_data$age <- NA_character_
65-
input_data$age65plus <- NA
66-
}
67-
68-
return(input_data)
69-
}
70-
717
#' Occupation
728
#'
739
#' @param input_data input data frame of raw survey data

facebook/delphiFacebook/R/responses.R

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,8 @@ load_response_one <- function(input_filename, params, contingency_run) {
194194
input_data <- code_schooling(input_data, wave)
195195
input_data <- code_beliefs(input_data, wave)
196196
input_data <- code_news_and_info(input_data, wave)
197+
input_data <- code_gender(input_data, wave)
198+
input_data <- code_age(input_data, wave)
197199

198200
if (!is.null(params$produce_individual_raceeth) && params$produce_individual_raceeth) {
199201
input_data <- code_race_ethnicity(input_data, wave)
@@ -227,8 +229,6 @@ load_response_one <- function(input_filename, params, contingency_run) {
227229
if (contingency_run) {
228230
## Create additional fields for aggregations.
229231
# Demographic grouping variables
230-
input_data <- code_gender(input_data, wave)
231-
input_data <- code_age(input_data, wave)
232232
input_data <- code_race_ethnicity(input_data, wave)
233233
input_data <- code_occupation(input_data, wave)
234234
input_data <- code_education(input_data, wave)
@@ -546,6 +546,8 @@ module_assignment <- function(input_data, wave) {
546546
input_data$FL_23_DO == "ModuleB" ~ "B",
547547
TRUE ~ NA_character_
548548
)
549+
} else {
550+
input_data$module <- NA_character_
549551
}
550552

551553
return(input_data)
@@ -752,3 +754,48 @@ filter_complete_responses <- function(data_full, params)
752754

753755
return(data_full)
754756
}
757+
758+
#' Filter responses to those that are "module-complete". Splits by module assignment
759+
#'
760+
#' Inclusion criteria:
761+
#'
762+
#' * answered age consent
763+
#' * CID/token IS NOT missing
764+
#' * distribution source (ie previews) IS NOT irregular
765+
#' * start date IS IN range, pacific time
766+
#' * Date is in [`params$start_date - params$backfill_days`, `end_date`],
767+
#' inclusive.
768+
#' * answered minimum of 2 additional questions, where to "answer" a numeric
769+
#' open-ended question (A2, A2b, B2b, Q40, C10_1_1, C10_2_1, C10_3_1, C10_4_1,
770+
#' D3, D4, D5) means to provide any number (floats okay) and to "answer" a radio
771+
#' button question is to provide a selection.
772+
#' * reached the end of the survey (i.e. sees the "Thank you" message)
773+
#' * answered age and gender questions
774+
#'
775+
#' Most of these criteria are handled by `filter_responses()` and
776+
#' `filter_complete_responses()` above; this function need only handle the last
777+
#' two criteria.
778+
#'
779+
#' @param data_full data frame of responses
780+
#' @param params named list of configuration options from `read_params()`,
781+
#' containing `start_date`, `backfill_days`, and `end_date`
782+
#'
783+
#' @importFrom dplyr filter
784+
#' @importFrom rlang .data
785+
#' @export
786+
filter_module_complete_responses <- function(data_full, params)
787+
{
788+
date_col <- if ("day" %in% names(data_full)) { "day" } else { "Date" }
789+
data_full <- rename(data_full, Date = .data$date) %>%
790+
filter_complete_responses(params) %>%
791+
filter(!is.na(.data$age),
792+
!is.na(.data$gender),
793+
.data$Finished == 1) %>%
794+
select(date_col, .data$token, .data$module)
795+
796+
data_a <- filter(data_full, .data$module == "A")
797+
data_b <- filter(data_full, .data$module == "B")
798+
799+
return(list(a = data_a, b = data_b))
800+
}
801+

0 commit comments

Comments
 (0)