cmu-delphi
diff --git a/‎.bumpversion.cfg
Lines changed: 1 addition & 1 deletion b/‎.bumpversion.cfg
Lines changed: 1 addition & 1 deletion
diff --git a/‎_delphi_utils_python/.bumpversion.cfg
Lines changed: 1 addition & 1 deletion b/‎_delphi_utils_python/.bumpversion.cfg
Lines changed: 1 addition & 1 deletion
diff --git a/‎_delphi_utils_python/delphi_utils/__init__.py
Lines changed: 1 addition & 1 deletion b/‎_delphi_utils_python/delphi_utils/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎_delphi_utils_python/delphi_utils/validator/dynamic.py
Lines changed: 1 addition & 1 deletion b/‎_delphi_utils_python/delphi_utils/validator/dynamic.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎_delphi_utils_python/setup.py
Lines changed: 1 addition & 1 deletion b/‎_delphi_utils_python/setup.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎ansible/templates/covid_act_now-params-prod.json.j2
Lines changed: 1 addition & 1 deletion b/‎ansible/templates/covid_act_now-params-prod.json.j2
Lines changed: 1 addition & 1 deletion
diff --git a/‎claims_hosp/delphi_claims_hosp/config.py
Lines changed: 3 additions & 3 deletions b/‎claims_hosp/delphi_claims_hosp/config.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎claims_hosp/tests/test_update_indicator.py
Lines changed: 4 additions & 4 deletions b/‎claims_hosp/tests/test_update_indicator.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎covid_act_now/params.json.template
Lines changed: 1 addition & 1 deletion b/‎covid_act_now/params.json.template
Lines changed: 1 addition & 1 deletion
diff --git a/‎facebook/Makefile
Lines changed: 34 additions & 4 deletions b/‎facebook/Makefile
Lines changed: 34 additions & 4 deletions
diff --git a/‎facebook/delphiFacebook/NAMESPACE
Lines changed: 3 additions & 0 deletions b/‎facebook/delphiFacebook/NAMESPACE
Lines changed: 3 additions & 0 deletions
diff --git a/‎facebook/delphiFacebook/R/contingency_variables.R
Lines changed: 0 additions & 64 deletions b/‎facebook/delphiFacebook/R/contingency_variables.R
Lines changed: 0 additions & 64 deletions
diff --git a/‎facebook/delphiFacebook/R/responses.R
Lines changed: 49 additions & 2 deletions b/‎facebook/delphiFacebook/R/responses.R
Lines changed: 49 additions & 2 deletions
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.13
+current_version = 0.2.14
 commit = True
 message = chore: bump covidcast-indicators to {new_version}
 tag = False
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.6
+current_version = 0.2.7
 commit = True
 message = chore: bump delphi_utils to {new_version}
 tag = False
 
@@ -15,4 +15,4 @@
 from .nancodes import Nans
 from .weekday import Weekday
 
-__version__ = "0.2.6"
+__version__ = "0.2.7"
@@ -309,7 +309,7 @@ def pad_reference_api_df(self, reference_api_df, geo_sig_df, reference_end_date)
         Returns:
             - reference_api_df: Supplemented version of original
         """
-        reference_api_df_max_date = reference_api_df.time_value.max()
+        reference_api_df_max_date = reference_api_df.time_value.max().date()
         if reference_api_df_max_date < reference_end_date:
             # Querying geo_sig_df, only taking relevant rows
             geo_sig_df_supplement = geo_sig_df.query(
 
@@ -26,7 +26,7 @@
 
 setup(
     name="delphi_utils",
-    version="0.2.6",
+    version="0.2.7",
     description="Shared Utility Functions for Indicators",
     long_description=long_description,
     long_description_content_type="text/markdown",
 
@@ -20,7 +20,7 @@
       "data_source": "covid-act-now",
       "span_length": 14,
       "min_expected_lag": {"all": "3"},
-      "max_expected_lag": {"all": "6"},
+      "max_expected_lag": {"all": "9"},
       "dry_run": true,
       "suppressed_errors": [
         {"check_name": "check_se_many_missing",
 
@@ -3,7 +3,7 @@
 
 Author: Maria Jahja
 Created: 2020-06-01
-Modified: 2020-09-27
+Modified: 2021-12-11
 
 """
 
@@ -26,8 +26,8 @@ class Config:
     # (one day needed for smoother to produce values)
     BURN_IN_PERIOD = timedelta(days=1)
 
-    # shift dates forward for labeling purposes
-    DAY_SHIFT = timedelta(days=1)
+    # if desired, shift dates forward for labeling purposes
+    DAY_SHIFT = timedelta(days=0)
 
     # data columns
     CLAIMS_COUNT_COLS = ["Denominator", "Covid_like"]
 
@@ -141,7 +141,7 @@ def test_write_to_csv_results(self):
         updater.write_to_csv(res0, td.name)
 
         # check outputs
-        expected_name = f"20200502_geography_{Config.signal_name}.csv"
+        expected_name = f"20200501_geography_{Config.signal_name}.csv"
         assert exists(join(td.name, expected_name))
         output_data = pd.read_csv(join(td.name, expected_name))
         assert (
@@ -155,7 +155,7 @@ def test_write_to_csv_results(self):
         assert np.isnan(output_data.direction.values).all()
         assert np.isnan(output_data.sample_size.values).all()
 
-        expected_name = f"20200503_geography_{Config.signal_name}.csv"
+        expected_name = f"20200502_geography_{Config.signal_name}.csv"
         assert exists(join(td.name, expected_name))
         output_data = pd.read_csv(join(td.name, expected_name))
         assert (
@@ -167,7 +167,7 @@ def test_write_to_csv_results(self):
         assert np.isnan(output_data.direction.values).all()
         assert np.isnan(output_data.sample_size.values).all()
 
-        expected_name = f"20200505_geography_{Config.signal_name}.csv"
+        expected_name = f"20200504_geography_{Config.signal_name}.csv"
         assert exists(join(td.name, expected_name))
         output_data = pd.read_csv(join(td.name, expected_name))
         assert (
@@ -221,7 +221,7 @@ def test_write_to_csv_with_se_results(self):
         updater.write_to_csv(res0, td.name)
 
         # check outputs
-        expected_name = f"20200502_geography_{signal_name}.csv"
+        expected_name = f"20200501_geography_{signal_name}.csv"
         assert exists(join(td.name, expected_name))
         output_data = pd.read_csv(join(td.name, expected_name))
         assert (
 
@@ -20,7 +20,7 @@
       "data_source": "covid-act-now",
       "span_length": 14,
       "min_expected_lag": {"all": "3"},
-      "max_expected_lag": {"all": "6"},
+      "max_expected_lag": {"all": "9"},
       "dry_run": true,
       "suppressed_errors": [
         {"check_name": "check_se_many_missing",
 
@@ -12,6 +12,7 @@ PYTHON:=env/bin/python
 QUALTRICS=$(shell $(PYTHON) -m delphi_utils get input_dir)
 WEIGHTS=$(shell $(PYTHON) -m delphi_utils get weights_in_dir)
 CIDS=$(shell $(PYTHON) -m delphi_utils get weights_out_dir)
+CIDS_EXP=$(shell $(PYTHON) -m delphi_utils get experimental_weights_out_dir)
 INDIVIDUAL=$(shell $(PYTHON) -m delphi_utils get individual_dir)
 INDIVIDUAL_RACEETH=$(shell $(PYTHON) -m delphi_utils get individual_raceeth_dir)
 ARCHIVE=$(shell $(PYTHON) -m delphi_utils get archive_dir)
@@ -25,7 +26,9 @@ SFTP_OPTIONS=$(shell $(PYTHON) -m delphi_utils get sftp_options)
 MAX_WEIGHTED=ls -1 $(WEIGHTS) | grep dap | tail -1 | sed 's/_.*//;s/-//g;'
 
 ANTIJOIN:="antijoin.cids.sorted.txt"
+ANTIJOIN_EXP:="antijoin.experimental.cids.sorted.txt"
 CIDS_DEST:="fb-interchange/cmu_respondent_ids"
+CIDS_EXP_DEST:="fb-interchange/cmu_respondent_ww_ids"
 INDIVID_DEST:="fb-public-results/"
 INDIVID_RACEETH_DEST:="protected-race-ethnicity-data/"
 RAW_DEST:="raw"
@@ -59,7 +62,7 @@ tidy: receiving
 	mv scratch/*.tgz tidy/
 
 clean:
-	rm -f $(RECEIVING)/*.csv $(INDIVIDUAL)/*.csv $(INDIVIDUAL_RACEETH)/*.csv  $(CIDS)/*.csv
+	rm -f $(RECEIVING)/*.csv $(INDIVIDUAL)/*.csv $(INDIVIDUAL_RACEETH)/*.csv $(CIDS)/*.csv $(CIDS_EXP)/*.csv
 
 clean-archive:
 	rm -f $(ARCHIVE)/*.Rds
@@ -78,6 +81,9 @@ install: install-python install-R
 $(CIDS):
 	[ -f $(CIDS) ] || mkdir -p $(CIDS)
 
+$(CIDS_EXP):
+	[ -f $(CIDS_EXP) ] || mkdir -p $(CIDS_EXP)
+
 init-qualtrics:
 	grep '"token": "..*"' params.json
 
@@ -133,14 +139,14 @@ dev: delphiFacebook_1.0.tar.gz
 lib:
 	R -e 'roxygen2::roxygenise("delphiFacebook")'
 
-run-R: $(CIDS)
+run-R: $(CIDS) $(CIDS_EXP)
 	rm -rf tmp
 	time Rscript run.R 2>&1 |tee tmp
 	grep "run_facebook completed successfully" tmp
 	grep "scheduled core" tmp ; \
 	[ "$$?" -eq 1 ]
 
-pipeline: scratch init-qualtrics params.json $(WEIGHTS) run-R post-cids post-individual post-individual-raceeth post-done tidy
+pipeline: scratch init-qualtrics params.json $(WEIGHTS) run-R post-cids post-experimental-cids post-individual post-individual-raceeth post-done tidy
 	grep $(TODAY) params.json
 	[ -f $(YESTERDAY) ] && rm $(YESTERDAY) || true
 	touch $@
@@ -184,6 +190,28 @@ post-cids: $(TODAY) $(CIDS)
 	echo "SUCCESS: $(DRY_MESSAGE)Posted `echo $${POST} | wc -w` cid files" >> $(MESSAGES)
 	touch $@
 
+post-experimental-cids: $(TODAY) $(CIDS_EXP)
+	rm -rf tmp
+	touch $(ANTIJOIN_EXP)
+	POST=`find $(CIDS_EXP) -maxdepth 1 -newer $(TODAY) -name "cvid_cids_*.csv"`; \
+	[ -n "$${POST}" ]; \
+	LC_ALL=C find $(CIDS_EXP) -maxdepth 1 -daystart -mtime +0 -name "cvid_cids*.csv" -exec sort -u -o ${ANTIJOIN_EXP} {} +; \
+	BATCH=""; \
+	for f in $${POST}; do \
+	    LC_ALL=C comm -23 <(LC_ALL=C sort $$f) ${ANTIJOIN_EXP} >tmp; \
+	    diff -q tmp $$f || mv $$f $$f.bak; \
+	    mv tmp $$f; \
+	    ncids=`wc -l $$f | awk '{print $$1}'`; \
+	    if [[ $$ncids == "0" ]]; then \
+		echo "ERROR: 0 CIDs reported for $$f"; \
+		exit 73; \
+	    fi; \
+	    BATCH="$${BATCH}put $$f ${CIDS_EXP_DEST}\n"; \
+	done; \
+	$(SFTP_POST); \
+	echo "SUCCESS: $(DRY_MESSAGE)Posted `echo $${POST} | wc -w` experimental cid files" >> $(MESSAGES)
+	touch $@
+
 post-individual: $(TODAY) $(INDIVIDUAL)
 	POST=`find $(INDIVIDUAL) -maxdepth 1 -newer $(TODAY) -name "cvid_responses_*.csv"`; \
 	[ -n "$${POST}" ]; \
@@ -210,10 +238,12 @@ post-individual-raceeth: $(TODAY) $(INDIVIDUAL_RACEETH)
 	echo "SUCCESS: $(DRY_MESSAGE)Posted `echo $${POST} | wc -w` race-ethnicity microresponse files" >> $(MESSAGES)
 	touch $@
 
-post-done: post-cids
+post-done: post-cids post-experimental-cids
 	touch $(YESTERDAY).done
 	BATCH="put $(YESTERDAY).done $(CIDS_DEST)\n"; \
 	$(SFTP_POST)
+	BATCH="put $(YESTERDAY).done $(CIDS_EXP_DEST)\n"; \
+	$(SFTP_POST)
 	echo "SUCCESS: $(DRY_MESSAGE)Posted $(YESTERDAY).done" >> $(MESSAGES)
 
 validate-covidcast:
 
@@ -12,6 +12,7 @@ export(end_of_prev_full_month)
 export(end_of_prev_full_week)
 export(filter_complete_responses)
 export(filter_data_for_aggregation)
+export(filter_module_complete_responses)
 export(filter_responses)
 export(floor_epiweek)
 export(get_filenames_in_range)
@@ -51,6 +52,7 @@ export(update_archive)
 export(update_params)
 export(verify_aggs)
 export(write_cid)
+export(write_cid_experimental_wrapper)
 export(write_contingency_tables)
 export(write_data_api)
 export(write_individual)
@@ -121,4 +123,5 @@ importFrom(stringi,stri_trans_tolower)
 importFrom(stringi,stri_trim)
 importFrom(tibble,add_column)
 importFrom(tibble,tribble)
+importFrom(utils,tail)
 useDynLib(delphiFacebook, .registration = TRUE)
@@ -4,70 +4,6 @@
 ## input data is always from only one wave of the survey -- they do not deal
 ## with inputs that have multiple waves mingled in one data frame.
 
-#' Gender
-#'
-#' @param input_data input data frame of raw survey data
-#' @param wave integer indicating survey version
-#' 
-#' @return augmented data frame
-code_gender <- function(input_data, wave) {
-  if ("D1" %in% names(input_data)) {
-    input_data$gender <- case_when(
-      input_data$D1 == 1 ~ "Male",
-      input_data$D1 == 2 ~ "Female",
-      input_data$D1 == 3 ~ "Other",
-      input_data$D1 == 4 ~ "Other",
-      input_data$D1 == 5 ~ NA_character_,
-      TRUE ~ NA_character_
-    )
-  } else {
-    input_data$gender <- NA_character_
-  }
-  
-  return(input_data)
-}
-
-#' Age-related fields
-#'
-#' @param input_data input data frame of raw survey data
-#' @param wave integer indicating survey version
-#' 
-#' @return augmented data frame
-code_age <- function(input_data, wave) {
-  if ("D2" %in% names(input_data)) {
-    input_data$agefull <- case_when(
-      input_data$D2 == 1 ~ "18-24",
-      input_data$D2 == 2 ~ "25-34",
-      input_data$D2 == 3 ~ "35-44",
-      input_data$D2 == 4 ~ "45-54",
-      input_data$D2 == 5 ~ "55-64",
-      input_data$D2 == 6 ~ "65-74",
-      input_data$D2 == 7 ~ "75plus",
-      TRUE ~ NA_character_
-    )
-    
-    # Condensed age categories
-    input_data$age <- case_when(
-      input_data$D2 == 1 ~ "18-24",
-      input_data$D2 == 2 ~ "25-44",
-      input_data$D2 == 3 ~ "25-44",
-      input_data$D2 == 4 ~ "45-64",
-      input_data$D2 == 5 ~ "45-64",
-      input_data$D2 == 6 ~ "65plus",
-      input_data$D2 == 7 ~ "65plus",
-      TRUE ~ NA_character_
-    )
-    
-    input_data$age65plus <- input_data$age == "65plus"
-  } else {
-    input_data$agefull <- NA_character_
-    input_data$age <- NA_character_
-    input_data$age65plus <- NA
-  }
-  
-  return(input_data)
-}
-
 #' Occupation
 #'
 #' @param input_data input data frame of raw survey data
 
@@ -194,6 +194,8 @@ load_response_one <- function(input_filename, params, contingency_run) {
   input_data <- code_schooling(input_data, wave)
   input_data <- code_beliefs(input_data, wave)
   input_data <- code_news_and_info(input_data, wave)
+  input_data <- code_gender(input_data, wave)
+  input_data <- code_age(input_data, wave)
 
   if (!is.null(params$produce_individual_raceeth) && params$produce_individual_raceeth) {
     input_data <- code_race_ethnicity(input_data, wave)
@@ -227,8 +229,6 @@ load_response_one <- function(input_filename, params, contingency_run) {
   if (contingency_run) {
     ## Create additional fields for aggregations.
     # Demographic grouping variables
-    input_data <- code_gender(input_data, wave)
-    input_data <- code_age(input_data, wave)
     input_data <- code_race_ethnicity(input_data, wave)
     input_data <- code_occupation(input_data, wave)
     input_data <- code_education(input_data, wave)
@@ -546,6 +546,8 @@ module_assignment <- function(input_data, wave) {
       input_data$FL_23_DO == "ModuleB" ~ "B",
       TRUE ~ NA_character_
     )
+  } else {
+    input_data$module <- NA_character_
   }
 
   return(input_data)
@@ -752,3 +754,48 @@ filter_complete_responses <- function(data_full, params)
 
   return(data_full)
 }
+
+#' Filter responses to those that are "module-complete". Splits by module assignment
+#'
+#' Inclusion criteria:
+#'
+#' * answered age consent
+#' * CID/token IS NOT missing
+#' * distribution source (ie previews) IS NOT irregular
+#' * start date IS IN range, pacific time
+#' * Date is in [`params$start_date - params$backfill_days`, `end_date`],
+#' inclusive.
+#' * answered minimum of 2 additional questions, where to "answer" a numeric
+#' open-ended question (A2, A2b, B2b, Q40, C10_1_1, C10_2_1, C10_3_1, C10_4_1,
+#' D3, D4, D5) means to provide any number (floats okay) and to "answer" a radio
+#' button question is to provide a selection.
+#' * reached the end of the survey (i.e. sees the "Thank you" message)
+#' * answered age and gender questions
+#'
+#' Most of these criteria are handled by `filter_responses()` and
+#' `filter_complete_responses()` above; this function need only handle the last
+#' two criteria.
+#'
+#' @param data_full data frame of responses
+#' @param params named list of configuration options from `read_params()`,
+#'   containing `start_date`, `backfill_days`, and `end_date`
+#'
+#' @importFrom dplyr filter
+#' @importFrom rlang .data
+#' @export
+filter_module_complete_responses <- function(data_full, params)
+{
+  date_col <- if ("day" %in% names(data_full)) { "day" } else { "Date" }
+  data_full <- rename(data_full, Date = .data$date) %>% 
+    filter_complete_responses(params) %>% 
+    filter(!is.na(.data$age),
+           !is.na(.data$gender),
+           .data$Finished == 1) %>% 
+    select(date_col, .data$token, .data$module)
+  
+  data_a <- filter(data_full, .data$module == "A")
+  data_b <- filter(data_full, .data$module == "B")
+  
+  return(list(a = data_a, b = data_b))
+}
+