diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 269a05c95..6f940f68f 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.1.22 +current_version = 0.1.23 commit = True message = chore: bump covidcast-indicators to {new_version} tag = False diff --git a/facebook/Dockerfile b/facebook/Dockerfile index d78599349..47969ca58 100644 --- a/facebook/Dockerfile +++ b/facebook/Dockerfile @@ -27,6 +27,7 @@ ADD ./monthly-files.R /facebook/monthly-files.R ADD ./contingency_tables.R /facebook/contingency_tables.R ADD ./contingency-combine.R /facebook/contingency-combine.R ADD ./ssmtp.conf /etc/ssmtp/ssmtp.conf +ADD ./GITREF facebook/GITREF WORKDIR /facebook/ RUN make lib RUN make install diff --git a/facebook/GITREF b/facebook/GITREF new file mode 100644 index 000000000..e69de29bb diff --git a/facebook/Makefile b/facebook/Makefile index 3ed8824d8..93305ed6c 100644 --- a/facebook/Makefile +++ b/facebook/Makefile @@ -13,6 +13,7 @@ QUALTRICS=$(shell $(PYTHON) -m delphi_utils get input_dir) WEIGHTS=$(shell $(PYTHON) -m delphi_utils get weights_in_dir) CIDS=$(shell $(PYTHON) -m delphi_utils get weights_out_dir) INDIVIDUAL=$(shell $(PYTHON) -m delphi_utils get individual_dir) +INDIVIDUAL_RACEETH=$(shell $(PYTHON) -m delphi_utils get individual_raceeth_dir) ARCHIVE=$(shell $(PYTHON) -m delphi_utils get archive_dir) RECEIVING=$(shell $(PYTHON) -m delphi_utils get export_dir) FB_CC=$(shell $(PYTHON) -m delphi_utils get qualtrics.notify-bad-weights) @@ -26,16 +27,15 @@ MAX_WEIGHTED=ls -1 $(WEIGHTS) | grep dap | tail -1 | sed 's/_.*//;s/-//g;' ANTIJOIN:="antijoin.cids.sorted.txt" CIDS_DEST:="fb-interchange/cmu_respondent_ids" INDIVID_DEST:="fb-public-results/" +INDIVID_RACEETH_DEST:="protected-race-ethnicity-data/" RAW_DEST:="raw" # dry-run mode: generate all files, but do not post them anywhere, and disable all emails to outside parties. DRY:=yes ifeq ($(DRY),yes) - EMAIL_SEND:=echo -e "Would send mail: echo -e \"Subject: $${SUBJECT}\n\n$${MSG}\" | sendmail $(DELPHI_SURVEY_EMAIL_USER)" SFTP_POST:=echo -e "Would run: sshpass -p $(DELPHI_SURVEY_SFTP_PASSWORD) sftp $(SFTP_OPTIONS) -b <(echo -e \"\$${BATCH}\") -P 2222 $(DELPHI_SURVEY_SFTP_USER)\n$${BATCH}" DRY_MESSAGE:="[DRY-RUN] " else - EMAIL_SEND:=echo -e "Subject: $${SUBJECT}\n\n$${MSG}" | sendmail $(DELPHI_SURVEY_EMAIL_USER) SFTP_POST:=sshpass -p $(DELPHI_SURVEY_SFTP_PASSWORD) sftp $(SFTP_OPTIONS) -b <(echo -e "$${BATCH}") -P 2222 $(DELPHI_SURVEY_SFTP_USER) endif @@ -54,11 +54,12 @@ tidy: receiving cp params.json tidy/ mv $(RECEIVING)/*.csv tidy/$(RECEIVING) mv $(INDIVIDUAL)/*.csv* tidy/$(INDIVIDUAL) + mv $(INDIVIDUAL_RACEETH)/*.csv* tidy/$(INDIVIDUAL_RACEETH) tar -czf scratch/tidy-`date +"%Y-%m-%d-%H%M%S"`.tgz --exclude='tidy-*.tgz' tidy mv scratch/*.tgz tidy/ clean: - rm -f $(RECEIVING)/*.csv $(INDIVIDUAL)/*.csv $(CIDS)/*.csv + rm -f $(RECEIVING)/*.csv $(INDIVIDUAL)/*.csv $(INDIVIDUAL_RACEETH)/*.csv $(CIDS)/*.csv clean-archive: rm -f $(ARCHIVE)/*.Rds @@ -100,6 +101,7 @@ params.json: $(TODAY) PAT=`grep fb-survey params.json | awk 'BEGIN{FS="\""}{print $$2}' | sed 's/ /_/g;s/^/-e /'`; \ $(PYTHON) -m delphi_utils set \ debug false \ + produce_individual_raceeth true \ end_date $(YESTERDAY) \ input <(find $(QUALTRICS) -maxdepth 1 -newer $< -type f -name "*.csv" | sort | grep $${PAT} | tr '\n' ',' | sed 's_$(QUALTRICS)/__g;s/,$$//' ) \ parallel true \ @@ -121,10 +123,8 @@ $(WEIGHTS): $(TODAY) MAX_WEIGHTED=`$(MAX_WEIGHTED)`; \ EXPECTED_MAX_WEIGHTED=`date --date='$(TODAY) -3 day' +'%Y%m%d'`; \ if [[ $$EXPECTED_MAX_WEIGHTED -gt $$MAX_WEIGHTED ]]; then \ - MSG="Expected most recent file: $$EXPECTED_MAX_WEIGHTED\nActual most recent file: $$MAX_WEIGHTED"; \ - echo "WARNING: $${MSG}" | tr "\n" ";" >> $(MESSAGES); \ - SUBJECT="[fb-cmu-cvid] Weights are stale"; \ - $(EMAIL_SEND) ;\ + MSG="Expected most recent file: $$EXPECTED_MAX_WEIGHTED; Actual most recent file: $$MAX_WEIGHTED"; \ + echo "WARNING: $${MSG}" >> $(MESSAGES); \ fi dev: delphiFacebook_1.0.tar.gz @@ -140,7 +140,7 @@ run-R: $(CIDS) grep "scheduled core" tmp ; \ [ "$$?" -eq 1 ] -pipeline: scratch init-qualtrics params.json $(WEIGHTS) run-R post-cids post-individual post-done tidy +pipeline: scratch init-qualtrics params.json $(WEIGHTS) run-R post-cids post-individual post-individual-raceeth post-done tidy grep $(TODAY) params.json [ -f $(YESTERDAY) ] && rm $(YESTERDAY) || true touch $@ @@ -197,6 +197,19 @@ post-individual: $(TODAY) $(INDIVIDUAL) echo "SUCCESS: $(DRY_MESSAGE)Posted `echo $${POST} | wc -w` microresponse files" >> $(MESSAGES) touch $@ +post-individual-raceeth: $(TODAY) $(INDIVIDUAL_RACEETH) + POST=`find $(INDIVIDUAL_RACEETH) -maxdepth 1 -newer $(TODAY) -name "cvid_responses_*.csv"`; \ + [ -n "$${POST}" ]; \ + BATCH=""; \ + for f in $${POST}; do \ + (grep token $$f; [[ $$? -eq 1 ]]); \ + gzip -f $$f; \ + BATCH="$${BATCH}put $${f}.gz ${INDIVID_RACEETH_DEST}\n"; \ + done; \ + $(SFTP_POST); \ + echo "SUCCESS: $(DRY_MESSAGE)Posted `echo $${POST} | wc -w` race-ethnicity microresponse files" >> $(MESSAGES) + touch $@ + post-done: post-cids touch $(YESTERDAY).done BATCH="put $(YESTERDAY).done $(CIDS_DEST)\n"; \ diff --git a/facebook/delphiFacebook/NAMESPACE b/facebook/delphiFacebook/NAMESPACE index 4495d210f..9613e1c02 100644 --- a/facebook/delphiFacebook/NAMESPACE +++ b/facebook/delphiFacebook/NAMESPACE @@ -55,6 +55,7 @@ export(write_contingency_tables) export(write_data_api) export(write_individual) import(data.table) +importFrom(Rcpp,evalCpp) importFrom(data.table,fread) importFrom(dplyr,"%>%") importFrom(dplyr,across) diff --git a/facebook/delphiFacebook/R/variables.R b/facebook/delphiFacebook/R/variables.R index 888e94059..8610317d2 100644 --- a/facebook/delphiFacebook/R/variables.R +++ b/facebook/delphiFacebook/R/variables.R @@ -30,6 +30,7 @@ split_options <- function(column) { #' @return a logical vector; for each list entry, whether selection is contained #' in the character vector. #' +#' @importFrom Rcpp evalCpp #' @useDynLib delphiFacebook, .registration = TRUE is_selected <- function(vec, selection, use_cpp=TRUE) { select_fn <- ifelse(use_cpp, is_selected_cpp, is_selected_r) diff --git a/facebook/micro/monthly-archive.sh b/facebook/micro/monthly-archive.sh new file mode 100644 index 000000000..15a50890e --- /dev/null +++ b/facebook/micro/monthly-archive.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +rm -f *.gz +if [ -z $1 ]; then + MONTH=`date --date "last month" +"%Y_%m"` +else + MONTH=$1 +fi +echo ${MONTH} +R_MONTH=${MONTH#*_}; R_MONTH=${R_MONTH#0} +BATCH="cd fb-public-results\nls -1 cvid_responses_${MONTH}*.gz" +sftp -b <(echo -e "${BATCH}") -P 2222 fb-automation@ftp.delphi.cmu.edu 2>/dev/null | \ + grep "^cvid" | \ + awk -F_ 'BEGIN{print "cd fb-public-results"} {key=$3 $4 $5; if (key!=last && last!="") {print record} last=key; record=$0} END{print record}' | \ + sed '/^cvid/ s/^/get /' >fetch.sftp +sftp -b fetch.sftp -P 2222 fb-automation@ftp.delphi.cmu.edu +OUT=${MONTH/_/-} +Rscript ../monthly-files.R ${MONTH%_*} ${R_MONTH} . >${OUT}.csv +gzip ${OUT}.csv +sftp -b <(echo -e "cd fb-public-results\nput ${OUT}.csv.gz") -P 2222 fb-automation@ftp.delphi.cmu.edu diff --git a/facebook/params.json.production.template b/facebook/params.json.production.template index 2756d80dc..79dc1aaab 100644 --- a/facebook/params.json.production.template +++ b/facebook/params.json.production.template @@ -7,6 +7,8 @@ "end_date": "2020-08-28", "export_dir": "./receiving", "individual_dir": "./individual", + "individual_raceeth_dir": "./individual_raceeth", + "produce_individual_raceeth": false, "input": [ "2020-08-29.2020-08-22.2020-08-29.Survey_of_COVID-Like_Illness_-_TODEPLOY_......_-_US_Expansion.csv", "2020-08-29.2020-08-22.2020-08-29.Survey_of_COVID-Like_Illness_-_TODEPLOY-_US_Expansion_-_With_Translations.csv"