Skip to content

Commit c7bf03e

Browse files
authored
Merge pull request #2015 from cmu-delphi/fix_new_date_for_google_symptoms
Fix new date for google symptoms
2 parents 4168db5 + d5be1bd commit c7bf03e

File tree

14 files changed

+42234
-34
lines changed

14 files changed

+42234
-34
lines changed

google_symptoms/delphi_google_symptoms/date_utils.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -73,24 +73,29 @@ def generate_num_export_days(params: Dict, logger) -> [int]:
7373
"_".join([metric, smoother, "search"]) for metric, smoother in product(COMBINED_METRIC, SMOOTHERS)
7474
)
7575

76-
# Fetch metadata to check how recent each signal is
77-
covidcast.use_api_key(params["indicator"]["api_credentials"])
78-
metadata = covidcast.metadata()
79-
# Filter to only those signals we currently want to produce for `google-symptoms`
80-
gs_metadata = metadata[(metadata.data_source == "google-symptoms") & (metadata.signal.isin(sensor_names))]
81-
8276
num_export_days = params["indicator"]["num_export_days"]
8377
custom_run = False if not params["common"].get("custom_run") else params["common"].get("custom_run", False)
8478

8579
if num_export_days is None and not custom_run:
80+
# Fetch metadata to check how recent each signal is
81+
covidcast.use_api_key(params["indicator"]["api_credentials"])
82+
metadata = covidcast.metadata()
83+
# Filter to only those signals we currently want to produce for `google-symptoms`
84+
gs_metadata = metadata[(metadata.data_source == "google-symptoms") & (metadata.signal.isin(sensor_names))]
85+
8686
if sensor_names.difference(set(gs_metadata.signal)):
8787
# If any signal not in metadata yet, we need to backfill its full history.
8888
logger.warning("Signals missing in the epidata; backfilling full history")
8989
num_export_days = (export_end_date - FULL_BKFILL_START_DATE).days + 1
9090
else:
9191
latest_date_diff = (datetime.today() - to_datetime(min(gs_metadata.max_time))).days + 1
92-
global_max_expected_lag = get_max_lag(params)
93-
expected_date_diff = params["validation"]["common"].get("span_length", 14) + global_max_expected_lag
92+
93+
expected_date_diff = params["validation"]["common"].get("span_length", 14)
94+
95+
# there's an expected lag of 4 days behind if running from today
96+
if export_end_date.date() == datetime.today().date():
97+
global_max_expected_lag = get_max_lag(params)
98+
expected_date_diff += global_max_expected_lag
9499

95100
if latest_date_diff > expected_date_diff:
96101
logger.info(f"Missing dates from: {to_datetime(min(gs_metadata.max_time)).date()}")

google_symptoms/delphi_google_symptoms/patch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def patch(params):
7171

7272
# Output dir setup
7373
current_issue_yyyymmdd = issue_date.strftime("%Y%m%d")
74-
current_issue_dir = f"""{params["patch"]["patch_dir"]}/issue_{current_issue_yyyymmdd}/google-symptom"""
74+
current_issue_dir = f"""{params["patch"]["patch_dir"]}/issue_{current_issue_yyyymmdd}/google-symptoms"""
7575
makedirs(f"{current_issue_dir}", exist_ok=True)
7676

7777
params["common"]["export_dir"] = f"""{current_issue_dir}"""

google_symptoms/delphi_google_symptoms/pull.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -248,10 +248,10 @@ def pull_gs_data(credentials, export_start_date, export_end_date, num_export_day
248248

249249
# For state level data
250250
dfs["state"] = pull_gs_data_one_geolevel("state", retrieve_dates)
251-
252251
# For county level data
253252
dfs["county"] = pull_gs_data_one_geolevel("county", retrieve_dates)
254253

254+
255255
# Add District of Columbia as county
256256
try:
257257
df_dc_county = dfs["state"][dfs["state"]["geo_id"] == "dc"].drop(
@@ -260,5 +260,4 @@ def pull_gs_data(credentials, export_start_date, export_end_date, num_export_day
260260
dfs["county"] = pd.concat([dfs["county"], df_dc_county])
261261
except KeyError:
262262
pass
263-
264263
return dfs

google_symptoms/delphi_google_symptoms/run.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@
55
when the module is run with `python -m delphi_google_symptoms`.
66
"""
77
import time
8-
from datetime import datetime, date
8+
from datetime import date, datetime
99
from itertools import product
1010

1111
import numpy as np
1212
from delphi_utils import create_export_csv, get_structured_logger
1313

14-
from .constants import COMBINED_METRIC, GEO_RESOLUTIONS, SMOOTHERS, SMOOTHERS_MAP
14+
from .constants import COMBINED_METRIC, FULL_BKFILL_START_DATE, GEO_RESOLUTIONS, SMOOTHERS, SMOOTHERS_MAP
1515
from .date_utils import generate_num_export_days
1616
from .geo import geo_map
1717
from .pull import pull_gs_data
@@ -47,7 +47,9 @@ def run_module(params, logger=None):
4747
log_exceptions=params["common"].get("log_exceptions", True),
4848
)
4949

50-
export_start_date = datetime.strptime(params["indicator"]["export_start_date"], "%Y-%m-%d")
50+
export_start_date = datetime.strptime(
51+
params["indicator"].get("export_start_date", datetime.strftime(FULL_BKFILL_START_DATE, "%Y-%m-%d")), "%Y-%m-%d"
52+
)
5153
# If end_date not specified, use current date.
5254
export_end_date = datetime.strptime(
5355
params["indicator"].get("export_end_date", datetime.strftime(date.today(), "%Y-%m-%d")), "%Y-%m-%d"

google_symptoms/setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
"pylint==2.8.3",
1515
"pytest-cov",
1616
"pytest",
17+
"pytest-freezegun~=0.4.2"
1718
]
1819

1920
setup(

google_symptoms/tests/conftest.py

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# -*- coding: utf-8 -*-
22
import logging
33
from pathlib import Path
4+
import re
45

56
import copy
67
import pytest
@@ -27,7 +28,7 @@
2728
# end as open_covid_region_code,
2829
# *
2930
# from `bigquery-public-data.covid19_symptom_search.states_daily_2020` # States by day
30-
# where timestamp(date) between timestamp("2020-07-26") and timestamp("2020-08-11")
31+
# where timestamp(date) between timestamp("2020-07-15") and timestamp("2020-08-22")
3132

3233
# County data is created by running the following query in the BigQuery
3334
# browser console:
@@ -38,17 +39,16 @@
3839
# end as open_covid_region_code,
3940
# *
4041
# from `bigquery-public-data.covid19_symptom_search.counties_daily_2020` # Counties by day; includes state and county name, + FIPS code
41-
# where timestamp(date) between timestamp("2020-07-26") and timestamp("2020-08-11")
42+
# where timestamp(date) between timestamp("2020-07-15") and timestamp("2020-08-22")
43+
4244

4345
good_input = {
44-
"state": f"{TEST_DIR}/test_data/small_states_daily.csv",
45-
"county": f"{TEST_DIR}/test_data/small_counties_daily.csv"
46+
"state": f"{TEST_DIR}/test_data/small_states_2020_07_15_2020_08_22.csv",
47+
"county": f"{TEST_DIR}/test_data/small_counties_2020_07_15_2020_08_22.csv"
4648
}
4749

4850
patch_input = {
4951
"state": f"{TEST_DIR}/test_data/state_2024-05-16_2024-07-18.csv",
50-
"county": f"{TEST_DIR}/test_data/county_2024-05-16_2024-07-18.csv"
51-
5252
}
5353

5454
symptom_names = ["symptom_" +
@@ -79,9 +79,9 @@ def params():
7979
"log_filename": f"{TEST_DIR}/test.log",
8080
},
8181
"indicator": {
82-
"export_start_date": "2020-02-20",
8382
"bigquery_credentials": {},
8483
"num_export_days": 14,
84+
"custom_run": False,
8585
"static_file_dir": "../static",
8686
"api_credentials": "fakesecret"
8787
},
@@ -124,7 +124,22 @@ def run_as_module(params):
124124

125125
with mock.patch("delphi_google_symptoms.pull.initialize_credentials",
126126
return_value=None), \
127-
mock.patch("pandas_gbq.read_gbq", side_effect=[state_data, county_data]), \
127+
mock.patch("pandas_gbq.read_gbq") as mock_read_gbq, \
128128
mock.patch("delphi_google_symptoms.pull.initialize_credentials", return_value=None), \
129129
mock.patch("delphi_google_symptoms.date_utils.covidcast.metadata", return_value=covidcast_metadata):
130-
delphi_google_symptoms.run.run_module(params)
130+
def side_effect(*args, **kwargs):
131+
if "symptom_search_sub_region_1_daily" in args[0]:
132+
df = state_data
133+
pattern = re.compile(r'\d{4}-\d{2}-\d{2}')
134+
start_date, end_date = re.findall(pattern, args[0])
135+
return df[(df["date"] >= start_date) & (df["date"] <= end_date)]
136+
elif "symptom_search_sub_region_2_daily" in args[0]:
137+
df = county_data
138+
pattern = re.compile(r'\d{4}-\d{2}-\d{2}')
139+
start_date, end_date = re.findall(pattern, args[0])
140+
return df[(df["date"] >= start_date) & (df["date"] <= end_date)]
141+
else:
142+
return pd.DataFrame()
143+
144+
mock_read_gbq.side_effect = side_effect
145+
delphi_google_symptoms.run.run_module(params)

0 commit comments

Comments
 (0)