Skip to content

Fix new date for google symptoms #2015

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions google_symptoms/delphi_google_symptoms/date_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,24 +73,29 @@ def generate_num_export_days(params: Dict, logger) -> [int]:
"_".join([metric, smoother, "search"]) for metric, smoother in product(COMBINED_METRIC, SMOOTHERS)
)

# Fetch metadata to check how recent each signal is
covidcast.use_api_key(params["indicator"]["api_credentials"])
metadata = covidcast.metadata()
# Filter to only those signals we currently want to produce for `google-symptoms`
gs_metadata = metadata[(metadata.data_source == "google-symptoms") & (metadata.signal.isin(sensor_names))]

num_export_days = params["indicator"]["num_export_days"]
custom_run = False if not params["common"].get("custom_run") else params["common"].get("custom_run", False)

if num_export_days is None and not custom_run:
# Fetch metadata to check how recent each signal is
covidcast.use_api_key(params["indicator"]["api_credentials"])
metadata = covidcast.metadata()
# Filter to only those signals we currently want to produce for `google-symptoms`
gs_metadata = metadata[(metadata.data_source == "google-symptoms") & (metadata.signal.isin(sensor_names))]

if sensor_names.difference(set(gs_metadata.signal)):
# If any signal not in metadata yet, we need to backfill its full history.
logger.warning("Signals missing in the epidata; backfilling full history")
num_export_days = (export_end_date - FULL_BKFILL_START_DATE).days + 1
else:
latest_date_diff = (datetime.today() - to_datetime(min(gs_metadata.max_time))).days + 1
global_max_expected_lag = get_max_lag(params)
expected_date_diff = params["validation"]["common"].get("span_length", 14) + global_max_expected_lag

expected_date_diff = params["validation"]["common"].get("span_length", 14)

# there's an expected lag of 4 days behind if running from today
if export_end_date.date() == datetime.today().date():
global_max_expected_lag = get_max_lag(params)
expected_date_diff += global_max_expected_lag

if latest_date_diff > expected_date_diff:
logger.info(f"Missing dates from: {to_datetime(min(gs_metadata.max_time)).date()}")
Expand Down
2 changes: 1 addition & 1 deletion google_symptoms/delphi_google_symptoms/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def patch(params):

# Output dir setup
current_issue_yyyymmdd = issue_date.strftime("%Y%m%d")
current_issue_dir = f"""{params["patch"]["patch_dir"]}/issue_{current_issue_yyyymmdd}/google-symptom"""
current_issue_dir = f"""{params["patch"]["patch_dir"]}/issue_{current_issue_yyyymmdd}/google-symptoms"""
makedirs(f"{current_issue_dir}", exist_ok=True)

params["common"]["export_dir"] = f"""{current_issue_dir}"""
Expand Down
3 changes: 1 addition & 2 deletions google_symptoms/delphi_google_symptoms/pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,10 +248,10 @@ def pull_gs_data(credentials, export_start_date, export_end_date, num_export_day

# For state level data
dfs["state"] = pull_gs_data_one_geolevel("state", retrieve_dates)

# For county level data
dfs["county"] = pull_gs_data_one_geolevel("county", retrieve_dates)


# Add District of Columbia as county
try:
df_dc_county = dfs["state"][dfs["state"]["geo_id"] == "dc"].drop(
Expand All @@ -260,5 +260,4 @@ def pull_gs_data(credentials, export_start_date, export_end_date, num_export_day
dfs["county"] = pd.concat([dfs["county"], df_dc_county])
except KeyError:
pass

return dfs
8 changes: 5 additions & 3 deletions google_symptoms/delphi_google_symptoms/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
when the module is run with `python -m delphi_google_symptoms`.
"""
import time
from datetime import datetime, date
from datetime import date, datetime
from itertools import product

import numpy as np
from delphi_utils import create_export_csv, get_structured_logger

from .constants import COMBINED_METRIC, GEO_RESOLUTIONS, SMOOTHERS, SMOOTHERS_MAP
from .constants import COMBINED_METRIC, FULL_BKFILL_START_DATE, GEO_RESOLUTIONS, SMOOTHERS, SMOOTHERS_MAP
from .date_utils import generate_num_export_days
from .geo import geo_map
from .pull import pull_gs_data
Expand Down Expand Up @@ -47,7 +47,9 @@ def run_module(params, logger=None):
log_exceptions=params["common"].get("log_exceptions", True),
)

export_start_date = datetime.strptime(params["indicator"]["export_start_date"], "%Y-%m-%d")
export_start_date = datetime.strptime(
params["indicator"].get("export_start_date", datetime.strftime(FULL_BKFILL_START_DATE, "%Y-%m-%d")), "%Y-%m-%d"
)
# If end_date not specified, use current date.
export_end_date = datetime.strptime(
params["indicator"].get("export_end_date", datetime.strftime(date.today(), "%Y-%m-%d")), "%Y-%m-%d"
Expand Down
1 change: 1 addition & 0 deletions google_symptoms/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"pylint==2.8.3",
"pytest-cov",
"pytest",
"pytest-freezegun~=0.4.2"
]

setup(
Expand Down
39 changes: 30 additions & 9 deletions google_symptoms/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# -*- coding: utf-8 -*-
import logging
from pathlib import Path
import re

import copy
import pytest
import mock
import pandas as pd
from freezegun import freeze_time

from os import listdir, remove, makedirs
from os.path import join, exists
Expand All @@ -27,7 +29,7 @@
# end as open_covid_region_code,
# *
# from `bigquery-public-data.covid19_symptom_search.states_daily_2020` # States by day
# where timestamp(date) between timestamp("2020-07-26") and timestamp("2020-08-11")
# where timestamp(date) between timestamp("2020-07-15") and timestamp("2020-08-22")

# County data is created by running the following query in the BigQuery
# browser console:
Expand All @@ -38,17 +40,21 @@
# end as open_covid_region_code,
# *
# from `bigquery-public-data.covid19_symptom_search.counties_daily_2020` # Counties by day; includes state and county name, + FIPS code
# where timestamp(date) between timestamp("2020-07-26") and timestamp("2020-08-11")
# where timestamp(date) between timestamp("2020-07-15") and timestamp("2020-08-22")

# good_input = {
# "state": f"{TEST_DIR}/test_data/small_states_daily.csv",
# "county": f"{TEST_DIR}/test_data/small_counties_daily.csv"
# }


good_input = {
"state": f"{TEST_DIR}/test_data/small_states_daily.csv",
"county": f"{TEST_DIR}/test_data/small_counties_daily.csv"
"state": f"{TEST_DIR}/test_data/small_states_2020_07_15_2020_08_22.csv",
"county": f"{TEST_DIR}/test_data/small_counties_2020_07_15_2020_08_22.csv"
}

patch_input = {
"state": f"{TEST_DIR}/test_data/state_2024-05-16_2024-07-18.csv",
"county": f"{TEST_DIR}/test_data/county_2024-05-16_2024-07-18.csv"

}

symptom_names = ["symptom_" +
Expand Down Expand Up @@ -79,9 +85,9 @@ def params():
"log_filename": f"{TEST_DIR}/test.log",
},
"indicator": {
"export_start_date": "2020-02-20",
"bigquery_credentials": {},
"num_export_days": 14,
"custom_run": False,
"static_file_dir": "../static",
"api_credentials": "fakesecret"
},
Expand Down Expand Up @@ -124,7 +130,22 @@ def run_as_module(params):

with mock.patch("delphi_google_symptoms.pull.initialize_credentials",
return_value=None), \
mock.patch("pandas_gbq.read_gbq", side_effect=[state_data, county_data]), \
mock.patch("pandas_gbq.read_gbq") as mock_read_gbq, \
mock.patch("delphi_google_symptoms.pull.initialize_credentials", return_value=None), \
mock.patch("delphi_google_symptoms.date_utils.covidcast.metadata", return_value=covidcast_metadata):
delphi_google_symptoms.run.run_module(params)
def side_effect(*args, **kwargs):
if "symptom_search_sub_region_1_daily" in args[0]:
df = state_data
pattern = re.compile(r'\d{4}-\d{2}-\d{2}')
start_date, end_date = re.findall(pattern, args[0])
return df[(df["date"] >= start_date) & (df["date"] <= end_date)]
elif "symptom_search_sub_region_2_daily" in args[0]:
df = county_data
pattern = re.compile(r'\d{4}-\d{2}-\d{2}')
start_date, end_date = re.findall(pattern, args[0])
return df[(df["date"] >= start_date) & (df["date"] <= end_date)]
else:
return pd.DataFrame()

mock_read_gbq.side_effect = side_effect
delphi_google_symptoms.run.run_module(params)
Loading
Loading