Skip to content

Commit d76cd40

Browse files
committed
sircomplainslot needs more filtering
1 parent bf21d33 commit d76cd40

File tree

3 files changed

+51
-14
lines changed

3 files changed

+51
-14
lines changed

sir_complainsalot/delphi_sir_complainsalot/check_source.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
import pandas as pd
99
from delphi_epidata import Epidata
1010

11-
covidcast.covidcast._ASYNC_CALL = True # pylint: disable=protected-access
11+
from .date_utils import _date_to_api_string, _parse_datetimes
12+
1213

1314
@dataclass
1415
class Complaint:
@@ -34,6 +35,7 @@ def to_md(self):
3435
message=self.message, updated=self.last_updated.strftime("%Y-%m-%d"))
3536

3637

38+
3739
def check_source(data_source, meta, params, grace, logger): # pylint: disable=too-many-locals
3840
"""Iterate over all signals from a source and check for problems.
3941
@@ -74,30 +76,28 @@ def check_source(data_source, meta, params, grace, logger): # pylint: disable=t
7476
signal=row["signal"],
7577
start_day=start_date.strftime("%Y-%m-%d"),
7678
end_day=end_date.strftime("%Y-%m-%d"),
77-
geo_type=row["geo_type"])
79+
geo_type=row["geo_type"],
80+
time_type=row["time_type"])
7881

7982
response = Epidata.covidcast(
8083
data_source,
8184
row["signal"],
8285
time_type=row["time_type"],
8386
geo_type=row["geo_type"],
84-
time_values=Epidata.range(start_date.strftime("%Y%m%d"), end_date.strftime("%Y%m%d")),
87+
time_values=Epidata.range(_date_to_api_string(start_date), _date_to_api_string(end_date)),
8588
geo_value="*",
8689
)
8790

88-
if response["result"] != 1:
89-
# Something failed in the API and we did not get real metadata
90-
raise RuntimeError("Error when fetching signal data from the API", response["message"])
91-
92-
latest_data = pd.DataFrame.from_dict(response["epidata"])
93-
latest_data["issue"] = pd.to_datetime(latest_data["issue"], format="%Y%m%d")
94-
latest_data["time_value"] = pd.to_datetime(latest_data["time_value"], format="%Y%m%d")
95-
latest_data.drop("direction", axis=1, inplace=True)
96-
97-
current_lag_in_days = (now - datetime.strptime(str(row["max_time"]), "%Y%m%d")).days
91+
current_lag_in_days = (now - row["max_time"]).days
9892
lag_calculated_from_api = False
93+
latest_data = None
94+
95+
if response["result"] == 1:
96+
latest_data = pd.DataFrame.from_dict(response["epidata"])
97+
latest_data["issue"] = latest_data.apply(lambda x: _parse_datetimes(x.issue, x.time_type), axis=1)
98+
latest_data["time_value"] = latest_data.apply(lambda x: _parse_datetimes(x.time_value, x.time_type), axis=1)
99+
latest_data.drop("direction", axis=1, inplace=True)
99100

100-
if latest_data is not None:
101101
unique_dates = [pd.to_datetime(val).date()
102102
for val in latest_data["time_value"].unique()]
103103
current_lag_in_days = (datetime.now().date() - max(unique_dates)).days
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
from datetime import datetime
2+
from typing import Union
3+
4+
from epiweeks import Week
5+
import pandas as pd
6+
def _date_to_api_string(date: datetime.date, time_type: str = "day") -> str: # pylint: disable=W0621
7+
"""Convert a date object to a YYYYMMDD or YYYYMM string expected by the API."""
8+
if time_type == "day":
9+
date_str = date.strftime("%Y%m%d")
10+
elif time_type == "week":
11+
date_str = Week.fromdate(date).cdcformat()
12+
return date_str
13+
14+
def _parse_datetimes(date_int: str, time_type: str, date_format: str = "%Y%m%d") -> Union[pd.Timestamp, None]:
15+
"""Convert a date or epiweeks string into timestamp objects.
16+
17+
Datetimes (length 8) are converted to their corresponding date, while epiweeks (length 6)
18+
are converted to the date of the start of the week. Returns nan otherwise
19+
20+
Epiweeks use the CDC format.
21+
22+
date_int: Int representation of date.
23+
time_type: The temporal resolution to request this data. Most signals
24+
are available at the "day" resolution (the default); some are only
25+
available at the "week" resolution, representing an MMWR week ("epiweek").
26+
date_format: String of the date format to parse.
27+
:returns: Timestamp.
28+
"""
29+
date_str = str(date_int)
30+
if time_type == "day":
31+
return pd.to_datetime(date_str, format=date_format)
32+
if time_type == "week":
33+
epiwk = Week(int(date_str[:4]), int(date_str[-2:]))
34+
return pd.to_datetime(epiwk.startdate())
35+
return None

sir_complainsalot/delphi_sir_complainsalot/run.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from delphi_utils import SlackNotifier, get_structured_logger, read_params
1414

1515
from .check_source import check_source
16+
from .date_utils import _parse_datetimes
1617

1718

1819
def get_logger():
@@ -30,6 +31,7 @@ def run_module():
3031
params = read_params()
3132
Epidata.auth = ("epidata", params["api_credentials"])
3233
meta = pd.DataFrame.from_dict(Epidata.covidcast_meta().get("epidata", dict()))
34+
meta["max_time"] = meta.apply(lambda x: _parse_datetimes(x.max_time, x.time_type), axis=1)
3335
slack_notifier = None
3436
if "channel" in params and "slack_token" in params:
3537
slack_notifier = SlackNotifier(params["channel"], params["slack_token"])

0 commit comments

Comments
 (0)