Skip to content

Commit 9c79289

Browse files
committed
Review fixes
1 parent 540f445 commit 9c79289

File tree

5 files changed

+94
-63
lines changed

5 files changed

+94
-63
lines changed

epidatpy/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@
66

77

88
from ._constants import __version__
9-
from .request import CovidcastEpidata, Epidata, EpiRange
9+
from .request import CovidcastEpidata, EpiDataContext, EpiRange

epidatpy/_model.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ def __init__(
157157
meta: Optional[Sequence[EpidataFieldInfo]] = None,
158158
only_supports_classic: bool = False,
159159
use_cache: Optional[bool] = None,
160+
cache_max_age_days: Optional[int] = None,
160161
) -> None:
161162
self._base_url = base_url
162163
self._endpoint = endpoint
@@ -166,8 +167,17 @@ def __init__(
166167
self.meta_by_name = {k.name: k for k in self.meta}
167168
# Set the use_cache value from the constructor if present.
168169
# Otherwise check the USE_EPIDATPY_CACHE variable, accepting various "truthy" values.
169-
self.use_cache = use_cache \
170-
or (environ.get("USE_EPIDATPY_CACHE", "").lower() in ['true', 't', '1'])
170+
self.use_cache = use_cache if use_cache is not None \
171+
else (environ.get("USE_EPIDATPY_CACHE", "").lower() in ['true', 't', '1'])
172+
# Set cache_max_age_days from the constructor, fall back to environment variable.
173+
if cache_max_age_days:
174+
self.cache_max_age_days = cache_max_age_days
175+
else:
176+
env_days = environ.get("EPIDATPY_CACHE_MAX_AGE_DAYS", "7")
177+
if env_days.isdigit():
178+
self.cache_max_age_days = int(env_days)
179+
else: # handle string / negative / invalid enviromment variable
180+
self.cache_max_age_days = 7
171181

172182
def _verify_parameters(self) -> None:
173183
# hook for verifying parameters before sending

epidatpy/request.py

+41-21
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from appdirs import user_cache_dir
1414
from diskcache import Cache
1515
from pandas import CategoricalDtype, DataFrame, Series, to_datetime
16+
from os import environ
1617
from requests import Response, Session
1718
from requests.auth import HTTPBasicAuth
1819
from tenacity import retry, stop_after_attempt
@@ -37,6 +38,11 @@
3738
__all__ = ["Epidata", "EpiDataCall", "EpiDataContext", "EpiRange", "CovidcastEpidata"]
3839
CACHE_DIRECTORY = user_cache_dir(appname="epidatpy", appauthor="delphi")
3940

41+
if environ.get("USE_EPIDATPY_CACHE", None):
42+
print(f"diskcache is being used (unset USE_EPIDATPY_CACHE if not intended). "
43+
f"The cache directory is {CACHE_DIRECTORY}. "
44+
f"The TTL is set to {environ.get("EPIDATPY_CACHE_MAX_AGE_DAYS", "7")} days.")
45+
4046
@retry(reraise=True, stop=stop_after_attempt(2))
4147
def _request_with_retry(
4248
url: str,
@@ -75,9 +81,10 @@ def __init__(
7581
params: Mapping[str, Optional[EpiRangeParam]],
7682
meta: Optional[Sequence[EpidataFieldInfo]] = None,
7783
only_supports_classic: bool = False,
78-
use_cache = None,
84+
use_cache: Optional[bool] = None,
85+
cache_max_age_days: Optional[int] = None,
7986
) -> None:
80-
super().__init__(base_url, endpoint, params, meta, only_supports_classic, use_cache)
87+
super().__init__(base_url, endpoint, params, meta, only_supports_classic, use_cache, cache_max_age_days)
8188
self._session = session
8289

8390
def with_base_url(self, base_url: str) -> "EpiDataCall":
@@ -94,6 +101,12 @@ def _call(
94101
url, params = self.request_arguments(fields)
95102
return _request_with_retry(url, params, self._session, stream)
96103

104+
def _get_cache_key(self, method) -> str:
105+
cache_key = f"{self._endpoint} | {method}"
106+
if self._params:
107+
cache_key += f" | {str(dict(sorted(self._params.items())))}"
108+
return cache_key
109+
97110
def classic(
98111
self,
99112
fields: Optional[Sequence[str]] = None,
@@ -105,7 +118,7 @@ def classic(
105118
try:
106119
if self.use_cache:
107120
with Cache(CACHE_DIRECTORY) as cache:
108-
cache_key = str(self._endpoint) + str(self._params)
121+
cache_key = self._get_cache_key("classic")
109122
if cache_key in cache:
110123
return cache[cache_key]
111124
response = self._call(fields)
@@ -117,9 +130,8 @@ def classic(
117130
r["epidata"] = [self._parse_row(row, disable_date_parsing=disable_date_parsing) for row in epidata]
118131
if self.use_cache:
119132
with Cache(CACHE_DIRECTORY) as cache:
120-
cache_key = str(self._endpoint) + str(self._params)
121-
# Set TTL to 7 days (TODO: configurable?)
122-
cache.set(cache_key, r, expire=7*24*60*60)
133+
cache_key = self._get_cache_key("classic")
134+
cache.set(cache_key, r, expire=self.cache_max_age_days*24*60*60)
123135
return r
124136
except Exception as e: # pylint: disable=broad-except
125137
return {"result": 0, "message": f"error: {e}", "epidata": []}
@@ -146,7 +158,7 @@ def df(
146158

147159
if self.use_cache:
148160
with Cache(CACHE_DIRECTORY) as cache:
149-
cache_key = str(self._endpoint) + str(self._params)
161+
cache_key = self._get_cache_key("df")
150162
if cache_key in cache:
151163
return cache[cache_key]
152164

@@ -184,7 +196,7 @@ def df(
184196
df = df.astype(data_types)
185197
if not disable_date_parsing:
186198
for info in time_fields:
187-
if info.type == EpidataFieldType.epiweek:
199+
if info.type == EpidataFieldType.epiweek or info.type == EpidataFieldType.date_or_epiweek:
188200
continue
189201
try:
190202
df[info.name] = to_datetime(df[info.name], format="%Y-%m-%d")
@@ -198,9 +210,8 @@ def df(
198210

199211
if self.use_cache:
200212
with Cache(CACHE_DIRECTORY) as cache:
201-
cache_key = str(self._endpoint) + str(self._params)
202-
# Set TTL to 7 days (TODO: configurable?)
203-
cache.set(cache_key, df, expire=7*24*60*60)
213+
cache_key = self._get_cache_key("df")
214+
cache.set(cache_key, df, expire=self.cache_max_age_days*24*60*60)
204215

205216
return df
206217

@@ -213,10 +224,18 @@ class EpiDataContext(AEpiDataEndpoints[EpiDataCall]):
213224
_base_url: Final[str]
214225
_session: Final[Optional[Session]]
215226

216-
def __init__(self, base_url: str = BASE_URL, session: Optional[Session] = None) -> None:
227+
def __init__(
228+
self,
229+
base_url: str = BASE_URL,
230+
session: Optional[Session] = None,
231+
use_cache: Optional[bool] = None,
232+
cache_max_age_days: Optional[int] = None,
233+
) -> None:
217234
super().__init__()
218235
self._base_url = base_url
219236
self._session = session
237+
self.use_cache = use_cache
238+
self.cache_max_age_days = cache_max_age_days
220239

221240
def with_base_url(self, base_url: str) -> "EpiDataContext":
222241
return EpiDataContext(base_url, self._session)
@@ -230,15 +249,16 @@ def _create_call(
230249
params: Mapping[str, Optional[EpiRangeParam]],
231250
meta: Optional[Sequence[EpidataFieldInfo]] = None,
232251
only_supports_classic: bool = False,
233-
use_cache: bool = False,
234-
) -> EpiDataCall:
235-
return EpiDataCall(self._base_url, self._session, endpoint, params, meta, only_supports_classic, use_cache)
236-
237-
238-
Epidata = EpiDataContext()
239252

240-
241-
def CovidcastEpidata(base_url: str = BASE_URL, session: Optional[Session] = None) -> CovidcastDataSources[EpiDataCall]:
253+
) -> EpiDataCall:
254+
return EpiDataCall(self._base_url, self._session, endpoint, params, meta, only_supports_classic, self.use_cache, self.cache_max_age_days)
255+
256+
def CovidcastEpidata(
257+
base_url: str = BASE_URL,
258+
session: Optional[Session] = None,
259+
use_cache: Optional[bool] = None,
260+
cache_max_age_days: Optional[int] = None,
261+
) -> CovidcastDataSources[EpiDataCall]:
242262
url = add_endpoint_to_url(base_url, "covidcast/meta")
243263
meta_data_res = _request_with_retry(url, {}, session, False)
244264
meta_data_res.raise_for_status()
@@ -247,6 +267,6 @@ def CovidcastEpidata(base_url: str = BASE_URL, session: Optional[Session] = None
247267
def create_call(
248268
params: Mapping[str, Optional[EpiRangeParam]],
249269
) -> EpiDataCall:
250-
return EpiDataCall(base_url, session, "covidcast", params, define_covidcast_fields())
270+
return EpiDataCall(base_url, session, "covidcast", params, define_covidcast_fields(), use_cache=use_cache, cache_max_age_days=cache_max_age_days)
251271

252272
return CovidcastDataSources.create(meta_data, create_call)

smoke_test.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
from datetime import date
22

3-
from epidatpy import CovidcastEpidata, Epidata, EpiRange
3+
from epidatpy import CovidcastEpidata, EpiDataContext, EpiRange
44

55
print("Epidata Test")
6-
apicall = Epidata.pub_covidcast("fb-survey", "smoothed_cli", "nation", "day", "us", EpiRange(20210405, 20210410))
6+
epidata = EpiDataContext(use_cache=True, cache_max_age_days=1)
7+
apicall = epidata.pub_covidcast("fb-survey", "smoothed_cli", "nation", "day", "us", EpiRange(20210405, 20210410))
78

89
# Call info
910
print(apicall)
@@ -27,17 +28,17 @@
2728
print(df.iloc[0])
2829

2930

30-
StagingEpidata = Epidata.with_base_url("https://staging.delphi.cmu.edu/epidata/")
31+
staging_epidata = epidata.with_base_url("https://staging.delphi.cmu.edu/epidata/")
3132

32-
epicall = StagingEpidata.pub_covidcast(
33+
epicall = staging_epidata.pub_covidcast(
3334
"fb-survey", "smoothed_cli", "nation", "day", "*", EpiRange(date(2021, 4, 5), date(2021, 4, 10))
3435
)
3536
print(epicall._base_url)
3637

3738

3839
# Covidcast test
3940
print("Covidcast Test")
40-
epidata = CovidcastEpidata()
41+
epidata = CovidcastEpidata(use_cache=True, cache_max_age_days=1)
4142
print(epidata.source_names())
4243
print(epidata.signal_names("fb-survey"))
4344
epidata["fb-survey"].signal_df

0 commit comments

Comments
 (0)