Skip to content

Commit ac66aaa

Browse files
authored
Merge pull request #992 from cmu-delphi/nowcast-ground-truth
Nowcasting ground truth and refactor
2 parents 9aa5a10 + 25eb0dd commit ac66aaa

File tree

6 files changed

+147
-102
lines changed

6 files changed

+147
-102
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
"""Deconvolution functions."""
2+
3+
4+
def deconvolve_double_smooth_tf_cv(x, y, z):
5+
x, y, z
6+
pass
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
"""Functions to construct the ground truth for a given location."""
2+
from datetime import date
3+
from typing import List, Dict, Tuple
4+
5+
import numpy as np
6+
7+
from .deconvolution import deconvolve_double_smooth_tf_cv
8+
from ..constants import Default
9+
from ..data_containers import LocationSeries, SensorConfig
10+
from ..epidata import get_historical_sensor_data, get_indicator_data, export_to_csv
11+
12+
13+
def construct_truths(start_date: date,
14+
end_date: date,
15+
as_of: date, # most likely today
16+
truth: SensorConfig,
17+
locations: List[LocationSeries],
18+
export_dir: str = "") -> Dict[Tuple, LocationSeries]:
19+
raw_indicator = get_indicator_data([truth], locations, as_of)
20+
output = {}
21+
for location in locations:
22+
indicator_key = (truth.source, truth.signal, location.geo_type, location.geo_value)
23+
location, missing_dates = get_historical_sensor_data(truth, location, end_date, start_date)
24+
location, export = fill_missing_days(location, raw_indicator[indicator_key], missing_dates)
25+
output[indicator_key] = location
26+
if export_dir and export.values:
27+
export_to_csv(export, truth, as_of, export_dir)
28+
return output
29+
30+
31+
def fill_missing_days(stored_vals: LocationSeries,
32+
indicator_data: LocationSeries,
33+
missing_dates: List[date]):
34+
export_data = LocationSeries(stored_vals.geo_value, stored_vals.geo_type)
35+
for day in missing_dates:
36+
try:
37+
y = np.array(indicator_data.get_data_range(min(indicator_data.dates), day, "linear"))
38+
x = np.arange(1, len(y) + 1)
39+
except ValueError:
40+
continue
41+
deconv_vals = deconvolve_double_smooth_tf_cv(
42+
y, x, Default.DELAY_DISTRIBUTION
43+
)
44+
missing_day_val = deconv_vals[-1]
45+
stored_vals.add_data(day, missing_day_val)
46+
export_data.add_data(day, missing_day_val) # holds only data to get exported
47+
return stored_vals, export_data

nowcast/delphi_nowcast/get_epidata.py renamed to nowcast/delphi_nowcast/epidata.py

Lines changed: 59 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
"""Retrieve data from Epidata API."""
1+
"""Functions for interfacing with Epidata."""
2+
import os
23
from datetime import datetime, date
34
from itertools import product
45
from typing import Tuple, List, Dict
@@ -67,8 +68,7 @@ def get_indicator_data(sensors: List[SensorConfig],
6768

6869

6970
def get_historical_sensor_data(sensor: SensorConfig,
70-
geo_value: str,
71-
geo_type: str,
71+
location: LocationSeries,
7272
start_date: date,
7373
end_date: date) -> Tuple[LocationSeries, list]:
7474
"""
@@ -81,10 +81,8 @@ def get_historical_sensor_data(sensor: SensorConfig,
8181
----------
8282
sensor
8383
SensorConfig specifying which sensor to retrieve.
84-
geo_type
85-
Geo type to retrieve.
86-
geo_value
87-
Geo value to retrieve.
84+
location
85+
LocationSeries for the location to get.
8886
start_date
8987
First day to retrieve (inclusive).
9088
end_date
@@ -98,23 +96,65 @@ def get_historical_sensor_data(sensor: SensorConfig,
9896
data_source=sensor.source,
9997
signals=sensor.signal,
10098
time_type="day",
101-
geo_type=geo_type,
99+
geo_type=location.geo_type,
102100
time_values=Epidata.range(start_date.strftime("%Y%m%d"), end_date.strftime("%Y%m%d")),
103-
geo_value=geo_value,
101+
geo_value=location.geo_value,
104102
sensor_names=sensor.name,
105103
lag=sensor.lag)
106104
all_dates = [i.date() for i in date_range(start_date, end_date)]
107105
if response["result"] == 1:
108-
output = LocationSeries(
109-
geo_value=geo_value,
110-
geo_type=geo_type,
111-
data={datetime.strptime(str(i["time_value"]), "%Y%m%d").date(): i["value"]
112-
for i in response.get("epidata", []) if not isnan(i["value"])}
113-
)
114-
missing_dates = [i for i in all_dates if i not in output.dates]
115-
return output, missing_dates
106+
location.data = {datetime.strptime(str(i["time_value"]), "%Y%m%d").date(): i["value"]
107+
for i in response.get("epidata", []) if not isnan(i["value"])}
108+
missing_dates = [i for i in all_dates if i not in location.dates]
109+
return location, missing_dates
116110
if response["result"] == -2: # no results
117111
print("No historical results found")
118-
output = LocationSeries(geo_value=geo_value, geo_type=geo_type)
119-
return output, all_dates
112+
return location, all_dates
120113
raise Exception(f"Bad result from Epidata: {response['message']}")
114+
115+
116+
def export_to_csv(value: LocationSeries,
117+
sensor: SensorConfig,
118+
as_of_date: date,
119+
receiving_dir: str
120+
) -> List[str]:
121+
"""
122+
Save value to csv for upload to Epidata database.
123+
124+
Parameters
125+
----------
126+
value
127+
LocationSeries containing data.
128+
sensor
129+
SensorConfig corresponding to value.
130+
as_of_date
131+
As_of date for the indicator data used to train the sensor.
132+
receiving_dir
133+
Export directory for Epidata acquisition.
134+
Returns
135+
-------
136+
Filepath of exported files
137+
"""
138+
export_dir = os.path.join(
139+
receiving_dir,
140+
f"issue_{as_of_date.strftime('%Y%m%d')}",
141+
sensor.source
142+
)
143+
os.makedirs(export_dir, exist_ok=True)
144+
exported_files = []
145+
for time_value in value.dates:
146+
export_file = os.path.join(
147+
export_dir,
148+
f"{time_value.strftime('%Y%m%d')}_{value.geo_type}_{sensor.signal}.csv"
149+
)
150+
if os.path.exists(export_file):
151+
with open(export_file, "a") as f:
152+
f.write(
153+
f"{sensor.name},{value.geo_value},{value.data.get(time_value, '')}\n")
154+
else:
155+
with open(export_file, "a") as f:
156+
f.write("sensor_name,geo_value,value\n")
157+
f.write(
158+
f"{sensor.name},{value.geo_value},{value.data.get(time_value, '')}\n")
159+
exported_files.append(export_file)
160+
return exported_files

nowcast/delphi_nowcast/sensorization/sensor.py

Lines changed: 3 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88

99
from .ar_model import compute_ar_sensor
1010
from .regression_model import compute_regression_sensor
11-
from ..get_epidata import get_indicator_data, get_historical_sensor_data
1211
from ..data_containers import LocationSeries, SensorConfig
1312
from ..constants import AR_ORDER, AR_LAMBDA, REG_INTERCEPT
13+
from ..epidata import get_indicator_data, get_historical_sensor_data, export_to_csv
1414

1515

1616
def compute_sensors(as_of_date: date,
@@ -65,7 +65,7 @@ def compute_sensors(as_of_date: date,
6565
if export_dir:
6666
for sensor, locations in output.items():
6767
for loc in locations:
68-
print(_export_to_csv(loc, sensor, as_of_date, export_dir))
68+
print(export_to_csv(loc, sensor, as_of_date, export_dir))
6969
return output
7070

7171

@@ -96,54 +96,8 @@ def historical_sensors(start_date: date,
9696
for location in ground_truths:
9797
for sensor in sensors:
9898
sensor_vals, missing_dates = get_historical_sensor_data(
99-
sensor, location.geo_value, location.geo_type, start_date, end_date
99+
sensor, location, start_date, end_date
100100
)
101101
if sensor_vals.data:
102102
output[sensor].append(sensor_vals)
103103
return output
104-
105-
106-
def _export_to_csv(value: LocationSeries,
107-
sensor: SensorConfig,
108-
as_of_date: date,
109-
receiving_dir: str
110-
) -> List[str]:
111-
"""
112-
Save value to csv for upload to Epidata database.
113-
Parameters
114-
----------
115-
value
116-
LocationSeries containing data.
117-
sensor
118-
SensorConfig corresponding to value.
119-
as_of_date
120-
As_of date for the indicator data used to train the sensor.
121-
receiving_dir
122-
Export directory for Epidata acquisition.
123-
Returns
124-
-------
125-
Filepath of exported files
126-
"""
127-
export_dir = os.path.join(
128-
receiving_dir,
129-
f"issue_{as_of_date.strftime('%Y%m%d')}",
130-
sensor.source
131-
)
132-
os.makedirs(export_dir, exist_ok=True)
133-
exported_files = []
134-
for time_value in value.dates:
135-
export_file = os.path.join(
136-
export_dir,
137-
f"{time_value.strftime('%Y%m%d')}_{value.geo_type}_{sensor.signal}.csv"
138-
)
139-
if os.path.exists(export_file):
140-
with open(export_file, "a") as f:
141-
f.write(
142-
f"{sensor.name},{value.geo_value},{value.data.get(time_value, '')}\n")
143-
else:
144-
with open(export_file, "a") as f:
145-
f.write("sensor_name,geo_value,value\n")
146-
f.write(
147-
f"{sensor.name},{value.geo_value},{value.data.get(time_value, '')}\n")
148-
exported_files.append(export_file)
149-
return exported_files

nowcast/tests/sensorization/test_sensor.py

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import pandas as pd
99

1010
from delphi_nowcast.data_containers import LocationSeries, SensorConfig
11-
from delphi_nowcast.sensorization.sensor import compute_sensors, historical_sensors, _export_to_csv
11+
from delphi_nowcast.sensorization.sensor import compute_sensors, historical_sensors
1212

1313

1414
class TestComputeSensors:
@@ -72,25 +72,3 @@ def test_historical_sensors_no_data(self, mock_historical):
7272
test_ground_truth = [LocationSeries("ca", "state")]
7373
assert historical_sensors(
7474
None, None, test_sensors, test_ground_truth) == {}
75-
76-
77-
class TestExportToCSV:
78-
79-
def test__export_to_csv(self):
80-
"""Test export creates the right file and right contents."""
81-
test_sensor = SensorConfig(source="src",
82-
signal="sig",
83-
name="test",
84-
lag=4)
85-
test_value = LocationSeries("ca", "state", {date(2020, 1, 1): 1.5})
86-
with tempfile.TemporaryDirectory() as tmpdir:
87-
out_files = _export_to_csv(test_value, test_sensor, date(2020, 1, 5), receiving_dir=tmpdir)
88-
assert len(out_files) == 1
89-
out_file = out_files[0]
90-
assert os.path.isfile(out_file)
91-
assert out_file.endswith("issue_20200105/src/20200101_state_sig.csv")
92-
out_file_df = pd.read_csv(out_file)
93-
pd.testing.assert_frame_equal(out_file_df,
94-
pd.DataFrame({"sensor_name": ["test"],
95-
"geo_value": ["ca"],
96-
"value": [1.5]}))

nowcast/tests/test_get_epidata.py renamed to nowcast/tests/test_epidata.py

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
1-
from unittest.mock import patch
1+
import os
2+
import tempfile
23
from datetime import date
4+
from unittest.mock import patch
35

4-
import pytest
56
import numpy as np
6-
7-
from delphi_nowcast.get_epidata import get_indicator_data, get_historical_sensor_data, \
8-
EPIDATA_START_DATE
7+
import pandas as pd
8+
import pytest
99
from delphi_nowcast.data_containers import LocationSeries, SensorConfig
10+
from delphi_nowcast.epidata import export_to_csv, get_indicator_data, get_historical_sensor_data, EPIDATA_START_DATE
1011

1112

1213
class TestGetIndicatorData:
@@ -130,8 +131,7 @@ def test_results(self, mock_epidata):
130131
{"time_value": 20200102, "value": np.nan}]
131132
}
132133
test_output = get_historical_sensor_data(SensorConfig(None, None, None, None),
133-
None,
134-
None,
134+
LocationSeries(None, None),
135135
date(2020, 1, 1),
136136
date(2020, 1, 4))
137137

@@ -144,8 +144,7 @@ def test_results(self, mock_epidata):
144144
def test_no_results(self, mock_epidata):
145145
mock_epidata.return_value = {"result": -2}
146146
test_output = get_historical_sensor_data(SensorConfig(None, None, None, None),
147-
None,
148-
None,
147+
LocationSeries(None, None),
149148
date(2020, 1, 1),
150149
date(2020, 1, 4))
151150

@@ -157,7 +156,28 @@ def test_error(self, mock_epidata):
157156
mock_epidata.return_value = {"result": -3, "message": "test failure"}
158157
with pytest.raises(Exception, match="Bad result from Epidata: test failure"):
159158
get_historical_sensor_data(SensorConfig(None, None, None, None),
160-
None,
161-
None,
159+
LocationSeries(None, None),
162160
date(2020, 1, 1),
163161
date(2020, 1, 4))
162+
163+
164+
class TestExportToCSV:
165+
166+
def test_export_to_csv(self):
167+
"""Test export creates the right file and right contents."""
168+
test_sensor = SensorConfig(source="src",
169+
signal="sig",
170+
name="test",
171+
lag=4)
172+
test_value = LocationSeries("ca", "state", {date(2020, 1, 1): 1.5})
173+
with tempfile.TemporaryDirectory() as tmpdir:
174+
out_files = export_to_csv(test_value, test_sensor, date(2020, 1, 5), receiving_dir=tmpdir)
175+
assert len(out_files) == 1
176+
out_file = out_files[0]
177+
assert os.path.isfile(out_file)
178+
assert out_file.endswith("issue_20200105/src/20200101_state_sig.csv")
179+
out_file_df = pd.read_csv(out_file)
180+
pd.testing.assert_frame_equal(out_file_df,
181+
pd.DataFrame({"sensor_name": ["test"],
182+
"geo_value": ["ca"],
183+
"value": [1.5]}))

0 commit comments

Comments
 (0)