Skip to content

Commit e919fda

Browse files
committed
added/updated tests
1 parent e07c697 commit e919fda

9 files changed

+39
-39
lines changed

doctor_visits/delphi_doctor_visits/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,12 @@ class Config:
1919
# data columns
2020
CLI_COLS = ["Covid_like", "Flu_like", "Mixed"]
2121
FLU1_COL = ["Flu1"]
22-
COUNT_COLS = ["Denominator"] + FLU1_COL + CLI_COLS
22+
COUNT_COLS = CLI_COLS + FLU1_COL + ["Denominator"]
2323
DATE_COL = "ServiceDate"
2424
GEO_COL = "PatCountyFIPS"
2525
AGE_COL = "PatAgeGroup"
2626
HRR_COLS = ["Pat HRR Name", "Pat HRR ID"]
27-
ID_COLS = [DATE_COL] + [GEO_COL] + HRR_COLS + [AGE_COL]
27+
ID_COLS = [DATE_COL] + [GEO_COL] + [AGE_COL]
2828
FILT_COLS = ID_COLS + COUNT_COLS
2929
# as of 2020-05-11, input file expected to have 10 columns
3030
# id cols: ServiceDate, PatCountyFIPS, PatAgeGroup, Pat HRR ID/Pat HRR Name

doctor_visits/delphi_doctor_visits/process_data.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,9 @@ def csv_to_df(filepath: str, startdate: datetime, enddate: datetime, dropdate: d
6565
6666
-------
6767
'''
68-
filename = Path(filepath).name
69-
logger.info(f"Processing {filename}")
68+
filepath = Path(filepath)
69+
logger.info(f"Processing {filepath}")
70+
7071
ddata = dd.read_csv(
7172
filepath,
7273
compression="gzip",
@@ -89,5 +90,11 @@ def csv_to_df(filepath: str, startdate: datetime, enddate: datetime, dropdate: d
8990
date_filter = ((ddata[Config.DATE_COL] >= Config.FIRST_DATA_DATE) & (ddata[Config.DATE_COL] < dropdate))
9091

9192
df = ddata[date_filter].compute()
92-
logger.info(f"Done processing {filename}")
93+
94+
# aggregate age groups (so data is unique by service date and FIPS)
95+
df = df.groupby([Config.DATE_COL, Config.GEO_COL]).sum(numeric_only=True).reset_index()
96+
assert np.sum(df.duplicated()) == 0, "Duplicates after age group aggregation"
97+
assert (df[Config.COUNT_COLS] >= 0).all().all(), "Counts must be nonnegative"
98+
99+
logger.info(f"Done processing {filepath}")
93100
return df

doctor_visits/delphi_doctor_visits/update_sensor.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@
1515
# third party
1616
import numpy as np
1717
import pandas as pd
18-
import dask.dataframe as dd
19-
2018

2119
# first party
2220
from delphi_utils import Weekday
@@ -42,10 +40,6 @@ def update_sensor(
4240
se: boolean to write out standard errors, if true, use an obfuscated name
4341
logger: the structured logger
4442
"""
45-
# aggregate age groups (so data is unique by service date and FIPS)
46-
data = data.groupby([Config.DATE_COL, Config.GEO_COL]).sum(numeric_only=True).reset_index()
47-
assert np.sum(data.duplicated()) == 0, "Duplicates after age group aggregation"
48-
assert (data[Config.COUNT_COLS] >= 0).all().all(), "Counts must be nonnegative"
4943

5044
drange = lambda s, e: np.array([s + timedelta(days=x) for x in range((e - s).days)])
5145
fit_dates = drange(Config.FIRST_DATA_DATE, dropdate)

doctor_visits/tests/teset_process_data.py

Lines changed: 0 additions & 24 deletions
This file was deleted.
Binary file not shown.
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
"""Tests for update_sensor.py."""
2+
from datetime import datetime
3+
import logging
4+
import pandas as pd
5+
6+
from delphi_doctor_visits.process_data import csv_to_df
7+
8+
TEST_LOGGER = logging.getLogger()
9+
10+
class TestProcessData:
11+
def test_csv_to_df(self):
12+
actual = csv_to_df(
13+
filepath="./test_data/SYNEDI_AGG_OUTPATIENT_07022020_1455CDT.csv.gz",
14+
startdate=datetime(2020, 2, 4),
15+
enddate=datetime(2020, 2, 5),
16+
dropdate=datetime(2020, 2,6),
17+
logger=TEST_LOGGER,
18+
)
19+
20+
comparison = pd.read_pickle("./comparison/process_data/main_after_date_SYNEDI_AGG_OUTPATIENT_07022020_1455CDT.pkl")
21+
pd.testing.assert_frame_equal(actual.reset_index(drop=True), comparison)

doctor_visits/tests/test_update_sensor.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Tests for update_sensor.py."""
2+
from datetime import datetime
23
import logging
34
import pandas as pd
45

@@ -8,11 +9,12 @@
89

910
class TestUpdateSensor:
1011
def test_update_sensor(self):
12+
df = pd.read_pickle("./test_data/SYNEDI_AGG_OUTPATIENT_07022020_1455CDT.pkl")
1113
actual = update_sensor(
12-
filepath="./test_data/SYNEDI_AGG_OUTPATIENT_07022020_1455CDT.csv.gz",
13-
startdate="2020-02-04",
14-
enddate="2020-02-05",
15-
dropdate="2020-02-06",
14+
data=df,
15+
startdate=datetime(2020, 2, 4),
16+
enddate=datetime(2020, 2, 5),
17+
dropdate=datetime(2020, 2,6),
1618
geo="state",
1719
parallel=False,
1820
weekday=False,

0 commit comments

Comments
 (0)