Skip to content

Commit 54a6458

Browse files
Jingjing TangJingjing Tang
authored andcommitted
fix the bug in load chng data, use prefix as issue date instead of dropdate
1 parent 64b962f commit 54a6458

File tree

2 files changed

+32
-25
lines changed

2 files changed

+32
-25
lines changed

changehc/delphi_changehc/load_data.py

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
Author: Aaron Rumack
55
Created: 2020-10-14
66
"""
7+
from datetime import datetime
78
# third party
89
import pandas as pd
910

@@ -58,7 +59,7 @@ def load_chng_data(filepath, dropdate, base_geo,
5859
# restrict to start and end date
5960
data = data[
6061
(data[Config.DATE_COL] >= Config.FIRST_DATA_DATE) &
61-
(data[Config.DATE_COL] < dropdate)
62+
(data[Config.DATE_COL] <= dropdate)
6263
]
6364

6465
# counts between 1 and 3 are coded as "3 or less", we convert to 1
@@ -76,25 +77,27 @@ def load_chng_data(filepath, dropdate, base_geo,
7677
return data
7778

7879

79-
def load_combined_data(denom_filepath, covid_filepath, dropdate, base_geo,
80+
def load_combined_data(denom_filepath, covid_filepath, base_geo,
8081
backfill_dir, geo, weekday, numtype, backfill_merge_day):
8182
"""Load in denominator and covid data, and combine them.
8283
8384
Args:
8485
denom_filepath: path to the aggregated denominator data
8586
covid_filepath: path to the aggregated covid data
86-
dropdate: data drop date (datetime object)
8787
base_geo: base geographic unit before aggregation ('fips')
8888
8989
Returns:
9090
combined multiindexed dataframe, index 0 is geo_base, index 1 is date
9191
"""
9292
assert base_geo == "fips", "base unit must be 'fips'"
9393

94+
# Get issue_date from the filename
95+
issue_date = datetime.strptime(covid_filepath.split("/")[-1][:8], "%Y%m%d")
96+
9497
# load each data stream
95-
denom_data = load_chng_data(denom_filepath, dropdate, base_geo,
98+
denom_data = load_chng_data(denom_filepath, issue_date, base_geo,
9699
Config.DENOM_COLS, Config.DENOM_DTYPES, Config.DENOM_COL)
97-
covid_data = load_chng_data(covid_filepath, dropdate, base_geo,
100+
covid_data = load_chng_data(covid_filepath, issue_date, base_geo,
98101
Config.COVID_COLS, Config.COVID_DTYPES, Config.COVID_COL)
99102

100103
# merge data
@@ -109,13 +112,13 @@ def load_combined_data(denom_filepath, covid_filepath, dropdate, base_geo,
109112

110113
# Store for backfill
111114
merge_backfill_file(backfill_dir, numtype, geo, weekday, backfill_merge_day,
112-
dropdate, test_mode=False, check_nd=25)
113-
store_backfill_file(data, dropdate, backfill_dir, numtype, geo, weekday)
115+
issue_date, test_mode=False, check_nd=25)
116+
store_backfill_file(data, issue_date, backfill_dir, numtype, geo, weekday)
114117
return data
115118

116119

117120
def load_cli_data(denom_filepath, flu_filepath, mixed_filepath, flu_like_filepath,
118-
covid_like_filepath, dropdate, base_geo,
121+
covid_like_filepath, base_geo,
119122
backfill_dir, geo, weekday, numtype, backfill_merge_day):
120123
"""Load in denominator and covid-like data, and combine them.
121124
@@ -125,24 +128,26 @@ def load_cli_data(denom_filepath, flu_filepath, mixed_filepath, flu_like_filepat
125128
mixed_filepath: path to the aggregated mixed data
126129
flu_like_filepath: path to the aggregated flu-like data
127130
covid_like_filepath: path to the aggregated covid-like data
128-
dropdate: data drop date (datetime object)
129131
base_geo: base geographic unit before aggregation ('fips')
130132
131133
Returns:
132134
combined multiindexed dataframe, index 0 is geo_base, index 1 is date
133135
"""
134136
assert base_geo == "fips", "base unit must be 'fips'"
135137

138+
# Get issue_date from the filename
139+
issue_date = datetime.strptime(flu_filepath.split("/")[-1][:8], "%Y%m%d")
140+
136141
# load each data stream
137-
denom_data = load_chng_data(denom_filepath, dropdate, base_geo,
142+
denom_data = load_chng_data(denom_filepath, issue_date, base_geo,
138143
Config.DENOM_COLS, Config.DENOM_DTYPES, Config.DENOM_COL)
139-
flu_data = load_chng_data(flu_filepath, dropdate, base_geo,
144+
flu_data = load_chng_data(flu_filepath, issue_date, base_geo,
140145
Config.FLU_COLS, Config.FLU_DTYPES, Config.FLU_COL)
141-
mixed_data = load_chng_data(mixed_filepath, dropdate, base_geo,
146+
mixed_data = load_chng_data(mixed_filepath, issue_date, base_geo,
142147
Config.MIXED_COLS, Config.MIXED_DTYPES, Config.MIXED_COL)
143-
flu_like_data = load_chng_data(flu_like_filepath, dropdate, base_geo,
148+
flu_like_data = load_chng_data(flu_like_filepath, issue_date, base_geo,
144149
Config.FLU_LIKE_COLS, Config.FLU_LIKE_DTYPES, Config.FLU_LIKE_COL)
145-
covid_like_data = load_chng_data(covid_like_filepath, dropdate, base_geo,
150+
covid_like_data = load_chng_data(covid_like_filepath, issue_date, base_geo,
146151
Config.COVID_LIKE_COLS, Config.COVID_LIKE_DTYPES, Config.COVID_LIKE_COL)
147152

148153
# merge data
@@ -162,30 +167,32 @@ def load_cli_data(denom_filepath, flu_filepath, mixed_filepath, flu_like_filepat
162167

163168
# Store for backfill
164169
merge_backfill_file(backfill_dir, numtype, geo, weekday, backfill_merge_day,
165-
dropdate, test_mode=False, check_nd=25)
166-
store_backfill_file(data, dropdate, backfill_dir, numtype, geo, weekday)
170+
issue_date, test_mode=False, check_nd=25)
171+
store_backfill_file(data, issue_date, backfill_dir, numtype, geo, weekday)
167172
return data
168173

169174

170-
def load_flu_data(denom_filepath, flu_filepath, dropdate, base_geo,
175+
def load_flu_data(denom_filepath, flu_filepath, base_geo,
171176
backfill_dir, geo, weekday, numtype, backfill_merge_day):
172177
"""Load in denominator and flu data, and combine them.
173178
174179
Args:
175180
denom_filepath: path to the aggregated denominator data
176181
flu_filepath: path to the aggregated flu data
177-
dropdate: data drop date (datetime object)
178182
base_geo: base geographic unit before aggregation ('fips')
179183
180184
Returns:
181185
combined multiindexed dataframe, index 0 is geo_base, index 1 is date
182186
"""
183187
assert base_geo == "fips", "base unit must be 'fips'"
184188

189+
# Get issue_date from the filename
190+
issue_date = datetime.strptime(flu_filepath.split("/")[-1][:8], "%Y%m%d")
191+
185192
# load each data stream
186-
denom_data = load_chng_data(denom_filepath, dropdate, base_geo,
193+
denom_data = load_chng_data(denom_filepath, issue_date, base_geo,
187194
Config.DENOM_COLS, Config.DENOM_DTYPES, Config.DENOM_COL)
188-
flu_data = load_chng_data(flu_filepath, dropdate, base_geo,
195+
flu_data = load_chng_data(flu_filepath, issue_date, base_geo,
189196
Config.FLU_COLS, Config.FLU_DTYPES, Config.FLU_COL)
190197

191198
# merge data
@@ -200,6 +207,6 @@ def load_flu_data(denom_filepath, flu_filepath, dropdate, base_geo,
200207

201208
# Store for backfill
202209
merge_backfill_file(backfill_dir, numtype, geo, weekday, backfill_merge_day,
203-
dropdate, test_mode=False, check_nd=25)
204-
store_backfill_file(data, dropdate, backfill_dir, numtype, geo, weekday)
210+
issue_date, test_mode=False, check_nd=25)
211+
store_backfill_file(data, issue_date, backfill_dir, numtype, geo, weekday)
205212
return data

changehc/delphi_changehc/run.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -183,16 +183,16 @@ def run_module(params: Dict[str, Dict[str, Any]]):
183183
)
184184
if numtype == "covid":
185185
data = load_combined_data(file_dict["denom"],
186-
file_dict["covid"],dropdate_dt,"fips",
186+
file_dict["covid"], "fips",
187187
backfill_dir, geo, weekday, numtype,
188188
backfill_merge_day)
189189
elif numtype == "cli":
190190
data = load_cli_data(file_dict["denom"],file_dict["flu"],file_dict["mixed"],
191-
file_dict["flu_like"],file_dict["covid_like"],dropdate_dt,"fips",
191+
file_dict["flu_like"],file_dict["covid_like"], "fips",
192192
backfill_dir, geo, weekday, numtype, backfill_merge_day)
193193
elif numtype == "flu":
194194
data = load_flu_data(file_dict["denom"],file_dict["flu"],
195-
dropdate_dt,"fips",backfill_dir, geo, weekday,
195+
"fips",backfill_dir, geo, weekday,
196196
numtype, backfill_merge_day)
197197
more_stats = su_inst.update_sensor(
198198
data,

0 commit comments

Comments
 (0)