4
4
Author: Aaron Rumack
5
5
Created: 2020-10-14
6
6
"""
7
+ from datetime import datetime
7
8
# third party
8
9
import pandas as pd
9
10
@@ -58,7 +59,7 @@ def load_chng_data(filepath, dropdate, base_geo,
58
59
# restrict to start and end date
59
60
data = data [
60
61
(data [Config .DATE_COL ] >= Config .FIRST_DATA_DATE ) &
61
- (data [Config .DATE_COL ] < dropdate )
62
+ (data [Config .DATE_COL ] <= dropdate )
62
63
]
63
64
64
65
# counts between 1 and 3 are coded as "3 or less", we convert to 1
@@ -76,25 +77,30 @@ def load_chng_data(filepath, dropdate, base_geo,
76
77
return data
77
78
78
79
79
- def load_combined_data (denom_filepath , covid_filepath , dropdate , base_geo ,
80
+ def load_combined_data (denom_filepath , covid_filepath , base_geo ,
80
81
backfill_dir , geo , weekday , numtype , backfill_merge_day ):
81
82
"""Load in denominator and covid data, and combine them.
82
83
83
84
Args:
84
85
denom_filepath: path to the aggregated denominator data
85
86
covid_filepath: path to the aggregated covid data
86
- dropdate: data drop date (datetime object)
87
87
base_geo: base geographic unit before aggregation ('fips')
88
88
89
89
Returns:
90
90
combined multiindexed dataframe, index 0 is geo_base, index 1 is date
91
91
"""
92
92
assert base_geo == "fips" , "base unit must be 'fips'"
93
93
94
+ # Get issue_date from the filename
95
+ issue_date = datetime .strptime (covid_filepath .split ("/" )[- 1 ][:8 ], "%Y%m%d" )
96
+ assert (
97
+ issue_date == datetime .strptime (denom_filepath .split ("/" )[- 1 ][:8 ], "%Y%m%d" )
98
+ ), "The aggregated files used for Covid Claims and Total Claims should have the same drop date."
99
+
94
100
# load each data stream
95
- denom_data = load_chng_data (denom_filepath , dropdate , base_geo ,
101
+ denom_data = load_chng_data (denom_filepath , issue_date , base_geo ,
96
102
Config .DENOM_COLS , Config .DENOM_DTYPES , Config .DENOM_COL )
97
- covid_data = load_chng_data (covid_filepath , dropdate , base_geo ,
103
+ covid_data = load_chng_data (covid_filepath , issue_date , base_geo ,
98
104
Config .COVID_COLS , Config .COVID_DTYPES , Config .COVID_COL )
99
105
100
106
# merge data
@@ -109,13 +115,13 @@ def load_combined_data(denom_filepath, covid_filepath, dropdate, base_geo,
109
115
110
116
# Store for backfill
111
117
merge_backfill_file (backfill_dir , numtype , geo , weekday , backfill_merge_day ,
112
- dropdate , test_mode = False , check_nd = 25 )
113
- store_backfill_file (data , dropdate , backfill_dir , numtype , geo , weekday )
118
+ issue_date , test_mode = False , check_nd = 25 )
119
+ store_backfill_file (data , issue_date , backfill_dir , numtype , geo , weekday )
114
120
return data
115
121
116
122
117
123
def load_cli_data (denom_filepath , flu_filepath , mixed_filepath , flu_like_filepath ,
118
- covid_like_filepath , dropdate , base_geo ,
124
+ covid_like_filepath , base_geo ,
119
125
backfill_dir , geo , weekday , numtype , backfill_merge_day ):
120
126
"""Load in denominator and covid-like data, and combine them.
121
127
@@ -125,24 +131,29 @@ def load_cli_data(denom_filepath, flu_filepath, mixed_filepath, flu_like_filepat
125
131
mixed_filepath: path to the aggregated mixed data
126
132
flu_like_filepath: path to the aggregated flu-like data
127
133
covid_like_filepath: path to the aggregated covid-like data
128
- dropdate: data drop date (datetime object)
129
134
base_geo: base geographic unit before aggregation ('fips')
130
135
131
136
Returns:
132
137
combined multiindexed dataframe, index 0 is geo_base, index 1 is date
133
138
"""
134
139
assert base_geo == "fips" , "base unit must be 'fips'"
135
140
141
+ # Get issue_date from the filename
142
+ issue_date = datetime .strptime (flu_filepath .split ("/" )[- 1 ][:8 ], "%Y%m%d" )
143
+ assert (
144
+ issue_date == datetime .strptime (denom_filepath .split ("/" )[- 1 ][:8 ], "%Y%m%d" )
145
+ ), "The aggregated files used for CLI Claims and Total Claims should have the same drop date."
146
+
136
147
# load each data stream
137
- denom_data = load_chng_data (denom_filepath , dropdate , base_geo ,
148
+ denom_data = load_chng_data (denom_filepath , issue_date , base_geo ,
138
149
Config .DENOM_COLS , Config .DENOM_DTYPES , Config .DENOM_COL )
139
- flu_data = load_chng_data (flu_filepath , dropdate , base_geo ,
150
+ flu_data = load_chng_data (flu_filepath , issue_date , base_geo ,
140
151
Config .FLU_COLS , Config .FLU_DTYPES , Config .FLU_COL )
141
- mixed_data = load_chng_data (mixed_filepath , dropdate , base_geo ,
152
+ mixed_data = load_chng_data (mixed_filepath , issue_date , base_geo ,
142
153
Config .MIXED_COLS , Config .MIXED_DTYPES , Config .MIXED_COL )
143
- flu_like_data = load_chng_data (flu_like_filepath , dropdate , base_geo ,
154
+ flu_like_data = load_chng_data (flu_like_filepath , issue_date , base_geo ,
144
155
Config .FLU_LIKE_COLS , Config .FLU_LIKE_DTYPES , Config .FLU_LIKE_COL )
145
- covid_like_data = load_chng_data (covid_like_filepath , dropdate , base_geo ,
156
+ covid_like_data = load_chng_data (covid_like_filepath , issue_date , base_geo ,
146
157
Config .COVID_LIKE_COLS , Config .COVID_LIKE_DTYPES , Config .COVID_LIKE_COL )
147
158
148
159
# merge data
@@ -162,30 +173,35 @@ def load_cli_data(denom_filepath, flu_filepath, mixed_filepath, flu_like_filepat
162
173
163
174
# Store for backfill
164
175
merge_backfill_file (backfill_dir , numtype , geo , weekday , backfill_merge_day ,
165
- dropdate , test_mode = False , check_nd = 25 )
166
- store_backfill_file (data , dropdate , backfill_dir , numtype , geo , weekday )
176
+ issue_date , test_mode = False , check_nd = 25 )
177
+ store_backfill_file (data , issue_date , backfill_dir , numtype , geo , weekday )
167
178
return data
168
179
169
180
170
- def load_flu_data (denom_filepath , flu_filepath , dropdate , base_geo ,
181
+ def load_flu_data (denom_filepath , flu_filepath , base_geo ,
171
182
backfill_dir , geo , weekday , numtype , backfill_merge_day ):
172
183
"""Load in denominator and flu data, and combine them.
173
184
174
185
Args:
175
186
denom_filepath: path to the aggregated denominator data
176
187
flu_filepath: path to the aggregated flu data
177
- dropdate: data drop date (datetime object)
178
188
base_geo: base geographic unit before aggregation ('fips')
179
189
180
190
Returns:
181
191
combined multiindexed dataframe, index 0 is geo_base, index 1 is date
182
192
"""
183
193
assert base_geo == "fips" , "base unit must be 'fips'"
184
194
195
+ # Get issue_date from the filename
196
+ issue_date = datetime .strptime (flu_filepath .split ("/" )[- 1 ][:8 ], "%Y%m%d" )
197
+ assert (
198
+ issue_date == datetime .strptime (denom_filepath .split ("/" )[- 1 ][:8 ], "%Y%m%d" )
199
+ ), "The aggregated files used for Flu Claims and Total Claims should have the same drop date."
200
+
185
201
# load each data stream
186
- denom_data = load_chng_data (denom_filepath , dropdate , base_geo ,
202
+ denom_data = load_chng_data (denom_filepath , issue_date , base_geo ,
187
203
Config .DENOM_COLS , Config .DENOM_DTYPES , Config .DENOM_COL )
188
- flu_data = load_chng_data (flu_filepath , dropdate , base_geo ,
204
+ flu_data = load_chng_data (flu_filepath , issue_date , base_geo ,
189
205
Config .FLU_COLS , Config .FLU_DTYPES , Config .FLU_COL )
190
206
191
207
# merge data
@@ -200,6 +216,6 @@ def load_flu_data(denom_filepath, flu_filepath, dropdate, base_geo,
200
216
201
217
# Store for backfill
202
218
merge_backfill_file (backfill_dir , numtype , geo , weekday , backfill_merge_day ,
203
- dropdate , test_mode = False , check_nd = 25 )
204
- store_backfill_file (data , dropdate , backfill_dir , numtype , geo , weekday )
219
+ issue_date , test_mode = False , check_nd = 25 )
220
+ store_backfill_file (data , issue_date , backfill_dir , numtype , geo , weekday )
205
221
return data
0 commit comments