4
4
Author: Aaron Rumack
5
5
Created: 2020-10-14
6
6
"""
7
+ from datetime import datetime
7
8
# third party
8
9
import pandas as pd
9
10
@@ -58,7 +59,7 @@ def load_chng_data(filepath, dropdate, base_geo,
58
59
# restrict to start and end date
59
60
data = data [
60
61
(data [Config .DATE_COL ] >= Config .FIRST_DATA_DATE ) &
61
- (data [Config .DATE_COL ] < dropdate )
62
+ (data [Config .DATE_COL ] <= dropdate )
62
63
]
63
64
64
65
# counts between 1 and 3 are coded as "3 or less", we convert to 1
@@ -76,25 +77,27 @@ def load_chng_data(filepath, dropdate, base_geo,
76
77
return data
77
78
78
79
79
- def load_combined_data (denom_filepath , covid_filepath , dropdate , base_geo ,
80
+ def load_combined_data (denom_filepath , covid_filepath , base_geo ,
80
81
backfill_dir , geo , weekday , numtype , backfill_merge_day ):
81
82
"""Load in denominator and covid data, and combine them.
82
83
83
84
Args:
84
85
denom_filepath: path to the aggregated denominator data
85
86
covid_filepath: path to the aggregated covid data
86
- dropdate: data drop date (datetime object)
87
87
base_geo: base geographic unit before aggregation ('fips')
88
88
89
89
Returns:
90
90
combined multiindexed dataframe, index 0 is geo_base, index 1 is date
91
91
"""
92
92
assert base_geo == "fips" , "base unit must be 'fips'"
93
93
94
+ # Get issue_date from the filename
95
+ issue_date = datetime .strptime (covid_filepath .split ("/" )[- 1 ][:8 ], "%Y%m%d" )
96
+
94
97
# load each data stream
95
- denom_data = load_chng_data (denom_filepath , dropdate , base_geo ,
98
+ denom_data = load_chng_data (denom_filepath , issue_date , base_geo ,
96
99
Config .DENOM_COLS , Config .DENOM_DTYPES , Config .DENOM_COL )
97
- covid_data = load_chng_data (covid_filepath , dropdate , base_geo ,
100
+ covid_data = load_chng_data (covid_filepath , issue_date , base_geo ,
98
101
Config .COVID_COLS , Config .COVID_DTYPES , Config .COVID_COL )
99
102
100
103
# merge data
@@ -109,13 +112,13 @@ def load_combined_data(denom_filepath, covid_filepath, dropdate, base_geo,
109
112
110
113
# Store for backfill
111
114
merge_backfill_file (backfill_dir , numtype , geo , weekday , backfill_merge_day ,
112
- dropdate , test_mode = False , check_nd = 25 )
113
- store_backfill_file (data , dropdate , backfill_dir , numtype , geo , weekday )
115
+ issue_date , test_mode = False , check_nd = 25 )
116
+ store_backfill_file (data , issue_date , backfill_dir , numtype , geo , weekday )
114
117
return data
115
118
116
119
117
120
def load_cli_data (denom_filepath , flu_filepath , mixed_filepath , flu_like_filepath ,
118
- covid_like_filepath , dropdate , base_geo ,
121
+ covid_like_filepath , base_geo ,
119
122
backfill_dir , geo , weekday , numtype , backfill_merge_day ):
120
123
"""Load in denominator and covid-like data, and combine them.
121
124
@@ -125,24 +128,26 @@ def load_cli_data(denom_filepath, flu_filepath, mixed_filepath, flu_like_filepat
125
128
mixed_filepath: path to the aggregated mixed data
126
129
flu_like_filepath: path to the aggregated flu-like data
127
130
covid_like_filepath: path to the aggregated covid-like data
128
- dropdate: data drop date (datetime object)
129
131
base_geo: base geographic unit before aggregation ('fips')
130
132
131
133
Returns:
132
134
combined multiindexed dataframe, index 0 is geo_base, index 1 is date
133
135
"""
134
136
assert base_geo == "fips" , "base unit must be 'fips'"
135
137
138
+ # Get issue_date from the filename
139
+ issue_date = datetime .strptime (flu_filepath .split ("/" )[- 1 ][:8 ], "%Y%m%d" )
140
+
136
141
# load each data stream
137
- denom_data = load_chng_data (denom_filepath , dropdate , base_geo ,
142
+ denom_data = load_chng_data (denom_filepath , issue_date , base_geo ,
138
143
Config .DENOM_COLS , Config .DENOM_DTYPES , Config .DENOM_COL )
139
- flu_data = load_chng_data (flu_filepath , dropdate , base_geo ,
144
+ flu_data = load_chng_data (flu_filepath , issue_date , base_geo ,
140
145
Config .FLU_COLS , Config .FLU_DTYPES , Config .FLU_COL )
141
- mixed_data = load_chng_data (mixed_filepath , dropdate , base_geo ,
146
+ mixed_data = load_chng_data (mixed_filepath , issue_date , base_geo ,
142
147
Config .MIXED_COLS , Config .MIXED_DTYPES , Config .MIXED_COL )
143
- flu_like_data = load_chng_data (flu_like_filepath , dropdate , base_geo ,
148
+ flu_like_data = load_chng_data (flu_like_filepath , issue_date , base_geo ,
144
149
Config .FLU_LIKE_COLS , Config .FLU_LIKE_DTYPES , Config .FLU_LIKE_COL )
145
- covid_like_data = load_chng_data (covid_like_filepath , dropdate , base_geo ,
150
+ covid_like_data = load_chng_data (covid_like_filepath , issue_date , base_geo ,
146
151
Config .COVID_LIKE_COLS , Config .COVID_LIKE_DTYPES , Config .COVID_LIKE_COL )
147
152
148
153
# merge data
@@ -162,30 +167,32 @@ def load_cli_data(denom_filepath, flu_filepath, mixed_filepath, flu_like_filepat
162
167
163
168
# Store for backfill
164
169
merge_backfill_file (backfill_dir , numtype , geo , weekday , backfill_merge_day ,
165
- dropdate , test_mode = False , check_nd = 25 )
166
- store_backfill_file (data , dropdate , backfill_dir , numtype , geo , weekday )
170
+ issue_date , test_mode = False , check_nd = 25 )
171
+ store_backfill_file (data , issue_date , backfill_dir , numtype , geo , weekday )
167
172
return data
168
173
169
174
170
- def load_flu_data (denom_filepath , flu_filepath , dropdate , base_geo ,
175
+ def load_flu_data (denom_filepath , flu_filepath , base_geo ,
171
176
backfill_dir , geo , weekday , numtype , backfill_merge_day ):
172
177
"""Load in denominator and flu data, and combine them.
173
178
174
179
Args:
175
180
denom_filepath: path to the aggregated denominator data
176
181
flu_filepath: path to the aggregated flu data
177
- dropdate: data drop date (datetime object)
178
182
base_geo: base geographic unit before aggregation ('fips')
179
183
180
184
Returns:
181
185
combined multiindexed dataframe, index 0 is geo_base, index 1 is date
182
186
"""
183
187
assert base_geo == "fips" , "base unit must be 'fips'"
184
188
189
+ # Get issue_date from the filename
190
+ issue_date = datetime .strptime (flu_filepath .split ("/" )[- 1 ][:8 ], "%Y%m%d" )
191
+
185
192
# load each data stream
186
- denom_data = load_chng_data (denom_filepath , dropdate , base_geo ,
193
+ denom_data = load_chng_data (denom_filepath , issue_date , base_geo ,
187
194
Config .DENOM_COLS , Config .DENOM_DTYPES , Config .DENOM_COL )
188
- flu_data = load_chng_data (flu_filepath , dropdate , base_geo ,
195
+ flu_data = load_chng_data (flu_filepath , issue_date , base_geo ,
189
196
Config .FLU_COLS , Config .FLU_DTYPES , Config .FLU_COL )
190
197
191
198
# merge data
@@ -200,6 +207,6 @@ def load_flu_data(denom_filepath, flu_filepath, dropdate, base_geo,
200
207
201
208
# Store for backfill
202
209
merge_backfill_file (backfill_dir , numtype , geo , weekday , backfill_merge_day ,
203
- dropdate , test_mode = False , check_nd = 25 )
204
- store_backfill_file (data , dropdate , backfill_dir , numtype , geo , weekday )
210
+ issue_date , test_mode = False , check_nd = 25 )
211
+ store_backfill_file (data , issue_date , backfill_dir , numtype , geo , weekday )
205
212
return data
0 commit comments