@@ -123,53 +123,20 @@ def county_to_hrr(self, data):
123
123
return data .groupby ("hrr" ), "hrr"
124
124
125
125
def county_to_megacounty (self , data , threshold_visits , threshold_len ):
126
- """A megacounty for a given day is all of the counties in a certain state who have:
127
- 1) Denominator sum over <threshold_len> days below <threshold_visits>, or
128
- 2) 0 denominator the last <min_recent_obs> days (not relevant for code
129
- because 0 denominator is not present)
126
+ """Convert to megacounty and groupby FIPS using GeoMapper package.
130
127
131
128
Args:
132
129
data: dataframe aggregated to the daily-county resolution (all 7 cols expected)
130
+ threshold_visits: count threshold to determine when to convert to megacounty.
131
+ threshold_len: number of days to use when thresholding.
133
132
134
133
Returns: tuple of dataframe at the daily-state resolution, and geo_id column name
135
134
"""
136
-
137
- dates = np .unique (data ["ServiceDate" ])
138
- fipss = np .unique (data ["PatCountyFIPS" ])
139
-
140
- # get denominator by day and location for all possible date-fips pairs
141
- # this fills in 0 if unobserved
142
- denom_dayloc = np .zeros ((len (dates ), len (fipss )))
143
- by_fips = data .groupby ("PatCountyFIPS" )
144
- for j , fips in enumerate (fipss ):
145
- denom_dayloc [:, j ] = DoctorVisitsSensor .fill_dates (
146
- by_fips .get_group (fips ).set_index ("ServiceDate" ), dates
147
- )["Denominator" ].values
148
-
149
- # get rolling sum across <threshold_len> days
150
- num_recent_visits = np .concatenate (
151
- (np .zeros ((threshold_len , len (fipss ))), np .cumsum (denom_dayloc , axis = 0 )),
152
- axis = 0 ,
153
- )
154
- num_recent_visits = (
155
- num_recent_visits [threshold_len :] - num_recent_visits [:- threshold_len ]
156
- )
157
- recent_visits_df = pd .DataFrame (
158
- [
159
- (dates [x [0 ]], fipss [x [1 ]], val )
160
- for x , val in np .ndenumerate (num_recent_visits )
161
- ],
162
- columns = ["ServiceDate" , "PatCountyFIPS" , "RecentVisits" ],
163
- )
164
- data = data .merge (
165
- recent_visits_df , how = "left" , on = ["ServiceDate" , "PatCountyFIPS" ]
166
- )
167
-
168
135
data = self .gmpr .fips_to_megacounty (data ,
169
136
threshold_visits ,
170
137
threshold_len ,
171
138
fips_col = "PatCountyFIPS" ,
172
- thr_col = "RecentVisits " ,
139
+ thr_col = "Denominator " ,
173
140
date_col = "ServiceDate" )
174
141
data .rename ({"megafips" : "PatCountyFIPS" }, axis = 1 , inplace = True )
175
142
return data .groupby ("PatCountyFIPS" ), "PatCountyFIPS"
0 commit comments