diff --git a/doctor_visits/delphi_doctor_visits/geo_maps.py b/doctor_visits/delphi_doctor_visits/geo_maps.py index 9ba935683..716e8899d 100644 --- a/doctor_visits/delphi_doctor_visits/geo_maps.py +++ b/doctor_visits/delphi_doctor_visits/geo_maps.py @@ -7,6 +7,7 @@ Created: 2020-04-18 Last modified: 2020-04-30 by Aaron Rumack (add megacounty code) """ +from functools import partial import pandas as pd from delphi_utils.geomap import GeoMapper @@ -20,6 +21,14 @@ class GeoMaps: def __init__(self): """Create the underlying GeoMapper.""" self.gmpr = GeoMapper() + self.geo_func = {"county": partial(self.county_to_megacounty, + threshold_visits=Config.MIN_RECENT_VISITS, + threshold_len=Config.RECENT_LENGTH), + "state": self.county_to_state, + "msa": self.county_to_msa, + "hrr": self.county_to_hrr, + "hhs": self.county_to_hhs, + "nation": self.county_to_nation} @staticmethod def convert_fips(x): @@ -61,6 +70,40 @@ def county_to_state(self, data): return data.groupby("state_id"), "state_id" + def county_to_hhs(self, data): + """Aggregate county data to the HHS region resolution. + + Args: + data: dataframe aggregated to the daily-county resolution (all 7 cols expected) + + Returns: tuple of dataframe at the daily-HHS resolution, and geo_id column name + """ + data = self.gmpr.add_geocode(data, + "fips", + "hhs", + from_col="PatCountyFIPS") + data.drop(columns="PatCountyFIPS", inplace=True) + data = data.groupby(["ServiceDate", "hhs"]).sum().reset_index() + + return data.groupby("hhs"), "hhs" + + def county_to_nation(self, data): + """Aggregate county data to the nation resolution. + + Args: + data: dataframe aggregated to the daily-county resolution (all 7 cols expected) + + Returns: tuple of dataframe at the daily-nation resolution, and geo_id column name + """ + data = self.gmpr.add_geocode(data, + "fips", + "nation", + from_col="PatCountyFIPS") + data.drop(columns="PatCountyFIPS", inplace=True) + data = data.groupby(["ServiceDate", "nation"]).sum().reset_index() + + return data.groupby("nation"), "nation" + def county_to_hrr(self, data): """Aggregate county data to the HRR resolution. diff --git a/doctor_visits/delphi_doctor_visits/run.py b/doctor_visits/delphi_doctor_visits/run.py index d85e0b632..1feb6c945 100644 --- a/doctor_visits/delphi_doctor_visits/run.py +++ b/doctor_visits/delphi_doctor_visits/run.py @@ -68,7 +68,7 @@ def run_module(params): logging.info("n_waiting_days:\t{n_waiting_days}") ## geographies - geos = ["state", "msa", "hrr", "county"] + geos = ["state", "msa", "hrr", "county", "hhs", "nation"] ## print out other vars diff --git a/doctor_visits/delphi_doctor_visits/update_sensor.py b/doctor_visits/delphi_doctor_visits/update_sensor.py index 725d4ca4f..01e1647fe 100644 --- a/doctor_visits/delphi_doctor_visits/update_sensor.py +++ b/doctor_visits/delphi_doctor_visits/update_sensor.py @@ -78,7 +78,7 @@ def update_sensor( startdate: first sensor date (YYYY-mm-dd) enddate: last sensor date (YYYY-mm-dd) dropdate: data drop date (YYYY-mm-dd) - geo: geographic resolution, one of ["county", "state", "msa", "hrr"] + geo: geographic resolution, one of ["county", "state", "msa", "hrr", "nation", "hhs"] parallel: boolean to run the sensor update in parallel weekday: boolean to adjust for weekday effects se: boolean to write out standard errors, if true, use an obfuscated name @@ -132,19 +132,8 @@ def update_sensor( # get right geography geo_map = GeoMaps() - if geo.lower() == "county": - data_groups, _ = geo_map.county_to_megacounty( - data, Config.MIN_RECENT_VISITS, Config.RECENT_LENGTH - ) - elif geo.lower() == "state": - data_groups, _ = geo_map.county_to_state(data) - elif geo.lower() == "msa": - data_groups, _ = geo_map.county_to_msa(data) - elif geo.lower() == "hrr": - data_groups, _ = geo_map.county_to_hrr(data) - else: - logging.error(f"{geo} is invalid, pick one of 'county', 'state', 'msa', 'hrr'") - return {} + mapping_func = geo_map.geo_func[geo.lower()] + data_groups, _ = mapping_func(data) unique_geo_ids = list(data_groups.groups.keys()) # run sensor fitting code (maybe in parallel)