diff --git a/changehc/delphi_changehc/run.py b/changehc/delphi_changehc/run.py index 8d4d25261..92a03e6c5 100644 --- a/changehc/delphi_changehc/run.py +++ b/changehc/delphi_changehc/run.py @@ -78,6 +78,16 @@ def make_asserts(params): assert (files["denom"] is None) == (files["flu"] is None), \ "exactly one of denom and flu files are provided" +def process_dates(params, startdate_dt, enddate_dt): + """Process the start and end dates for indicator.""" + enddate = params["indicator"].get("end_date") + if enddate is None: + enddate = str(enddate_dt.date()) + startdate = params["indicator"].get("start_date", str(startdate_dt.date())) + if startdate is None: + startdate = str(startdate_dt.date()) + return startdate, enddate + def run_module(params: Dict[str, Dict[str, Any]]): """ @@ -144,8 +154,8 @@ def run_module(params: Dict[str, Dict[str, Any]]): enddate_dt = dropdate_dt - timedelta(days=n_waiting_days) startdate_dt = enddate_dt - timedelta(days=n_backfill_days) # now allow manual overrides - enddate = enddate = params["indicator"].get("end_date",str(enddate_dt.date())) - startdate = params["indicator"].get("start_date", str(startdate_dt.date())) + + startdate, enddate = process_dates(params, startdate_dt, enddate_dt) logger.info("generating signal and exporting to CSV", first_sensor_date = startdate, diff --git a/changehc/delphi_changehc/update_sensor.py b/changehc/delphi_changehc/update_sensor.py index 52a1af47f..11faffa3d 100644 --- a/changehc/delphi_changehc/update_sensor.py +++ b/changehc/delphi_changehc/update_sensor.py @@ -160,6 +160,8 @@ def geo_reindex(self, data): thr_col="den", mega_col=geo, date_col=Config.DATE_COL) + # this line should be removed once the fix is implemented for megacounties + data_frame = data_frame[~((data_frame['county'].str.len() > 5) | (data_frame['county'].str.contains('_')))] elif geo == "state": data_frame = gmpr.replace_geocode(data, "fips", "state_id", new_col="state", date_col=Config.DATE_COL) @@ -172,7 +174,8 @@ def geo_reindex(self, data): multiindex = pd.MultiIndex.from_product((unique_geo_ids, self.fit_dates), names=[geo, Config.DATE_COL]) assert (len(multiindex) <= (len(gmpr.get_geo_values(gmpr.as_mapper_name(geo))) * len(self.fit_dates)) - ), "more loc-date pairs than maximum number of geographies x number of dates" + ), f"more loc-date pairs than maximum number of geographies x number of dates, length of multiindex is {len(multiindex)}, geo level is {geo}" + # fill dataframe with missing dates using 0 data_frame = data_frame.reindex(multiindex, fill_value=0) data_frame.fillna(0, inplace=True)