diff --git a/_delphi_utils_python/data_proc/geomap/geo_data_proc.py b/_delphi_utils_python/data_proc/geomap/geo_data_proc.py index 893c5199d..d42de3261 100644 --- a/_delphi_utils_python/data_proc/geomap/geo_data_proc.py +++ b/_delphi_utils_python/data_proc/geomap/geo_data_proc.py @@ -217,6 +217,12 @@ def create_jhu_uid_fips_crosswalk(): {"jhu_uid": "63072999", "fips": "72000", "weight": 1.0}, ] ) + cruise_ships = pd.DataFrame( + [ + {"jhu_uid": "84088888", "fips": "88888", "weight": 1.0}, + {"jhu_uid": "84099999", "fips": "99999", "weight": 1.0}, + ] + ) jhu_df = ( pd.read_csv(JHU_FIPS_URL, dtype={"UID": str, "FIPS": str}) @@ -234,7 +240,7 @@ def create_jhu_uid_fips_crosswalk(): # Drop the JHU UIDs that were hand-modified dup_ind = jhu_df["jhu_uid"].isin( pd.concat( - [hand_additions, unassigned_states, out_of_state, puerto_rico_unassigned] + [hand_additions, unassigned_states, out_of_state, puerto_rico_unassigned, cruise_ships] )["jhu_uid"].values ) jhu_df.drop(jhu_df.index[dup_ind], inplace=True) diff --git a/_delphi_utils_python/delphi_utils/data/jhu_uid_fips_table.csv b/_delphi_utils_python/delphi_utils/data/jhu_uid_fips_table.csv index 427459b98..d69e014bc 100644 --- a/_delphi_utils_python/delphi_utils/data/jhu_uid_fips_table.csv +++ b/_delphi_utils_python/delphi_utils/data/jhu_uid_fips_table.csv @@ -82,8 +82,6 @@ jhu_uid,fips,weight 63072149,72149,1.0 63072151,72151,1.0 63072153,72153,1.0 -84088888,88888,1.0 -84099999,99999,1.0 84000001,01000,1.0 84000002,02000,1.0 84000004,04000,1.0 diff --git a/jhu/delphi_jhu/pull.py b/jhu/delphi_jhu/pull.py index 18015a23c..3b5ddbe6f 100644 --- a/jhu/delphi_jhu/pull.py +++ b/jhu/delphi_jhu/pull.py @@ -14,7 +14,7 @@ def download_data(base_url: str, metric: str) -> pd.DataFrame: df = pd.read_csv(base_url.format(metric=metric)) # Keep the UID and the time series columns only # The regex filters for columns with the date format MM-DD-YY or M-D-YY - df = df.filter(regex="\d{1,2}\/\d{1,2}\/\d{2}|UID").melt( + df = df.filter(regex=r"\d{1,2}\/\d{1,2}\/\d{2}|UID").melt( id_vars=["UID"], var_name="timestamp", value_name="cumulative_counts" ) df["timestamp"] = pd.to_datetime(df["timestamp"]) diff --git a/jhu/delphi_jhu/run.py b/jhu/delphi_jhu/run.py index c402a516f..ee9d1c85b 100644 --- a/jhu/delphi_jhu/run.py +++ b/jhu/delphi_jhu/run.py @@ -7,10 +7,8 @@ from datetime import datetime from itertools import product from functools import partial -from os.path import join import numpy as np -import pandas as pd from delphi_utils import ( read_params, create_export_csv,