Skip to content

Commit fb446a6

Browse files
authored
Merge pull request #1825 from cmu-delphi/ndefries/pandasv2-fix-tests
Update all indicators for pandas v2
2 parents 1647ea2 + 513a39b commit fb446a6

File tree

21 files changed

+104
-66
lines changed

21 files changed

+104
-66
lines changed

_delphi_utils_python/delphi_utils/flash_eval/eval_day.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ def output(evd_ranking, day, lag, signal, logger):
147147
"""
148148
starter_link = f"{HTML_LINK}{(day+pd.Timedelta(f'{lag}d')).strftime('%Y-%m_%d')}"
149149
p_text = ""
150-
for j, (index, value) in enumerate(evd_ranking.sort_values(ascending=False).iteritems()):
150+
for j, (index, value) in enumerate(evd_ranking.sort_values(ascending=False).items()):
151151
if j < 30:
152152
start_link = f"{starter_link},{day.strftime('%Y-%m_%d')},{index}"
153153
p_text += f"\t{start_link}|*{index}*, {'{:.2f}'.format(value)}>\n"

_delphi_utils_python/delphi_utils/geomap.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -401,9 +401,9 @@ def replace_geocode(
401401
df.drop("weight", axis=1, inplace=True)
402402

403403
if not date_col is None:
404-
df = df.groupby([date_col, new_col]).sum().reset_index()
404+
df = df.groupby([date_col, new_col]).sum(numeric_only=True).reset_index()
405405
else:
406-
df = df.groupby([new_col]).sum().reset_index()
406+
df = df.groupby([new_col]).sum(numeric_only=True).reset_index()
407407
return df
408408

409409
def add_population_column(self, data, geocode_type, geocode_col=None, dropna=True):
@@ -501,7 +501,7 @@ def fips_to_megacounty(
501501
)
502502
data.set_index([fips_col, date_col], inplace=True)
503503
data = data.join(mega_data)
504-
data = data.reset_index().groupby([date_col, mega_col]).sum()
504+
data = data.reset_index().groupby([date_col, mega_col]).sum(numeric_only=True)
505505
return data.reset_index()
506506

507507
def as_mapper_name(self, geo_type, state="state_id"):

_delphi_utils_python/delphi_utils/validator/dynamic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ def replace_first_six(df, start_date):
195195
start_date = self.params.time_window.start_date)
196196

197197
if not error_df.empty:
198-
for index, value in error_df.iteritems():
198+
for index, value in error_df.items():
199199
report.add_raised_error(
200200
ValidationFailure("check_val_missing",
201201
geo_type=geo_type,

_delphi_utils_python/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
"mock",
1515
"moto",
1616
"numpy",
17-
"pandas>=1.1.0,<2",
17+
"pandas>=1.1.0",
1818
"pydocstyle",
1919
"pylint==2.8.3",
2020
"pytest",

_delphi_utils_python/tests/test_export.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -250,15 +250,15 @@ def test_export_with_null_removal(self):
250250
"""Test that `remove_null_samples = True` removes entries with null samples."""
251251
_clean_directory(self.TEST_DIR)
252252

253-
df_with_nulls = self.DF.copy().append(
254-
{
253+
df_with_nulls = pd.concat(
254+
[self.DF.copy(),
255+
pd.DataFrame({
255256
"geo_id": "66666",
256257
"timestamp": datetime(2020, 6, 6),
257258
"val": 10,
258259
"se": 0.2,
259260
"sample_size": pd.NA,
260-
},
261-
ignore_index=True,
261+
}, index = [0])]
262262
)
263263

264264
create_export_csv(
@@ -283,15 +283,15 @@ def test_export_without_null_removal(self):
283283
"""Test that `remove_null_samples = False` does not remove entries with null samples."""
284284
_clean_directory(self.TEST_DIR)
285285

286-
df_with_nulls = self.DF.copy().append(
287-
{
286+
df_with_nulls = pd.concat(
287+
[self.DF.copy(),
288+
pd.DataFrame({
288289
"geo_id": "66666",
289290
"timestamp": datetime(2020, 6, 6),
290291
"val": 10,
291292
"se": 0.2,
292293
"sample_size": pd.NA,
293-
},
294-
ignore_index=True,
294+
}, index = [0])]
295295
)
296296

297297
create_export_csv(

_delphi_utils_python/tests/test_geomap.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ def test_load_fips_chngfips_table(self, geomapper):
196196

197197
def test_load_jhu_uid_fips_table(self, geomapper):
198198
jhu_data = geomapper.get_crosswalk(from_code="jhu_uid", to_code="fips")
199-
assert np.allclose(jhu_data.groupby("jhu_uid").sum(), 1.0)
199+
assert np.allclose(jhu_data.groupby("jhu_uid").sum(numeric_only=True), 1.0)
200200

201201
def test_load_zip_hrr_table(self, geomapper):
202202
zip_data = geomapper.get_crosswalk(from_code="zip", to_code="hrr")

_delphi_utils_python/tests/validator/test_dynamic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def test_half_padding(self):
4848
ref_df, test_df, ref_date, ref_date)
4949

5050
# Check it only takes missing dates - so the last 5 dates
51-
assert new_ref_df.time_value.max() == datetime.strptime("2021-01-11",
51+
assert new_ref_df.time_value.max().date() == datetime.strptime("2021-01-11",
5252
"%Y-%m-%d").date()
5353
assert new_ref_df.shape[0] == 11
5454
assert new_ref_df["val"].iloc[5] == 2
@@ -71,7 +71,7 @@ def test_full_padding(self):
7171
ref_df, test_df, ref_date, ref_date)
7272

7373
# Check it only takes missing dates up to the day before the reference
74-
assert new_ref_df.time_value.max() == datetime.strptime("2021-01-15",
74+
assert new_ref_df.time_value.max().date() == datetime.strptime("2021-01-15",
7575
"%Y-%m-%d").date()
7676
assert new_ref_df.shape[0] == 15
7777
assert new_ref_df["val"].iloc[5] == 2

changehc/delphi_changehc/load_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def load_chng_data(filepath, dropdate, base_geo,
7171
), "Counts must be nonnegative"
7272

7373
# aggregate age groups (so data is unique by date and base geography)
74-
data = data.groupby([base_geo, Config.DATE_COL]).sum()
74+
data = data.groupby([base_geo, Config.DATE_COL]).sum(numeric_only=True)
7575
data.dropna(inplace=True) # drop rows with any missing entries
7676

7777
return data

changehc/tests/test_update_sensor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def test_geo_reindex(self):
9191
"timestamp": [pd.Timestamp(f'03-{i}-2020') for i in range(1, 14)]})
9292
data_frame = su_inst.geo_reindex(test_data)
9393
assert data_frame.shape[0] == multiple*len(su_inst.fit_dates)
94-
assert (data_frame.sum() == (4200,19000)).all()
94+
assert (data_frame.sum(numeric_only=True) == (4200,19000)).all()
9595

9696
def test_update_sensor(self):
9797
"""Tests that the sensors are properly updated."""

claims_hosp/delphi_claims_hosp/load_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def load_claims_data(claims_filepath, dropdate, base_geo):
4747
), "Claims counts must be nonnegative"
4848

4949
# aggregate age groups (so data is unique by date and base geography)
50-
claims_data = claims_data.groupby([base_geo, Config.DATE_COL]).sum()
50+
claims_data = claims_data.groupby([base_geo, Config.DATE_COL]).sum(numeric_only=True)
5151
claims_data.dropna(inplace=True) # drop rows with any missing entries
5252

5353
return claims_data

doctor_visits/delphi_doctor_visits/geo_maps.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def county_to_msa(self, data):
4949
from_col="PatCountyFIPS",
5050
new_col="cbsa_id")
5151
data.drop(columns="PatCountyFIPS", inplace=True)
52-
data = data.groupby(["ServiceDate", "cbsa_id"]).sum().reset_index()
52+
data = data.groupby(["ServiceDate", "cbsa_id"]).sum(numeric_only=True).reset_index()
5353

5454
return data.groupby("cbsa_id"), "cbsa_id"
5555

@@ -66,7 +66,7 @@ def county_to_state(self, data):
6666
"state_id",
6767
from_col="PatCountyFIPS")
6868
data.drop(columns="PatCountyFIPS", inplace=True)
69-
data = data.groupby(["ServiceDate", "state_id"]).sum().reset_index()
69+
data = data.groupby(["ServiceDate", "state_id"]).sum(numeric_only=True).reset_index()
7070

7171
return data.groupby("state_id"), "state_id"
7272

@@ -83,7 +83,7 @@ def county_to_hhs(self, data):
8383
"hhs",
8484
from_col="PatCountyFIPS")
8585
data.drop(columns="PatCountyFIPS", inplace=True)
86-
data = data.groupby(["ServiceDate", "hhs"]).sum().reset_index()
86+
data = data.groupby(["ServiceDate", "hhs"]).sum(numeric_only=True).reset_index()
8787

8888
return data.groupby("hhs"), "hhs"
8989

@@ -100,7 +100,7 @@ def county_to_nation(self, data):
100100
"nation",
101101
from_col="PatCountyFIPS")
102102
data.drop(columns="PatCountyFIPS", inplace=True)
103-
data = data.groupby(["ServiceDate", "nation"]).sum().reset_index()
103+
data = data.groupby(["ServiceDate", "nation"]).sum(numeric_only=True).reset_index()
104104

105105
return data.groupby("nation"), "nation"
106106

doctor_visits/delphi_doctor_visits/sensor.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,16 +60,17 @@ def fill_dates(y_data, dates):
6060
last_date = dates[-1]
6161
cols = y_data.columns
6262

63+
df_list = [y_data]
6364
if first_date not in y_data.index:
64-
y_data = y_data.append(
65+
df_list.append(
6566
pd.DataFrame(dict.fromkeys(cols, 0.0), columns=cols, index=[first_date])
6667
)
6768
if last_date not in y_data.index:
68-
y_data = y_data.append(
69+
df_list.append(
6970
pd.DataFrame(dict.fromkeys(cols, 0.0), columns=cols, index=[last_date])
7071
)
7172

72-
y_data.sort_index(inplace=True)
73+
y_data = pd.concat(df_list).sort_index()
7374
y_data = y_data.asfreq("D", fill_value=0)
7475
return y_data
7576

doctor_visits/delphi_doctor_visits/update_sensor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def update_sensor(
101101
data.dropna(inplace=True) # drop rows with any missing entries
102102

103103
# aggregate age groups (so data is unique by service date and FIPS)
104-
data = data.groupby([Config.DATE_COL, Config.GEO_COL]).sum().reset_index()
104+
data = data.groupby([Config.DATE_COL, Config.GEO_COL]).sum(numeric_only=True).reset_index()
105105
assert np.sum(data.duplicated()) == 0, "Duplicates after age group aggregation"
106106
assert (data[Config.COUNT_COLS] >= 0).all().all(), "Counts must be nonnegative"
107107

dsew_community_profile/delphi_dsew_community_profile/pull.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,7 @@ def generate_prop_signal(df, geo, geo_mapper):
701701
).groupby(
702702
geo
703703
).sum(
704+
numeric_only=True
704705
).reset_index(
705706
)
706707
df = pd.merge(df, map_df, left_on="geo_id", right_on=geo, how="inner")

dsew_community_profile/tests/test_pull.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -240,8 +240,8 @@ def test_nation_from_state(self):
240240
'sample_size': [None, None],
241241
'publish_date': [datetime(year=2020, month=1, day=1)]*2,})
242242

243-
pa_pop = int(state_pop.loc[state_pop.state_id == "pa", "pop"])
244-
wv_pop = int(state_pop.loc[state_pop.state_id == "wv", "pop"])
243+
pa_pop = int(state_pop.loc[state_pop.state_id == "pa", "pop"].iloc[0])
244+
wv_pop = int(state_pop.loc[state_pop.state_id == "wv", "pop"].iloc[0])
245245
tot_pop = pa_pop + wv_pop
246246

247247
assert True, nation_from_state(
@@ -285,7 +285,14 @@ def test_generate_prop_signal_msa(self):
285285
geomapper = GeoMapper()
286286
county_pop = geomapper.get_crosswalk("fips", "pop")
287287
county_msa = geomapper.get_crosswalk("fips", "msa")
288-
msa_pop = county_pop.merge(county_msa, on="fips", how="inner").groupby("msa").sum().reset_index()
288+
msa_pop = county_pop.merge(
289+
county_msa, on="fips", how="inner"
290+
).groupby(
291+
"msa"
292+
).sum(
293+
numeric_only=True
294+
).reset_index(
295+
)
289296

290297
test_df = pd.DataFrame({
291298
'geo_id': ['35620', '31080'],
@@ -294,8 +301,8 @@ def test_generate_prop_signal_msa(self):
294301
'se': [None, None],
295302
'sample_size': [None, None],})
296303

297-
nyc_pop = int(msa_pop.loc[msa_pop.msa == "35620", "pop"])
298-
la_pop = int(msa_pop.loc[msa_pop.msa == "31080", "pop"])
304+
nyc_pop = int(msa_pop.loc[msa_pop.msa == "35620", "pop"].iloc[0])
305+
la_pop = int(msa_pop.loc[msa_pop.msa == "31080", "pop"].iloc[0])
299306

300307
expected_df = pd.DataFrame({
301308
'geo_id': ['35620', '31080'],
@@ -342,8 +349,8 @@ def test_generate_prop_signal_non_msa(self):
342349
'se': [None, None],
343350
'sample_size': [None, None],})
344351

345-
pop1 = int(geo_pop.loc[geo_pop[settings["code_name"]] == settings["geo_names"][0], "pop"])
346-
pop2 = int(geo_pop.loc[geo_pop[settings["code_name"]] == settings["geo_names"][1], "pop"])
352+
pop1 = int(geo_pop.loc[geo_pop[settings["code_name"]] == settings["geo_names"][0], "pop"].iloc[0])
353+
pop2 = int(geo_pop.loc[geo_pop[settings["code_name"]] == settings["geo_names"][1], "pop"].iloc[0])
347354

348355
expected_df = pd.DataFrame({
349356
'geo_id': settings["geo_names"],

hhs_hosp/tests/test_run.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,8 @@ def test_transform_signal_pop():
100100
'timestamp': [datetime(year=2020, month=1, day=1)]*2,
101101
'val': [15., 150.],})
102102

103-
pa_pop = int(state_pop.loc[state_pop.state_id == "pa", "pop"])
104-
wv_pop = int(state_pop.loc[state_pop.state_id == "wv", "pop"])
103+
pa_pop = int(state_pop[state_pop.state_id == "pa"]["pop"].iloc[0])
104+
wv_pop = int(state_pop[state_pop.state_id == "wv"]["pop"].iloc[0])
105105
pd.testing.assert_frame_equal(
106106
transform_signal(
107107
CONFIRMED_PROP,

nchs_mortality/delphi_nchs_mortality/pull.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,19 @@ def pull_nchs_mortality_data(token: str, test_file: Optional[str]=None):
108108
# Get mask df to ignore cells where both of them have NAN values
109109
mask = (df_ny[keep_columns].isnull().values \
110110
& df_nyc[keep_columns].isnull().values)
111-
df_ny = df_ny.append(df_nyc).groupby("timestamp").sum().where(~mask, np.nan)
111+
df_ny = pd.concat(
112+
[df_ny, df_nyc]
113+
).groupby(
114+
"timestamp"
115+
).sum(
116+
numeric_only=True
117+
).where(
118+
~mask, np.nan
119+
)
112120
df_ny["state"] = "New York"
113121
# Drop NYC and NY in the full dataset
114122
df = df.loc[~df["state"].isin(["New York", "New York City"]), :]
115-
df = df.append(df_ny).reset_index().sort_values(["state", "timestamp"])
123+
df = pd.concat([df, df_ny]).reset_index().sort_values(["state", "timestamp"])
116124
# Add population info
117125
keep_columns.extend(["timestamp", "geo_id", "population"])
118126
gmpr = GeoMapper()

quidel_covidtest/delphi_quidel_covidtest/data_tools.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,18 @@ def fill_dates(y_data, first_date, last_date):
3030
Returns: dataframe containing all dates given
3131
"""
3232
cols = y_data.columns
33+
34+
df_list = [y_data]
3335
if first_date not in y_data.index:
34-
y_data = y_data.append(pd.DataFrame(dict.fromkeys(cols, 0.),
35-
columns=cols, index=[first_date]))
36+
df_list.append(
37+
pd.DataFrame(dict.fromkeys(cols, 0.), columns=cols, index=[first_date])
38+
)
3639
if last_date not in y_data.index:
37-
y_data = y_data.append(pd.DataFrame(dict.fromkeys(cols, 0.),
38-
columns=cols, index=[last_date]))
40+
df_list.append(
41+
pd.DataFrame(dict.fromkeys(cols, 0.), columns=cols, index=[last_date])
42+
)
3943

40-
y_data.sort_index(inplace=True)
44+
y_data = pd.concat(df_list).sort_index()
4145
y_data = y_data.asfreq('D', fill_value=0)
4246
y_data.fillna(0, inplace=True)
4347
return y_data

quidel_covidtest/delphi_quidel_covidtest/generate_sensor.py

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ def generate_sensor_for_nonparent_geo(state_groups, res_key, smooth, device,
2727
Returns:
2828
df: pd.DataFrame
2929
"""
30-
state_df = pd.DataFrame(columns=["geo_id", "val", "se", "sample_size", "timestamp"])
3130
state_list = list(state_groups.groups.keys())
31+
df_list = []
3232
for state in state_list:
3333
state_group = state_groups.get_group(state)
3434
state_group = state_group.drop(columns=[res_key])
@@ -63,12 +63,15 @@ def generate_sensor_for_nonparent_geo(state_groups, res_key, smooth, device,
6363
stat = stat * 100
6464

6565
se = se * 100
66-
state_df = state_df.append(pd.DataFrame({"geo_id": state,
67-
"timestamp": state_group.index,
68-
"val": stat,
69-
"se": se,
70-
"sample_size": sample_size}))
71-
return remove_null_samples(state_df)
66+
df_list.append(
67+
pd.DataFrame({"geo_id": state,
68+
"timestamp": state_group.index,
69+
"val": stat,
70+
"se": se,
71+
"sample_size": sample_size})
72+
)
73+
74+
return remove_null_samples(pd.concat(df_list))
7275

7376
def generate_sensor_for_parent_geo(state_groups, data, res_key, smooth,
7477
device, first_date, last_date, suffix):
@@ -88,9 +91,9 @@ def generate_sensor_for_parent_geo(state_groups, data, res_key, smooth,
8891
df: pd.DataFrame
8992
"""
9093
has_parent = True
91-
res_df = pd.DataFrame(columns=["geo_id", "val", "se", "sample_size"])
9294
if res_key == "fips": # Add rest-of-state report for county level
9395
data = add_megacounties(data, smooth)
96+
df_list = []
9497
for loc, res_group in data.groupby(res_key):
9598
parent_state = res_group['state_id'].values[0]
9699
try:
@@ -147,9 +150,12 @@ def generate_sensor_for_parent_geo(state_groups, data, res_key, smooth,
147150
stat = stat * 100
148151

149152
se = se * 100
150-
res_df = res_df.append(pd.DataFrame({"geo_id": loc,
151-
"timestamp": res_group.index,
152-
"val": stat,
153-
"se": se,
154-
"sample_size": sample_size}))
155-
return remove_null_samples(res_df)
153+
df_list.append(
154+
pd.DataFrame({"geo_id": loc,
155+
"timestamp": res_group.index,
156+
"val": stat,
157+
"se": se,
158+
"sample_size": sample_size})
159+
)
160+
161+
return remove_null_samples(pd.concat(df_list))

quidel_covidtest/delphi_quidel_covidtest/geo_maps.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,5 +88,5 @@ def add_parent_state(data, geo_res, geo_key):
8888
# Merge the info of parent state to the data
8989
data = data.merge(mix_map, how="left", on=geo_key).drop(
9090
columns=["population"]).dropna()
91-
data = data.groupby(["timestamp", geo_key, "state_id"]).sum().reset_index()
91+
data = data.groupby(["timestamp", geo_key, "state_id"]).sum(numeric_only=True).reset_index()
9292
return data

0 commit comments

Comments
 (0)