From 03224e4300d7609ddb30580a46a1cc7028137762 Mon Sep 17 00:00:00 2001 From: Florian Hofstetter Date: Sat, 1 Oct 2022 15:00:13 +0200 Subject: [PATCH 1/3] Fix: Reanme duplicate function names in unittests --- .../tests/groupby/aggregate/test_aggregate.py | 8 ++--- pandas/tests/groupby/test_groupby.py | 30 +++++++++---------- pandas/tests/io/pytables/test_append.py | 18 +++++------ .../tests/resample/test_resampler_grouper.py | 16 +++++----- pandas/tests/window/test_groupby.py | 8 ++--- pyproject.toml | 1 - 6 files changed, 39 insertions(+), 42 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index bda4d0da9f6ce..3e1ee02aabce7 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -281,8 +281,8 @@ def test_groupby_mixed_cols_axis1(func, expected_data, result_dtype_dict): def test_aggregate_item_by_item(df): grouped = df.groupby("A") - aggfun = lambda ser: ser.size - result = grouped.agg(aggfun) + aggfun_0 = lambda ser: ser.size + result = grouped.agg(aggfun_0) foo = (df.A == "foo").sum() bar = (df.A == "bar").sum() K = len(result.columns) @@ -294,10 +294,10 @@ def test_aggregate_item_by_item(df): exp = Series(np.array([bar] * K), index=list("BCD"), name="bar") tm.assert_almost_equal(result.xs("bar"), exp) - def aggfun(ser): + def aggfun_1(ser): return ser.size - result = DataFrame().groupby(df.A).agg(aggfun) + result = DataFrame().groupby(df.A).agg(aggfun_1) assert isinstance(result, DataFrame) assert len(result) == 0 diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index b52f5e20b2286..a1648fd6fdfc3 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -112,6 +112,10 @@ def max_value(group): def test_groupby_return_type(): # GH2893, return a reduced type + + def func(dataf): + return dataf["val2"] - dataf["val2"].mean() + df1 = DataFrame( [ {"val1": 1, "val2": 20}, @@ -121,9 +125,6 @@ def test_groupby_return_type(): ] ) - def func(dataf): - return dataf["val2"] - dataf["val2"].mean() - with tm.assert_produces_warning(FutureWarning): result = df1.groupby("val1", squeeze=True).apply(func) assert isinstance(result, Series) @@ -137,9 +138,6 @@ def func(dataf): ] ) - def func(dataf): - return dataf["val2"] - dataf["val2"].mean() - with tm.assert_produces_warning(FutureWarning): result = df2.groupby("val1", squeeze=True).apply(func) assert isinstance(result, Series) @@ -162,51 +160,51 @@ def test_inconsistent_return_type(): } ) - def f(grp): + def f_0(grp): return grp.iloc[0] expected = df.groupby("A").first()[["B"]] - result = df.groupby("A").apply(f)[["B"]] + result = df.groupby("A").apply(f_0)[["B"]] tm.assert_frame_equal(result, expected) - def f(grp): + def f_1(grp): if grp.name == "Tiger": return None return grp.iloc[0] - result = df.groupby("A").apply(f)[["B"]] + result = df.groupby("A").apply(f_1)[["B"]] e = expected.copy() e.loc["Tiger"] = np.nan tm.assert_frame_equal(result, e) - def f(grp): + def f_2(grp): if grp.name == "Pony": return None return grp.iloc[0] - result = df.groupby("A").apply(f)[["B"]] + result = df.groupby("A").apply(f_2)[["B"]] e = expected.copy() e.loc["Pony"] = np.nan tm.assert_frame_equal(result, e) # 5592 revisited, with datetimes - def f(grp): + def f_3(grp): if grp.name == "Pony": return None return grp.iloc[0] - result = df.groupby("A").apply(f)[["C"]] + result = df.groupby("A").apply(f_3)[["C"]] e = df.groupby("A").first()[["C"]] e.loc["Pony"] = pd.NaT tm.assert_frame_equal(result, e) # scalar outputs - def f(grp): + def f_4(grp): if grp.name == "Pony": return None return grp.iloc[0].loc["C"] - result = df.groupby("A").apply(f) + result = df.groupby("A").apply(f_4) e = df.groupby("A").first()["C"].copy() e.loc["Pony"] = np.nan e.name = None diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py index 40a50c55de2a4..917a208465578 100644 --- a/pandas/tests/io/pytables/test_append.py +++ b/pandas/tests/io/pytables/test_append.py @@ -361,7 +361,7 @@ def test_append_with_strings(setup_path): with ensure_clean_store(setup_path) as store: with catch_warnings(record=True): - def check_col(key, name, size): + def check_col_0(key, name, size): assert ( getattr(store.get_storer(key).table.description, name).itemsize == size @@ -371,20 +371,20 @@ def check_col(key, name, size): df = DataFrame([[123, "asdqwerty"], [345, "dggnhebbsdfbdfb"]]) store.append("df_big", df) tm.assert_frame_equal(store.select("df_big"), df) - check_col("df_big", "values_block_1", 15) + check_col_0("df_big", "values_block_1", 15) # appending smaller string ok df2 = DataFrame([[124, "asdqy"], [346, "dggnhefbdfb"]]) store.append("df_big", df2) expected = concat([df, df2]) tm.assert_frame_equal(store.select("df_big"), expected) - check_col("df_big", "values_block_1", 15) + check_col_0("df_big", "values_block_1", 15) # avoid truncation on elements df = DataFrame([[123, "asdqwerty"], [345, "dggnhebbsdfbdfb"]]) store.append("df_big2", df, min_itemsize={"values": 50}) tm.assert_frame_equal(store.select("df_big2"), df) - check_col("df_big2", "values_block_1", 50) + check_col_0("df_big2", "values_block_1", 50) # bigger string on next append store.append("df_new", df) @@ -437,7 +437,7 @@ def check_col(key, name, size): with ensure_clean_store(setup_path) as store: - def check_col(key, name, size): + def check_col_1(key, name, size): assert getattr(store.get_storer(key).table.description, name).itemsize, size df = DataFrame({"A": "foo", "B": "bar"}, index=range(10)) @@ -445,20 +445,20 @@ def check_col(key, name, size): # a min_itemsize that creates a data_column _maybe_remove(store, "df") store.append("df", df, min_itemsize={"A": 200}) - check_col("df", "A", 200) + check_col_1("df", "A", 200) assert store.get_storer("df").data_columns == ["A"] # a min_itemsize that creates a data_column2 _maybe_remove(store, "df") store.append("df", df, data_columns=["B"], min_itemsize={"A": 200}) - check_col("df", "A", 200) + check_col_1("df", "A", 200) assert store.get_storer("df").data_columns == ["B", "A"] # a min_itemsize that creates a data_column2 _maybe_remove(store, "df") store.append("df", df, data_columns=["B"], min_itemsize={"values": 200}) - check_col("df", "B", 200) - check_col("df", "values_block_0", 200) + check_col_1("df", "B", 200) + check_col_1("df", "values_block_0", 200) assert store.get_storer("df").data_columns == ["B"] # infer the .typ on subsequent appends diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 8aff217cca5c1..ab7bb8b2eff99 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -65,10 +65,10 @@ def test_deferred_with_groupby(): df = DataFrame(data, columns=["date", "id", "score"]) df.date = pd.to_datetime(df.date) - def f(x): + def f_0(x): return x.set_index("date").resample("D").asfreq() - expected = df.groupby("id").apply(f) + expected = df.groupby("id").apply(f_0) result = df.set_index("date").groupby("id").resample("D").asfreq() tm.assert_frame_equal(result, expected) @@ -80,10 +80,10 @@ def f(x): } ).set_index("date") - def f(x): + def f_0(x): return x.resample("1D").ffill() - expected = df.groupby("group").apply(f) + expected = df.groupby("group").apply(f_0) result = df.groupby("group").resample("1D").ffill() tm.assert_frame_equal(result, expected) @@ -257,16 +257,16 @@ def test_apply(): # reduction expected = g.resample("2s").sum() - def f(x): + def f_0(x): return x.resample("2s").sum() - result = r.apply(f) + result = r.apply(f_0) tm.assert_frame_equal(result, expected) - def f(x): + def f_1(x): return x.resample("2s").apply(lambda y: y.sum()) - result = g.apply(f) + result = g.apply(f_1) # y.sum() results in int64 instead of int32 on 32-bit architectures expected = expected.astype("int64") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index 5f4805eaa01d2..38ac6bb2e1c09 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -1064,10 +1064,10 @@ def test_expanding_corr_cov(self, f): result = getattr(r, f)(self.frame) - def func(x): + def func_0(x): return getattr(x.expanding(), f)(self.frame) - expected = g.apply(func) + expected = g.apply(func_0) # GH 39591: groupby.apply returns 1 instead of nan for windows # with all nan values null_idx = list(range(20, 61)) + list(range(72, 113)) @@ -1079,10 +1079,10 @@ def func(x): result = getattr(r.B, f)(pairwise=True) - def func(x): + def func_1(x): return getattr(x.B.expanding(), f)(pairwise=True) - expected = g.apply(func) + expected = g.apply(func_1) tm.assert_series_equal(result, expected) def test_expanding_apply(self, raw): diff --git a/pyproject.toml b/pyproject.toml index 3e87d237170aa..a30bdf2de7456 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,6 @@ disable = [ "access-member-before-definition", "bad-super-call", "c-extension-no-member", - "function-redefined", "import-error", "inherit-non-class", "invalid-repr-returned", From 92e952a0bed9e769fe64a03696b027d0242ecbf6 Mon Sep 17 00:00:00 2001 From: Florian Hofstetter Date: Sat, 1 Oct 2022 16:22:57 +0200 Subject: [PATCH 2/3] Fix: Reanme duplicate function names in unittests --- pandas/tests/resample/test_resampler_grouper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index ab7bb8b2eff99..ceb9d6e2fda4d 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -80,10 +80,10 @@ def f_0(x): } ).set_index("date") - def f_0(x): + def f_1(x): return x.resample("1D").ffill() - expected = df.groupby("group").apply(f_0) + expected = df.groupby("group").apply(f_1) result = df.groupby("group").resample("1D").ffill() tm.assert_frame_equal(result, expected) From 6011b11d0d098b53d9393245a5c5642402c19991 Mon Sep 17 00:00:00 2001 From: Florian Hofstetter Date: Sun, 2 Oct 2022 11:00:44 +0200 Subject: [PATCH 3/3] Fix: Reanme duplicate function names in unittests --- pandas/tests/io/pytables/test_append.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py index 917a208465578..cae6c4924015d 100644 --- a/pandas/tests/io/pytables/test_append.py +++ b/pandas/tests/io/pytables/test_append.py @@ -361,7 +361,7 @@ def test_append_with_strings(setup_path): with ensure_clean_store(setup_path) as store: with catch_warnings(record=True): - def check_col_0(key, name, size): + def check_col(key, name, size): assert ( getattr(store.get_storer(key).table.description, name).itemsize == size @@ -371,20 +371,20 @@ def check_col_0(key, name, size): df = DataFrame([[123, "asdqwerty"], [345, "dggnhebbsdfbdfb"]]) store.append("df_big", df) tm.assert_frame_equal(store.select("df_big"), df) - check_col_0("df_big", "values_block_1", 15) + check_col("df_big", "values_block_1", 15) # appending smaller string ok df2 = DataFrame([[124, "asdqy"], [346, "dggnhefbdfb"]]) store.append("df_big", df2) expected = concat([df, df2]) tm.assert_frame_equal(store.select("df_big"), expected) - check_col_0("df_big", "values_block_1", 15) + check_col("df_big", "values_block_1", 15) # avoid truncation on elements df = DataFrame([[123, "asdqwerty"], [345, "dggnhebbsdfbdfb"]]) store.append("df_big2", df, min_itemsize={"values": 50}) tm.assert_frame_equal(store.select("df_big2"), df) - check_col_0("df_big2", "values_block_1", 50) + check_col("df_big2", "values_block_1", 50) # bigger string on next append store.append("df_new", df) @@ -437,28 +437,25 @@ def check_col_0(key, name, size): with ensure_clean_store(setup_path) as store: - def check_col_1(key, name, size): - assert getattr(store.get_storer(key).table.description, name).itemsize, size - df = DataFrame({"A": "foo", "B": "bar"}, index=range(10)) # a min_itemsize that creates a data_column _maybe_remove(store, "df") store.append("df", df, min_itemsize={"A": 200}) - check_col_1("df", "A", 200) + check_col("df", "A", 200) assert store.get_storer("df").data_columns == ["A"] # a min_itemsize that creates a data_column2 _maybe_remove(store, "df") store.append("df", df, data_columns=["B"], min_itemsize={"A": 200}) - check_col_1("df", "A", 200) + check_col("df", "A", 200) assert store.get_storer("df").data_columns == ["B", "A"] # a min_itemsize that creates a data_column2 _maybe_remove(store, "df") store.append("df", df, data_columns=["B"], min_itemsize={"values": 200}) - check_col_1("df", "B", 200) - check_col_1("df", "values_block_0", 200) + check_col("df", "B", 200) + check_col("df", "values_block_0", 200) assert store.get_storer("df").data_columns == ["B"] # infer the .typ on subsequent appends