diff --git a/pandas/tests/groupby/test_apply_mutate.py b/pandas/tests/groupby/test_apply_mutate.py new file mode 100644 index 0000000000000..529f76bf692ce --- /dev/null +++ b/pandas/tests/groupby/test_apply_mutate.py @@ -0,0 +1,70 @@ +import numpy as np + +import pandas as pd +import pandas._testing as tm + + +def test_mutate_groups(): + + # GH3380 + + df = pd.DataFrame( + { + "cat1": ["a"] * 8 + ["b"] * 6, + "cat2": ["c"] * 2 + + ["d"] * 2 + + ["e"] * 2 + + ["f"] * 2 + + ["c"] * 2 + + ["d"] * 2 + + ["e"] * 2, + "cat3": [f"g{x}" for x in range(1, 15)], + "val": np.random.randint(100, size=14), + } + ) + + def f_copy(x): + x = x.copy() + x["rank"] = x.val.rank(method="min") + return x.groupby("cat2")["rank"].min() + + def f_no_copy(x): + x["rank"] = x.val.rank(method="min") + return x.groupby("cat2")["rank"].min() + + grpby_copy = df.groupby("cat1").apply(f_copy) + grpby_no_copy = df.groupby("cat1").apply(f_no_copy) + tm.assert_series_equal(grpby_copy, grpby_no_copy) + + +def test_no_mutate_but_looks_like(): + + # GH 8467 + # first show's mutation indicator + # second does not, but should yield the same results + df = pd.DataFrame({"key": [1, 1, 1, 2, 2, 2, 3, 3, 3], "value": range(9)}) + + result1 = df.groupby("key", group_keys=True).apply(lambda x: x[:].key) + result2 = df.groupby("key", group_keys=True).apply(lambda x: x.key) + tm.assert_series_equal(result1, result2) + + +def test_apply_function_with_indexing(): + # GH: 33058 + df = pd.DataFrame( + {"col1": ["A", "A", "A", "B", "B", "B"], "col2": [1, 2, 3, 4, 5, 6]} + ) + + def fn(x): + x.col2[x.index[-1]] = 0 + return x.col2 + + result = df.groupby(["col1"], as_index=False).apply(fn) + expected = pd.Series( + [1, 2, 0, 4, 5, 0], + index=pd.MultiIndex.from_tuples( + [(0, 0), (0, 1), (0, 2), (1, 3), (1, 4), (1, 5)] + ), + name="col2", + ) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index b8d8f56512a69..c88d16e34eab8 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -921,51 +921,6 @@ def test_groupby_complex(): tm.assert_series_equal(result, expected) -def test_mutate_groups(): - - # GH3380 - - df = DataFrame( - { - "cat1": ["a"] * 8 + ["b"] * 6, - "cat2": ["c"] * 2 - + ["d"] * 2 - + ["e"] * 2 - + ["f"] * 2 - + ["c"] * 2 - + ["d"] * 2 - + ["e"] * 2, - "cat3": [f"g{x}" for x in range(1, 15)], - "val": np.random.randint(100, size=14), - } - ) - - def f_copy(x): - x = x.copy() - x["rank"] = x.val.rank(method="min") - return x.groupby("cat2")["rank"].min() - - def f_no_copy(x): - x["rank"] = x.val.rank(method="min") - return x.groupby("cat2")["rank"].min() - - grpby_copy = df.groupby("cat1").apply(f_copy) - grpby_no_copy = df.groupby("cat1").apply(f_no_copy) - tm.assert_series_equal(grpby_copy, grpby_no_copy) - - -def test_no_mutate_but_looks_like(): - - # GH 8467 - # first show's mutation indicator - # second does not, but should yield the same results - df = DataFrame({"key": [1, 1, 1, 2, 2, 2, 3, 3, 3], "value": range(9)}) - - result1 = df.groupby("key", group_keys=True).apply(lambda x: x[:].key) - result2 = df.groupby("key", group_keys=True).apply(lambda x: x.key) - tm.assert_series_equal(result1, result2) - - def test_groupby_series_indexed_differently(): s1 = Series( [5.0, -9.0, 4.0, 100.0, -5.0, 55.0, 6.7],