|
| 1 | +import numpy as np |
| 2 | + |
| 3 | +import pandas as pd |
| 4 | +import pandas._testing as tm |
| 5 | + |
| 6 | + |
| 7 | +def test_mutate_groups(): |
| 8 | + |
| 9 | + # GH3380 |
| 10 | + |
| 11 | + df = pd.DataFrame( |
| 12 | + { |
| 13 | + "cat1": ["a"] * 8 + ["b"] * 6, |
| 14 | + "cat2": ["c"] * 2 |
| 15 | + + ["d"] * 2 |
| 16 | + + ["e"] * 2 |
| 17 | + + ["f"] * 2 |
| 18 | + + ["c"] * 2 |
| 19 | + + ["d"] * 2 |
| 20 | + + ["e"] * 2, |
| 21 | + "cat3": [f"g{x}" for x in range(1, 15)], |
| 22 | + "val": np.random.randint(100, size=14), |
| 23 | + } |
| 24 | + ) |
| 25 | + |
| 26 | + def f_copy(x): |
| 27 | + x = x.copy() |
| 28 | + x["rank"] = x.val.rank(method="min") |
| 29 | + return x.groupby("cat2")["rank"].min() |
| 30 | + |
| 31 | + def f_no_copy(x): |
| 32 | + x["rank"] = x.val.rank(method="min") |
| 33 | + return x.groupby("cat2")["rank"].min() |
| 34 | + |
| 35 | + grpby_copy = df.groupby("cat1").apply(f_copy) |
| 36 | + grpby_no_copy = df.groupby("cat1").apply(f_no_copy) |
| 37 | + tm.assert_series_equal(grpby_copy, grpby_no_copy) |
| 38 | + |
| 39 | + |
| 40 | +def test_no_mutate_but_looks_like(): |
| 41 | + |
| 42 | + # GH 8467 |
| 43 | + # first show's mutation indicator |
| 44 | + # second does not, but should yield the same results |
| 45 | + df = pd.DataFrame({"key": [1, 1, 1, 2, 2, 2, 3, 3, 3], "value": range(9)}) |
| 46 | + |
| 47 | + result1 = df.groupby("key", group_keys=True).apply(lambda x: x[:].key) |
| 48 | + result2 = df.groupby("key", group_keys=True).apply(lambda x: x.key) |
| 49 | + tm.assert_series_equal(result1, result2) |
| 50 | + |
| 51 | + |
| 52 | +def test_apply_function_with_indexing(): |
| 53 | + # GH: 33058 |
| 54 | + df = pd.DataFrame( |
| 55 | + {"col1": ["A", "A", "A", "B", "B", "B"], "col2": [1, 2, 3, 4, 5, 6]} |
| 56 | + ) |
| 57 | + |
| 58 | + def fn(x): |
| 59 | + x.col2[x.index[-1]] = 0 |
| 60 | + return x.col2 |
| 61 | + |
| 62 | + result = df.groupby(["col1"], as_index=False).apply(fn) |
| 63 | + expected = pd.Series( |
| 64 | + [1, 2, 0, 4, 5, 0], |
| 65 | + index=pd.MultiIndex.from_tuples( |
| 66 | + [(0, 0), (0, 1), (0, 2), (1, 3), (1, 4), (1, 5)] |
| 67 | + ), |
| 68 | + name="col2", |
| 69 | + ) |
| 70 | + tm.assert_series_equal(result, expected) |
0 commit comments