|
| 1 | +import pytest |
| 2 | + |
| 3 | +import pandas.util.testing as tm |
| 4 | +import pandas as pd |
| 5 | +from .base import BaseExtensionTests |
| 6 | + |
| 7 | + |
| 8 | +class BaseGroupbyTests(BaseExtensionTests): |
| 9 | + """Groupby-specific tests.""" |
| 10 | + |
| 11 | + def test_grouping_grouper(self, data_for_grouping): |
| 12 | + df = pd.DataFrame({ |
| 13 | + "A": ["B", "B", None, None, "A", "A", "B", "C"], |
| 14 | + "B": data_for_grouping |
| 15 | + }) |
| 16 | + gr1 = df.groupby("A").grouper.groupings[0] |
| 17 | + gr2 = df.groupby("B").grouper.groupings[0] |
| 18 | + |
| 19 | + tm.assert_numpy_array_equal(gr1.grouper, df.A.values) |
| 20 | + tm.assert_extension_array_equal(gr2.grouper, data_for_grouping) |
| 21 | + |
| 22 | + @pytest.mark.parametrize('as_index', [True, False]) |
| 23 | + def test_groupby_extension_agg(self, as_index, data_for_grouping): |
| 24 | + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], |
| 25 | + "B": data_for_grouping}) |
| 26 | + result = df.groupby("B", as_index=as_index).A.mean() |
| 27 | + _, index = pd.factorize(data_for_grouping, sort=True) |
| 28 | + # TODO(ExtensionIndex): remove astype |
| 29 | + index = pd.Index(index.astype(object), name="B") |
| 30 | + expected = pd.Series([3, 1, 4], index=index, name="A") |
| 31 | + if as_index: |
| 32 | + self.assert_series_equal(result, expected) |
| 33 | + else: |
| 34 | + expected = expected.reset_index() |
| 35 | + self.assert_frame_equal(result, expected) |
| 36 | + |
| 37 | + def test_groupby_extension_no_sort(self, data_for_grouping): |
| 38 | + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], |
| 39 | + "B": data_for_grouping}) |
| 40 | + result = df.groupby("B", sort=False).A.mean() |
| 41 | + _, index = pd.factorize(data_for_grouping, sort=False) |
| 42 | + # TODO(ExtensionIndex): remove astype |
| 43 | + index = pd.Index(index.astype(object), name="B") |
| 44 | + expected = pd.Series([1, 3, 4], index=index, name="A") |
| 45 | + self.assert_series_equal(result, expected) |
| 46 | + |
| 47 | + def test_groupby_extension_transform(self, data_for_grouping): |
| 48 | + valid = data_for_grouping[~data_for_grouping.isna()] |
| 49 | + df = pd.DataFrame({"A": [1, 1, 3, 3, 1, 4], |
| 50 | + "B": valid}) |
| 51 | + |
| 52 | + result = df.groupby("B").A.transform(len) |
| 53 | + expected = pd.Series([3, 3, 2, 2, 3, 1], name="A") |
| 54 | + |
| 55 | + self.assert_series_equal(result, expected) |
| 56 | + |
| 57 | + @pytest.mark.parametrize('op', [ |
| 58 | + lambda x: 1, |
| 59 | + lambda x: [1] * len(x), |
| 60 | + lambda x: pd.Series([1] * len(x)), |
| 61 | + lambda x: x, |
| 62 | + ], ids=['scalar', 'list', 'series', 'object']) |
| 63 | + def test_groupby_extension_apply(self, data_for_grouping, op): |
| 64 | + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], |
| 65 | + "B": data_for_grouping}) |
| 66 | + df.groupby("B").apply(op) |
| 67 | + df.groupby("B").A.apply(op) |
| 68 | + df.groupby("A").apply(op) |
| 69 | + df.groupby("A").B.apply(op) |
0 commit comments