diff --git a/pandas/core/apply.py b/pandas/core/apply.py index ca1be3154757a..9947866a76e3f 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -14,7 +14,7 @@ is_list_like, is_sequence, ) -from pandas.core.dtypes.generic import ABCMultiIndex, ABCSeries +from pandas.core.dtypes.generic import ABCSeries from pandas.core.construction import create_series_with_explicit_dtype @@ -278,9 +278,8 @@ def apply_standard(self): if ( self.result_type in ["reduce", None] and not self.dtypes.apply(is_extension_array_dtype).any() - # Disallow complex_internals since libreduction shortcut - # cannot handle MultiIndex - and not isinstance(self.agg_axis, ABCMultiIndex) + # Disallow complex_internals since libreduction shortcut raises a TypeError + and not self.agg_axis._has_complex_internals ): values = self.values diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 37067a1897a52..679d3668523c2 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -164,8 +164,8 @@ def apply(self, f, data: FrameOrSeries, axis: int = 0): com.get_callable_name(f) not in base.plotting_methods and isinstance(splitter, FrameSplitter) and axis == 0 - # apply_frame_axis0 doesn't allow MultiIndex - and not isinstance(sdata.index, MultiIndex) + # fast_apply/libreduction doesn't allow non-numpy backed indexes + and not sdata.index._has_complex_internals ): try: result_values, mutated = splitter.fast_apply(f, group_keys) @@ -616,8 +616,8 @@ def agg_series(self, obj: Series, func): # TODO: can we get a performant workaround for EAs backed by ndarray? return self._aggregate_series_pure_python(obj, func) - elif isinstance(obj.index, MultiIndex): - # MultiIndex; Pre-empt TypeError in _aggregate_series_fast + elif obj.index._has_complex_internals: + # Pre-empt TypeError in _aggregate_series_fast return self._aggregate_series_pure_python(obj, func) try: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index bab3d2d1b5431..00f0984d43578 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4109,6 +4109,14 @@ def _assert_can_do_op(self, value): if not is_scalar(value): raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}") + @property + def _has_complex_internals(self): + """ + Indicates if an index is not directly backed by a numpy array + """ + # used to avoid libreduction code paths, which raise or require conversion + return False + def _is_memory_usage_qualified(self) -> bool: """ Return a boolean if we need a qualified .info display. diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 268ab9ba4e4c4..36ba86bdc9f07 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -378,6 +378,11 @@ def values(self): """ return the underlying data, which is a Categorical """ return self._data + @property + def _has_complex_internals(self): + # used to avoid libreduction code paths, which raise or require conversion + return True + def _wrap_setop_result(self, other, result): name = get_op_result_name(self, other) # We use _shallow_copy rather than the Index implementation diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index a756900ff9ae5..89db2b4a7a379 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -411,6 +411,11 @@ def values(self): """ return self._data + @property + def _has_complex_internals(self): + # used to avoid libreduction code paths, which raise or require conversion + return True + def __array_wrap__(self, result, context=None): # we don't want the superclass implementation return result diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 488f3617f2c3c..2aa7a00707f24 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1346,6 +1346,11 @@ def values(self): self._tuples = lib.fast_zip(values) return self._tuples + @property + def _has_complex_internals(self): + # used to avoid libreduction code paths, which raise or require conversion + return True + @cache_readonly def is_monotonic_increasing(self) -> bool: """ diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index fe6c1ba808f9a..106aaeec4f8be 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -255,6 +255,11 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs): def values(self): return np.asarray(self) + @property + def _has_complex_internals(self): + # used to avoid libreduction code paths, which raise or require conversion + return True + def _shallow_copy(self, values=None, **kwargs): # TODO: simplify, figure out type of values if values is None: diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 0a7272bbc131c..67bdcc246579e 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -360,6 +360,23 @@ def test_func_duplicates_raises(): df.groupby("A").agg(["min", "min"]) +@pytest.mark.parametrize( + "index", + [ + pd.CategoricalIndex(list("abc")), + pd.interval_range(0, 3), + pd.period_range("2020", periods=3, freq="D"), + pd.MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0)]), + ], +) +def test_agg_index_has_complex_internals(index): + # GH 31223 + df = DataFrame({"group": [1, 1, 2], "value": [0, 1, 0]}, index=index) + result = df.groupby("group").agg({"value": Series.nunique}) + expected = DataFrame({"group": [1, 2], "value": [2, 1]}).set_index("group") + tm.assert_frame_equal(result, expected) + + class TestNamedAggregationSeries: def test_series_named_agg(self): df = pd.Series([1, 2, 3, 4]) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index fc7b9f56002d8..c18ef73203914 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -811,3 +811,19 @@ def test_groupby_apply_datetime_result_dtypes(): index=["observation", "color", "mood", "intensity", "score"], ) tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "index", + [ + pd.CategoricalIndex(list("abc")), + pd.interval_range(0, 3), + pd.period_range("2020", periods=3, freq="D"), + pd.MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0)]), + ], +) +def test_apply_index_has_complex_internals(index): + # GH 31248 + df = DataFrame({"group": [1, 1, 2], "value": [0, 1, 0]}, index=index) + result = df.groupby("group").apply(lambda x: x) + tm.assert_frame_equal(result, df)