Skip to content

Commit 50e8ef9

Browse files
committed
BUG: GroupBy.min/max with unordered Categorical and no groups
1 parent 7674874 commit 50e8ef9

File tree

3 files changed

+45
-52
lines changed

3 files changed

+45
-52
lines changed

doc/source/whatsnew/v2.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -955,6 +955,7 @@ Categorical
955955
- Bug in :meth:`Series.replace` with categorical dtype losing nullable dtypes of underlying categories (:issue:`49404`)
956956
- Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` would reorder categories when used as a grouper (:issue:`48749`)
957957
- Bug in :class:`Categorical` constructor when constructing from a :class:`Categorical` object and ``dtype="category"`` losing ordered-ness (:issue:`49309`)
958+
- Bug in :meth:`SeriesGroupBy.min`, :meth:`SeriesGroupBy.max`, :meth:`DataFrameGroupBy.min`, and :meth:`DataFrameGroupBy.max` with unordered :class:`CategoricalDtype` with no groups failing to raise ``TypeError`` (:issue:`??`)
958959
-
959960

960961
Datetimelike

pandas/core/groupby/ops.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -226,9 +226,10 @@ def _disallow_invalid_ops(self, dtype: DtypeObj, is_numeric: bool = False):
226226
227227
Raises
228228
------
229+
TypeError
230+
This is not a valid operation for this dtype.
229231
NotImplementedError
230-
This is either not a valid function for this dtype, or
231-
valid but not implemented in cython.
232+
This may be a valid operation, but does not have a cython implementation.
232233
"""
233234
how = self.how
234235

@@ -237,16 +238,16 @@ def _disallow_invalid_ops(self, dtype: DtypeObj, is_numeric: bool = False):
237238
return
238239

239240
if isinstance(dtype, CategoricalDtype):
240-
# NotImplementedError for methods that can fall back to a
241-
# non-cython implementation.
242241
if how in ["sum", "prod", "cumsum", "cumprod"]:
243242
raise TypeError(f"{dtype} type does not support {how} operations")
243+
if how in ["min", "max", "rank"] and not dtype.ordered:
244+
# raise TypeError instead of NotImplementedError to ensure we
245+
# don't go down a group-by-group path, since in the empty-groups
246+
# case that would fail to raise
247+
raise TypeError(f"Cannot perform {how} with non-ordered Categorical")
244248
if how not in ["rank"]:
245249
# only "rank" is implemented in cython
246250
raise NotImplementedError(f"{dtype} dtype not supported")
247-
if not dtype.ordered:
248-
# TODO: TypeError?
249-
raise NotImplementedError(f"{dtype} dtype not supported")
250251

251252
elif is_sparse(dtype):
252253
# categoricals are only 1d, so we

pandas/tests/groupby/test_groupby.py

Lines changed: 36 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1901,25 +1901,12 @@ def test_empty_groupby(
19011901
raises=ValueError, match="attempt to get arg(min|max) of an empty sequence"
19021902
)
19031903
request.node.add_marker(mark)
1904-
elif (
1905-
isinstance(values, Categorical)
1906-
and len(keys) == 1
1907-
and not isinstance(columns, list)
1908-
):
1909-
mark = pytest.mark.xfail(
1910-
raises=TypeError, match="'Categorical' does not implement"
1911-
)
1912-
request.node.add_marker(mark)
19131904
elif isinstance(values, Categorical) and len(keys) == 1 and op in ["sum", "prod"]:
19141905
mark = pytest.mark.xfail(
19151906
raises=AssertionError, match="(DataFrame|Series) are different"
19161907
)
19171908
request.node.add_marker(mark)
1918-
elif (
1919-
isinstance(values, Categorical)
1920-
and len(keys) == 2
1921-
and op in ["min", "max", "sum"]
1922-
):
1909+
elif isinstance(values, Categorical) and len(keys) == 2 and op in ["sum"]:
19231910
mark = pytest.mark.xfail(
19241911
raises=AssertionError, match="(DataFrame|Series) are different"
19251912
)
@@ -1949,6 +1936,31 @@ def get_result(**kwargs):
19491936
else:
19501937
return getattr(gb, method)(op, **kwargs)
19511938

1939+
if isinstance(values, Categorical) and not values.ordered and op in ["min", "max"]:
1940+
msg = f"Cannot perform {op} with non-ordered Categorical"
1941+
with pytest.raises(TypeError, match=msg):
1942+
get_result()
1943+
1944+
if isinstance(columns, list):
1945+
# i.e. DataframeGroupBy, not SeriesGroupBy
1946+
result = get_result(numeric_only=True)
1947+
1948+
# Categorical is special without 'observed=True', we get an NaN entry
1949+
# corresponding to the unobserved group. If we passed observed=True
1950+
# to groupby, expected would just be 'df.set_index(keys)[columns]'
1951+
# as below
1952+
lev = Categorical([0], dtype=values.dtype)
1953+
if len(keys) != 1:
1954+
idx = MultiIndex.from_product([lev, lev], names=keys)
1955+
else:
1956+
# all columns are dropped, but we end up with one row
1957+
# Categorical is special without 'observed=True'
1958+
idx = Index(lev, name=keys[0])
1959+
1960+
expected = DataFrame([], columns=[], index=idx)
1961+
tm.assert_equal(result, expected)
1962+
return
1963+
19521964
if columns == "C":
19531965
# i.e. SeriesGroupBy
19541966
if op in ["prod", "sum", "skew"]:
@@ -2018,38 +2030,17 @@ def get_result(**kwargs):
20182030
tm.assert_equal(result, expected)
20192031
return
20202032

2021-
if (op in ["min", "max", "skew"] and isinstance(values, Categorical)) or (
2022-
op == "skew" and df.dtypes[0].kind == "M"
2033+
if op == "skew" and (
2034+
isinstance(values, Categorical) or df.dtypes[0].kind == "M"
20232035
):
2024-
if op == "skew" or len(keys) == 1:
2025-
msg = "|".join(
2026-
[
2027-
"Categorical is not ordered",
2028-
"does not support reduction",
2029-
]
2030-
)
2031-
with pytest.raises(TypeError, match=msg):
2032-
get_result()
2033-
return
2034-
# Categorical doesn't implement, so with numeric_only=True
2035-
# these are dropped and we get an empty DataFrame back
2036-
result = get_result()
2037-
2038-
# with numeric_only=True, these are dropped, and we get
2039-
# an empty DataFrame back
2040-
if len(keys) != 1:
2041-
# Categorical is special without 'observed=True'
2042-
lev = Categorical([0], dtype=values.dtype)
2043-
mi = MultiIndex.from_product([lev, lev], names=keys)
2044-
expected = DataFrame([], columns=[], index=mi)
2045-
else:
2046-
# all columns are dropped, but we end up with one row
2047-
# Categorical is special without 'observed=True'
2048-
lev = Categorical([0], dtype=values.dtype)
2049-
ci = Index(lev, name=keys[0])
2050-
expected = DataFrame([], columns=[], index=ci)
2051-
2052-
tm.assert_equal(result, expected)
2036+
msg = "|".join(
2037+
[
2038+
"Categorical is not ordered",
2039+
"does not support reduction",
2040+
]
2041+
)
2042+
with pytest.raises(TypeError, match=msg):
2043+
get_result()
20532044
return
20542045

20552046
result = get_result()

0 commit comments

Comments
 (0)