Skip to content

Commit 8267427

Browse files
authored
BUG: Fix SeriesGroupBy.quantile for nullable integers (#33138)
1 parent 45c13a9 commit 8267427

File tree

3 files changed

+33
-2
lines changed

3 files changed

+33
-2
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,7 @@ Groupby/resample/rolling
445445
- Bug in :meth:`DataFrameGroupby.transform` produces incorrect result with transformation functions (:issue:`30918`)
446446
- Bug in :meth:`GroupBy.count` causes segmentation fault when grouped-by column contains NaNs (:issue:`32841`)
447447
- Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` produces inconsistent type when aggregating Boolean series (:issue:`32894`)
448+
- Bug in :meth:`SeriesGroupBy.quantile` raising on nullable integers (:issue:`33136`)
448449
- Bug in :meth:`SeriesGroupBy.first`, :meth:`SeriesGroupBy.last`, :meth:`SeriesGroupBy.min`, and :meth:`SeriesGroupBy.max` returning floats when applied to nullable Booleans (:issue:`33071`)
449450
- Bug in :meth:`DataFrameGroupBy.agg` with dictionary input losing ``ExtensionArray`` dtypes (:issue:`32194`)
450451
- Bug in :meth:`DataFrame.resample` where an ``AmbiguousTimeError`` would be raised when the resulting timezone aware :class:`DatetimeIndex` had a DST transition at midnight (:issue:`25758`)

pandas/core/groupby/groupby.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ class providing the base-class of operations.
4444
from pandas.core.dtypes.cast import maybe_cast_result
4545
from pandas.core.dtypes.common import (
4646
ensure_float,
47+
is_bool_dtype,
4748
is_datetime64_dtype,
49+
is_extension_array_dtype,
4850
is_integer_dtype,
4951
is_numeric_dtype,
5052
is_object_dtype,
@@ -1867,9 +1869,13 @@ def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]:
18671869
)
18681870

18691871
inference = None
1870-
if is_integer_dtype(vals):
1872+
if is_integer_dtype(vals.dtype):
1873+
if is_extension_array_dtype(vals.dtype):
1874+
vals = vals.to_numpy(dtype=float, na_value=np.nan)
18711875
inference = np.int64
1872-
elif is_datetime64_dtype(vals):
1876+
elif is_bool_dtype(vals.dtype) and is_extension_array_dtype(vals.dtype):
1877+
vals = vals.to_numpy(dtype=float, na_value=np.nan)
1878+
elif is_datetime64_dtype(vals.dtype):
18731879
inference = "datetime64[ns]"
18741880
vals = np.asarray(vals).astype(np.float)
18751881

pandas/tests/groupby/test_function.py

+24
Original file line numberDiff line numberDiff line change
@@ -1519,6 +1519,30 @@ def test_quantile_missing_group_values_correct_results():
15191519
tm.assert_frame_equal(result, expected)
15201520

15211521

1522+
@pytest.mark.parametrize(
1523+
"values",
1524+
[
1525+
pd.array([1, 0, None] * 2, dtype="Int64"),
1526+
pd.array([True, False, None] * 2, dtype="boolean"),
1527+
],
1528+
)
1529+
@pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]])
1530+
def test_groupby_quantile_nullable_array(values, q):
1531+
# https://github.com/pandas-dev/pandas/issues/33136
1532+
df = pd.DataFrame({"a": ["x"] * 3 + ["y"] * 3, "b": values})
1533+
result = df.groupby("a")["b"].quantile(q)
1534+
1535+
if isinstance(q, list):
1536+
idx = pd.MultiIndex.from_product((["x", "y"], q), names=["a", None])
1537+
true_quantiles = [0.0, 0.5, 1.0]
1538+
else:
1539+
idx = pd.Index(["x", "y"], name="a")
1540+
true_quantiles = [0.5]
1541+
1542+
expected = pd.Series(true_quantiles * 2, index=idx, name="b")
1543+
tm.assert_series_equal(result, expected)
1544+
1545+
15221546
# pipe
15231547
# --------------------------------
15241548

0 commit comments

Comments
 (0)