Skip to content

Commit 4ec87eb

Browse files
BUG: GroupBy.quantile fails with pd.NA (#43150)
1 parent baa1032 commit 4ec87eb

File tree

3 files changed

+44
-0
lines changed

3 files changed

+44
-0
lines changed

doc/source/whatsnew/v1.3.3.rst

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Fixed regressions
1717
- Fixed regression in :class:`DataFrame` constructor failing to broadcast for defined :class:`Index` and len one list of :class:`Timestamp` (:issue:`42810`)
1818
- Performance regression in :meth:`core.window.ewm.ExponentialMovingWindow.mean` (:issue:`42333`)
1919
- Fixed regression in :meth:`.GroupBy.agg` incorrectly raising in some cases (:issue:`42390`)
20+
- Fixed regression in :meth:`.GroupBy.quantile` which was failing with ``pandas.NA`` (:issue:`42849`)
2021
- Fixed regression in :meth:`merge` where ``on`` columns with ``ExtensionDtype`` or ``bool`` data types were cast to ``object`` in ``right`` and ``outer`` merge (:issue:`40073`)
2122
- Fixed regression in :meth:`RangeIndex.where` and :meth:`RangeIndex.putmask` raising ``AssertionError`` when result did not represent a :class:`RangeIndex` (:issue:`43240`)
2223
- Fixed regression in :meth:`read_parquet` where the ``fastparquet`` engine would not work properly with fastparquet 0.7.0 (:issue:`43075`)

pandas/core/groupby/groupby.py

+4
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ class providing the base-class of operations.
6363
from pandas.core.dtypes.common import (
6464
is_bool_dtype,
6565
is_datetime64_dtype,
66+
is_float_dtype,
6667
is_integer_dtype,
6768
is_numeric_dtype,
6869
is_object_dtype,
@@ -2453,6 +2454,9 @@ def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, np.dtype | None]:
24532454
elif is_timedelta64_dtype(vals.dtype):
24542455
inference = np.dtype("timedelta64[ns]")
24552456
out = np.asarray(vals).astype(float)
2457+
elif isinstance(vals, ExtensionArray) and is_float_dtype(vals):
2458+
inference = np.dtype(np.float64)
2459+
out = vals.to_numpy(dtype=float, na_value=np.nan)
24562460
else:
24572461
out = np.asarray(vals)
24582462

pandas/tests/groupby/test_quantile.py

+39
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,45 @@ def test_groupby_quantile_skips_invalid_dtype(q):
248248
tm.assert_frame_equal(result, expected)
249249

250250

251+
def test_groupby_quantile_NA_float(any_float_dtype):
252+
# GH#42849
253+
df = DataFrame({"x": [1, 1], "y": [0.2, np.nan]}, dtype=any_float_dtype)
254+
result = df.groupby("x")["y"].quantile(0.5)
255+
expected = pd.Series([0.2], dtype=float, index=[1.0], name="y")
256+
expected.index.name = "x"
257+
tm.assert_series_equal(expected, result)
258+
259+
result = df.groupby("x")["y"].quantile([0.5, 0.75])
260+
expected = pd.Series(
261+
[0.2] * 2,
262+
index=pd.MultiIndex.from_product(([1.0], [0.5, 0.75]), names=["x", None]),
263+
name="y",
264+
)
265+
tm.assert_series_equal(result, expected)
266+
267+
268+
def test_groupby_quantile_NA_int(any_int_ea_dtype):
269+
# GH#42849
270+
df = DataFrame({"x": [1, 1], "y": [2, 5]}, dtype=any_int_ea_dtype)
271+
result = df.groupby("x")["y"].quantile(0.5)
272+
expected = pd.Series([3.5], dtype=float, index=Index([1], name="x"), name="y")
273+
tm.assert_series_equal(expected, result)
274+
275+
result = df.groupby("x").quantile(0.5)
276+
expected = DataFrame({"y": 3.5}, index=Index([1], name="x"))
277+
tm.assert_frame_equal(result, expected)
278+
279+
280+
@pytest.mark.parametrize("dtype", ["Float64", "Float32"])
281+
def test_groupby_quantile_allNA_column(dtype):
282+
# GH#42849
283+
df = DataFrame({"x": [1, 1], "y": [pd.NA] * 2}, dtype=dtype)
284+
result = df.groupby("x")["y"].quantile(0.5)
285+
expected = pd.Series([np.nan], dtype=float, index=[1.0], name="y")
286+
expected.index.name = "x"
287+
tm.assert_series_equal(expected, result)
288+
289+
251290
def test_groupby_timedelta_quantile():
252291
# GH: 29485
253292
df = DataFrame(

0 commit comments

Comments
 (0)