Skip to content

Commit 5d6e352

Browse files
Backport PR #43150 on branch 1.3.x (BUG: GroupBy.quantile fails with pd.NA) (#43417)
1 parent ac09649 commit 5d6e352

File tree

3 files changed

+46
-0
lines changed

3 files changed

+46
-0
lines changed

doc/source/whatsnew/v1.3.3.rst

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Fixed regressions
1717
- Fixed regression in :class:`DataFrame` constructor failing to broadcast for defined :class:`Index` and len one list of :class:`Timestamp` (:issue:`42810`)
1818
- Performance regression in :meth:`core.window.ewm.ExponentialMovingWindow.mean` (:issue:`42333`)
1919
- Fixed regression in :meth:`.GroupBy.agg` incorrectly raising in some cases (:issue:`42390`)
20+
- Fixed regression in :meth:`.GroupBy.quantile` which was failing with ``pandas.NA`` (:issue:`42849`)
2021
- Fixed regression in :meth:`.GroupBy.apply` where ``nan`` values were dropped even with ``dropna=False`` (:issue:`43205`)
2122
- Fixed regression in :meth:`merge` where ``on`` columns with ``ExtensionDtype`` or ``bool`` data types were cast to ``object`` in ``right`` and ``outer`` merge (:issue:`40073`)
2223
- Fixed regression in :meth:`RangeIndex.where` and :meth:`RangeIndex.putmask` raising ``AssertionError`` when result did not represent a :class:`RangeIndex` (:issue:`43240`)

pandas/core/groupby/groupby.py

+4
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ class providing the base-class of operations.
6363
from pandas.core.dtypes.common import (
6464
is_bool_dtype,
6565
is_datetime64_dtype,
66+
is_float_dtype,
6667
is_integer_dtype,
6768
is_numeric_dtype,
6869
is_object_dtype,
@@ -2450,6 +2451,9 @@ def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, np.dtype | None]:
24502451
elif is_timedelta64_dtype(vals.dtype):
24512452
inference = np.dtype("timedelta64[ns]")
24522453
out = np.asarray(vals).astype(float)
2454+
elif isinstance(vals, ExtensionArray) and is_float_dtype(vals):
2455+
inference = np.dtype(np.float64)
2456+
out = vals.to_numpy(dtype=float, na_value=np.nan)
24532457
else:
24542458
out = np.asarray(vals)
24552459

pandas/tests/groupby/test_quantile.py

+41
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,47 @@ def test_groupby_quantile_skips_invalid_dtype(q):
248248
tm.assert_frame_equal(result, expected)
249249

250250

251+
def test_groupby_quantile_NA_float(any_float_allowed_nullable_dtype):
252+
# GH#42849
253+
df = DataFrame(
254+
{"x": [1, 1], "y": [0.2, np.nan]}, dtype=any_float_allowed_nullable_dtype
255+
)
256+
result = df.groupby("x")["y"].quantile(0.5)
257+
expected = pd.Series([0.2], dtype=float, index=Index(df["x"][:1]), name="y")
258+
tm.assert_series_equal(expected, result)
259+
260+
result = df.groupby("x")["y"].quantile([0.5, 0.75])
261+
expected = pd.Series(
262+
[0.2] * 2,
263+
index=pd.MultiIndex.from_arrays(
264+
[Index(df["x"]), [0.5, 0.75]], names=["x", None]
265+
),
266+
name="y",
267+
)
268+
tm.assert_series_equal(result, expected)
269+
270+
271+
def test_groupby_quantile_NA_int(any_nullable_int_dtype):
272+
# GH#42849
273+
df = DataFrame({"x": [1, 1], "y": [2, 5]}, dtype=any_nullable_int_dtype)
274+
result = df.groupby("x")["y"].quantile(0.5)
275+
expected = pd.Series([3.5], dtype=float, index=Index(df["x"][:1]), name="y")
276+
tm.assert_series_equal(expected, result)
277+
278+
result = df.groupby("x").quantile(0.5)
279+
expected = DataFrame({"y": 3.5}, index=Index(df["x"][:1]))
280+
tm.assert_frame_equal(result, expected)
281+
282+
283+
@pytest.mark.parametrize("dtype", ["Float64", "Float32"])
284+
def test_groupby_quantile_allNA_column(dtype):
285+
# GH#42849
286+
df = DataFrame({"x": [1, 1], "y": [pd.NA] * 2}, dtype=dtype)
287+
result = df.groupby("x")["y"].quantile(0.5)
288+
expected = pd.Series([np.nan], dtype=float, index=Index(df["x"][:1]), name="y")
289+
tm.assert_series_equal(expected, result)
290+
291+
251292
def test_groupby_timedelta_quantile():
252293
# GH: 29485
253294
df = DataFrame(

0 commit comments

Comments
 (0)