Skip to content

Commit 1dac7e7

Browse files
dsaxtonmeeseeksmachine
authored andcommitted
Backport PR pandas-dev#31440: BUG: Fix qcut for nullable integers
1 parent 172b33e commit 1dac7e7

File tree

5 files changed

+33
-21
lines changed

5 files changed

+33
-21
lines changed

doc/source/whatsnew/v1.0.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ ExtensionArray
121121
^^^^^^^^^^^^^^
122122

123123
- Bug in dtype being lost in ``__invert__`` (``~`` operator) for extension-array backed ``Series`` and ``DataFrame`` (:issue:`23087`)
124+
- Bug where :meth:`qcut` would raise when passed a nullable integer. (:issue:`31389`)
124125
-
125126

126127

pandas/core/reshape/tile.py

+7-8
Original file line numberDiff line numberDiff line change
@@ -202,17 +202,10 @@ def cut(
202202
"""
203203
# NOTE: this binning code is changed a bit from histogram for var(x) == 0
204204

205-
# for handling the cut for datetime and timedelta objects
206205
original = x
207206
x = _preprocess_for_cut(x)
208207
x, dtype = _coerce_to_type(x)
209208

210-
# To support cut(IntegerArray), we convert to object dtype with NaN
211-
# Will properly support in the future.
212-
# https://github.com/pandas-dev/pandas/pull/31290
213-
if is_extension_array_dtype(x.dtype) and is_integer_dtype(x.dtype):
214-
x = x.to_numpy(dtype=object, na_value=np.nan)
215-
216209
if not np.iterable(bins):
217210
if is_scalar(bins) and bins < 1:
218211
raise ValueError("`bins` should be a positive integer.")
@@ -435,7 +428,7 @@ def _bins_to_cuts(
435428

436429
def _coerce_to_type(x):
437430
"""
438-
if the passed data is of datetime/timedelta or bool type,
431+
if the passed data is of datetime/timedelta, bool or nullable int type,
439432
this method converts it to numeric so that cut or qcut method can
440433
handle it
441434
"""
@@ -452,6 +445,12 @@ def _coerce_to_type(x):
452445
elif is_bool_dtype(x):
453446
# GH 20303
454447
x = x.astype(np.int64)
448+
# To support cut and qcut for IntegerArray we convert to float dtype.
449+
# Will properly support in the future.
450+
# https://github.com/pandas-dev/pandas/pull/31290
451+
# https://github.com/pandas-dev/pandas/issues/31389
452+
elif is_extension_array_dtype(x) and is_integer_dtype(x):
453+
x = x.to_numpy(dtype=np.float64, na_value=np.nan)
455454

456455
if dtype is not None:
457456
# GH 19768: force NaT to NaN during integer conversion

pandas/tests/arrays/test_integer.py

-13
Original file line numberDiff line numberDiff line change
@@ -1059,19 +1059,6 @@ def test_value_counts_na():
10591059
tm.assert_series_equal(result, expected)
10601060

10611061

1062-
@pytest.mark.parametrize("bins", [3, [0, 5, 15]])
1063-
@pytest.mark.parametrize("right", [True, False])
1064-
@pytest.mark.parametrize("include_lowest", [True, False])
1065-
def test_cut(bins, right, include_lowest):
1066-
a = np.random.randint(0, 10, size=50).astype(object)
1067-
a[::2] = np.nan
1068-
result = pd.cut(
1069-
pd.array(a, dtype="Int64"), bins, right=right, include_lowest=include_lowest
1070-
)
1071-
expected = pd.cut(a, bins, right=right, include_lowest=include_lowest)
1072-
tm.assert_categorical_equal(result, expected)
1073-
1074-
10751062
def test_array_setitem_nullable_boolean_mask():
10761063
# GH 31446
10771064
ser = pd.Series([1, 2], dtype="Int64")

pandas/tests/reshape/test_cut.py

+13
Original file line numberDiff line numberDiff line change
@@ -612,3 +612,16 @@ def test_cut_incorrect_labels(labels):
612612
msg = "Bin labels must either be False, None or passed in as a list-like argument"
613613
with pytest.raises(ValueError, match=msg):
614614
cut(values, 4, labels=labels)
615+
616+
617+
@pytest.mark.parametrize("bins", [3, [0, 5, 15]])
618+
@pytest.mark.parametrize("right", [True, False])
619+
@pytest.mark.parametrize("include_lowest", [True, False])
620+
def test_cut_nullable_integer(bins, right, include_lowest):
621+
a = np.random.randint(0, 10, size=50).astype(float)
622+
a[::2] = np.nan
623+
result = cut(
624+
pd.array(a, dtype="Int64"), bins, right=right, include_lowest=include_lowest
625+
)
626+
expected = cut(a, bins, right=right, include_lowest=include_lowest)
627+
tm.assert_categorical_equal(result, expected)

pandas/tests/reshape/test_qcut.py

+12
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import numpy as np
44
import pytest
55

6+
import pandas as pd
67
from pandas import (
78
Categorical,
89
DatetimeIndex,
@@ -286,3 +287,14 @@ def test_qcut_bool_coercion_to_int(bins, box, compare):
286287
expected = qcut(data_expected, bins, duplicates="drop")
287288
result = qcut(data_result, bins, duplicates="drop")
288289
compare(result, expected)
290+
291+
292+
@pytest.mark.parametrize("q", [2, 5, 10])
293+
def test_qcut_nullable_integer(q, any_nullable_int_dtype):
294+
arr = pd.array(np.arange(100), dtype=any_nullable_int_dtype)
295+
arr[::2] = pd.NA
296+
297+
result = qcut(arr, q)
298+
expected = qcut(arr.astype(float), q)
299+
300+
tm.assert_categorical_equal(result, expected)

0 commit comments

Comments
 (0)