Skip to content

Commit a5daff2

Browse files
TomAugspurgerjreback
authored andcommitted
BUG: Handle IntegerArray in pd.cut (#31290)
1 parent 74a5edc commit a5daff2

File tree

2 files changed

+21
-0
lines changed

2 files changed

+21
-0
lines changed

pandas/core/reshape/tile.py

+8
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414
is_datetime64_dtype,
1515
is_datetime64tz_dtype,
1616
is_datetime_or_timedelta_dtype,
17+
is_extension_array_dtype,
1718
is_integer,
19+
is_integer_dtype,
1820
is_list_like,
1921
is_scalar,
2022
is_timedelta64_dtype,
@@ -205,6 +207,12 @@ def cut(
205207
x = _preprocess_for_cut(x)
206208
x, dtype = _coerce_to_type(x)
207209

210+
# To support cut(IntegerArray), we convert to object dtype with NaN
211+
# Will properly support in the future.
212+
# https://github.com/pandas-dev/pandas/pull/31290
213+
if is_extension_array_dtype(x.dtype) and is_integer_dtype(x.dtype):
214+
x = x.to_numpy(dtype=object, na_value=np.nan)
215+
208216
if not np.iterable(bins):
209217
if is_scalar(bins) and bins < 1:
210218
raise ValueError("`bins` should be a positive integer.")

pandas/tests/arrays/test_integer.py

+13
Original file line numberDiff line numberDiff line change
@@ -1061,6 +1061,19 @@ def test_value_counts_na():
10611061
tm.assert_series_equal(result, expected)
10621062

10631063

1064+
@pytest.mark.parametrize("bins", [3, [0, 5, 15]])
1065+
@pytest.mark.parametrize("right", [True, False])
1066+
@pytest.mark.parametrize("include_lowest", [True, False])
1067+
def test_cut(bins, right, include_lowest):
1068+
a = np.random.randint(0, 10, size=50).astype(object)
1069+
a[::2] = np.nan
1070+
result = pd.cut(
1071+
pd.array(a, dtype="Int64"), bins, right=right, include_lowest=include_lowest
1072+
)
1073+
expected = pd.cut(a, bins, right=right, include_lowest=include_lowest)
1074+
tm.assert_categorical_equal(result, expected)
1075+
1076+
10641077
# TODO(jreback) - these need testing / are broken
10651078

10661079
# shift

0 commit comments

Comments
 (0)