Skip to content

Commit 9c99be9

Browse files
ryankarlosNico Cernek
authored and
Nico Cernek
committed
BUG: Coercing bool types to int in qcut (pandas-dev#28802)
1 parent 1c6f99f commit 9c99be9

File tree

4 files changed

+43
-2
lines changed

4 files changed

+43
-2
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,7 @@ Reshaping
342342
- :meth:`DataFrame.merge` now preserves right frame's row order when executing a right merge (:issue:`27453`)
343343
- Bug :func:`merge_asof` could not use :class:`datetime.timedelta` for ``tolerance`` kwarg (:issue:`28098`)
344344
- Bug in :func:`merge`, did not append suffixes correctly with MultiIndex (:issue:`28518`)
345+
- :func:`qcut` and :func:`cut` now handle boolean input (:issue:`20303`)
345346

346347
Sparse
347348
^^^^^^

pandas/core/reshape/tile.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from pandas.core.dtypes.common import (
1212
_NS_DTYPE,
1313
ensure_int64,
14+
is_bool_dtype,
1415
is_categorical_dtype,
1516
is_datetime64_dtype,
1617
is_datetime64tz_dtype,
@@ -423,8 +424,8 @@ def _bins_to_cuts(
423424

424425
def _coerce_to_type(x):
425426
"""
426-
if the passed data is of datetime/timedelta type,
427-
this method converts it to numeric so that cut method can
427+
if the passed data is of datetime/timedelta or bool type,
428+
this method converts it to numeric so that cut or qcut method can
428429
handle it
429430
"""
430431
dtype = None
@@ -437,6 +438,9 @@ def _coerce_to_type(x):
437438
elif is_timedelta64_dtype(x):
438439
x = to_timedelta(x)
439440
dtype = np.dtype("timedelta64[ns]")
441+
elif is_bool_dtype(x):
442+
# GH 20303
443+
x = x.astype(np.int64)
440444

441445
if dtype is not None:
442446
# GH 19768: force NaT to NaN during integer conversion

pandas/tests/reshape/test_cut.py

+18
Original file line numberDiff line numberDiff line change
@@ -585,3 +585,21 @@ def test_timedelta_cut_roundtrip():
585585
["0 days 23:57:07.200000", "2 days 00:00:00", "3 days 00:00:00"]
586586
)
587587
tm.assert_index_equal(result_bins, expected_bins)
588+
589+
590+
@pytest.mark.parametrize("bins", [6, 7])
591+
@pytest.mark.parametrize(
592+
"box, compare",
593+
[
594+
(Series, tm.assert_series_equal),
595+
(np.array, tm.assert_categorical_equal),
596+
(list, tm.assert_equal),
597+
],
598+
)
599+
def test_cut_bool_coercion_to_int(bins, box, compare):
600+
# issue 20303
601+
data_expected = box([0, 1, 1, 0, 1] * 10)
602+
data_result = box([False, True, True, False, True] * 10)
603+
expected = cut(data_expected, bins, duplicates="drop")
604+
result = cut(data_result, bins, duplicates="drop")
605+
compare(result, expected)

pandas/tests/reshape/test_qcut.py

+18
Original file line numberDiff line numberDiff line change
@@ -236,3 +236,21 @@ def test_date_like_qcut_bins(arg, expected_bins):
236236
ser = Series(arg)
237237
result, result_bins = qcut(ser, 2, retbins=True)
238238
tm.assert_index_equal(result_bins, expected_bins)
239+
240+
241+
@pytest.mark.parametrize("bins", [6, 7])
242+
@pytest.mark.parametrize(
243+
"box, compare",
244+
[
245+
(Series, tm.assert_series_equal),
246+
(np.array, tm.assert_categorical_equal),
247+
(list, tm.assert_equal),
248+
],
249+
)
250+
def test_qcut_bool_coercion_to_int(bins, box, compare):
251+
# issue 20303
252+
data_expected = box([0, 1, 1, 0, 1] * 10)
253+
data_result = box([False, True, True, False, True] * 10)
254+
expected = qcut(data_expected, bins, duplicates="drop")
255+
result = qcut(data_result, bins, duplicates="drop")
256+
compare(result, expected)

0 commit comments

Comments
 (0)