diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 53041441ba040..605b9fd916348 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -337,6 +337,7 @@ Reshaping - Bug in :meth:`DataFrame.stack` not handling non-unique indexes correctly when creating MultiIndex (:issue: `28301`) - Bug :func:`merge_asof` could not use :class:`datetime.timedelta` for ``tolerance`` kwarg (:issue:`28098`) - Bug in :func:`merge`, did not append suffixes correctly with MultiIndex (:issue:`28518`) +- :func:`qcut` and :func:`cut` now handle boolean input (:issue:`20303`) Sparse ^^^^^^ diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index ab354a21a33df..be5d75224e77d 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -11,6 +11,7 @@ from pandas.core.dtypes.common import ( _NS_DTYPE, ensure_int64, + is_bool_dtype, is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype, @@ -423,8 +424,8 @@ def _bins_to_cuts( def _coerce_to_type(x): """ - if the passed data is of datetime/timedelta type, - this method converts it to numeric so that cut method can + if the passed data is of datetime/timedelta or bool type, + this method converts it to numeric so that cut or qcut method can handle it """ dtype = None @@ -437,6 +438,9 @@ def _coerce_to_type(x): elif is_timedelta64_dtype(x): x = to_timedelta(x) dtype = np.dtype("timedelta64[ns]") + elif is_bool_dtype(x): + # GH 20303 + x = x.astype(np.int64) if dtype is not None: # GH 19768: force NaT to NaN during integer conversion diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index a2ebf2359f55f..611c3272c123f 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -585,3 +585,21 @@ def test_timedelta_cut_roundtrip(): ["0 days 23:57:07.200000", "2 days 00:00:00", "3 days 00:00:00"] ) tm.assert_index_equal(result_bins, expected_bins) + + +@pytest.mark.parametrize("bins", [6, 7]) +@pytest.mark.parametrize( + "box, compare", + [ + (Series, tm.assert_series_equal), + (np.array, tm.assert_categorical_equal), + (list, tm.assert_equal), + ], +) +def test_cut_bool_coercion_to_int(bins, box, compare): + # issue 20303 + data_expected = box([0, 1, 1, 0, 1] * 10) + data_result = box([False, True, True, False, True] * 10) + expected = cut(data_expected, bins, duplicates="drop") + result = cut(data_result, bins, duplicates="drop") + compare(result, expected) diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index cb46918157e89..eca9b11bd4364 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -236,3 +236,21 @@ def test_date_like_qcut_bins(arg, expected_bins): ser = Series(arg) result, result_bins = qcut(ser, 2, retbins=True) tm.assert_index_equal(result_bins, expected_bins) + + +@pytest.mark.parametrize("bins", [6, 7]) +@pytest.mark.parametrize( + "box, compare", + [ + (Series, tm.assert_series_equal), + (np.array, tm.assert_categorical_equal), + (list, tm.assert_equal), + ], +) +def test_qcut_bool_coercion_to_int(bins, box, compare): + # issue 20303 + data_expected = box([0, 1, 1, 0, 1] * 10) + data_result = box([False, True, True, False, True] * 10) + expected = qcut(data_expected, bins, duplicates="drop") + result = qcut(data_result, bins, duplicates="drop") + compare(result, expected)