From 1ffdf50bc460f225276f7a7298b8eb3781a2aced Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Sat, 5 Oct 2019 15:54:51 +0100 Subject: [PATCH 1/8] Update whatsnew file --- doc/source/whatsnew/v0.25.2.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst index 9789c9fce3541..380f081e0650f 100644 --- a/doc/source/whatsnew/v0.25.2.rst +++ b/doc/source/whatsnew/v0.25.2.rst @@ -103,6 +103,7 @@ Other - Compatibility with Python 3.8 in :meth:`DataFrame.query` (:issue:`27261`) - Fix to ensure that tab-completion in an IPython console does not raise warnings for deprecated attributes (:issue:`27900`). +- :func:`qcut` now handles bool ndarray/Series (:issue:`20303`) .. _whatsnew_0.252.contributors: From aabbd95475458bf642cc4932180672b59f2e50c6 Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Sat, 5 Oct 2019 16:08:08 +0100 Subject: [PATCH 2/8] Add bool to int coerce --- pandas/core/reshape/tile.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index ab354a21a33df..0127e5e520d46 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -18,6 +18,7 @@ is_integer, is_scalar, is_timedelta64_dtype, + is_bool_dtype ) from pandas.core.dtypes.missing import isna @@ -423,8 +424,8 @@ def _bins_to_cuts( def _coerce_to_type(x): """ - if the passed data is of datetime/timedelta type, - this method converts it to numeric so that cut method can + if the passed data is of datetime/timedelta or bool type, + this method converts it to numeric so that cut or qcut method can handle it """ dtype = None @@ -437,6 +438,9 @@ def _coerce_to_type(x): elif is_timedelta64_dtype(x): x = to_timedelta(x) dtype = np.dtype("timedelta64[ns]") + elif is_bool_dtype(x): + x = x.astype(int) + dtype = x.dtype if dtype is not None: # GH 19768: force NaT to NaN during integer conversion From 7ecaf7981bcc4831a05edee81e7aa12f64409d9f Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Sat, 5 Oct 2019 17:14:33 +0100 Subject: [PATCH 3/8] Add bool to int coercion tests for cut and qcut --- pandas/tests/reshape/test_cut.py | 11 +++++++++++ pandas/tests/reshape/test_qcut.py | 10 ++++++++++ 2 files changed, 21 insertions(+) diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index a2ebf2359f55f..aeffc2a4efd56 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -585,3 +585,14 @@ def test_timedelta_cut_roundtrip(): ["0 days 23:57:07.200000", "2 days 00:00:00", "3 days 00:00:00"] ) tm.assert_index_equal(result_bins, expected_bins) + + +@pytest.mark.parametrize("bins", [6, 7]) +@pytest.mark.parametrize("box", "compare", [(Series, tm.assert_series_equal), (np.array,tm.assert_categorical_equal)]) +def test_cut_bool_coercion_to_int(bins, box, compare): + # issue 20303 + x = box(np.random.randint(2, size=200)) + expected = cut(x, bins, duplicates='drop') + data = x.astype(bool) + result = cut(data, bins, duplicates='drop') + compare(result, expected) \ No newline at end of file diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index cb46918157e89..7a5920c87fb6f 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -236,3 +236,13 @@ def test_date_like_qcut_bins(arg, expected_bins): ser = Series(arg) result, result_bins = qcut(ser, 2, retbins=True) tm.assert_index_equal(result_bins, expected_bins) + +@pytest.mark.parametrize("bins", [6, 7]) +@pytest.mark.parametrize("box", "compare", [(Series, tm.assert_series_equal), (np.array,tm.assert_categorical_equal)]) +def test_qcut_bool_coercion_to_int(bins, box, compare): + # issue 20303 + x = box(np.random.randint(2, size=200)) + expected = qcut(x, bins, duplicates='drop') + data = x.astype(bool) + result = qcut(data, bins, duplicates='drop') + compare(result, expected) \ No newline at end of file From 311106a3fbb275fa6f8ba2b8d02904bdf5cc8a92 Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Sat, 5 Oct 2019 22:06:36 +0100 Subject: [PATCH 4/8] syntax changes --- pandas/core/reshape/tile.py | 8 +++++--- pandas/tests/reshape/test_cut.py | 11 +++++++---- pandas/tests/reshape/test_qcut.py | 12 ++++++++---- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 0127e5e520d46..8004ab245f05e 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -11,6 +11,7 @@ from pandas.core.dtypes.common import ( _NS_DTYPE, ensure_int64, + is_bool_dtype, is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype, @@ -18,7 +19,6 @@ is_integer, is_scalar, is_timedelta64_dtype, - is_bool_dtype ) from pandas.core.dtypes.missing import isna @@ -439,12 +439,14 @@ def _coerce_to_type(x): x = to_timedelta(x) dtype = np.dtype("timedelta64[ns]") elif is_bool_dtype(x): - x = x.astype(int) dtype = x.dtype if dtype is not None: # GH 19768: force NaT to NaN during integer conversion - x = np.where(x.notna(), x.view(np.int64), np.nan) + if is_bool_dtype(x): + x = np.where(x.notna(), x.astype(int), np.nan) + else: + x = np.where(x.notna(), x.view(np.int64), np.nan) return x, dtype diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index aeffc2a4efd56..1a410c1a8fe32 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -588,11 +588,14 @@ def test_timedelta_cut_roundtrip(): @pytest.mark.parametrize("bins", [6, 7]) -@pytest.mark.parametrize("box", "compare", [(Series, tm.assert_series_equal), (np.array,tm.assert_categorical_equal)]) +@pytest.mark.parametrize( + "box, compare", + [(Series, tm.assert_series_equal), (np.array, tm.assert_categorical_equal)], +) def test_cut_bool_coercion_to_int(bins, box, compare): # issue 20303 x = box(np.random.randint(2, size=200)) - expected = cut(x, bins, duplicates='drop') + expected = cut(x, bins, duplicates="drop") data = x.astype(bool) - result = cut(data, bins, duplicates='drop') - compare(result, expected) \ No newline at end of file + result = cut(data, bins, duplicates="drop") + compare(result, expected) diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index 7a5920c87fb6f..77e14932cb0a9 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -237,12 +237,16 @@ def test_date_like_qcut_bins(arg, expected_bins): result, result_bins = qcut(ser, 2, retbins=True) tm.assert_index_equal(result_bins, expected_bins) + @pytest.mark.parametrize("bins", [6, 7]) -@pytest.mark.parametrize("box", "compare", [(Series, tm.assert_series_equal), (np.array,tm.assert_categorical_equal)]) +@pytest.mark.parametrize( + "box, compare", + [(Series, tm.assert_series_equal), (np.array, tm.assert_categorical_equal)], +) def test_qcut_bool_coercion_to_int(bins, box, compare): # issue 20303 x = box(np.random.randint(2, size=200)) - expected = qcut(x, bins, duplicates='drop') + expected = qcut(x, bins, duplicates="drop") data = x.astype(bool) - result = qcut(data, bins, duplicates='drop') - compare(result, expected) \ No newline at end of file + result = qcut(data, bins, duplicates="drop") + compare(result, expected) From 33c889cd77213e6d861b54fd7c4ca19b2fa9cd7f Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Sat, 5 Oct 2019 22:13:35 +0100 Subject: [PATCH 5/8] switch np.where condition to check if non nan --- pandas/core/reshape/tile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 8004ab245f05e..98c95a4a2f6e6 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -444,7 +444,7 @@ def _coerce_to_type(x): if dtype is not None: # GH 19768: force NaT to NaN during integer conversion if is_bool_dtype(x): - x = np.where(x.notna(), x.astype(int), np.nan) + x = np.where(~np.isnan(x), x.astype(int), np.nan) else: x = np.where(x.notna(), x.view(np.int64), np.nan) From 412f2d958259a1fd8cc573664c3903e2b9a2c2b2 Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Sun, 6 Oct 2019 00:31:56 +0100 Subject: [PATCH 6/8] Add list as box to tests and make more verbose --- pandas/core/reshape/tile.py | 7 ++----- pandas/tests/reshape/test_cut.py | 14 +++++++++----- pandas/tests/reshape/test_qcut.py | 14 +++++++++----- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 98c95a4a2f6e6..5f91f1ae1e722 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -439,14 +439,11 @@ def _coerce_to_type(x): x = to_timedelta(x) dtype = np.dtype("timedelta64[ns]") elif is_bool_dtype(x): - dtype = x.dtype + x = x.astype(np.int64) if dtype is not None: # GH 19768: force NaT to NaN during integer conversion - if is_bool_dtype(x): - x = np.where(~np.isnan(x), x.astype(int), np.nan) - else: - x = np.where(x.notna(), x.view(np.int64), np.nan) + x = np.where(x.notna(), x.view(np.int64), np.nan) return x, dtype diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index 1a410c1a8fe32..611c3272c123f 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -590,12 +590,16 @@ def test_timedelta_cut_roundtrip(): @pytest.mark.parametrize("bins", [6, 7]) @pytest.mark.parametrize( "box, compare", - [(Series, tm.assert_series_equal), (np.array, tm.assert_categorical_equal)], + [ + (Series, tm.assert_series_equal), + (np.array, tm.assert_categorical_equal), + (list, tm.assert_equal), + ], ) def test_cut_bool_coercion_to_int(bins, box, compare): # issue 20303 - x = box(np.random.randint(2, size=200)) - expected = cut(x, bins, duplicates="drop") - data = x.astype(bool) - result = cut(data, bins, duplicates="drop") + data_expected = box([0, 1, 1, 0, 1] * 10) + data_result = box([False, True, True, False, True] * 10) + expected = cut(data_expected, bins, duplicates="drop") + result = cut(data_result, bins, duplicates="drop") compare(result, expected) diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index 77e14932cb0a9..eca9b11bd4364 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -241,12 +241,16 @@ def test_date_like_qcut_bins(arg, expected_bins): @pytest.mark.parametrize("bins", [6, 7]) @pytest.mark.parametrize( "box, compare", - [(Series, tm.assert_series_equal), (np.array, tm.assert_categorical_equal)], + [ + (Series, tm.assert_series_equal), + (np.array, tm.assert_categorical_equal), + (list, tm.assert_equal), + ], ) def test_qcut_bool_coercion_to_int(bins, box, compare): # issue 20303 - x = box(np.random.randint(2, size=200)) - expected = qcut(x, bins, duplicates="drop") - data = x.astype(bool) - result = qcut(data, bins, duplicates="drop") + data_expected = box([0, 1, 1, 0, 1] * 10) + data_result = box([False, True, True, False, True] * 10) + expected = qcut(data_expected, bins, duplicates="drop") + result = qcut(data_result, bins, duplicates="drop") compare(result, expected) From b397804ea0b0e19572cf51cdd6c4762e53dd6698 Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Sun, 6 Oct 2019 00:41:01 +0100 Subject: [PATCH 7/8] Moved whats new to 1.0 --- doc/source/whatsnew/v0.25.1.rst | 1 + doc/source/whatsnew/v0.25.2.rst | 1 - pandas/core/reshape/tile.py | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index 63dd56f4a3793..00e110c68a9cc 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -110,6 +110,7 @@ Other - Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when replacing timezone-aware timestamps using a dict-like replacer (:issue:`27720`) - Bug in :meth:`Series.rename` when using a custom type indexer. Now any value that isn't callable or dict-like is treated as a scalar. (:issue:`27814`) +- :func:`qcut` and `cut` now handle boolean input (:issue:`20303`) .. _whatsnew_0.251.contributors: diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst index 380f081e0650f..9789c9fce3541 100644 --- a/doc/source/whatsnew/v0.25.2.rst +++ b/doc/source/whatsnew/v0.25.2.rst @@ -103,7 +103,6 @@ Other - Compatibility with Python 3.8 in :meth:`DataFrame.query` (:issue:`27261`) - Fix to ensure that tab-completion in an IPython console does not raise warnings for deprecated attributes (:issue:`27900`). -- :func:`qcut` now handles bool ndarray/Series (:issue:`20303`) .. _whatsnew_0.252.contributors: diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 5f91f1ae1e722..be5d75224e77d 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -439,6 +439,7 @@ def _coerce_to_type(x): x = to_timedelta(x) dtype = np.dtype("timedelta64[ns]") elif is_bool_dtype(x): + # GH 20303 x = x.astype(np.int64) if dtype is not None: From 1cc400c52856b9c1a04305f7a4f18699720b7b77 Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Mon, 7 Oct 2019 01:50:11 +0100 Subject: [PATCH 8/8] Shift whatsnew to 1.0.0 --- doc/source/whatsnew/v0.25.1.rst | 1 - doc/source/whatsnew/v1.0.0.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index 00e110c68a9cc..63dd56f4a3793 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -110,7 +110,6 @@ Other - Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when replacing timezone-aware timestamps using a dict-like replacer (:issue:`27720`) - Bug in :meth:`Series.rename` when using a custom type indexer. Now any value that isn't callable or dict-like is treated as a scalar. (:issue:`27814`) -- :func:`qcut` and `cut` now handle boolean input (:issue:`20303`) .. _whatsnew_0.251.contributors: diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 53041441ba040..605b9fd916348 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -337,6 +337,7 @@ Reshaping - Bug in :meth:`DataFrame.stack` not handling non-unique indexes correctly when creating MultiIndex (:issue: `28301`) - Bug :func:`merge_asof` could not use :class:`datetime.timedelta` for ``tolerance`` kwarg (:issue:`28098`) - Bug in :func:`merge`, did not append suffixes correctly with MultiIndex (:issue:`28518`) +- :func:`qcut` and :func:`cut` now handle boolean input (:issue:`20303`) Sparse ^^^^^^