diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index ece9ff4a1adff..8dbc5673f1bcd 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -670,8 +670,8 @@ Bug Fixes - Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`, :issue:`15424`) - Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a numpy array (:issue:`15434`) - Bug in ``Rolling.quantile`` function that caused a segmentation fault when called with a quantile value outside of the range [0, 1] (:issue:`15463`) - - +- Bug in ``pd.cut()`` single bin on all 0s array raises ``ValueError`` (:issue:`15428`) +- Bug in ``pd.qcut()`` single quantile and array with identical values raises ``ValueError`` (:issue:`15431`) - Bug in ``SparseSeries.reindex`` on single level with list of length 1 (:issue:`15447`) diff --git a/pandas/tests/tools/test_tile.py b/pandas/tests/tools/test_tile.py index de44eadc15751..11b242bc06e15 100644 --- a/pandas/tests/tools/test_tile.py +++ b/pandas/tests/tools/test_tile.py @@ -3,7 +3,7 @@ import numpy as np from pandas.compat import zip -from pandas import Series, Index +from pandas import Series, Index, Categorical import pandas.util.testing as tm from pandas.util.testing import assertRaisesRegexp import pandas.core.common as com @@ -239,7 +239,6 @@ def test_qcut_binning_issues(self): self.assertTrue(ep <= sn) def test_cut_return_categorical(self): - from pandas import Categorical s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8]) res = cut(s, 3) exp = Series(Categorical.from_codes([0, 0, 0, 1, 1, 1, 2, 2, 2], @@ -249,7 +248,6 @@ def test_cut_return_categorical(self): tm.assert_series_equal(res, exp) def test_qcut_return_categorical(self): - from pandas import Categorical s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8]) res = qcut(s, [0, 0.333, 0.666, 1]) exp = Series(Categorical.from_codes([0, 0, 0, 1, 1, 1, 2, 2, 2], @@ -285,6 +283,60 @@ def test_qcut_duplicates_bin(self): # invalid self.assertRaises(ValueError, qcut, values, 3, duplicates='foo') + def test_single_quantile(self): + # issue 15431 + expected = Series([0, 0]) + + s = Series([9., 9.]) + result = qcut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + result = qcut(s, 1) + exp_lab = Series(Categorical.from_codes([0, 0], ["[9, 9]"], + ordered=True)) + tm.assert_series_equal(result, exp_lab) + + s = Series([-9., -9.]) + result = qcut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + result = qcut(s, 1) + exp_lab = Series(Categorical.from_codes([0, 0], ["[-9, -9]"], + ordered=True)) + tm.assert_series_equal(result, exp_lab) + + s = Series([0., 0.]) + result = qcut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + result = qcut(s, 1) + exp_lab = Series(Categorical.from_codes([0, 0], ["[0, 0]"], + ordered=True)) + tm.assert_series_equal(result, exp_lab) + + expected = Series([0]) + + s = Series([9]) + result = qcut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + result = qcut(s, 1) + exp_lab = Series(Categorical.from_codes([0], ["[9, 9]"], + ordered=True)) + tm.assert_series_equal(result, exp_lab) + + s = Series([-9]) + result = qcut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + result = qcut(s, 1) + exp_lab = Series(Categorical.from_codes([0], ["[-9, -9]"], + ordered=True)) + tm.assert_series_equal(result, exp_lab) + + s = Series([0]) + result = qcut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + result = qcut(s, 1) + exp_lab = Series(Categorical.from_codes([0], ["[0, 0]"], + ordered=True)) + tm.assert_series_equal(result, exp_lab) + def test_single_bin(self): # issue 14652 expected = Series([0, 0]) @@ -297,6 +349,29 @@ def test_single_bin(self): result = cut(s, 1, labels=False) tm.assert_series_equal(result, expected) + expected = Series([0]) + + s = Series([9]) + result = cut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + + s = Series([-9]) + result = cut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + + # issue 15428 + expected = Series([0, 0]) + + s = Series([0., 0.]) + result = cut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + + expected = Series([0]) + + s = Series([0]) + result = cut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + def test_datetime_cut(self): # GH 14714 # testing for time data to be present as series diff --git a/pandas/tools/tile.py b/pandas/tools/tile.py index feb4d4bfd5044..034ecd72bd41e 100644 --- a/pandas/tools/tile.py +++ b/pandas/tools/tile.py @@ -104,8 +104,8 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, mn, mx = [mi + 0.0 for mi in rng] if mn == mx: # adjust end points before binning - mn -= .001 * abs(mn) - mx += .001 * abs(mx) + mn -= .001 * abs(mn) if mn != 0 else .001 + mx += .001 * abs(mx) if mx != 0 else .001 bins = np.linspace(mn, mx, bins + 1, endpoint=True) else: # adjust end points after binning bins = np.linspace(mn, mx, bins + 1, endpoint=True) @@ -206,7 +206,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None, "valid options are: raise, drop") unique_bins = algos.unique(bins) - if len(unique_bins) < len(bins): + if len(unique_bins) < len(bins) and len(bins) != 2: if duplicates == 'raise': raise ValueError("Bin edges must be unique: {}.\nYou " "can drop duplicate edges by setting "