diff --git a/doc/source/whatsnew/v0.15.1.txt b/doc/source/whatsnew/v0.15.1.txt index fa1b8b24e75b5..7b3aaa00e7ea9 100644 --- a/doc/source/whatsnew/v0.15.1.txt +++ b/doc/source/whatsnew/v0.15.1.txt @@ -47,3 +47,4 @@ Experimental Bug Fixes ~~~~~~~~~ +- Bug in ``cut``/``qcut`` when using ``Series`` and ``retbins=True`` (:issue:`8589`) diff --git a/pandas/tools/tests/test_tile.py b/pandas/tools/tests/test_tile.py index 3bdd49673ca71..4a0218bef6001 100644 --- a/pandas/tools/tests/test_tile.py +++ b/pandas/tools/tests/test_tile.py @@ -248,6 +248,16 @@ def test_qcut_return_categorical(self): ordered=True)) tm.assert_series_equal(res, exp) + def test_series_retbins(self): + # GH 8589 + s = Series(np.arange(4)) + result, bins = cut(s, 2, retbins=True) + assert_equal(result.cat.codes.values, [0, 0, 1, 1]) + assert_almost_equal(bins, [-0.003, 1.5, 3]) + + result, bins = qcut(s, 2, retbins=True) + assert_equal(result.cat.codes.values, [0, 0, 1, 1]) + assert_almost_equal(bins, [0, 1.5, 3]) def curpath(): diff --git a/pandas/tools/tile.py b/pandas/tools/tile.py index 5eddd2f8dec33..6830919d9c09f 100644 --- a/pandas/tools/tile.py +++ b/pandas/tools/tile.py @@ -109,11 +109,8 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, if (np.diff(bins) < 0).any(): raise ValueError('bins must increase monotonically.') - res = _bins_to_cuts(x, bins, right=right, labels=labels,retbins=retbins, precision=precision, - include_lowest=include_lowest) - if isinstance(x, Series): - res = Series(res, index=x.index) - return res + return _bins_to_cuts(x, bins, right=right, labels=labels,retbins=retbins, precision=precision, + include_lowest=include_lowest) @@ -168,18 +165,21 @@ def qcut(x, q, labels=None, retbins=False, precision=3): else: quantiles = q bins = algos.quantile(x, quantiles) - res = _bins_to_cuts(x, bins, labels=labels, retbins=retbins,precision=precision, - include_lowest=True) - if isinstance(x, Series): - res = Series(res, index=x.index) - return res + return _bins_to_cuts(x, bins, labels=labels, retbins=retbins,precision=precision, + include_lowest=True) def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False, precision=3, name=None, include_lowest=False): - if name is None and isinstance(x, Series): - name = x.name + x_is_series = isinstance(x, Series) + series_index = None + + if x_is_series: + series_index = x.index + if name is None: + name = x.name + x = np.asarray(x) side = 'left' if right else 'right' @@ -224,6 +224,9 @@ def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False, fac = fac.astype(np.float64) np.putmask(fac, na_mask, np.nan) + if x_is_series: + fac = Series(fac, index=series_index) + if not retbins: return fac