Skip to content

Commit e5f8082

Browse files
committed
allow pd.cut to take an IntervalIndex for bins
1 parent 4a5ebea commit e5f8082

File tree

2 files changed

+26
-1
lines changed

2 files changed

+26
-1
lines changed

pandas/tests/tools/test_tile.py

+12
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,18 @@ def test_arraylike(self):
5959
tm.assert_almost_equal(bins, np.array([0.1905, 3.36666667,
6060
6.53333333, 9.7]))
6161

62+
def test_bins_from_intervalindex(self):
63+
c = cut(range(5), 3)
64+
expected = c
65+
result = cut(range(5), bins=expected.categories)
66+
tm.assert_categorical_equal(result, expected)
67+
68+
expected = Categorical.from_codes(np.append(c.codes, -1),
69+
categories=c.categories,
70+
ordered=True)
71+
result = cut(range(6), bins=expected.categories)
72+
tm.assert_categorical_equal(result, expected)
73+
6274
def test_bins_not_monotonic(self):
6375
data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1]
6476
self.assertRaises(ValueError, cut, data, [0.1, 1.5, 1, 10])

pandas/tools/tile.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
2929
----------
3030
x : array-like
3131
Input array to be binned. It has to be 1-dimensional.
32-
bins : int or sequence of scalars
32+
bins : int, sequence of scalars, or IntervalIndex
3333
If `bins` is an int, it defines the number of equal-width bins in the
3434
range of `x`. However, in this case, the range of `x` is extended
3535
by .1% on each side to include the min or max values of `x`. If
@@ -78,10 +78,12 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
7878
(6.533, 9.7], (0.191, 3.367]]
7979
Categories (3, object): [(0.191, 3.367] < (3.367, 6.533] < (6.533, 9.7]],
8080
array([ 0.1905 , 3.36666667, 6.53333333, 9.7 ]))
81+
8182
>>> pd.cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]), 3,
8283
labels=["good","medium","bad"])
8384
[good, good, good, medium, bad, good]
8485
Categories (3, object): [good < medium < bad]
86+
8587
>>> pd.cut(np.ones(5), 4, labels=False)
8688
array([1, 1, 1, 1, 1], dtype=int64)
8789
"""
@@ -119,6 +121,8 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
119121
else:
120122
bins[-1] += adj
121123

124+
elif isinstance(bins, IntervalIndex):
125+
pass
122126
else:
123127
bins = np.asarray(bins)
124128
bins = _convert_bin_to_numeric_type(bins, dtype)
@@ -179,9 +183,11 @@ def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'):
179183
>>> pd.qcut(range(5), 4)
180184
[[0, 1], [0, 1], (1, 2], (2, 3], (3, 4]]
181185
Categories (4, object): [[0, 1] < (1, 2] < (2, 3] < (3, 4]]
186+
182187
>>> pd.qcut(range(5), 3, labels=["good","medium","bad"])
183188
[good, good, medium, bad, bad]
184189
Categories (3, object): [good < medium < bad]
190+
185191
>>> pd.qcut(range(5), 4, labels=False)
186192
array([0, 0, 1, 2, 3], dtype=int64)
187193
"""
@@ -210,6 +216,13 @@ def _bins_to_cuts(x, bins, right=True, labels=None,
210216
raise ValueError("invalid value for 'duplicates' parameter, "
211217
"valid options are: raise, drop")
212218

219+
if isinstance(bins, IntervalIndex):
220+
# we have a fast-path here
221+
ids = bins.get_indexer(x)
222+
result = algos.take_nd(bins, ids)
223+
result = Categorical(result, ordered=True)
224+
return result, bins
225+
213226
unique_bins = algos.unique(bins)
214227
if len(unique_bins) < len(bins) and len(bins) != 2:
215228
if duplicates == 'raise':

0 commit comments

Comments
 (0)