Skip to content

Commit d12db3a

Browse files
committed
REGR: ensure passed binlabels to pd.cut have a compat dtype on output (#10140)
1 parent 676cb95 commit d12db3a

File tree

3 files changed

+48
-5
lines changed

3 files changed

+48
-5
lines changed

doc/source/whatsnew/v0.16.2.txt

+1
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ Bug Fixes
6969
- Bung in ``Series`` arithmetic methods may incorrectly hold names (:issue:`10068`)
7070

7171
- Bug in ``DatetimeIndex`` and ``TimedeltaIndex`` names are lost after timedelta arithmetics ( :issue:`9926`)
72+
- Regression in ``pd.cut` to ensure passed ``binlabels`` have a compat dtype on output (:issue:`10140`)
7273

7374

7475
- Bug in `Series.plot(label="LABEL")` not correctly setting the label (:issue:`10119`)

pandas/tools/tests/test_tile.py

+27-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import numpy as np
55
from pandas.compat import zip
66

7-
from pandas import DataFrame, Series, unique
7+
from pandas import DataFrame, Series, unique, Index, Categorical, CategoricalIndex
88
import pandas.util.testing as tm
99
from pandas.util.testing import assertRaisesRegexp
1010
import pandas.core.common as com
@@ -97,6 +97,32 @@ def test_label_precision(self):
9797
'(0.54, 0.72]']
9898
self.assert_numpy_array_equal(result.categories, ex_levels)
9999

100+
def test_label_coercion(self):
101+
# GH10140
102+
103+
df = DataFrame({'x' : 100 * np.random.random(100)})
104+
df['y'] = df.x**2
105+
106+
binedges = np.arange(0,110,10)
107+
binlabels = np.arange(5,105,10)
108+
109+
for bl in [Index(binlabels), Categorical(binlabels), Index(binlabels).map(str)]:
110+
expected = Index(bl).dtype
111+
result = cut(df.x, bins=binedges, labels=bl)
112+
self.assertEqual(result.dtype, expected)
113+
z = df.groupby(result).y.mean()
114+
self.assertEqual(z.index.dtype, expected)
115+
116+
# reversed categories
117+
bl = Categorical(binlabels,categories=binlabels[::-1],ordered=True)
118+
expected = Index(bl).dtype
119+
result = cut(df.x, bins=binedges, labels=bl)
120+
self.assertEqual(result.dtype, expected)
121+
z = df.groupby(result).y.mean()
122+
self.assertEqual(z.index.dtype, expected)
123+
tm.assert_index_equal(z.index,
124+
CategoricalIndex(Categorical.from_codes(np.arange(len(bl)),categories=bl.categories,ordered=True),name='x'))
125+
100126
def test_na_handling(self):
101127
arr = np.arange(0, 0.75, 0.01)
102128
arr[::3] = np.nan

pandas/tools/tile.py

+20-4
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,14 @@ def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False,
195195
has_nas = na_mask.any()
196196

197197
if labels is not False:
198+
199+
def to_categorical(levels):
200+
if com.is_categorical_dtype(levels):
201+
levels = levels.categories
202+
np.putmask(ids, na_mask, 0)
203+
fac = Categorical(ids - 1, levels, ordered=True, name=name, fastpath=True)
204+
return fac
205+
198206
if labels is None:
199207
increases = 0
200208
while True:
@@ -209,15 +217,23 @@ def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False,
209217
else:
210218
break
211219

220+
221+
fac = to_categorical(levels)
222+
223+
212224
else:
213225
if len(labels) != len(bins) - 1:
214226
raise ValueError('Bin labels must be one fewer than '
215227
'the number of bin edges')
216-
levels = labels
217228

218-
levels = np.asarray(levels, dtype=object)
219-
np.putmask(ids, na_mask, 0)
220-
fac = Categorical(ids - 1, levels, ordered=True, name=name, fastpath=True)
229+
# we want to coerce the resultant Categorical to the binlabels type if supplied
230+
# if we are passed a Categorical in the binlabels, then use this dtype
231+
# 10140
232+
labels = _ensure_index(labels)
233+
fac = to_categorical(labels)
234+
if not com.is_categorical_dtype(labels):
235+
fac = type(labels)(np.asarray(fac))
236+
221237
else:
222238
fac = ids - 1
223239
if has_nas:

0 commit comments

Comments
 (0)