Skip to content

Commit c8a6d8c

Browse files
ryankarlosjreback
authored andcommitted
ERR: Improve error message and doc for invalid labels in cut/qcut (#30691)
1 parent 21fd692 commit c8a6d8c

File tree

4 files changed

+57
-6
lines changed

4 files changed

+57
-6
lines changed

doc/source/whatsnew/v1.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -1001,7 +1001,7 @@ Reshaping
10011001
- Bug in :func:`melt` where supplying mixed strings and numeric values for ``id_vars`` or ``value_vars`` would incorrectly raise a ``ValueError`` (:issue:`29718`)
10021002
- Dtypes are now preserved when transposing a ``DataFrame`` where each column is the same extension dtype (:issue:`30091`)
10031003
- Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`)
1004-
-
1004+
- Improved error message and docstring in :func:`cut` and :func:`qcut` when `labels=True` (:issue:`13318`)
10051005

10061006
Sparse
10071007
^^^^^^

pandas/core/reshape/tile.py

+15-5
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
is_datetime64tz_dtype,
1616
is_datetime_or_timedelta_dtype,
1717
is_integer,
18+
is_list_like,
1819
is_scalar,
1920
is_timedelta64_dtype,
2021
)
@@ -65,11 +66,12 @@ def cut(
6566
``right == True`` (the default), then the `bins` ``[1, 2, 3, 4]``
6667
indicate (1,2], (2,3], (3,4]. This argument is ignored when
6768
`bins` is an IntervalIndex.
68-
labels : array or bool, optional
69+
labels : array or False, default None
6970
Specifies the labels for the returned bins. Must be the same length as
7071
the resulting bins. If False, returns only integer indicators of the
7172
bins. This affects the type of the output container (see below).
72-
This argument is ignored when `bins` is an IntervalIndex.
73+
This argument is ignored when `bins` is an IntervalIndex. If True,
74+
raises an error.
7375
retbins : bool, default False
7476
Whether to return the bins or not. Useful when bins is provided
7577
as a scalar.
@@ -286,10 +288,10 @@ def qcut(
286288
q : int or list-like of int
287289
Number of quantiles. 10 for deciles, 4 for quartiles, etc. Alternately
288290
array of quantiles, e.g. [0, .25, .5, .75, 1.] for quartiles.
289-
labels : array or bool, default None
291+
labels : array or False, default None
290292
Used as labels for the resulting bins. Must be of the same length as
291293
the resulting bins. If False, return only integer indicators of the
292-
bins.
294+
bins. If True, raises an error.
293295
retbins : bool, optional
294296
Whether to return the (bins, labels) or not. Can be useful if bins
295297
is given as a scalar.
@@ -391,15 +393,23 @@ def _bins_to_cuts(
391393
has_nas = na_mask.any()
392394

393395
if labels is not False:
394-
if labels is None:
396+
if not (labels is None or is_list_like(labels)):
397+
raise ValueError(
398+
"Bin labels must either be False, None or passed in as a "
399+
"list-like argument"
400+
)
401+
402+
elif labels is None:
395403
labels = _format_labels(
396404
bins, precision, right=right, include_lowest=include_lowest, dtype=dtype
397405
)
406+
398407
else:
399408
if len(labels) != len(bins) - 1:
400409
raise ValueError(
401410
"Bin labels must be one fewer than the number of bin edges"
402411
)
412+
403413
if not is_categorical_dtype(labels):
404414
labels = Categorical(labels, categories=labels, ordered=True)
405415

pandas/tests/reshape/test_cut.py

+9
Original file line numberDiff line numberDiff line change
@@ -603,3 +603,12 @@ def test_cut_bool_coercion_to_int(bins, box, compare):
603603
expected = cut(data_expected, bins, duplicates="drop")
604604
result = cut(data_result, bins, duplicates="drop")
605605
compare(result, expected)
606+
607+
608+
@pytest.mark.parametrize("labels", ["foo", 1, True])
609+
def test_cut_incorrect_labels(labels):
610+
# GH 13318
611+
values = range(5)
612+
msg = "Bin labels must either be False, None or passed in as a list-like argument"
613+
with pytest.raises(ValueError, match=msg):
614+
cut(values, 4, labels=labels)

pandas/tests/reshape/test_qcut.py

+32
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,38 @@ def test_qcut_return_intervals():
130130
tm.assert_series_equal(res, exp)
131131

132132

133+
@pytest.mark.parametrize("labels", ["foo", 1, True])
134+
def test_qcut_incorrect_labels(labels):
135+
# GH 13318
136+
values = range(5)
137+
msg = "Bin labels must either be False, None or passed in as a list-like argument"
138+
with pytest.raises(ValueError, match=msg):
139+
qcut(values, 4, labels=labels)
140+
141+
142+
@pytest.mark.parametrize("labels", [["a", "b", "c"], list(range(3))])
143+
def test_qcut_wrong_length_labels(labels):
144+
# GH 13318
145+
values = range(10)
146+
msg = "Bin labels must be one fewer than the number of bin edges"
147+
with pytest.raises(ValueError, match=msg):
148+
qcut(values, 4, labels=labels)
149+
150+
151+
@pytest.mark.parametrize(
152+
"labels, expected",
153+
[
154+
(["a", "b", "c"], Categorical(["a", "b", "c"], ordered=True)),
155+
(list(range(3)), Categorical([0, 1, 2], ordered=True)),
156+
],
157+
)
158+
def test_qcut_list_like_labels(labels, expected):
159+
# GH 13318
160+
values = range(3)
161+
result = qcut(values, 3, labels=labels)
162+
tm.assert_categorical_equal(result, expected)
163+
164+
133165
@pytest.mark.parametrize(
134166
"kwargs,msg",
135167
[

0 commit comments

Comments
 (0)