Skip to content

Commit 55c086e

Browse files
committed
BUG: Update added tests to use Categorical.from_codes. Update whats new lines. Updated docstrings (#33141)
1 parent c7fc2ae commit 55c086e

File tree

3 files changed

+24
-25
lines changed

3 files changed

+24
-25
lines changed

doc/source/whatsnew/v1.1.0.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ Other enhancements
9191
- The :meth:`DataFrame.to_feather` method now supports additional keyword
9292
arguments (e.g. to set the compression) that are added in pyarrow 0.17
9393
(:issue:`33422`).
94+
- The :func:`cut` will now accept parameter ``ordered`` with default ``ordered=True``. If ``ordered=False`` and no labels are provided, an error will be raised (:issue:`33141`)
9495

9596
.. ---------------------------------------------------------------------------
9697
@@ -540,8 +541,7 @@ Reshaping
540541
- Bug in :meth:`concat` where when passing a non-dict mapping as ``objs`` would raise a ``TypeError`` (:issue:`32863`)
541542
- :meth:`DataFrame.agg` now provides more descriptive ``SpecificationError`` message when attempting to aggregating non-existant column (:issue:`32755`)
542543
- Bug in :meth:`DataFrame.unstack` when MultiIndexed columns and MultiIndexed rows were used (:issue:`32624`, :issue:`24729` and :issue:`28306`)
543-
- Bug in :func:`cut` when non-unique labels used an error raised. Added input parameter ``ordered`` to :func:`cut` with default (``ordered=True``).
544-
If ``ordered=False`` and no labels are provided, an error will be raised (:issue:33141`)
544+
- Bug in :func:`cut` raised an error when non-unique labels (:issue:`33141`)
545545

546546

547547
Sparse

pandas/core/reshape/tile.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -153,12 +153,13 @@ def cut(
153153
[bad, good, medium, medium, good, bad]
154154
Categories (3, object): [bad < medium < good]
155155
156-
``ordered=False`` will result in unordered categories when labels are passed:
156+
``ordered=False`` will result in unordered categories when labels are passed.
157+
This parameter can be used to allow non-unique labels:
157158
158-
>>> pd.cut(np.array([1, 7, 5, 4, 6, 3]),
159-
... 3, labels=["B", "C", "A"])
160-
[B, A, C, C, A, B]
161-
Categories (3, object): [B < C < A]
159+
>>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3,
160+
... labels=["B", "A", "B"], ordered=False)
161+
[B, B, A, A, B, B]
162+
Categories (2, object): [A, B]
162163
163164
``labels=False`` implies you just want the bins back.
164165

pandas/tests/reshape/test_cut.py

+16-18
Original file line numberDiff line numberDiff line change
@@ -628,40 +628,38 @@ def test_cut_nullable_integer(bins, right, include_lowest):
628628

629629

630630
@pytest.mark.parametrize(
631-
"data, bins, labels",
631+
"data, bins, labels, expected_codes, expected_labels",
632632
[
633-
([15, 17, 19], [14, 16, 18, 20], ["A", "B", "A"]),
634-
([1, 3, 5], [0, 2, 4, 6, 8], [2, 0, 1, 2]),
633+
([15, 17, 19], [14, 16, 18, 20], ["A", "B", "A"], [0, 1, 0], ["A", "B"]),
634+
([1, 3, 5], [0, 2, 4, 6, 8], [2, 0, 1, 2], [2, 0, 1], [0, 1, 2]),
635635
],
636636
)
637-
def test_cut_non_unique_labels(data, bins, labels):
637+
def test_cut_non_unique_labels(data, bins, labels, expected_codes, expected_labels):
638+
# GH 33141
638639
result = cut(data, bins=bins, labels=labels, ordered=False)
639-
expected = cut(
640-
data, bins=bins, labels=Categorical(labels, ordered=False), ordered=False
640+
expected = Categorical.from_codes(
641+
expected_codes, categories=expected_labels, ordered=False
641642
)
642643
tm.assert_categorical_equal(result, expected)
643644

644645

645646
@pytest.mark.parametrize(
646-
"data, bins, labels",
647+
"data, bins, labels, expected_codes, expected_labels",
647648
[
648-
([15, 17, 19], [14, 16, 18, 20], ["C", "B", "A"]),
649-
([1, 3, 5], [0, 2, 4, 6, 8], [3, 0, 1, 2]),
649+
([15, 17, 19], [14, 16, 18, 20], ["C", "B", "A"], [0, 1, 2], ["C", "B", "A"]),
650+
([1, 3, 5], [0, 2, 4, 6, 8], [3, 0, 1, 2], [0, 1, 2], [3, 0, 1, 2]),
650651
],
651652
)
652-
def test_cut_unordered_labels(data, bins, labels):
653+
def test_cut_unordered_labels(data, bins, labels, expected_codes, expected_labels):
654+
# GH 33141
653655
result = cut(data, bins=bins, labels=labels, ordered=False)
654-
expected = cut(
655-
data,
656-
bins=bins,
657-
labels=Categorical(labels, categories=labels, ordered=False),
658-
ordered=False,
656+
expected = Categorical.from_codes(
657+
expected_codes, categories=expected_labels, ordered=False
659658
)
660659
tm.assert_categorical_equal(result, expected)
661660

662661

663-
@pytest.mark.parametrize("data, bins,", [([0.5, 3], [0, 1, 2])])
664-
def test_cut_unordered_with_missing_labels_raises_error(data, bins):
662+
def test_cut_unordered_with_missing_labels_raises_error():
665663
msg = "'labels' must be provided if 'ordered = False'"
666664
with pytest.raises(ValueError, match=msg):
667-
cut(data, bins=bins, ordered=False)
665+
cut([0.5, 3], bins=[0, 1, 2], ordered=False)

0 commit comments

Comments
 (0)