Skip to content

API: remove ordered requirement in Categorical.searchsorted #21686

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ Categorical

- Added test to assert the :func:`fillna` raises the correct ValueError message when the value isn't a value from categories (:issue:`13628`)
- Bug in :meth:`Categorical.astype` where ``NaN`` values were handled incorrectly when casting to int (:issue:`28406`)
-
- :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted` now work on unordered categoricals also (:issue:`21667`)
-


Expand Down
7 changes: 0 additions & 7 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1399,13 +1399,6 @@ def memory_usage(self, deep=False):
@Substitution(klass="Categorical")
@Appender(_shared_docs["searchsorted"])
def searchsorted(self, value, side="left", sorter=None):
if not self.ordered:
raise ValueError(
"Categorical not ordered\nyou can use "
".as_ordered() to change the Categorical to an "
"ordered one"
)

from pandas.core.series import Series

codes = _get_codes_for_values(Series(value).values, self.categories)
Expand Down
14 changes: 14 additions & 0 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1515,6 +1515,12 @@ def factorize(self, sort=False, na_sentinel=-1):
corresponding elements in `value` were inserted before the indices,
the order of `self` would be preserved.

.. note::

The %(klass)s *must* be monotonically sorted, otherwise
wrong locations will likely be returned. Pandas does *not*
check this for you.

Parameters
----------
value : array_like
Expand All @@ -1540,6 +1546,7 @@ def factorize(self, sort=False, na_sentinel=-1):

See Also
--------
sort_values
numpy.searchsorted

Notes
Expand Down Expand Up @@ -1578,6 +1585,13 @@ def factorize(self, sort=False, na_sentinel=-1):

>>> x.searchsorted(['bread'], side='right')
array([3])

If the values are not monotonically sorted, wrong locations
may be returned:

>>> x = pd.Series([2, 1, 3])
>>> x.searchsorted(1)
0 # wrong result, correct would be 1
"""

@Substitution(klass="Index")
Expand Down
41 changes: 14 additions & 27 deletions pandas/tests/arrays/categorical/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,65 +78,52 @@ def test_mode(self, values, categories, exp_mode):
exp = Categorical(exp_mode, categories=categories, ordered=True)
tm.assert_categorical_equal(res, exp)

def test_searchsorted(self):
def test_searchsorted(self, ordered_fixture):
# https://github.com/pandas-dev/pandas/issues/8420
# https://github.com/pandas-dev/pandas/issues/14522

c1 = Categorical(
["cheese", "milk", "apple", "bread", "bread"],
categories=["cheese", "milk", "apple", "bread"],
ordered=True,
)
s1 = Series(c1)
c2 = Categorical(
cat = Categorical(
["cheese", "milk", "apple", "bread", "bread"],
categories=["cheese", "milk", "apple", "bread"],
ordered=False,
ordered=ordered_fixture,
)
s2 = Series(c2)
ser = Series(cat)

# Searching for single item argument, side='left' (default)
res_cat = c1.searchsorted("apple")
res_cat = cat.searchsorted("apple")
assert res_cat == 2
assert is_scalar(res_cat)

res_ser = s1.searchsorted("apple")
res_ser = ser.searchsorted("apple")
assert res_ser == 2
assert is_scalar(res_ser)

# Searching for single item array, side='left' (default)
res_cat = c1.searchsorted(["bread"])
res_ser = s1.searchsorted(["bread"])
res_cat = cat.searchsorted(["bread"])
res_ser = ser.searchsorted(["bread"])
exp = np.array([3], dtype=np.intp)
tm.assert_numpy_array_equal(res_cat, exp)
tm.assert_numpy_array_equal(res_ser, exp)

# Searching for several items array, side='right'
res_cat = c1.searchsorted(["apple", "bread"], side="right")
res_ser = s1.searchsorted(["apple", "bread"], side="right")
res_cat = cat.searchsorted(["apple", "bread"], side="right")
res_ser = ser.searchsorted(["apple", "bread"], side="right")
exp = np.array([3, 5], dtype=np.intp)
tm.assert_numpy_array_equal(res_cat, exp)
tm.assert_numpy_array_equal(res_ser, exp)

# Searching for a single value that is not from the Categorical
msg = r"Value\(s\) to be inserted must be in categories"
with pytest.raises(KeyError, match=msg):
c1.searchsorted("cucumber")
cat.searchsorted("cucumber")
with pytest.raises(KeyError, match=msg):
s1.searchsorted("cucumber")
ser.searchsorted("cucumber")

# Searching for multiple values one of each is not from the Categorical
with pytest.raises(KeyError, match=msg):
c1.searchsorted(["bread", "cucumber"])
cat.searchsorted(["bread", "cucumber"])
with pytest.raises(KeyError, match=msg):
s1.searchsorted(["bread", "cucumber"])

# searchsorted call for unordered Categorical
msg = "Categorical not ordered"
with pytest.raises(ValueError, match=msg):
c2.searchsorted("apple")
with pytest.raises(ValueError, match=msg):
s2.searchsorted("apple")
ser.searchsorted(["bread", "cucumber"])

def test_unique(self):
# categories are reordered based on value when ordered=False
Expand Down