Skip to content

Commit 01ade5a

Browse files
committed
categorical: searchsorted returns a scalar if input was scalar
1 parent 3283ae8 commit 01ade5a

File tree

5 files changed

+57
-14
lines changed

5 files changed

+57
-14
lines changed

doc/source/whatsnew/v0.23.0.txt

+3
Original file line numberDiff line numberDiff line change
@@ -1259,6 +1259,9 @@ Indexing
12591259
- Bug in performing in-place operations on a ``DataFrame`` with a duplicate ``Index`` (:issue:`17105`)
12601260
- Bug in :meth:`IntervalIndex.get_loc` and :meth:`IntervalIndex.get_indexer` when used with an :class:`IntervalIndex` containing a single interval (:issue:`17284`, :issue:`20921`)
12611261
- Bug in ``.loc`` with a ``uint64`` indexer (:issue:`20722`)
1262+
- Bug in ``CategoricalIndex.searchsorted`` where the method didn't return a scalar when the input values was scalar (:issue:`21019`)
1263+
- Bug in ``CategoricalIndex`` where slicing beyond the range of the data raised a KeyError (:issue:`21019`)
1264+
12621265

12631266
MultiIndex
12641267
^^^^^^^^^^

pandas/core/arrays/categorical.py

+2
Original file line numberDiff line numberDiff line change
@@ -1341,6 +1341,8 @@ def searchsorted(self, value, side='left', sorter=None):
13411341

13421342
if -1 in values_as_codes:
13431343
raise ValueError("Value(s) to be inserted must be in categories.")
1344+
if is_scalar(value):
1345+
values_as_codes = np.asscalar(values_as_codes)
13441346

13451347
return self.codes.searchsorted(values_as_codes, side=side,
13461348
sorter=sorter)

pandas/core/indexes/category.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -432,13 +432,14 @@ def get_loc(self, key, method=None):
432432
>>> monotonic_index.get_loc('b')
433433
slice(1, 3, None)
434434
435-
>>> non_monotonic_index = p.dCategoricalIndex(list('abcb'))
435+
>>> non_monotonic_index = pd.CategoricalIndex(list('abcb'))
436436
>>> non_monotonic_index.get_loc('b')
437437
array([False, True, False, True], dtype=bool)
438438
"""
439-
codes = self.categories.get_loc(key)
440-
if (codes == -1):
441-
raise KeyError(key)
439+
try:
440+
codes = self.categories.get_loc(key)
441+
except KeyError:
442+
raise KeyError("Category `{}` unknown".format(key))
442443
return self._engine.get_loc(codes)
443444

444445
def get_value(self, series, key):

pandas/tests/categorical/test_analytics.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,9 @@ def test_searchsorted(self):
8686
# Searching for single item argument, side='left' (default)
8787
res_cat = c1.searchsorted('apple')
8888
res_ser = s1.searchsorted('apple')
89-
exp = np.array([2], dtype=np.intp)
90-
tm.assert_numpy_array_equal(res_cat, exp)
91-
tm.assert_numpy_array_equal(res_ser, exp)
89+
exp = np.int64(2)
90+
assert res_cat == exp
91+
assert res_ser == exp
9292

9393
# Searching for single item array, side='left' (default)
9494
res_cat = c1.searchsorted(['bread'])

pandas/tests/indexing/test_categorical.py

+44-7
Original file line numberDiff line numberDiff line change
@@ -627,15 +627,52 @@ def test_reindexing(self):
627627
lambda: self.df2.reindex(['a'], limit=2))
628628

629629
def test_loc_slice(self):
630-
# slicing
631-
# not implemented ATM
632-
# GH9748
630+
# Raises KeyError since the left slice 'a' is not unique
631+
pytest.raises(KeyError, lambda: self.df.loc["a":"b"])
632+
result = self.df.loc["b":"c"]
633633

634-
pytest.raises(TypeError, lambda: self.df.loc[1:5])
634+
expected = DataFrame(
635+
{"A": [2, 3, 4]},
636+
index=CategoricalIndex(
637+
["b", "b", "c"], name="B", categories=list("cab")
638+
),
639+
)
640+
641+
assert_frame_equal(result, expected)
642+
643+
ordered_df = DataFrame(
644+
{"A": range(0, 6)},
645+
index=CategoricalIndex(list("aabcde"), name="B", ordered=True),
646+
)
647+
648+
result = ordered_df.loc["a":"b"]
649+
expected = DataFrame(
650+
{"A": range(0, 3)},
651+
index=CategoricalIndex(
652+
list("aab"), categories=list("abcde"), name="B", ordered=True
653+
),
654+
)
655+
assert_frame_equal(result, expected)
656+
657+
# This should select the entire dataframe
658+
result = ordered_df.loc["a":"e"]
659+
assert_frame_equal(result, ordered_df)
660+
661+
df_slice = ordered_df.loc["a":"b"]
662+
# Although the edge is not within the slice, this should fall back
663+
# to searchsorted slicing since the category is known
664+
result = df_slice.loc["a":"e"]
665+
assert_frame_equal(result, df_slice)
635666

636-
# result = df.loc[1:5]
637-
# expected = df.iloc[[1,2,3,4]]
638-
# assert_frame_equal(result, expected)
667+
# If the categorical is not sorted and the requested edge
668+
# is not in the slice we cannot perform slicing
669+
df_slice.index = df_slice.index.as_unordered()
670+
with pytest.raises(KeyError):
671+
df_slice.loc["a":"e"]
672+
673+
with pytest.raises(KeyError):
674+
# If the category is not known, there is nothing we can do
675+
ordered_df.loc["a":"z"]
639676

640677
def test_boolean_selection(self):
641678

0 commit comments

Comments
 (0)