Skip to content

Commit 04ca52f

Browse files
committed
Address PR comments
1 parent 25b5fd7 commit 04ca52f

File tree

3 files changed

+28
-31
lines changed

3 files changed

+28
-31
lines changed

doc/source/whatsnew/v0.23.0.txt

-3
Original file line numberDiff line numberDiff line change
@@ -1289,9 +1289,6 @@ Indexing
12891289
- Bug in performing in-place operations on a ``DataFrame`` with a duplicate ``Index`` (:issue:`17105`)
12901290
- Bug in :meth:`IntervalIndex.get_loc` and :meth:`IntervalIndex.get_indexer` when used with an :class:`IntervalIndex` containing a single interval (:issue:`17284`, :issue:`20921`)
12911291
- Bug in ``.loc`` with a ``uint64`` indexer (:issue:`20722`)
1292-
- Bug in :func:`CategoricalIndex.searchsorted` where the method did not return a scalar when the input values was scalar (:issue:`21019`)
1293-
- Bug in :class:`CategoricalIndex` where slicing beyond the range of the data raised a KeyError (:issue:`21019`)
1294-
12951292

12961293
MultiIndex
12971294
^^^^^^^^^^

doc/source/whatsnew/v0.23.1.txt

+1-2
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,7 @@ Indexing
8989
- Bug in :class:`IntervalIndex` constructors where creating an ``IntervalIndex`` from categorical data was not fully supported (:issue:`21243`, issue:`21253`)
9090
- Bug in :meth:`MultiIndex.sort_index` which was not guaranteed to sort correctly with ``level=1``; this was also causing data misalignment in particular :meth:`DataFrame.stack` operations (:issue:`20994`, :issue:`20945`, :issue:`21052`)
9191
- Bug in :func:`CategoricalIndex.searchsorted` where the method did not return a scalar when the input values was scalar (:issue:`21019`)
92-
- Bug in :class:`CategoricalIndex` where slicing beyond the range of the data raised a KeyError (:issue:`21019`)
93-
-
92+
- Bug in :class:`CategoricalIndex` where slicing beyond the range of the data raised a ``KeyError`` (:issue:`21019`)
9493

9594
I/O
9695
^^^

pandas/tests/indexing/test_categorical.py

+27-26
Original file line numberDiff line numberDiff line change
@@ -627,38 +627,29 @@ def test_reindexing(self):
627627
lambda: self.df2.reindex(['a'], limit=2))
628628

629629
def test_loc_slice(self):
630-
# Raises KeyError since the left slice 'a' is not unique
631-
pytest.raises(KeyError, lambda: self.df.loc["a":"b"])
632-
result = self.df.loc["b":"c"]
633-
634-
expected = DataFrame(
635-
{"A": [2, 3, 4]},
636-
index=CategoricalIndex(
637-
["b", "b", "c"], name="B", categories=list("cab")
638-
),
630+
df = DataFrame(
631+
{"A": range(0, 6)},
632+
index=CategoricalIndex(list("aabcde"), name="B"),
639633
)
640634

635+
# slice on an unordered categorical using in-sample, connected edges
636+
result = df.loc["b":"d"]
637+
expected = df.iloc[2:5]
641638
assert_frame_equal(result, expected)
642639

643-
ordered_df = DataFrame(
644-
{"A": range(0, 6)},
645-
index=CategoricalIndex(list("aabcde"), name="B", ordered=True),
646-
)
647-
648-
# This should select the entire dataframe
649-
result = ordered_df.loc["a":"e"]
650-
assert_frame_equal(result, ordered_df)
651-
result_iloc = ordered_df.iloc[0:6]
640+
# Slice the entire dataframe
641+
result = df.loc["a":"e"]
642+
assert_frame_equal(result, df)
643+
result_iloc = df.iloc[0:6]
652644
assert_frame_equal(result_iloc, result)
653645

654-
result = ordered_df.loc["a":"b"]
655-
expected = DataFrame(
656-
{"A": range(0, 3)},
657-
index=CategoricalIndex(
658-
list("aab"), categories=list("abcde"), name="B", ordered=True
659-
),
660-
)
661-
assert_frame_equal(result, expected)
646+
# check if the result is identical to an ordinary index
647+
df_non_cat_index = df.copy()
648+
df_non_cat_index.index = df_non_cat_index.index.astype(str)
649+
result = df.loc["a":"e"]
650+
result_non_cat = df_non_cat_index.loc["a": "e"]
651+
result.index = result.index.astype(str)
652+
assert_frame_equal(result_non_cat, result)
662653

663654
@pytest.mark.parametrize(
664655
"content",
@@ -669,6 +660,8 @@ def test_loc_beyond_edge_slicing(self, content):
669660
"""
670661
This test ensures that no `KeyError` is raised if trying to slice
671662
beyond the edges of known, ordered categories.
663+
664+
see GH21019
672665
"""
673666
# This dataframe might be a slice of a larger categorical
674667
# (i.e. more categories are known than there are in the column)
@@ -701,6 +694,14 @@ def test_loc_beyond_edge_slicing(self, content):
701694
# If the category is not known, there is nothing we can do
702695
ordered_df.loc["a":"z"]
703696

697+
unordered_df = ordered_df.copy()
698+
unordered_df.index = unordered_df.index.as_unordered()
699+
with pytest.raises(KeyError):
700+
# This operation previously succeeded for an ordered index. Since
701+
# this index is no longer ordered, we cannot perfom out of range
702+
# slicing / searchsorted
703+
unordered_df.loc["a": "d"]
704+
704705
def test_boolean_selection(self):
705706

706707
df3 = self.df3

0 commit comments

Comments
 (0)