@@ -645,6 +645,12 @@ def test_loc_slice(self):
645
645
index = CategoricalIndex (list ("aabcde" ), name = "B" , ordered = True ),
646
646
)
647
647
648
+ # This should select the entire dataframe
649
+ result = ordered_df .loc ["a" :"e" ]
650
+ assert_frame_equal (result , ordered_df )
651
+ result_iloc = ordered_df .iloc [0 :6 ]
652
+ assert_frame_equal (result_iloc , result )
653
+
648
654
result = ordered_df .loc ["a" :"b" ]
649
655
expected = DataFrame (
650
656
{"A" : range (0 , 3 )},
@@ -654,21 +660,42 @@ def test_loc_slice(self):
654
660
)
655
661
assert_frame_equal (result , expected )
656
662
657
- # This should select the entire dataframe
658
- result = ordered_df .loc ["a" :"e" ]
659
- assert_frame_equal (result , ordered_df )
663
+ @pytest .mark .parametrize (
664
+ "content" ,
665
+ [list ("aab" ), list ("bbc" ), list ('bbc' )],
666
+ ids = ["right_edge" , "left_edge" , "both_edges" ],
667
+ )
668
+ def test_loc_beyond_edge_slicing (self , content ):
669
+ """
670
+ This test ensures that no `KeyError` is raised if trying to slice
671
+ beyond the edges of known, ordered categories.
672
+ """
673
+ # This dataframe might be a slice of a larger categorical
674
+ # (i.e. more categories are known than there are in the column)
675
+
676
+ ordered_df = DataFrame (
677
+ {"A" : range (0 , 3 )},
678
+ index = CategoricalIndex (
679
+ content , categories = list ("abcde" ), name = "B" , ordered = True
680
+ ),
681
+ )
660
682
661
- df_slice = ordered_df .loc ["a" :"b" ]
662
683
# Although the edge is not within the slice, this should fall back
663
- # to searchsorted slicing since the category is known
664
- result = df_slice .loc ["a" :"e" ]
665
- assert_frame_equal (result , df_slice )
684
+ # to searchsorted slicing since the category is known and the index
685
+ # is ordered. Since we're selecting a value larger/lower than the
686
+ # right/left edge we should get the original slice again.
687
+ result = ordered_df .loc ["a" : "d" ]
688
+ assert_frame_equal (result , ordered_df )
689
+
690
+ # Ensure that index based slicing gives the same result
691
+ result_iloc = ordered_df .iloc [0 :4 ]
692
+ assert_frame_equal (result , result_iloc )
666
693
667
694
# If the categorical is not sorted and the requested edge
668
695
# is not in the slice we cannot perform slicing
669
- df_slice .index = df_slice .index .as_unordered ()
696
+ ordered_df .index = ordered_df .index .as_unordered ()
670
697
with pytest .raises (KeyError ):
671
- df_slice .loc ["a" :"e " ]
698
+ ordered_df .loc ["a" : "d " ]
672
699
673
700
with pytest .raises (KeyError ):
674
701
# If the category is not known, there is nothing we can do
0 commit comments