diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index dfa87e3cd4574..2e0c942806f72 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -408,6 +408,9 @@ Missing MultiIndex ^^^^^^^^^^ +- Bug in :meth:`DataFrame.loc` returning empty result when slicing a :class:`MultiIndex` with a negative step size and non-null start/stop values (:issue:`46156`) +- Bug in :meth:`DataFrame.loc` raising when slicing a :class:`MultiIndex` with a negative step size other than -1 (:issue:`46156`) +- Bug in :meth:`DataFrame.loc` raising when slicing a :class:`MultiIndex` with a negative step size and slicing a non-int labeled index level (:issue:`46156`) - Bug in :meth:`Series.to_numpy` where multiindexed Series could not be converted to numpy arrays when an ``na_value`` was supplied (:issue:`45774`) - Bug in :class:`MultiIndex.equals` not commutative when only one side has extension array dtype (:issue:`46026`) - diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index cdde510927081..d192e6d7f8e32 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3162,9 +3162,6 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): # given the inputs and the codes/indexer, compute an indexer set # if we have a provided indexer, then this need not consider # the entire labels set - if step is not None and step < 0: - # Switch elements for negative step size - start, stop = stop - 1, start - 1 r = np.arange(start, stop, step) if indexer is not None and len(indexer) != len(codes): @@ -3196,19 +3193,25 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): if isinstance(key, slice): # handle a slice, returning a slice if we can # otherwise a boolean indexer + step = key.step + is_negative_step = step is not None and step < 0 try: if key.start is not None: start = level_index.get_loc(key.start) + elif is_negative_step: + start = len(level_index) - 1 else: start = 0 + if key.stop is not None: stop = level_index.get_loc(key.stop) + elif is_negative_step: + stop = 0 elif isinstance(start, slice): stop = len(level_index) else: stop = len(level_index) - 1 - step = key.step except KeyError: # we have a partial slice (like looking up a partial date @@ -3228,8 +3231,9 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): elif level > 0 or self._lexsort_depth == 0 or step is not None: # need to have like semantics here to right # searching as when we are using a slice - # so include the stop+1 (so we include stop) - return convert_indexer(start, stop + 1, step) + # so adjust the stop by 1 (so we include stop) + stop = (stop - 1) if is_negative_step else (stop + 1) + return convert_indexer(start, stop, step) else: # sorted, so can return slice object -> view i = algos.searchsorted(level_codes, start, side="left") @@ -3520,7 +3524,8 @@ def _reorder_indexer( new_order = key_order_map[self.codes[i][indexer]] elif isinstance(k, slice) and k.step is not None and k.step < 0: - new_order = np.arange(n)[k][indexer] + # flip order for negative step + new_order = np.arange(n)[::-1][indexer] elif isinstance(k, slice) and k.start is None and k.stop is None: # slice(None) should not determine order GH#31330 new_order = np.ones((n,))[indexer] diff --git a/pandas/tests/indexing/multiindex/test_slice.py b/pandas/tests/indexing/multiindex/test_slice.py index 55d45a21d643a..91ea1f7cec324 100644 --- a/pandas/tests/indexing/multiindex/test_slice.py +++ b/pandas/tests/indexing/multiindex/test_slice.py @@ -758,12 +758,48 @@ def test_int_series_slicing(self, multiindex_year_month_day_dataframe_random_dat expected = ymd.reindex(s.index[5:]) tm.assert_frame_equal(result, expected) - def test_loc_slice_negative_stepsize(self): + @pytest.mark.parametrize( + "dtype, loc, iloc", + [ + # dtype = int, step = -1 + ("int", slice(None, None, -1), slice(None, None, -1)), + ("int", slice(3, None, -1), slice(3, None, -1)), + ("int", slice(None, 1, -1), slice(None, 0, -1)), + ("int", slice(3, 1, -1), slice(3, 0, -1)), + # dtype = int, step = -2 + ("int", slice(None, None, -2), slice(None, None, -2)), + ("int", slice(3, None, -2), slice(3, None, -2)), + ("int", slice(None, 1, -2), slice(None, 0, -2)), + ("int", slice(3, 1, -2), slice(3, 0, -2)), + # dtype = str, step = -1 + ("str", slice(None, None, -1), slice(None, None, -1)), + ("str", slice("d", None, -1), slice(3, None, -1)), + ("str", slice(None, "b", -1), slice(None, 0, -1)), + ("str", slice("d", "b", -1), slice(3, 0, -1)), + # dtype = str, step = -2 + ("str", slice(None, None, -2), slice(None, None, -2)), + ("str", slice("d", None, -2), slice(3, None, -2)), + ("str", slice(None, "b", -2), slice(None, 0, -2)), + ("str", slice("d", "b", -2), slice(3, 0, -2)), + ], + ) + def test_loc_slice_negative_stepsize(self, dtype, loc, iloc): # GH#38071 - mi = MultiIndex.from_product([["a", "b"], [0, 1]]) - df = DataFrame([[1, 2], [3, 4], [5, 6], [7, 8]], index=mi) - result = df.loc[("a", slice(None, None, -1)), :] - expected = DataFrame( - [[3, 4], [1, 2]], index=MultiIndex.from_tuples([("a", 1), ("a", 0)]) - ) - tm.assert_frame_equal(result, expected) + labels = { + "str": list("abcde"), + "int": range(5), + }[dtype] + + mi = MultiIndex.from_arrays([labels] * 2) + df = DataFrame(1.0, index=mi, columns=["A"]) + + SLC = pd.IndexSlice + + expected = df.iloc[iloc, :] + result_get_loc = df.loc[SLC[loc], :] + result_get_locs_level_0 = df.loc[SLC[loc, :], :] + result_get_locs_level_1 = df.loc[SLC[:, loc], :] + + tm.assert_frame_equal(result_get_loc, expected) + tm.assert_frame_equal(result_get_locs_level_0, expected) + tm.assert_frame_equal(result_get_locs_level_1, expected)