Skip to content

BUG: MultiIndex slicing with negative step #46156

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 11, 2022
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,9 @@ Missing

MultiIndex
^^^^^^^^^^
- Bug in :meth:`DataFrame.loc` returning empty result when indexer contains a slice with a negative step size and non-null start/stop values (:issue:`46156`)
- Bug in :meth:`DataFrame.loc` raising when indexer contains a slice with a negative step size other than -1 (:issue:`46156`)
- Bug in :meth:`DataFrame.loc` raising when indexer contains a slice with a negative step size and slicing a non-int labeled index level (:issue:`46156`)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you clarify that the df has a MultiIndex?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated, thanks

- Bug in :meth:`Series.to_numpy` where multiindexed Series could not be converted to numpy arrays when an ``na_value`` was supplied (:issue:`45774`)
- Bug in :class:`MultiIndex.equals` not commutative when only one side has extension array dtype (:issue:`46026`)
-
Expand Down
19 changes: 12 additions & 7 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -3164,9 +3164,6 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
# given the inputs and the codes/indexer, compute an indexer set
# if we have a provided indexer, then this need not consider
# the entire labels set
if step is not None and step < 0:
# Switch elements for negative step size
start, stop = stop - 1, start - 1
r = np.arange(start, stop, step)

if indexer is not None and len(indexer) != len(codes):
Expand Down Expand Up @@ -3198,19 +3195,25 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
if isinstance(key, slice):
# handle a slice, returning a slice if we can
# otherwise a boolean indexer
step = key.step
is_negative_step = step is not None and step < 0

try:
if key.start is not None:
start = level_index.get_loc(key.start)
elif is_negative_step:
start = len(level_index) - 1
else:
start = 0

if key.stop is not None:
stop = level_index.get_loc(key.stop)
elif is_negative_step:
stop = 0
elif isinstance(start, slice):
stop = len(level_index)
else:
stop = len(level_index) - 1
step = key.step
except KeyError:

# we have a partial slice (like looking up a partial date
Expand All @@ -3230,8 +3233,9 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
elif level > 0 or self._lexsort_depth == 0 or step is not None:
# need to have like semantics here to right
# searching as when we are using a slice
# so include the stop+1 (so we include stop)
return convert_indexer(start, stop + 1, step)
# so adjust the stop by 1 (so we include stop)
stop = (stop - 1) if is_negative_step else (stop + 1)
return convert_indexer(start, stop, step)
else:
# sorted, so can return slice object -> view
i = algos.searchsorted(level_codes, start, side="left")
Expand Down Expand Up @@ -3522,7 +3526,8 @@ def _reorder_indexer(

new_order = key_order_map[self.codes[i][indexer]]
elif isinstance(k, slice) and k.step is not None and k.step < 0:
new_order = np.arange(n)[k][indexer]
# flip order for negative step
new_order = np.arange(n)[::-1][indexer]
elif isinstance(k, slice) and k.start is None and k.stop is None:
# slice(None) should not determine order GH#31330
new_order = np.ones((n,))[indexer]
Expand Down
52 changes: 44 additions & 8 deletions pandas/tests/indexing/multiindex/test_slice.py
Original file line number Diff line number Diff line change
Expand Up @@ -758,12 +758,48 @@ def test_int_series_slicing(self, multiindex_year_month_day_dataframe_random_dat
expected = ymd.reindex(s.index[5:])
tm.assert_frame_equal(result, expected)

def test_loc_slice_negative_stepsize(self):
@pytest.mark.parametrize(
"dtype, loc, iloc",
[
# dtype = int, step = -1
("int", slice(None, None, -1), slice(None, None, -1)),
("int", slice(3, None, -1), slice(3, None, -1)),
("int", slice(None, 1, -1), slice(None, 0, -1)),
("int", slice(3, 1, -1), slice(3, 0, -1)),
# dtype = int, step = -2
("int", slice(None, None, -2), slice(None, None, -2)),
("int", slice(3, None, -2), slice(3, None, -2)),
("int", slice(None, 1, -2), slice(None, 0, -2)),
("int", slice(3, 1, -2), slice(3, 0, -2)),
# dtype = str, step = -1
("str", slice(None, None, -1), slice(None, None, -1)),
("str", slice("d", None, -1), slice(3, None, -1)),
("str", slice(None, "b", -1), slice(None, 0, -1)),
("str", slice("d", "b", -1), slice(3, 0, -1)),
# dtype = str, step = -2
("str", slice(None, None, -2), slice(None, None, -2)),
("str", slice("d", None, -2), slice(3, None, -2)),
("str", slice(None, "b", -2), slice(None, 0, -2)),
("str", slice("d", "b", -2), slice(3, 0, -2)),
],
)
def test_loc_slice_negative_stepsize(self, dtype, loc, iloc):
# GH#38071
mi = MultiIndex.from_product([["a", "b"], [0, 1]])
df = DataFrame([[1, 2], [3, 4], [5, 6], [7, 8]], index=mi)
result = df.loc[("a", slice(None, None, -1)), :]
expected = DataFrame(
[[3, 4], [1, 2]], index=MultiIndex.from_tuples([("a", 1), ("a", 0)])
)
tm.assert_frame_equal(result, expected)
labels = {
"str": list("abcde"),
"int": range(5),
}[dtype]

mi = MultiIndex.from_arrays([labels] * 2)
df = DataFrame(1.0, index=mi, columns=["A"])

SLC = pd.IndexSlice

expected = df.iloc[iloc, :]
result_get_loc = df.loc[SLC[loc], :]
result_get_locs_level_0 = df.loc[SLC[loc, :], :]
result_get_locs_level_1 = df.loc[SLC[:, loc], :]

tm.assert_frame_equal(result_get_loc, expected)
tm.assert_frame_equal(result_get_locs_level_0, expected)
tm.assert_frame_equal(result_get_locs_level_1, expected)