Skip to content

Commit 3274f27

Browse files
committed
Merge pull request #8753 from immerrr/refactor-slice-locs
API: allow negative steps for label-based indexing
2 parents 750151c + b735ffc commit 3274f27

File tree

11 files changed

+594
-193
lines changed

11 files changed

+594
-193
lines changed

doc/source/whatsnew/v0.15.2.txt

+23
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,30 @@ Bug Fixes
7171
- ``Timedelta`` kwargs may now be numpy ints and floats (:issue:`8757`).
7272
- ``sql_schema`` now generates dialect appropriate ``CREATE TABLE`` statements (:issue:`8697`)
7373
- ``slice`` string method now takes step into account (:issue:`8754`)
74+
- Fix negative step support for label-based slices (:issue:`8753`)
7475

76+
Old behavior:
77+
78+
.. code-block:: python
79+
80+
In [1]: s = pd.Series(np.arange(3), ['a', 'b', 'c'])
81+
Out[1]:
82+
a 0
83+
b 1
84+
c 2
85+
dtype: int64
86+
87+
In [2]: s.loc['c':'a':-1]
88+
Out[2]:
89+
c 2
90+
dtype: int64
91+
92+
New behavior:
93+
94+
.. ipython:: python
95+
96+
s = pd.Series(np.arange(3), ['a', 'b', 'c'])
97+
s.loc['c':'a':-1]
7598

7699

77100
- Imported categorical variables from Stata files retain the ordinal information in the underlying data (:issue:`8836`).

pandas/core/index.py

+136-66
Original file line numberDiff line numberDiff line change
@@ -1959,23 +1959,99 @@ def slice_indexer(self, start=None, end=None, step=None):
19591959
-----
19601960
This function assumes that the data is sorted, so use at your own peril
19611961
"""
1962-
start_slice, end_slice = self.slice_locs(start, end)
1962+
start_slice, end_slice = self.slice_locs(start, end, step=step)
19631963

19641964
# return a slice
1965-
if np.isscalar(start_slice) and np.isscalar(end_slice):
1965+
if not lib.isscalar(start_slice):
1966+
raise AssertionError("Start slice bound is non-scalar")
1967+
if not lib.isscalar(end_slice):
1968+
raise AssertionError("End slice bound is non-scalar")
19661969

1967-
# degenerate cases
1968-
if start is None and end is None:
1969-
return slice(None, None, step)
1970+
return slice(start_slice, end_slice, step)
19701971

1971-
return slice(start_slice, end_slice, step)
1972+
def _maybe_cast_slice_bound(self, label, side):
1973+
"""
1974+
This function should be overloaded in subclasses that allow non-trivial
1975+
casting on label-slice bounds, e.g. datetime-like indices allowing
1976+
strings containing formatted datetimes.
19721977
1973-
# loc indexers
1974-
return (Index(start_slice) & Index(end_slice)).values
1978+
Parameters
1979+
----------
1980+
label : object
1981+
side : {'left', 'right'}
1982+
1983+
Notes
1984+
-----
1985+
Value of `side` parameter should be validated in caller.
19751986
1976-
def slice_locs(self, start=None, end=None):
19771987
"""
1978-
For an ordered Index, compute the slice locations for input labels
1988+
return label
1989+
1990+
def get_slice_bound(self, label, side):
1991+
"""
1992+
Calculate slice bound that corresponds to given label.
1993+
1994+
Returns leftmost (one-past-the-rightmost if ``side=='right'``) position
1995+
of given label.
1996+
1997+
Parameters
1998+
----------
1999+
label : object
2000+
side : {'left', 'right'}
2001+
2002+
"""
2003+
if side not in ('left', 'right'):
2004+
raise ValueError(
2005+
"Invalid value for side kwarg,"
2006+
" must be either 'left' or 'right': %s" % (side,))
2007+
2008+
original_label = label
2009+
# For datetime indices label may be a string that has to be converted
2010+
# to datetime boundary according to its resolution.
2011+
label = self._maybe_cast_slice_bound(label, side)
2012+
2013+
try:
2014+
slc = self.get_loc(label)
2015+
except KeyError:
2016+
if self.is_monotonic_increasing:
2017+
return self.searchsorted(label, side=side)
2018+
elif self.is_monotonic_decreasing:
2019+
# np.searchsorted expects ascending sort order, have to reverse
2020+
# everything for it to work (element ordering, search side and
2021+
# resulting value).
2022+
pos = self[::-1].searchsorted(
2023+
label, side='right' if side == 'left' else 'right')
2024+
return len(self) - pos
2025+
2026+
# In all other cases, just re-raise the KeyError
2027+
raise
2028+
2029+
if isinstance(slc, np.ndarray):
2030+
# get_loc may return a boolean array or an array of indices, which
2031+
# is OK as long as they are representable by a slice.
2032+
if com.is_bool_dtype(slc):
2033+
slc = lib.maybe_booleans_to_slice(slc.view('u1'))
2034+
else:
2035+
slc = lib.maybe_indices_to_slice(slc.astype('i8'))
2036+
if isinstance(slc, np.ndarray):
2037+
raise KeyError(
2038+
"Cannot get %s slice bound for non-unique label:"
2039+
" %r" % (side, original_label))
2040+
2041+
if isinstance(slc, slice):
2042+
if side == 'left':
2043+
return slc.start
2044+
else:
2045+
return slc.stop
2046+
else:
2047+
if side == 'right':
2048+
return slc + 1
2049+
else:
2050+
return slc
2051+
2052+
def slice_locs(self, start=None, end=None, step=None):
2053+
"""
2054+
Compute slice locations for input labels.
19792055
19802056
Parameters
19812057
----------
@@ -1986,51 +2062,51 @@ def slice_locs(self, start=None, end=None):
19862062
19872063
Returns
19882064
-------
1989-
(start, end) : (int, int)
2065+
start, end : int
19902066
1991-
Notes
1992-
-----
1993-
This function assumes that the data is sorted, so use at your own peril
19942067
"""
2068+
inc = (step is None or step >= 0)
19952069

1996-
is_unique = self.is_unique
1997-
1998-
def _get_slice(starting_value, offset, search_side, slice_property,
1999-
search_value):
2000-
if search_value is None:
2001-
return starting_value
2070+
if not inc:
2071+
# If it's a reverse slice, temporarily swap bounds.
2072+
start, end = end, start
20022073

2003-
try:
2004-
slc = self.get_loc(search_value)
2005-
2006-
if not is_unique:
2007-
2008-
# get_loc will return a boolean array for non_uniques
2009-
# if we are not monotonic
2010-
if isinstance(slc, (np.ndarray, Index)):
2011-
raise KeyError("cannot peform a slice operation "
2012-
"on a non-unique non-monotonic index")
2013-
2014-
if isinstance(slc, slice):
2015-
slc = getattr(slc, slice_property)
2016-
else:
2017-
slc += offset
2074+
start_slice = None
2075+
if start is not None:
2076+
start_slice = self.get_slice_bound(start, 'left')
2077+
if start_slice is None:
2078+
start_slice = 0
20182079

2019-
except KeyError:
2020-
if self.is_monotonic_increasing:
2021-
slc = self.searchsorted(search_value, side=search_side)
2022-
elif self.is_monotonic_decreasing:
2023-
search_side = 'right' if search_side == 'left' else 'left'
2024-
slc = len(self) - self[::-1].searchsorted(search_value,
2025-
side=search_side)
2026-
else:
2027-
raise
2028-
return slc
2080+
end_slice = None
2081+
if end is not None:
2082+
end_slice = self.get_slice_bound(end, 'right')
2083+
if end_slice is None:
2084+
end_slice = len(self)
20292085

2030-
start_slice = _get_slice(0, offset=0, search_side='left',
2031-
slice_property='start', search_value=start)
2032-
end_slice = _get_slice(len(self), offset=1, search_side='right',
2033-
slice_property='stop', search_value=end)
2086+
if not inc:
2087+
# Bounds at this moment are swapped, swap them back and shift by 1.
2088+
#
2089+
# slice_locs('B', 'A', step=-1): s='B', e='A'
2090+
#
2091+
# s='A' e='B'
2092+
# AFTER SWAP: | |
2093+
# v ------------------> V
2094+
# -----------------------------------
2095+
# | | |A|A|A|A| | | | | |B|B| | | | |
2096+
# -----------------------------------
2097+
# ^ <------------------ ^
2098+
# SHOULD BE: | |
2099+
# end=s-1 start=e-1
2100+
#
2101+
end_slice, start_slice = start_slice - 1, end_slice - 1
2102+
2103+
# i == -1 triggers ``len(self) + i`` selection that points to the
2104+
# last element, not before-the-first one, subtracting len(self)
2105+
# compensates that.
2106+
if end_slice == -1:
2107+
end_slice -= len(self)
2108+
if start_slice == -1:
2109+
start_slice -= len(self)
20342110

20352111
return start_slice, end_slice
20362112

@@ -3887,7 +3963,12 @@ def _tuple_index(self):
38873963
"""
38883964
return Index(self.values)
38893965

3890-
def slice_locs(self, start=None, end=None, strict=False):
3966+
def get_slice_bound(self, label, side):
3967+
if not isinstance(label, tuple):
3968+
label = label,
3969+
return self._partial_tup_index(label, side=side)
3970+
3971+
def slice_locs(self, start=None, end=None, step=None):
38913972
"""
38923973
For an ordered MultiIndex, compute the slice locations for input
38933974
labels. They can be tuples representing partial levels, e.g. for a
@@ -3900,7 +3981,8 @@ def slice_locs(self, start=None, end=None, strict=False):
39003981
If None, defaults to the beginning
39013982
end : label or tuple
39023983
If None, defaults to the end
3903-
strict : boolean,
3984+
step : int or None
3985+
Slice step
39043986
39053987
Returns
39063988
-------
@@ -3910,21 +3992,9 @@ def slice_locs(self, start=None, end=None, strict=False):
39103992
-----
39113993
This function assumes that the data is sorted by the first level
39123994
"""
3913-
if start is None:
3914-
start_slice = 0
3915-
else:
3916-
if not isinstance(start, tuple):
3917-
start = start,
3918-
start_slice = self._partial_tup_index(start, side='left')
3919-
3920-
if end is None:
3921-
end_slice = len(self)
3922-
else:
3923-
if not isinstance(end, tuple):
3924-
end = end,
3925-
end_slice = self._partial_tup_index(end, side='right')
3926-
3927-
return start_slice, end_slice
3995+
# This function adds nothing to its parent implementation (the magic
3996+
# happens in get_slice_bound method), but it adds meaningful doc.
3997+
return super(MultiIndex, self).slice_locs(start, end, step)
39283998

39293999
def _partial_tup_index(self, tup, side='left'):
39304000
if len(tup) > self.lexsort_depth:

pandas/tests/test_index.py

+28-2
Original file line numberDiff line numberDiff line change
@@ -910,8 +910,34 @@ def test_slice_locs_na(self):
910910
self.assertEqual(idx.slice_locs(1), (1, 3))
911911
self.assertEqual(idx.slice_locs(np.nan), (0, 3))
912912

913-
idx = Index([np.nan, np.nan, 1, 2])
914-
self.assertRaises(KeyError, idx.slice_locs, np.nan)
913+
idx = Index([0, np.nan, np.nan, 1, 2])
914+
self.assertEqual(idx.slice_locs(np.nan), (1, 5))
915+
916+
def test_slice_locs_negative_step(self):
917+
idx = Index(list('bcdxy'))
918+
919+
SLC = pd.IndexSlice
920+
921+
def check_slice(in_slice, expected):
922+
s_start, s_stop = idx.slice_locs(in_slice.start, in_slice.stop,
923+
in_slice.step)
924+
result = idx[s_start:s_stop:in_slice.step]
925+
expected = pd.Index(list(expected))
926+
self.assertTrue(result.equals(expected))
927+
928+
for in_slice, expected in [
929+
(SLC[::-1], 'yxdcb'), (SLC['b':'y':-1], ''),
930+
(SLC['b'::-1], 'b'), (SLC[:'b':-1], 'yxdcb'),
931+
(SLC[:'y':-1], 'y'), (SLC['y'::-1], 'yxdcb'),
932+
(SLC['y'::-4], 'yb'),
933+
# absent labels
934+
(SLC[:'a':-1], 'yxdcb'), (SLC[:'a':-2], 'ydb'),
935+
(SLC['z'::-1], 'yxdcb'), (SLC['z'::-3], 'yc'),
936+
(SLC['m'::-1], 'dcb'), (SLC[:'m':-1], 'yx'),
937+
(SLC['a':'a':-1], ''), (SLC['z':'z':-1], ''),
938+
(SLC['m':'m':-1], '')
939+
]:
940+
check_slice(in_slice, expected)
915941

916942
def test_drop(self):
917943
n = len(self.strIndex)

pandas/tests/test_indexing.py

+58
Original file line numberDiff line numberDiff line change
@@ -4141,6 +4141,64 @@ def run_tests(df, rhs, right):
41414141

41424142
run_tests(df, rhs, right)
41434143

4144+
def test_str_label_slicing_with_negative_step(self):
4145+
SLC = pd.IndexSlice
4146+
4147+
def assert_slices_equivalent(l_slc, i_slc):
4148+
assert_series_equal(s.loc[l_slc], s.iloc[i_slc])
4149+
4150+
if not idx.is_integer:
4151+
# For integer indices, ix and plain getitem are position-based.
4152+
assert_series_equal(s[l_slc], s.iloc[i_slc])
4153+
assert_series_equal(s.ix[l_slc], s.iloc[i_slc])
4154+
4155+
for idx in [_mklbl('A', 20), np.arange(20) + 100,
4156+
np.linspace(100, 150, 20)]:
4157+
idx = Index(idx)
4158+
s = Series(np.arange(20), index=idx)
4159+
assert_slices_equivalent(SLC[idx[9]::-1], SLC[9::-1])
4160+
assert_slices_equivalent(SLC[:idx[9]:-1], SLC[:8:-1])
4161+
assert_slices_equivalent(SLC[idx[13]:idx[9]:-1], SLC[13:8:-1])
4162+
assert_slices_equivalent(SLC[idx[9]:idx[13]:-1], SLC[:0])
4163+
4164+
def test_multiindex_label_slicing_with_negative_step(self):
4165+
s = Series(np.arange(20),
4166+
MultiIndex.from_product([list('abcde'), np.arange(4)]))
4167+
SLC = pd.IndexSlice
4168+
4169+
def assert_slices_equivalent(l_slc, i_slc):
4170+
assert_series_equal(s.loc[l_slc], s.iloc[i_slc])
4171+
assert_series_equal(s[l_slc], s.iloc[i_slc])
4172+
assert_series_equal(s.ix[l_slc], s.iloc[i_slc])
4173+
4174+
assert_slices_equivalent(SLC[::-1], SLC[::-1])
4175+
4176+
assert_slices_equivalent(SLC['d'::-1], SLC[15::-1])
4177+
assert_slices_equivalent(SLC[('d',)::-1], SLC[15::-1])
4178+
4179+
assert_slices_equivalent(SLC[:'d':-1], SLC[:11:-1])
4180+
assert_slices_equivalent(SLC[:('d',):-1], SLC[:11:-1])
4181+
4182+
assert_slices_equivalent(SLC['d':'b':-1], SLC[15:3:-1])
4183+
assert_slices_equivalent(SLC[('d',):'b':-1], SLC[15:3:-1])
4184+
assert_slices_equivalent(SLC['d':('b',):-1], SLC[15:3:-1])
4185+
assert_slices_equivalent(SLC[('d',):('b',):-1], SLC[15:3:-1])
4186+
assert_slices_equivalent(SLC['b':'d':-1], SLC[:0])
4187+
4188+
assert_slices_equivalent(SLC[('c', 2)::-1], SLC[10::-1])
4189+
assert_slices_equivalent(SLC[:('c', 2):-1], SLC[:9:-1])
4190+
assert_slices_equivalent(SLC[('e', 0):('c', 2):-1], SLC[16:9:-1])
4191+
4192+
def test_slice_with_zero_step_raises(self):
4193+
s = Series(np.arange(20), index=_mklbl('A', 20))
4194+
self.assertRaisesRegexp(ValueError, 'slice step cannot be zero',
4195+
lambda: s[::0])
4196+
self.assertRaisesRegexp(ValueError, 'slice step cannot be zero',
4197+
lambda: s.loc[::0])
4198+
self.assertRaisesRegexp(ValueError, 'slice step cannot be zero',
4199+
lambda: s.ix[::0])
4200+
4201+
41444202
class TestSeriesNoneCoercion(tm.TestCase):
41454203
EXPECTED_RESULTS = [
41464204
# For numeric series, we should coerce to NaN.

0 commit comments

Comments
 (0)