Skip to content

ENH: slicing with decreasing monotonic indexes #8680

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 2, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1166,6 +1166,8 @@ Attributes

Index.values
Index.is_monotonic
Index.is_monotonic_increasing
Index.is_monotonic_decreasing
Index.is_unique
Index.dtype
Index.inferred_type
Expand Down
28 changes: 26 additions & 2 deletions doc/source/whatsnew/v0.15.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,29 @@ API changes

s.dt.hour

- support for slicing with monotonic decreasing indexes, even if ``start`` or ``stop`` is
not found in the index (:issue:`7860`):

.. ipython:: python

s = pd.Series(['a', 'b', 'c', 'd'], [4, 3, 2, 1])
s

previous behavior:

.. code-block:: python

In [8]: s.loc[3.5:1.5]
KeyError: 3.5

current behavior:

.. ipython:: python

s.loc[3.5:1.5]

- added Index properties `is_monotonic_increasing` and `is_monotonic_decreasing` (:issue:`8680`).

.. _whatsnew_0151.enhancements:

Enhancements
Expand Down Expand Up @@ -208,8 +231,9 @@ Bug Fixes
- Bug in ix/loc block splitting on setitem (manifests with integer-like dtypes, e.g. datetime64) (:issue:`8607`)




- Bug when doing label based indexing with integers not found in the index for
non-unique but monotonic indexes (:issue:`8680`).
- Bug when indexing a Float64Index with ``np.nan`` on numpy 1.7 (:issue:`8980`).



Expand Down
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1461,7 +1461,7 @@ def xs(self, key, axis=0, level=None, copy=None, drop_level=True):
name=self.index[loc])

else:
result = self[loc]
result = self.iloc[loc]
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was surprising to encounter (needed to change it to fix some tests), but maybe it was there for a reason? I don't think there is any reason for loc here to do label based rather than integer indexing?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could be
I have tried to not touch xs recently. it seems s but fragile and multi slicing obviates the need for it anyhow

result.index = new_index

# this could be a view
Expand Down
43 changes: 28 additions & 15 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,8 +573,22 @@ def _mpl_repr(self):

@property
def is_monotonic(self):
""" return if the index has monotonic (only equaly or increasing) values """
return self._engine.is_monotonic
""" alias for is_monotonic_increasing (deprecated) """
return self._engine.is_monotonic_increasing

@property
def is_monotonic_increasing(self):
""" return if the index is monotonic increasing (only equal or
increasing) values
"""
return self._engine.is_monotonic_increasing

@property
def is_monotonic_decreasing(self):
""" return if the index is monotonic decreasing (only equal or
decreasing values
"""
return self._engine.is_monotonic_decreasing

def is_lexsorted_for_tuple(self, tup):
return True
Expand Down Expand Up @@ -1988,16 +2002,12 @@ def _get_slice(starting_value, offset, search_side, slice_property,
slc += offset

except KeyError:
if self.is_monotonic:

# we are duplicated but non-unique
# so if we have an indexer then we are done
# else search for it (GH 7523)
if not is_unique and is_integer(search_value):
slc = search_value
else:
slc = self.searchsorted(search_value,
side=search_side)
if self.is_monotonic_increasing:
slc = self.searchsorted(search_value, side=search_side)
elif self.is_monotonic_decreasing:
search_side = 'right' if search_side == 'left' else 'left'
slc = len(self) - self[::-1].searchsorted(search_value,
side=search_side)
else:
raise
return slc
Expand Down Expand Up @@ -2431,10 +2441,13 @@ def __contains__(self, other):
def get_loc(self, key):
try:
if np.all(np.isnan(key)):
nan_idxs = self._nan_idxs
try:
return self._nan_idxs.item()
except ValueError:
return self._nan_idxs
return nan_idxs.item()
except (ValueError, IndexError):
# should only need to catch ValueError here but on numpy
# 1.7 .item() can raise IndexError when NaNs are present
return nan_idxs
except (TypeError, NotImplementedError):
pass
return super(Float64Index, self).get_loc(key)
Expand Down
39 changes: 25 additions & 14 deletions pandas/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ cdef class IndexEngine:
bint over_size_threshold

cdef:
bint unique, monotonic
bint unique, monotonic_inc, monotonic_dec
bint initialized, monotonic_check, unique_check

def __init__(self, vgetter, n):
Expand All @@ -89,7 +89,8 @@ cdef class IndexEngine:
self.monotonic_check = 0

self.unique = 0
self.monotonic = 0
self.monotonic_inc = 0
self.monotonic_dec = 0

def __contains__(self, object val):
self._ensure_mapping_populated()
Expand Down Expand Up @@ -134,7 +135,7 @@ cdef class IndexEngine:
if is_definitely_invalid_key(val):
raise TypeError

if self.over_size_threshold and self.is_monotonic:
if self.over_size_threshold and self.is_monotonic_increasing:
if not self.is_unique:
return self._get_loc_duplicates(val)
values = self._get_index_values()
Expand All @@ -158,7 +159,7 @@ cdef class IndexEngine:
cdef:
Py_ssize_t diff

if self.is_monotonic:
if self.is_monotonic_increasing:
values = self._get_index_values()
left = values.searchsorted(val, side='left')
right = values.searchsorted(val, side='right')
Expand Down Expand Up @@ -210,25 +211,35 @@ cdef class IndexEngine:

return self.unique == 1

property is_monotonic:
property is_monotonic_increasing:

def __get__(self):
if not self.monotonic_check:
self._do_monotonic_check()

return self.monotonic == 1
return self.monotonic_inc == 1

property is_monotonic_decreasing:

def __get__(self):
if not self.monotonic_check:
self._do_monotonic_check()

return self.monotonic_dec == 1

cdef inline _do_monotonic_check(self):
try:
values = self._get_index_values()
self.monotonic, unique = self._call_monotonic(values)
self.monotonic_inc, self.monotonic_dec, unique = \
self._call_monotonic(values)

if unique is not None:
self.unique = unique
self.unique_check = 1

except TypeError:
self.monotonic = 0
self.monotonic_inc = 0
self.monotonic_dec = 0
self.monotonic_check = 1

cdef _get_index_values(self):
Expand Down Expand Up @@ -345,7 +356,7 @@ cdef class Int64Engine(IndexEngine):
return _hash.Int64HashTable(n)

def _call_monotonic(self, values):
return algos.is_monotonic_int64(values)
return algos.is_monotonic_int64(values, timelike=False)

def get_pad_indexer(self, other, limit=None):
return algos.pad_int64(self._get_index_values(), other,
Expand Down Expand Up @@ -435,7 +446,7 @@ cdef class Float64Engine(IndexEngine):
return result

def _call_monotonic(self, values):
return algos.is_monotonic_float64(values)
return algos.is_monotonic_float64(values, timelike=False)

def get_pad_indexer(self, other, limit=None):
return algos.pad_float64(self._get_index_values(), other,
Expand Down Expand Up @@ -489,7 +500,7 @@ cdef class ObjectEngine(IndexEngine):
return _hash.PyObjectHashTable(n)

def _call_monotonic(self, values):
return algos.is_monotonic_object(values)
return algos.is_monotonic_object(values, timelike=False)

def get_pad_indexer(self, other, limit=None):
return algos.pad_object(self._get_index_values(), other,
Expand All @@ -506,7 +517,7 @@ cdef class DatetimeEngine(Int64Engine):
return 'M8[ns]'

def __contains__(self, object val):
if self.over_size_threshold and self.is_monotonic:
if self.over_size_threshold and self.is_monotonic_increasing:
if not self.is_unique:
return self._get_loc_duplicates(val)
values = self._get_index_values()
Expand All @@ -521,15 +532,15 @@ cdef class DatetimeEngine(Int64Engine):
return self.vgetter().view('i8')

def _call_monotonic(self, values):
return algos.is_monotonic_int64(values)
return algos.is_monotonic_int64(values, timelike=True)

cpdef get_loc(self, object val):
if is_definitely_invalid_key(val):
raise TypeError

# Welcome to the spaghetti factory

if self.over_size_threshold and self.is_monotonic:
if self.over_size_threshold and self.is_monotonic_increasing:
if not self.is_unique:
val = _to_i8(val)
return self._get_loc_duplicates(val)
Expand Down
32 changes: 26 additions & 6 deletions pandas/src/generate_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,31 +539,51 @@ def diff_2d_%(name)s(ndarray[%(c_type)s, ndim=2] arr,

is_monotonic_template = """@cython.boundscheck(False)
@cython.wraparound(False)
def is_monotonic_%(name)s(ndarray[%(c_type)s] arr):
def is_monotonic_%(name)s(ndarray[%(c_type)s] arr, bint timelike):
'''
Returns
-------
is_monotonic, is_unique
is_monotonic_inc, is_monotonic_dec, is_unique
'''
cdef:
Py_ssize_t i, n
%(c_type)s prev, cur
bint is_unique = 1
bint is_monotonic_inc = 1
bint is_monotonic_dec = 1

n = len(arr)

if n < 2:
return True, True
if n == 1:
if arr[0] != arr[0] or (timelike and arr[0] == iNaT):
# single value is NaN
return False, False, True
else:
return True, True, True
elif n < 2:
return True, True, True

if timelike and arr[0] == iNaT:
return False, False, None

prev = arr[0]
for i in range(1, n):
cur = arr[i]
if timelike and cur == iNaT:
return False, False, None
if cur < prev:
return False, None
is_monotonic_inc = 0
elif cur > prev:
is_monotonic_dec = 0
elif cur == prev:
is_unique = 0
else:
# cur or prev is NaN
return False, False, None
if not is_monotonic_inc and not is_monotonic_dec:
return False, False, None
prev = cur
return True, is_unique
return is_monotonic_inc, is_monotonic_dec, is_unique
"""

map_indices_template = """@cython.wraparound(False)
Expand Down
Loading