Skip to content

Commit 340c98b

Browse files
committed
CLN/COMPAT: IntervalIndex
1 parent 74162aa commit 340c98b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+3211
-3065
lines changed

asv_bench/benchmarks/indexing.py

+20
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,26 @@ def time_is_monotonic(self):
226226
self.miint.is_monotonic
227227

228228

229+
class IntervalIndexing(object):
230+
goal_time = 0.2
231+
232+
def setup(self):
233+
self.monotonic = Series(np.arange(1000000),
234+
index=IntervalIndex.from_breaks(np.arange(1000001)))
235+
236+
def time_getitem_scalar(self):
237+
self.monotonic[80000]
238+
239+
def time_loc_scalar(self):
240+
self.monotonic.loc[80000]
241+
242+
def time_getitem_list(self):
243+
self.monotonic[80000:]
244+
245+
def time_loc_list(self):
246+
self.monotonic.loc[80000:]
247+
248+
229249
class PanelIndexing(object):
230250
goal_time = 0.2
231251

doc/source/api.rst

+21
Original file line numberDiff line numberDiff line change
@@ -1405,6 +1405,27 @@ Categorical Components
14051405
CategoricalIndex.as_ordered
14061406
CategoricalIndex.as_unordered
14071407

1408+
.. _api.intervalindex:
1409+
1410+
IntervalIndex
1411+
-------------
1412+
1413+
.. autosummary::
1414+
:toctree: generated/
1415+
1416+
IntervalIndex
1417+
1418+
IntervalIndex Components
1419+
~~~~~~~~~~~~~~~~~~~~~~~~
1420+
1421+
.. autosummary::
1422+
:toctree: generated/
1423+
1424+
IntervalIndex.from_arrays
1425+
IntervalIndex.from_tuples
1426+
IntervalIndex.from_breaks
1427+
IntervalIndex.from_intervals
1428+
14081429
.. _api.multiindex:
14091430

14101431
MultiIndex

doc/source/whatsnew/v0.20.0.txt

+31
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ Highlights include:
1313
- ``Panel`` has been deprecated, see :ref:`here <whatsnew_0200.api_breaking.deprecate_panel>`
1414
- Improved user API when accessing levels in ``.groupby()``, see :ref:`here <whatsnew_0200.enhancements.groupby_access>`
1515
- Improved support for UInt64 dtypes, see :ref:`here <whatsnew_0200.enhancements.uint64_support>`
16+
- Addition of an ``IntervalIndex`` and ``Interval`` scalar type, see :ref:`here <whatsnew_0200.enhancements.intervalindex>`
1617
- A new orient for JSON serialization, ``orient='table'``, that uses the Table Schema spec, see :ref:`here <whatsnew_0200.enhancements.table_schema>`
1718
- Window Binary Corr/Cov operations return a MultiIndexed ``DataFrame`` rather than a ``Panel``, as ``Panel`` is now deprecated, see :ref:`here <whatsnew_0200.api_breaking.rolling_pairwise>`
1819
- Support for S3 handling now uses ``s3fs``, see :ref:`here <whatsnew_0200.api_breaking.s3>`
@@ -314,6 +315,36 @@ To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you
314315

315316
sdf.to_coo()
316317

318+
.. _whatsnew_0200.enhancements.intervalindex:
319+
320+
IntervalIndex
321+
^^^^^^^^^^^^^
322+
323+
pandas has gain an ``IntervalIndex`` with its own dtype, ``interval`` as well as the ``Interval`` scalar type. These allow first-class support for interval
324+
notation, specifically as return type for ``pd.cut`` and ``pd.qcut``. (:issue:`7640`, :issue:`8625`)
325+
326+
**Previous behavior**:
327+
328+
.. code-block:: ipython
329+
330+
In [2]: pd.cut(range(3), 2)
331+
Out[2]:
332+
[(-0.002, 1], (-0.002, 1], (1, 2]]
333+
Categories (2, object): [(-0.002, 1] < (1, 2]]
334+
335+
# the returned categories are strings, representing Intervals
336+
In [3]: pd.cut(range(3), 2).categories
337+
Out[3]: Index(['(-0.002, 1]', '(1, 2]'], dtype='object')
338+
339+
**New behavior**:
340+
341+
.. ipython:: python
342+
343+
c = pd.cut(range(3), 2)
344+
c
345+
c.categories
346+
pd.api.types.is_interval_dtype(c.categories)
347+
317348
.. _whatsnew_0200.enhancements.other:
318349

319350
Other Enhancements

pandas/_libs/hashtable.pyx

+2
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ PyDateTime_IMPORT
3939
cdef extern from "Python.h":
4040
int PySlice_Check(object)
4141

42+
cdef size_t _INIT_VEC_CAP = 128
43+
4244
include "hashtable_class_helper.pxi"
4345
include "hashtable_func_helper.pxi"
4446

pandas/src/interval.pyx renamed to pandas/_libs/interval.pyx

+74-30
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,11 @@ cimport numpy as np
22
import numpy as np
33
import pandas as pd
44

5+
cimport util
56
cimport cython
67
import cython
8+
from numpy cimport *
9+
from tslib import Timestamp
710

811
from cpython.object cimport (Py_EQ, Py_NE, Py_GT, Py_LT, Py_GE, Py_LE,
912
PyObject_RichCompare)
@@ -44,6 +47,20 @@ cdef _interval_like(other):
4447

4548

4649
cdef class Interval(IntervalMixin):
50+
"""
51+
Immutable object implementing an Interval, a bounded slice-like interval.
52+
53+
.. versionadded:: 0.20.0
54+
55+
Properties
56+
----------
57+
left, right : values
58+
Left and right bounds for each interval.
59+
closed : {'left', 'right', 'both', 'neither'}
60+
Whether the interval is closed on the left-side, right-side, both or
61+
neither. Defaults to 'right'.
62+
"""
63+
4764
cdef readonly object left, right
4865
cdef readonly str closed
4966

@@ -84,88 +101,115 @@ cdef class Interval(IntervalMixin):
84101
return NotImplemented
85102
else:
86103
op_str = {Py_LT: '<', Py_LE: '<=', Py_GT: '>', Py_GE: '>='}[op]
87-
raise TypeError('unorderable types: %s() %s %s()' %
88-
(type(self).__name__, op_str, type(other).__name__))
104+
raise TypeError(
105+
'unorderable types: %s() %s %s()' %
106+
(type(self).__name__, op_str, type(other).__name__))
89107

90108
def __reduce__(self):
91109
args = (self.left, self.right, self.closed)
92110
return (type(self), args)
93111

112+
def _repr_base(self):
113+
left = self.left
114+
right = self.right
115+
116+
# TODO: need more general formatting methodology here
117+
if isinstance(left, Timestamp) and isinstance(right, Timestamp):
118+
left = left._short_repr
119+
right = right._short_repr
120+
121+
return left, right
122+
94123
def __repr__(self):
124+
125+
left, right = self._repr_base()
95126
return ('%s(%r, %r, closed=%r)' %
96-
(type(self).__name__, self.left, self.right, self.closed))
127+
(type(self).__name__, left, right, self.closed))
97128

98129
def __str__(self):
130+
131+
left, right = self._repr_base()
99132
start_symbol = '[' if self.closed_left else '('
100133
end_symbol = ']' if self.closed_right else ')'
101-
return '%s%s, %s%s' % (start_symbol, self.left, self.right, end_symbol)
134+
return '%s%s, %s%s' % (start_symbol, left, right, end_symbol)
102135

103136
def __add__(self, y):
104137
if isinstance(y, numbers.Number):
105138
return Interval(self.left + y, self.right + y)
106139
elif isinstance(y, Interval) and isinstance(self, numbers.Number):
107140
return Interval(y.left + self, y.right + self)
108-
else:
109-
raise NotImplemented
141+
return NotImplemented
110142

111143
def __sub__(self, y):
112144
if isinstance(y, numbers.Number):
113145
return Interval(self.left - y, self.right - y)
114-
else:
115-
raise NotImplemented
146+
return NotImplemented
116147

117148
def __mul__(self, y):
118149
if isinstance(y, numbers.Number):
119150
return Interval(self.left * y, self.right * y)
120151
elif isinstance(y, Interval) and isinstance(self, numbers.Number):
121152
return Interval(y.left * self, y.right * self)
122-
else:
123-
return NotImplemented
153+
return NotImplemented
124154

125155
def __div__(self, y):
126156
if isinstance(y, numbers.Number):
127157
return Interval(self.left / y, self.right / y)
128-
else:
129-
return NotImplemented
158+
return NotImplemented
130159

131160
def __truediv__(self, y):
132161
if isinstance(y, numbers.Number):
133162
return Interval(self.left / y, self.right / y)
134-
else:
135-
return NotImplemented
163+
return NotImplemented
136164

137165
def __floordiv__(self, y):
138166
if isinstance(y, numbers.Number):
139167
return Interval(self.left // y, self.right // y)
140-
else:
141-
return NotImplemented
168+
return NotImplemented
142169

143170

144171
@cython.wraparound(False)
145172
@cython.boundscheck(False)
146-
cpdef interval_bounds_to_intervals(np.ndarray left, np.ndarray right,
147-
str closed):
148-
result = np.empty(len(left), dtype=object)
149-
nulls = pd.isnull(left) | pd.isnull(right)
150-
result[nulls] = np.nan
151-
for i in np.flatnonzero(~nulls):
152-
result[i] = Interval(left[i], right[i], closed)
153-
return result
173+
cpdef intervals_to_interval_bounds(ndarray intervals):
174+
"""
175+
Parameters
176+
----------
177+
intervals: ndarray object array of Intervals / nulls
154178
179+
Returns
180+
-------
181+
tuples (left: ndarray object array,
182+
right: ndarray object array,
183+
closed: str)
184+
185+
"""
186+
187+
cdef:
188+
object closed = None, interval
189+
int64_t n = len(intervals)
190+
ndarray left, right
191+
192+
left = np.empty(n, dtype=object)
193+
right = np.empty(n, dtype=object)
155194

156-
@cython.wraparound(False)
157-
@cython.boundscheck(False)
158-
cpdef intervals_to_interval_bounds(np.ndarray intervals):
159-
left = np.empty(len(intervals), dtype=object)
160-
right = np.empty(len(intervals), dtype=object)
161-
cdef str closed = None
162195
for i in range(len(intervals)):
163196
interval = intervals[i]
197+
if util._checknull(interval):
198+
left[i] = np.nan
199+
right[i] = np.nan
200+
continue
201+
202+
if not isinstance(interval, Interval):
203+
raise TypeError("type {} with value {} is not an interval".format(
204+
type(interval), interval))
205+
164206
left[i] = interval.left
165207
right[i] = interval.right
166208
if closed is None:
167209
closed = interval.closed
168210
elif closed != interval.closed:
169211
raise ValueError('intervals must all be closed on the same side')
212+
170213
return left, right, closed
171214

215+
include "intervaltree.pxi"

0 commit comments

Comments
 (0)