Skip to content

Commit c30ef44

Browse files
shoyerjreback
authored andcommitted
API/ENH: IntervalIndex
closes pandas-dev#7640 closes pandas-dev#8625
1 parent 2cad4dd commit c30ef44

18 files changed

+3376
-160
lines changed

pandas/_libs/hashtable.pyx

-3
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,6 @@ PyDateTime_IMPORT
3939
cdef extern from "Python.h":
4040
int PySlice_Check(object)
4141

42-
cdef size_t _INIT_VEC_CAP = 128
43-
44-
4542
include "hashtable_class_helper.pxi"
4643
include "hashtable_func_helper.pxi"
4744

pandas/_libs/lib.pyx

+5-1
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,7 @@ cpdef bint isscalar(object val):
314314
- instances of datetime.timedelta
315315
- Period
316316
- instances of decimal.Decimal
317+
- Interval
317318
318319
"""
319320

@@ -327,7 +328,8 @@ cpdef bint isscalar(object val):
327328
or PyDelta_Check(val)
328329
or PyTime_Check(val)
329330
or util.is_period_object(val)
330-
or is_decimal(val))
331+
or is_decimal(val),
332+
or is_interval(val))
331333

332334

333335
def item_from_zerodim(object val):
@@ -1965,4 +1967,6 @@ cdef class BlockPlacement:
19651967

19661968
include "reduce.pyx"
19671969
include "properties.pyx"
1970+
include "interval.pyx"
1971+
include "intervaltree.pyx"
19681972
include "inference.pyx"

pandas/_libs/src/inference.pyx

+21
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,10 @@ def infer_dtype(object _values):
347347
if is_period_array(values):
348348
return 'period'
349349

350+
elif is_interval(val):
351+
if is_interval_array_fixed_closed(values):
352+
return 'interval'
353+
350354
for i in range(n):
351355
val = util.get_value_1d(values, i)
352356
if (util.is_integer_object(val) and
@@ -742,6 +746,23 @@ cpdef bint is_period_array(ndarray[object] values):
742746
return False
743747
return null_count != n
744748

749+
cdef inline bint is_interval(object o):
750+
return isinstance(o, Interval)
751+
752+
def is_interval_array_fixed_closed(ndarray[object] values):
753+
cdef Py_ssize_t i, n = len(values)
754+
cdef str closed
755+
if n == 0:
756+
return False
757+
for i in range(n):
758+
if not is_interval(values[i]):
759+
return False
760+
if i == 0:
761+
closed = values[0].closed
762+
elif closed != values[i].closed:
763+
return False
764+
return True
765+
745766

746767
cdef extern from "parse_helper.h":
747768
inline int floatify(object, double *result, int *maybe_int) except -1

pandas/core/algorithms.py

-1
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,6 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
404404
cat, bins = cut(values, bins, retbins=True)
405405
except TypeError:
406406
raise TypeError("bins argument only works with numeric data.")
407-
values = cat.codes
408407

409408
if is_extension_type(values) and not is_datetimetz(values):
410409
# handle Categorical and sparse,

pandas/core/api.py

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from pandas.core.index import (Index, CategoricalIndex, Int64Index,
1313
UInt64Index, RangeIndex, Float64Index,
1414
MultiIndex)
15+
from pandas.core.interval import Interval, IntervalIndex
1516

1617
from pandas.core.series import Series
1718
from pandas.core.frame import DataFrame

pandas/core/groupby.py

+16-8
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@
4242
from pandas.core.categorical import Categorical
4343
from pandas.core.frame import DataFrame
4444
from pandas.core.generic import NDFrame
45-
from pandas.core.index import (Index, MultiIndex, CategoricalIndex,
46-
_ensure_index)
45+
from pandas.core.interval import IntervalIndex
4746
from pandas.core.internals import BlockManager, make_block
4847
from pandas.core.series import Series
4948
from pandas.core.panel import Panel
@@ -3072,12 +3071,20 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
30723071
if bins is None:
30733072
lab, lev = algorithms.factorize(val, sort=True)
30743073
else:
3075-
cat, bins = cut(val, bins, retbins=True)
3074+
raise NotImplementedError('this is broken')
3075+
lab, bins = cut(val, bins, retbins=True)
30763076
# bins[:-1] for backward compat;
30773077
# o.w. cat.categories could be better
3078-
lab, lev, dropna = cat.codes, bins[:-1], False
3079-
3080-
sorter = np.lexsort((lab, ids))
3078+
# cat = Categorical(cat)
3079+
# lab, lev, dropna = cat.codes, bins[:-1], False
3080+
3081+
if (lab.dtype == object
3082+
and lib.is_interval_array_fixed_closed(lab[notnull(lab)])):
3083+
lab_index = Index(lab)
3084+
assert isinstance(lab, IntervalIndex)
3085+
sorter = np.lexsort((lab_index.left, lab_index.right, ids))
3086+
else:
3087+
sorter = np.lexsort((lab, ids))
30813088
ids, lab = ids[sorter], lab[sorter]
30823089

30833090
# group boundaries are where group ids change
@@ -3118,12 +3125,13 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
31183125
acc = rep(d)
31193126
out /= acc
31203127

3121-
if sort and bins is None:
3128+
if sort: # and bins is None:
31223129
cat = ids[inc][mask] if dropna else ids[inc]
31233130
sorter = np.lexsort((out if ascending else -out, cat))
31243131
out, labels[-1] = out[sorter], labels[-1][sorter]
31253132

3126-
if bins is None:
3133+
# if bins is None:
3134+
if True:
31273135
mi = MultiIndex(levels=levels, labels=labels, names=names,
31283136
verify_integrity=False)
31293137

0 commit comments

Comments
 (0)