Skip to content

Commit 2a58edb

Browse files
shoyerjreback
authored andcommitted
API/ENH: IntervalIndex
closes pandas-dev#7640 closes pandas-dev#8625
1 parent 34c6bd0 commit 2a58edb

18 files changed

+3377
-160
lines changed

pandas/_libs/hashtable.pyx

-3
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,6 @@ PyDateTime_IMPORT
3939
cdef extern from "Python.h":
4040
int PySlice_Check(object)
4141

42-
cdef size_t _INIT_VEC_CAP = 128
43-
44-
4542
include "hashtable_class_helper.pxi"
4643
include "hashtable_func_helper.pxi"
4744

pandas/_libs/lib.pyx

+5-1
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ cpdef bint isscalar(object val):
245245
- instances of datetime.timedelta
246246
- Period
247247
- instances of decimal.Decimal
248+
- Interval
248249
249250
"""
250251

@@ -258,7 +259,8 @@ cpdef bint isscalar(object val):
258259
or PyDelta_Check(val)
259260
or PyTime_Check(val)
260261
or util.is_period_object(val)
261-
or is_decimal(val))
262+
or is_decimal(val),
263+
or is_interval(val))
262264

263265

264266
def item_from_zerodim(object val):
@@ -1896,4 +1898,6 @@ cdef class BlockPlacement:
18961898

18971899
include "reduce.pyx"
18981900
include "properties.pyx"
1901+
include "interval.pyx"
1902+
include "intervaltree.pyx"
18991903
include "inference.pyx"

pandas/_libs/src/inference.pyx

+21
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,10 @@ def infer_dtype(object _values):
347347
if is_period_array(values):
348348
return 'period'
349349

350+
elif is_interval(val):
351+
if is_interval_array_fixed_closed(values):
352+
return 'interval'
353+
350354
for i in range(n):
351355
val = util.get_value_1d(values, i)
352356
if (util.is_integer_object(val) and
@@ -742,6 +746,23 @@ cpdef bint is_period_array(ndarray[object] values):
742746
return False
743747
return null_count != n
744748

749+
cdef inline bint is_interval(object o):
750+
return isinstance(o, Interval)
751+
752+
def is_interval_array_fixed_closed(ndarray[object] values):
753+
cdef Py_ssize_t i, n = len(values)
754+
cdef str closed
755+
if n == 0:
756+
return False
757+
for i in range(n):
758+
if not is_interval(values[i]):
759+
return False
760+
if i == 0:
761+
closed = values[0].closed
762+
elif closed != values[i].closed:
763+
return False
764+
return True
765+
745766

746767
cdef extern from "parse_helper.h":
747768
inline int floatify(object, double *result, int *maybe_int) except -1

pandas/core/algorithms.py

-1
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,6 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
442442
cat, bins = cut(values, bins, retbins=True)
443443
except TypeError:
444444
raise TypeError("bins argument only works with numeric data.")
445-
values = cat.codes
446445

447446
if is_extension_type(values) and not is_datetimetz(values):
448447
# handle Categorical and sparse,

pandas/core/api.py

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from pandas.core.index import (Index, CategoricalIndex, Int64Index,
1313
UInt64Index, RangeIndex, Float64Index,
1414
MultiIndex)
15+
from pandas.core.interval import Interval, IntervalIndex
1516

1617
from pandas.core.series import Series
1718
from pandas.core.frame import DataFrame

pandas/core/groupby.py

+16-8
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,7 @@
4343
from pandas.core.categorical import Categorical
4444
from pandas.core.frame import DataFrame
4545
from pandas.core.generic import NDFrame
46-
from pandas.core.index import (Index, MultiIndex, CategoricalIndex,
47-
_ensure_index)
46+
from pandas.core.interval import IntervalIndex
4847
from pandas.core.internals import BlockManager, make_block
4948
from pandas.core.series import Series
5049
from pandas.core.panel import Panel
@@ -3120,12 +3119,20 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
31203119
if bins is None:
31213120
lab, lev = algorithms.factorize(val, sort=True)
31223121
else:
3123-
cat, bins = cut(val, bins, retbins=True)
3122+
raise NotImplementedError('this is broken')
3123+
lab, bins = cut(val, bins, retbins=True)
31243124
# bins[:-1] for backward compat;
31253125
# o.w. cat.categories could be better
3126-
lab, lev, dropna = cat.codes, bins[:-1], False
3127-
3128-
sorter = np.lexsort((lab, ids))
3126+
# cat = Categorical(cat)
3127+
# lab, lev, dropna = cat.codes, bins[:-1], False
3128+
3129+
if (lab.dtype == object
3130+
and lib.is_interval_array_fixed_closed(lab[notnull(lab)])):
3131+
lab_index = Index(lab)
3132+
assert isinstance(lab, IntervalIndex)
3133+
sorter = np.lexsort((lab_index.left, lab_index.right, ids))
3134+
else:
3135+
sorter = np.lexsort((lab, ids))
31293136
ids, lab = ids[sorter], lab[sorter]
31303137

31313138
# group boundaries are where group ids change
@@ -3166,12 +3173,13 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
31663173
acc = rep(d)
31673174
out /= acc
31683175

3169-
if sort and bins is None:
3176+
if sort: # and bins is None:
31703177
cat = ids[inc][mask] if dropna else ids[inc]
31713178
sorter = np.lexsort((out if ascending else -out, cat))
31723179
out, labels[-1] = out[sorter], labels[-1][sorter]
31733180

3174-
if bins is None:
3181+
# if bins is None:
3182+
if True:
31753183
mi = MultiIndex(levels=levels, labels=labels, names=names,
31763184
verify_integrity=False)
31773185

0 commit comments

Comments
 (0)