Skip to content

Commit 74162aa

Browse files
shoyerjreback
authored andcommitted
API/ENH: IntervalIndex
closes #7640 closes #8625
1 parent 7ee73ff commit 74162aa

18 files changed

+3377
-160
lines changed

pandas/_libs/hashtable.pyx

-3
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,6 @@ PyDateTime_IMPORT
3939
cdef extern from "Python.h":
4040
int PySlice_Check(object)
4141

42-
cdef size_t _INIT_VEC_CAP = 128
43-
44-
4542
include "hashtable_class_helper.pxi"
4643
include "hashtable_func_helper.pxi"
4744

pandas/_libs/lib.pyx

+5-1
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ cpdef bint isscalar(object val):
245245
- instances of datetime.timedelta
246246
- Period
247247
- instances of decimal.Decimal
248+
- Interval
248249
249250
"""
250251

@@ -258,7 +259,8 @@ cpdef bint isscalar(object val):
258259
or PyDelta_Check(val)
259260
or PyTime_Check(val)
260261
or util.is_period_object(val)
261-
or is_decimal(val))
262+
or is_decimal(val),
263+
or is_interval(val))
262264

263265

264266
def item_from_zerodim(object val):
@@ -1896,4 +1898,6 @@ cdef class BlockPlacement:
18961898

18971899
include "reduce.pyx"
18981900
include "properties.pyx"
1901+
include "interval.pyx"
1902+
include "intervaltree.pyx"
18991903
include "inference.pyx"

pandas/_libs/src/inference.pyx

+21
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,10 @@ def infer_dtype(object value):
429429
if is_period_array(values):
430430
return 'period'
431431

432+
elif is_interval(val):
433+
if is_interval_array_fixed_closed(values):
434+
return 'interval'
435+
432436
for i in range(n):
433437
val = util.get_value_1d(values, i)
434438
if (util.is_integer_object(val) and
@@ -879,6 +883,23 @@ cpdef bint is_period_array(ndarray[object] values):
879883
return False
880884
return null_count != n
881885

886+
cdef inline bint is_interval(object o):
887+
return isinstance(o, Interval)
888+
889+
def is_interval_array_fixed_closed(ndarray[object] values):
890+
cdef Py_ssize_t i, n = len(values)
891+
cdef str closed
892+
if n == 0:
893+
return False
894+
for i in range(n):
895+
if not is_interval(values[i]):
896+
return False
897+
if i == 0:
898+
closed = values[0].closed
899+
elif closed != values[i].closed:
900+
return False
901+
return True
902+
882903

883904
cdef extern from "parse_helper.h":
884905
inline int floatify(object, double *result, int *maybe_int) except -1

pandas/core/algorithms.py

-1
Original file line numberDiff line numberDiff line change
@@ -609,7 +609,6 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
609609
cat, bins = cut(values, bins, retbins=True)
610610
except TypeError:
611611
raise TypeError("bins argument only works with numeric data.")
612-
values = cat.codes
613612

614613
if is_categorical_dtype(values) or is_sparse(values):
615614

pandas/core/api.py

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from pandas.core.index import (Index, CategoricalIndex, Int64Index,
1313
UInt64Index, RangeIndex, Float64Index,
1414
MultiIndex)
15+
from pandas.core.interval import Interval, IntervalIndex
1516

1617
from pandas.core.series import Series
1718
from pandas.core.frame import DataFrame

pandas/core/groupby.py

+16-8
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,7 @@
4343
from pandas.core.categorical import Categorical
4444
from pandas.core.frame import DataFrame
4545
from pandas.core.generic import NDFrame
46-
from pandas.core.index import (Index, MultiIndex, CategoricalIndex,
47-
_ensure_index)
46+
from pandas.core.interval import IntervalIndex
4847
from pandas.core.internals import BlockManager, make_block
4948
from pandas.core.series import Series
5049
from pandas.core.panel import Panel
@@ -3146,12 +3145,20 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
31463145
if bins is None:
31473146
lab, lev = algorithms.factorize(val, sort=True)
31483147
else:
3149-
cat, bins = cut(val, bins, retbins=True)
3148+
raise NotImplementedError('this is broken')
3149+
lab, bins = cut(val, bins, retbins=True)
31503150
# bins[:-1] for backward compat;
31513151
# o.w. cat.categories could be better
3152-
lab, lev, dropna = cat.codes, bins[:-1], False
3153-
3154-
sorter = np.lexsort((lab, ids))
3152+
# cat = Categorical(cat)
3153+
# lab, lev, dropna = cat.codes, bins[:-1], False
3154+
3155+
if (lab.dtype == object
3156+
and lib.is_interval_array_fixed_closed(lab[notnull(lab)])):
3157+
lab_index = Index(lab)
3158+
assert isinstance(lab, IntervalIndex)
3159+
sorter = np.lexsort((lab_index.left, lab_index.right, ids))
3160+
else:
3161+
sorter = np.lexsort((lab, ids))
31553162
ids, lab = ids[sorter], lab[sorter]
31563163

31573164
# group boundaries are where group ids change
@@ -3192,12 +3199,13 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
31923199
acc = rep(d)
31933200
out /= acc
31943201

3195-
if sort and bins is None:
3202+
if sort: # and bins is None:
31963203
cat = ids[inc][mask] if dropna else ids[inc]
31973204
sorter = np.lexsort((out if ascending else -out, cat))
31983205
out, labels[-1] = out[sorter], labels[-1][sorter]
31993206

3200-
if bins is None:
3207+
# if bins is None:
3208+
if True:
32013209
mi = MultiIndex(levels=levels, labels=labels, names=names,
32023210
verify_integrity=False)
32033211

0 commit comments

Comments
 (0)