Skip to content

Commit 455b3fd

Browse files
shoyerjreback
authored andcommitted
API/ENH: IntervalIndex
closes pandas-dev#7640 closes pandas-dev#8625
1 parent 704cdbf commit 455b3fd

19 files changed

+3395
-153
lines changed

pandas/core/algorithms.py

-1
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,6 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
405405
cat, bins = cut(values, bins, retbins=True)
406406
except TypeError:
407407
raise TypeError("bins argument only works with numeric data.")
408-
values = cat.codes
409408

410409
if is_extension_type(values) and not is_datetimetz(values):
411410
# handle Categorical and sparse,

pandas/core/api.py

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from pandas.core.index import (Index, CategoricalIndex, Int64Index,
1313
UInt64Index, RangeIndex, Float64Index,
1414
MultiIndex)
15+
from pandas.core.interval import Interval, IntervalIndex
1516

1617
from pandas.core.series import Series, TimeSeries
1718
from pandas.core.frame import DataFrame

pandas/core/groupby.py

+16-8
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@
4242
from pandas.core.categorical import Categorical
4343
from pandas.core.frame import DataFrame
4444
from pandas.core.generic import NDFrame
45-
from pandas.core.index import (Index, MultiIndex, CategoricalIndex,
46-
_ensure_index)
45+
from pandas.core.interval import IntervalIndex
4746
from pandas.core.internals import BlockManager, make_block
4847
from pandas.core.series import Series
4948
from pandas.core.panel import Panel
@@ -3065,12 +3064,20 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
30653064
if bins is None:
30663065
lab, lev = algos.factorize(val, sort=True)
30673066
else:
3068-
cat, bins = cut(val, bins, retbins=True)
3067+
raise NotImplementedError('this is broken')
3068+
lab, bins = cut(val, bins, retbins=True)
30693069
# bins[:-1] for backward compat;
30703070
# o.w. cat.categories could be better
3071-
lab, lev, dropna = cat.codes, bins[:-1], False
3072-
3073-
sorter = np.lexsort((lab, ids))
3071+
# cat = Categorical(cat)
3072+
# lab, lev, dropna = cat.codes, bins[:-1], False
3073+
3074+
if (lab.dtype == object
3075+
and lib.is_interval_array_fixed_closed(lab[notnull(lab)])):
3076+
lab_index = Index(lab)
3077+
assert isinstance(lab, IntervalIndex)
3078+
sorter = np.lexsort((lab_index.left, lab_index.right, ids))
3079+
else:
3080+
sorter = np.lexsort((lab, ids))
30743081
ids, lab = ids[sorter], lab[sorter]
30753082

30763083
# group boundaries are where group ids change
@@ -3111,12 +3118,13 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
31113118
acc = rep(d)
31123119
out /= acc
31133120

3114-
if sort and bins is None:
3121+
if sort: # and bins is None:
31153122
cat = ids[inc][mask] if dropna else ids[inc]
31163123
sorter = np.lexsort((out if ascending else -out, cat))
31173124
out, labels[-1] = out[sorter], labels[-1][sorter]
31183125

3119-
if bins is None:
3126+
# if bins is None:
3127+
if True:
31203128
mi = MultiIndex(levels=levels, labels=labels, names=names,
31213129
verify_integrity=False)
31223130

0 commit comments

Comments
 (0)