Skip to content

Commit b67b098

Browse files
shoyerjreback
authored andcommitted
API/ENH: IntervalIndex
closes pandas-dev#7640 closes pandas-dev#8625
1 parent e351ed0 commit b67b098

18 files changed

+3381
-153
lines changed

pandas/core/algorithms.py

-1
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,6 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
405405
cat, bins = cut(values, bins, retbins=True)
406406
except TypeError:
407407
raise TypeError("bins argument only works with numeric data.")
408-
values = cat.codes
409408

410409
if is_extension_type(values) and not is_datetimetz(values):
411410
# handle Categorical and sparse,

pandas/core/api.py

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from pandas.core.index import (Index, CategoricalIndex, Int64Index,
1313
UInt64Index, RangeIndex, Float64Index,
1414
MultiIndex)
15+
from pandas.core.interval import Interval, IntervalIndex
1516

1617
from pandas.core.series import Series, TimeSeries
1718
from pandas.core.frame import DataFrame

pandas/core/groupby.py

+16-8
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@
4242
from pandas.core.categorical import Categorical
4343
from pandas.core.frame import DataFrame
4444
from pandas.core.generic import NDFrame
45-
from pandas.core.index import (Index, MultiIndex, CategoricalIndex,
46-
_ensure_index)
45+
from pandas.core.interval import IntervalIndex
4746
from pandas.core.internals import BlockManager, make_block
4847
from pandas.core.series import Series
4948
from pandas.core.panel import Panel
@@ -3086,12 +3085,20 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
30863085
if bins is None:
30873086
lab, lev = algos.factorize(val, sort=True)
30883087
else:
3089-
cat, bins = cut(val, bins, retbins=True)
3088+
raise NotImplementedError('this is broken')
3089+
lab, bins = cut(val, bins, retbins=True)
30903090
# bins[:-1] for backward compat;
30913091
# o.w. cat.categories could be better
3092-
lab, lev, dropna = cat.codes, bins[:-1], False
3093-
3094-
sorter = np.lexsort((lab, ids))
3092+
# cat = Categorical(cat)
3093+
# lab, lev, dropna = cat.codes, bins[:-1], False
3094+
3095+
if (lab.dtype == object
3096+
and lib.is_interval_array_fixed_closed(lab[notnull(lab)])):
3097+
lab_index = Index(lab)
3098+
assert isinstance(lab, IntervalIndex)
3099+
sorter = np.lexsort((lab_index.left, lab_index.right, ids))
3100+
else:
3101+
sorter = np.lexsort((lab, ids))
30953102
ids, lab = ids[sorter], lab[sorter]
30963103

30973104
# group boundaries are where group ids change
@@ -3132,12 +3139,13 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
31323139
acc = rep(d)
31333140
out /= acc
31343141

3135-
if sort and bins is None:
3142+
if sort: # and bins is None:
31363143
cat = ids[inc][mask] if dropna else ids[inc]
31373144
sorter = np.lexsort((out if ascending else -out, cat))
31383145
out, labels[-1] = out[sorter], labels[-1][sorter]
31393146

3140-
if bins is None:
3147+
# if bins is None:
3148+
if True:
31413149
mi = MultiIndex(levels=levels, labels=labels, names=names,
31423150
verify_integrity=False)
31433151

0 commit comments

Comments
 (0)