pandas-dev
diff --git a/‎pandas/api/tests/test_api.py
+1-1 b/‎pandas/api/tests/test_api.py
+1-1
diff --git a/‎pandas/core/algorithms.py
+41-20 b/‎pandas/core/algorithms.py
+41-20
diff --git a/‎pandas/core/api.py
+2-2 b/‎pandas/core/api.py
+2-2
diff --git a/‎pandas/core/groupby.py
+25-19 b/‎pandas/core/groupby.py
+25-19
diff --git a/‎pandas/hashtable.pyx
+2 b/‎pandas/hashtable.pyx
+2
diff --git a/‎pandas/indexes/api.py
+2-1 b/‎pandas/indexes/api.py
+2-1
diff --git a/‎pandas/indexes/base.py
+24-2 b/‎pandas/indexes/base.py
+24-2
@@ -56,7 +56,7 @@ class TestPDApi(Base, tm.TestCase):
                'Period', 'PeriodIndex', 'RangeIndex', 'UInt64Index',
                'Series', 'SparseArray', 'SparseDataFrame',
                'SparseSeries', 'TimeGrouper', 'Timedelta',
-               'TimedeltaIndex', 'Timestamp']
+               'TimedeltaIndex', 'Timestamp', 'Interval', 'IntervalIndex']
 
     # these are already deprecated; awaiting removal
     deprecated_classes = ['TimeSeries', 'WidePanel',
 
@@ -8,7 +8,7 @@
 
 from pandas import compat, lib, tslib, _np_version_under1p8
 from pandas.types.cast import _maybe_promote
-from pandas.types.generic import ABCSeries, ABCIndex
+from pandas.types.generic import ABCSeries, ABCIndex, ABCIntervalIndex
 from pandas.types.common import (is_unsigned_integer_dtype,
                                  is_signed_integer_dtype,
                                  is_integer_dtype,
@@ -401,31 +401,47 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
     if bins is not None:
         try:
             from pandas.tools.tile import cut
-            values = Series(values).values
-            cat, bins = cut(values, bins, retbins=True)
+            values = Series(values)
+            ii, _, lev = cut(values, bins, retbins=True, include_lowest=True)
         except TypeError:
             raise TypeError("bins argument only works with numeric data.")
 
-    if is_extension_type(values) and not is_datetimetz(values):
-        # handle Categorical and sparse,
-        # datetime tz can be handeled in ndarray path
-        result = Series(values).values.value_counts(dropna=dropna)
-        result.name = name
-        counts = result.values
+        # if normalizing, we need the total (include NA's)
+        counts = np.array([len(ii)])
+
+        # remove NaN ii entries
+        if dropna:
+            mask = ii.notnull()
+            values = values[mask]
+            ii = ii[mask]
+
+        result = values.groupby(ii).count()
+
+        # reindex & fill in 0's for non-represented levels
+        # but don't if we have completely dropped everything
+        # as its now a missing level
+        # this matches our groupby.value_counts behavior
+        if dropna and not len(values) and not len(result):
+            result.index = lev[0:0]
+        else:
+            result = result.reindex(lev).fillna(0).astype('i8')
+
     else:
-        # ndarray path. pass original to handle DatetimeTzBlock
-        keys, counts = _value_counts_arraylike(values, dropna=dropna)
 
-        from pandas import Index, Series
-        if not isinstance(keys, Index):
-            keys = Index(keys)
-        result = Series(counts, index=keys, name=name)
+        if is_extension_type(values) and not is_datetimetz(values):
+            # handle Categorical and sparse,
+            # datetime tz can be handeled in ndarray path
+            result = Series(values).values.value_counts(dropna=dropna)
+            result.name = name
+            counts = result.values
+        else:
+            # ndarray path. pass original to handle DatetimeTzBlock
+            keys, counts = _value_counts_arraylike(values, dropna=dropna)
 
-    if bins is not None:
-        # TODO: This next line should be more efficient
-        result = result.reindex(np.arange(len(cat.categories)),
-                                fill_value=0)
-        result.index = bins[:-1]
+            from pandas import Index, Series
+            if not isinstance(keys, Index):
+                keys = Index(keys)
+            result = Series(counts, index=keys, name=name)
 
     if sort:
         result = result.sort_values(ascending=ascending)
@@ -1244,6 +1260,11 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None,
                            allow_fill=allow_fill)
     elif is_datetimetz(arr):
         return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
+    elif isinstance(arr, ABCIntervalIndex):
+        # TODO: we need to be sure we are taking on an actual IntervalIndex
+        # this is 'hacky' until we have a first class dtype
+        # ideally will use is_interval_dtype here
+        return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
 
     if indexer is None:
         indexer = np.arange(arr.shape[axis], dtype=np.int64)
 
@@ -11,8 +11,8 @@
 from pandas.formats.format import set_eng_float_format
 from pandas.core.index import (Index, CategoricalIndex, Int64Index,
                                UInt64Index, RangeIndex, Float64Index,
-                               MultiIndex)
-from pandas.core.interval import Interval, IntervalIndex
+                               MultiIndex, IntervalIndex)
+from pandas.indexes.interval import Interval
 
 from pandas.core.series import Series, TimeSeries
 from pandas.core.frame import DataFrame
 
@@ -17,6 +17,7 @@
 from pandas.types.common import (is_numeric_dtype,
                                  is_timedelta64_dtype, is_datetime64_dtype,
                                  is_categorical_dtype,
+                                 is_interval_dtype,
                                  is_datetimelike,
                                  is_datetime64_any_dtype,
                                  is_bool, is_integer_dtype,
@@ -39,10 +40,11 @@
 
 from pandas.core.base import (PandasObject, SelectionMixin, GroupByError,
                               DataError, SpecificationError)
+from pandas.core.index import (Index, MultiIndex,
+                               CategoricalIndex, _ensure_index)
 from pandas.core.categorical import Categorical
 from pandas.core.frame import DataFrame
 from pandas.core.generic import NDFrame
-from pandas.core.interval import IntervalIndex
 from pandas.core.internals import BlockManager, make_block
 from pandas.core.series import Series
 from pandas.core.panel import Panel
@@ -2579,7 +2581,7 @@ def _convert_grouper(axis, grouper):
             return grouper.reindex(axis)._values
     elif isinstance(grouper, (list, Series, Index, np.ndarray)):
         if len(grouper) != len(axis):
-            raise AssertionError('Grouper and axis must be same length')
+            raise ValueError('Grouper and axis must be same length')
         return grouper
     else:
         return grouper
@@ -3063,36 +3065,41 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
 
         if bins is None:
             lab, lev = algos.factorize(val, sort=True)
+            llab = lambda lab, inc: lab[inc]
         else:
-            raise NotImplementedError('this is broken')
-            lab, bins = cut(val, bins, retbins=True)
-            # bins[:-1] for backward compat;
-            # o.w. cat.categories could be better
-            # cat = Categorical(cat)
-            # lab, lev, dropna = cat.codes, bins[:-1], False
-
-        if (lab.dtype == object
-                and lib.is_interval_array_fixed_closed(lab[notnull(lab)])):
-            lab_index = Index(lab)
-            assert isinstance(lab, IntervalIndex)
-            sorter = np.lexsort((lab_index.left, lab_index.right, ids))
+
+            # lab is an IntervalIndex
+            # we get our last level of labels from the
+            # II indexer
+            # TODO: make this a method on II
+            lab, _, lev = cut(val, bins, retbins=True, include_lowest=True)
+
+            # we compute the levels here rather than use the bins
+            # because we may have adjusted them with include_lowest
+            llab = lambda lab, inc: lab[inc]._multiindex.labels[-1]
+
+        if is_interval_dtype(lab):
+            # TODO: should we do this inside II?
+            sorter = np.lexsort((lab.left, lab.right, ids))
         else:
             sorter = np.lexsort((lab, ids))
+
         ids, lab = ids[sorter], lab[sorter]
 
         # group boundaries are where group ids change
         idx = np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]]
 
         # new values are where sorted labels change
-        inc = np.r_[True, lab[1:] != lab[:-1]]
+        lchanges = llab(lab, slice(1, None)) != llab(lab, slice(None, -1))
+        inc = np.r_[True, lchanges]
         inc[idx] = True  # group boundaries are also new values
         out = np.diff(np.nonzero(np.r_[inc, True])[0])  # value counts
 
         # num. of times each group should be repeated
         rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx))
 
         # multi-index components
-        labels = list(map(rep, self.grouper.recons_labels)) + [lab[inc]]
+        labels = list(map(rep, self.grouper.recons_labels)) + [llab(lab, inc)]
         levels = [ping.group_index for ping in self.grouper.groupings] + [lev]
         names = self.grouper.names + [self.name]
 
@@ -3118,13 +3125,12 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
                 acc = rep(d)
             out /= acc
 
-        if sort:  # and bins is None:
+        if sort and bins is None:
             cat = ids[inc][mask] if dropna else ids[inc]
             sorter = np.lexsort((out if ascending else -out, cat))
             out, labels[-1] = out[sorter], labels[-1][sorter]
 
-        # if bins is None:
-        if True:
+        if bins is None:
             mi = MultiIndex(levels=levels, labels=labels, names=names,
                             verify_integrity=False)
 
 
@@ -39,6 +39,8 @@ PyDateTime_IMPORT
 cdef extern from "Python.h":
     int PySlice_Check(object)
 
+cdef size_t _INIT_VEC_CAP = 128
+
 include "hashtable_class_helper.pxi"
 include "hashtable_func_helper.pxi"
 
 
@@ -3,6 +3,7 @@
                                  InvalidIndexError)
 from pandas.indexes.category import CategoricalIndex  # noqa
 from pandas.indexes.multi import MultiIndex  # noqa
+from pandas.indexes.interval import IntervalIndex  # noqa
 from pandas.indexes.numeric import (NumericIndex, Float64Index,  # noqa
                                     Int64Index, UInt64Index)
 from pandas.indexes.range import RangeIndex  # noqa
@@ -13,7 +14,7 @@
 # TODO: there are many places that rely on these private methods existing in
 # pandas.core.index
 __all__ = ['Index', 'MultiIndex', 'NumericIndex', 'Float64Index', 'Int64Index',
-           'CategoricalIndex', 'RangeIndex', 'UInt64Index',
+           'CategoricalIndex', 'IntervalIndex', 'RangeIndex', 'UInt64Index',
            'InvalidIndexError',
            '_new_Index',
            '_ensure_index', '_get_na_value', '_get_combined_index',
 
@@ -26,6 +26,7 @@
                                  is_dtype_equal,
                                  is_object_dtype,
                                  is_categorical_dtype,
+                                 is_interval_dtype,
                                  is_bool_dtype,
                                  is_signed_integer_dtype,
                                  is_unsigned_integer_dtype,
@@ -268,6 +269,10 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
                 elif inferred in ['floating', 'mixed-integer-float']:
                     from .numeric import Float64Index
                     return Float64Index(subarr, copy=copy, name=name)
+                elif inferred == 'interval':
+                    from .interval import IntervalIndex
+                    return IntervalIndex.from_intervals(subarr, name=name,
+                                                        copy=copy)
                 elif inferred == 'boolean':
                     # don't support boolean explicity ATM
                     pass
@@ -1180,6 +1185,9 @@ def is_object(self):
     def is_categorical(self):
         return self.inferred_type in ['categorical']
 
+    def is_interval(self):
+        return self.inferred_type in ['interval']
+
     def is_mixed(self):
         return self.inferred_type in ['mixed']
 
@@ -3232,6 +3240,13 @@ def _searchsorted_monotonic(self, label, side='left'):
 
         raise ValueError('index must be monotonic increasing or decreasing')
 
+    def _get_loc_only_exact_matches(self, key):
+        """
+        This is overriden on subclasses (namely, IntervalIndex) to control
+        get_slice_bound.
+        """
+        return self.get_loc(key)
+
     def get_slice_bound(self, label, side, kind):
         """
         Calculate slice bound that corresponds to given label.
@@ -3261,7 +3276,7 @@ def get_slice_bound(self, label, side, kind):
 
         # we need to look up the label
         try:
-            slc = self.get_loc(label)
+            slc = self._get_loc_only_exact_matches(label)
         except KeyError as err:
             try:
                 return self._searchsorted_monotonic(label, side)
@@ -3501,7 +3516,11 @@ def _evaluate_compare(self, other):
                 if needs_i8_conversion(self) and needs_i8_conversion(other):
                     return self._evaluate_compare(other, op)
 
-                if is_object_dtype(self) and self.nlevels == 1:
+                # TODO: super hack
+                if (is_object_dtype(self) and
+                        self.nlevels == 1 and not
+                        is_interval_dtype(self)):
+
                     # don't pass MultiIndex
                     with np.errstate(all='ignore'):
                         result = _comp_method_OBJECT_ARRAY(
@@ -3813,6 +3832,9 @@ def _ensure_index(index_like, copy=False):
 
 
 def _get_na_value(dtype):
+    if is_datetime64_any_dtype(dtype) or is_timedelta64_dtype(dtype):
+        return tslib.NaT
+
     return {np.datetime64: tslib.NaT,
             np.timedelta64: tslib.NaT}.get(dtype, np.nan)