pandas-dev
diff --git a/‎.gitignore
+1-1 b/‎.gitignore
+1-1
diff --git a/‎doc/source/advanced.rst
+70-2 b/‎doc/source/advanced.rst
+70-2
diff --git a/‎doc/source/api.rst
+20 b/‎doc/source/api.rst
+20
diff --git a/‎doc/source/whatsnew/v0.16.1.txt
+40 b/‎doc/source/whatsnew/v0.16.1.txt
+40
diff --git a/‎pandas/core/api.py
+1-1 b/‎pandas/core/api.py
+1-1
diff --git a/‎pandas/core/categorical.py
+33-5 b/‎pandas/core/categorical.py
+33-5
diff --git a/‎pandas/core/common.py
+18 b/‎pandas/core/common.py
+18
@@ -55,7 +55,7 @@ dist
 ######################
 .directory
 .gdb_history
-.DS_Store?
+.DS_Store
 ehthumbs.db
 Icon?
 Thumbs.db
 
@@ -594,7 +594,76 @@ faster than fancy indexing.
    timeit ser.ix[indexer]
    timeit ser.take(indexer)
 
-.. _indexing.float64index:
+.. _indexing.categoricalindex:
+
+CategoricalIndex
+----------------
+
+.. versionadded:: 0.16.1
+
+We introduce a ``CategoricalIndex``, a new type of index object that is useful for supporting
+indexing with duplicates. This is a container around a ``Categorical`` (introduced in v0.15.0)
+and allows efficient indexing and storage of an index with a large number of duplicated elements. Prior to 0.16.1,
+setting the index of a ``DataFrame/Series`` with a ``category`` dtype would convert this to regular object-based ``Index``.
+
+.. ipython:: python
+
+   df = DataFrame({'A' : np.arange(6),
+                   'B' : Series(list('aabbca')).astype('category',
+                                                       categories=list('cab'))
+                  })
+   df
+   df.dtypes
+   df.B.cat.categories
+
+Setting the index, will create create a ``CategoricalIndex``
+
+.. ipython:: python
+
+   df2 = df.set_index('B')
+   df2.index
+   df2.index.categories
+
+Indexing works similarly to an ``Index`` with duplicates
+
+.. ipython:: python
+
+   df2.loc['a']
+
+   # and preserves the CategoricalIndex
+   df2.loc['a'].index
+   df2.loc['a'].index.categories
+
+Sorting will order by the order of the categories
+
+.. ipython:: python
+
+   df2.sort_index()
+
+Groupby operations on the index will preserve the index nature as well
+
+.. ipython:: python
+
+   df2.groupby(level=0).sum()
+   df2.groupby(level=0).sum().index
+
+.. warning::
+
+   Reshaping and Comparision operations on a ``CategoricalIndex`` must have the same categories
+   or a ``TypeError`` will be raised.
+
+   .. code-block:: python
+
+      In [10]: df3 = DataFrame({'A' : np.arange(6),
+                                'B' : Series(list('aabbca')).astype('category',
+                                                                    categories=list('abc'))
+                               }).set_index('B')
+
+      In [11]: df3.index.categories
+      Out[11]: Index([u'a', u'b', u'c'], dtype='object')
+
+      In [12]: pd.concat([df2,df3]
+      TypeError: categories must match existing categories when appending
 
 Float64Index
 ------------
@@ -706,4 +775,3 @@ Of course if you need integer based selection, then use ``iloc``
 .. ipython:: python
 
    dfir.iloc[0:5]
-
 
@@ -1289,6 +1289,26 @@ Selecting
    Index.slice_indexer
    Index.slice_locs
 
+.. _api.categoricalindex:
+
+CategoricalIndex
+----------------
+
+.. autosummary::
+   :toctree: generated/
+
+   CategoricalIndex
+
+Categorical Components
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. autosummary::
+   :toctree: generated/
+
+   CategoricalIndex.codes
+   CategoricalIndex.categories
+   CategoricalIndex.ordered
+
 .. _api.datetimeindex:
 
 DatetimeIndex
 
@@ -7,6 +7,10 @@ This is a minor bug-fix release from 0.16.0 and includes a a large number of
 bug fixes along several new features, enhancements, and performance improvements.
 We recommend that all users upgrade to this version.
 
+Highlights include:
+
+- Support for a ``CategoricalIndex``, a category based index, see :ref:`here <whatsnew_0161`.enhancements.categoricalindex>`
+
 .. contents:: What's new in v0.16.1
     :local:
     :backlinks: none
@@ -17,10 +21,46 @@ We recommend that all users upgrade to this version.
 Enhancements
 ~~~~~~~~~~~~
 
+.. _whatsnew_0161.enhancements.categoricalindex:
+
+CategoricalIndex
+^^^^^^^^^^^^^^^^
+
+We introduce a ``CategoricalIndex``, a new type of index object that is useful for supporting
+indexing with duplicates. This is a container around a ``Categorical`` (introduced in v0.15.0)
+and allows efficient indexing and storage of an index with a large number of duplicated elements. Prior to 0.16.1,
+setting the index of a ``DataFrame/Series`` with a ``category`` dtype would convert this to regular object-based ``Index``.
+
+.. ipython :: python
+
+   df = DataFrame({'A' : np.arange(6),
+                   'B' : Series(list('aabbca')).astype('category',
+                                                       categories=list('cab'))
+                  })
+   df
+   df.dtypes
+   df.B.cat.categories
+
+   # setting the index, will create create a CategoricalIndex
+   df2 = df.set_index('B')
+   df2.index
+   df2.index.categories
+
+   # indexing works similarly to an Index with duplicates
+   df2.loc['a']
 
+   # and preserves the CategoricalIndex
+   df2.loc['a'].index
+   df2.loc['a'].index.categories
 
+   # sorting will order by the order of the categories
+   df2.sort_index()
 
+   # groupby operations on the index will preserve the index nature as well
+   df2.groupby(level=0).sum()
+   df2.groupby(level=0).sum().index
 
+See the :ref:`documentation <advanced.categoricalindex>` for more. (:issue:`7629`)
 
 .. _whatsnew_0161.api:
 
 
@@ -8,7 +8,7 @@
 from pandas.core.categorical import Categorical
 from pandas.core.groupby import Grouper
 from pandas.core.format import set_eng_float_format
-from pandas.core.index import Index, Int64Index, Float64Index, MultiIndex
+from pandas.core.index import Index, CategoricalIndex, Int64Index, Float64Index, MultiIndex
 
 from pandas.core.series import Series, TimeSeries
 from pandas.core.frame import DataFrame
 
@@ -14,10 +14,11 @@
 import pandas.core.common as com
 from pandas.util.decorators import cache_readonly
 
-from pandas.core.common import (CategoricalDtype, ABCSeries, isnull, notnull,
+from pandas.core.common import (CategoricalDtype, ABCSeries, ABCCategoricalIndex, isnull, notnull,
                                 is_categorical_dtype, is_integer_dtype, is_object_dtype,
                                 _possibly_infer_to_datetimelike, get_dtype_kinds,
                                 is_list_like, is_sequence, is_null_slice, is_bool,
+                                is_dtypes_equal,
                                 _ensure_platform_int, _ensure_object, _ensure_int64,
                                 _coerce_indexer_dtype, _values_from_object, take_1d)
 from pandas.util.terminal import get_terminal_size
@@ -79,7 +80,7 @@ def f(self, other):
 
 def maybe_to_categorical(array):
     """ coerce to a categorical if a series is given """
-    if isinstance(array, ABCSeries):
+    if isinstance(array, (ABCSeries, ABCCategoricalIndex)):
         return array.values
     return array
 
@@ -233,12 +234,17 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
             cat = values
             if isinstance(values, ABCSeries):
                 cat = values.values
+            if isinstance(values, ABCCategoricalIndex):
+                ordered = values.ordered
+                cat = values.values
+
             if categories is None:
                 categories = cat.categories
             values = values.__array__()
 
         elif isinstance(values, Index):
-            pass
+            #values = np.array(values)
+            ordered = True
 
         else:
 
@@ -302,11 +308,27 @@ def copy(self):
         return Categorical(values=self._codes.copy(),categories=self.categories,
                            name=self.name, ordered=self.ordered, fastpath=True)
 
+    def astype(self, dtype):
+        """ coerce this type to another dtype """
+        if is_categorical_dtype(dtype):
+            return self
+        return np.array(self, dtype=dtype)
+
     @cache_readonly
     def ndim(self):
         """Number of dimensions of the Categorical """
         return self._codes.ndim
 
+    @cache_readonly
+    def size(self):
+        """ return the len of myself """
+        return len(self)
+
+    @cache_readonly
+    def itemsize(self):
+        """ return the size of a single category """
+        return self.categories.itemsize
+
     def reshape(self, new_shape, **kwargs):
         """ compat with .reshape """
         return self
@@ -1596,14 +1618,20 @@ def _delegate_method(self, name, *args, **kwargs):
 ##### utility routines #####
 
 def _get_codes_for_values(values, categories):
-    """"
+    """
     utility routine to turn values into codes given the specified categories
     """
 
     from pandas.core.algorithms import _get_data_algo, _hashtables
-    if values.dtype != categories.dtype:
+    if not is_dtypes_equal(values.dtype,categories.dtype):
+        values = _ensure_object(values)
+        categories = _ensure_object(categories)
+
+    if is_object_dtype(values):
         values = _ensure_object(values)
+    if is_object_dtype(categories):
         categories = _ensure_object(categories)
+
     (hash_klass, vec_klass), vals = _get_data_algo(values, _hashtables)
     t = hash_klass(len(categories))
     t.map_locations(_values_from_object(categories))
 
@@ -72,6 +72,7 @@ def _check(cls, inst):
 ABCDatetimeIndex = create_pandas_abc_type("ABCDatetimeIndex", "_typ", ("datetimeindex",))
 ABCTimedeltaIndex = create_pandas_abc_type("ABCTimedeltaIndex", "_typ", ("timedeltaindex",))
 ABCPeriodIndex = create_pandas_abc_type("ABCPeriodIndex", "_typ", ("periodindex",))
+ABCCategoricalIndex = create_pandas_abc_type("ABCCategoricalIndex", "_typ", ("categoricalindex",))
 ABCSeries = create_pandas_abc_type("ABCSeries", "_typ", ("series",))
 ABCDataFrame = create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe",))
 ABCPanel = create_pandas_abc_type("ABCPanel", "_typ", ("panel",))
@@ -2438,9 +2439,26 @@ def _get_dtype_type(arr_or_dtype):
         return np.dtype(arr_or_dtype).type
     elif isinstance(arr_or_dtype, CategoricalDtype):
         return CategoricalDtypeType
+    elif isinstance(arr_or_dtype, compat.string_types):
+        if is_categorical_dtype(arr_or_dtype):
+            return CategoricalDtypeType
+        return _get_dtype_type(np.dtype(arr_or_dtype))
     return arr_or_dtype.dtype.type
 
 
+def is_dtypes_equal(source, target):
+    """ return a boolean if the dtypes are equal """
+    source = _get_dtype_type(source)
+    target = _get_dtype_type(target)
+
+    try:
+        return source == target
+    except TypeError:
+
+        # invalid comparison
+        # object == category will hit this
+        return False
+
 def is_any_int_dtype(arr_or_dtype):
     tipo = _get_dtype_type(arr_or_dtype)
     return issubclass(tipo, np.integer)