pandas-dev · jreback · Apr 20, 2015 · Mar 26, 2015 · jorisvandenbossche · Apr 14, 2015
diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst
@@ -594,6 +594,95 @@ faster than fancy indexing.
    timeit ser.ix[indexer]
    timeit ser.take(indexer)
 
+.. _indexing.categoricalindex:
+
+CategoricalIndex
+----------------
+
+.. versionadded:: 0.16.1
+
+We introduce a ``CategoricalIndex``, a new type of index object that is useful for supporting
+indexing with duplicates. This is a container around a ``Categorical`` (introduced in v0.15.0)
+and allows efficient indexing and storage of an index with a large number of duplicated elements. Prior to 0.16.1,
+setting the index of a ``DataFrame/Series`` with a ``category`` dtype would convert this to regular object-based ``Index``.
+
+.. ipython:: python
+
+   df = DataFrame({'A' : np.arange(6),
+                   'B' : Series(list('aabbca')).astype('category',
+                                                       categories=list('cab'))
+                  })
+   df
+   df.dtypes
+   df.B.cat.categories
+
+Setting the index, will create create a ``CategoricalIndex``
+
+.. ipython:: python
+
+   df2 = df.set_index('B')
+   df2.index
+
+Indexing with ``__getitem__/.iloc/.loc/.ix`` works similarly to an ``Index`` with duplicates.
+The indexers MUST be in the category or the operation will raise.
+
+.. ipython:: python
+
+   df2.loc['a']
+
+These PRESERVE the ``CategoricalIndex``
+
+.. ipython:: python
+
+   df2.loc['a'].index
+
+Sorting will order by the order of the categories
+
+.. ipython:: python
+
+   df2.sort_index()
+
+Groupby operations on the index will preserve the index nature as well
+
+.. ipython:: python
+
+   df2.groupby(level=0).sum()
+   df2.groupby(level=0).sum().index
+
+Reindexing operations, will return a resulting index based on the type of the passed
+indexer, meaning that passing a list will return a plain-old-``Index``; indexing with
+a ``Categorical`` will return a ``CategoricalIndex``, indexed according to the categories
+of the PASSED ``Categorical`` dtype. This allows one to arbitrarly index these even with
+values NOT in the categories, similarly to how you can reindex ANY pandas index.
+
+.. ipython :: python
+
+   df2.reindex(['a','e'])
+   df2.reindex(['a','e']).index
+   df2.reindex(pd.Categorical(['a','e'],categories=list('abcde')))
+   df2.reindex(pd.Categorical(['a','e'],categories=list('abcde'))).index
+
+.. warning::
+
+   Reshaping and Comparision operations on a ``CategoricalIndex`` must have the same categories
+   or a ``TypeError`` will be raised.
+
+   .. code-block:: python
+
+      In [10]: df3 = DataFrame({'A' : np.arange(6),
+                                'B' : Series(list('aabbca')).astype('category',
+                                                                    categories=list('abc'))
+                               }).set_index('B')
+
+      In [11]: df3.index
+      Out[11]:
+      CategoricalIndex([u'a', u'a', u'b', u'b', u'c', u'a'],
+                       categories=[u'a', u'b', u'c'],
+                       ordered=False)
+
+      In [12]: pd.concat([df2,df3]
+      TypeError: categories must match existing categories when appending
+
 .. _indexing.float64index:
 
 Float64Index
@@ -706,4 +795,3 @@ Of course if you need integer based selection, then use ``iloc``
 .. ipython:: python
 
    dfir.iloc[0:5]
-
diff --git a/doc/source/api.rst b/doc/source/api.rst
@@ -1291,6 +1291,34 @@ Selecting
    Index.slice_indexer
    Index.slice_locs
 
+.. _api.categoricalindex:
+
+CategoricalIndex
+----------------
+
+.. autosummary::
+   :toctree: generated/
+
+   CategoricalIndex
+
+Categorical Components
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. autosummary::
+   :toctree: generated/
+
+   CategoricalIndex.codes
+   CategoricalIndex.categories
+   CategoricalIndex.ordered
+   CategoricalIndex.rename_categories
+   CategoricalIndex.reorder_categories
+   CategoricalIndex.add_categories
+   CategoricalIndex.remove_categories
+   CategoricalIndex.remove_unused_categories
+   CategoricalIndex.set_categories
+   CategoricalIndex.as_ordered
+   CategoricalIndex.as_unordered
+
 .. _api.datetimeindex:
 
 DatetimeIndex

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
@@ -7,6 +7,10 @@ This is a minor bug-fix release from 0.16.0 and includes a a large number of
 bug fixes along several new features, enhancements, and performance improvements.
 We recommend that all users upgrade to this version.
 
+Highlights include:
+
+- Support for a ``CategoricalIndex``, a category based index, see :ref:`here <whatsnew_0161`.enhancements.categoricalindex>`
+
 .. contents:: What's new in v0.16.1
     :local:
     :backlinks: none
@@ -31,6 +35,7 @@ Enhancements
   will return a `np.array` instead of a boolean `Index` (:issue:`8875`). This enables the following expression
   to work naturally:
 
+
   .. ipython:: python
 
      idx = Index(['a1', 'a2', 'b1', 'b2'])
@@ -40,6 +45,7 @@ Enhancements
      s[s.index.str.startswith('a')]
 
 - ``DataFrame.mask()`` and ``Series.mask()`` now support same keywords as ``where`` (:issue:`8801`)
+
 - ``drop`` function can now accept ``errors`` keyword to suppress ValueError raised when any of label does not exist in the target data. (:issue:`6736`)
 
   .. ipython:: python
@@ -58,6 +64,75 @@ Enhancements
 
 - ``DataFrame`` and ``Series`` now have ``_constructor_expanddim`` property as overridable constructor for one higher dimensionality data. This should be used only when it is really needed, see :ref:`here <ref-subclassing-pandas>`
 
+.. _whatsnew_0161.enhancements.categoricalindex:
+
+CategoricalIndex
+^^^^^^^^^^^^^^^^
+
+We introduce a ``CategoricalIndex``, a new type of index object that is useful for supporting
+indexing with duplicates. This is a container around a ``Categorical`` (introduced in v0.15.0)
+and allows efficient indexing and storage of an index with a large number of duplicated elements. Prior to 0.16.1,
+setting the index of a ``DataFrame/Series`` with a ``category`` dtype would convert this to regular object-based ``Index``.
+
+.. ipython :: python
+
+   df = DataFrame({'A' : np.arange(6),
+                   'B' : Series(list('aabbca')).astype('category',
+                                                       categories=list('cab'))
+                  })
+   df
+   df.dtypes
+   df.B.cat.categories
+
+setting the index, will create create a CategoricalIndex
+
+.. ipython :: python
+
+   df2 = df.set_index('B')
+   df2.index
+
+indexing with ``__getitem__/.iloc/.loc/.ix`` works similarly to an Index with duplicates.
+The indexers MUST be in the category or the operation will raise.
+
+.. ipython :: python
+
+   df2.loc['a']
+
+and preserves the ``CategoricalIndex``
+
+.. ipython :: python
+
+   df2.loc['a'].index
+
+sorting will order by the order of the categories
+
+.. ipython :: python
+
+   df2.sort_index()
+
+groupby operations on the index will preserve the index nature as well
+
+.. ipython :: python
+
+   df2.groupby(level=0).sum()
+   df2.groupby(level=0).sum().index
+
+reindexing operations, will return a resulting index based on the type of the passed
+indexer, meaning that passing a list will return a plain-old-``Index``; indexing with
+a ``Categorical`` will return a ``CategoricalIndex``, indexed according to the categories
+of the PASSED ``Categorical`` dtype. This allows one to arbitrarly index these even with
+values NOT in the categories, similarly to how you can reindex ANY pandas index.
+
+.. ipython :: python
+
+   df2.reindex(['a','e'])
+   df2.reindex(['a','e']).index
+   df2.reindex(pd.Categorical(['a','e'],categories=list('abcde')))
+   df2.reindex(pd.Categorical(['a','e'],categories=list('abcde'))).index
+
+See the :ref:`documentation <advanced.categoricalindex>` for more. (:issue:`7629`)
+>>>>>>> support CategoricalIndex
+
 .. _whatsnew_0161.api:
 
 API changes

diff --git a/pandas/core/api.py b/pandas/core/api.py
@@ -8,7 +8,7 @@
 from pandas.core.categorical import Categorical
 from pandas.core.groupby import Grouper
 from pandas.core.format import set_eng_float_format
-from pandas.core.index import Index, Int64Index, Float64Index, MultiIndex
+from pandas.core.index import Index, CategoricalIndex, Int64Index, Float64Index, MultiIndex
 
 from pandas.core.series import Series, TimeSeries
 from pandas.core.frame import DataFrame

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -121,7 +121,7 @@ def _delegate_method(self, name, *args, **kwargs):
         raise TypeError("You cannot call method {name}".format(name=name))
 
     @classmethod
-    def _add_delegate_accessors(cls, delegate, accessors, typ):
+    def _add_delegate_accessors(cls, delegate, accessors, typ, overwrite=False):
         """
         add accessors to cls from the delegate class
 
@@ -131,6 +131,8 @@ def _add_delegate_accessors(cls, delegate, accessors, typ):
         delegate : the class to get methods/properties & doc-strings
         acccessors : string list of accessors to add
         typ : 'property' or 'method'
+        overwrite : boolean, default False
+           overwrite the method/property in the target class if it exists
 
         """
 
@@ -164,7 +166,7 @@ def f(self, *args, **kwargs):
                 f = _create_delegator_method(name)
 
             # don't overwrite existing methods/properties
-            if not hasattr(cls, name):
+            if overwrite or not hasattr(cls, name):
                 setattr(cls,name,f)