fixup! DOC: Update docs to use CategoricalDtype

TomAugspurger · TomAugspurger · commit 880dcb7b0cd6 · 2017-08-18T12:02:48.000-05:00
diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst
@@ -96,12 +96,17 @@ By passing a :class:`pandas.Categorical` object to a `Series` or assigning it to
     df["B"] = raw_cat
     df
 
-You can also specify differently ordered categories or make the resulting data
-ordered by passing a :class:`CategoricalDtype`:
+Anywhere above we passed a keyword ``dtype='category'``, we used the default behavior of
+
+1. categories are inferred from the data
+2. categories are unordered.
+
+To control those behaviors, instead of passing ``'category'``, use an instance
+of :class:`CategoricalDtype`.
 
 .. ipython:: python
 
-    s = pd.Series(["a","b","c","a"])
+    s = pd.Series(["a", "b", "c", "a"])
     cat_type = pd.CategoricalDtype(categories=["b", "c", "d"], ordered=False)
     s_cat = s.astype(cat_type)
     s_cat
@@ -145,7 +150,7 @@ constructor to save the factorize step during normal constructor mode:
 CategoricalDtype
 ----------------
 
-.. versionadded:: 0.21.0
+.. versionchanged:: 0.21.0
 
 A categorical's type is fully described by 1.) its categories (an iterable with
 unique values and no missing values), and 2.) its orderedness (a boolean).
@@ -184,12 +189,19 @@ order of the ``categories`` is not considered
    # Unequal, since the second CategoricalDtype is ordered
    c1 == pd.CategoricalDtype(['a',  'b', 'c'], ordered=True)
 
-Finally, all instances of ``CategoricalDtype`` compare equal to the string ``'category'``
+All instances of ``CategoricalDtype`` compare equal to the string ``'category'``
 
 .. ipython:: python
 
    c1 == 'category'
 
+
+.. warning::
+
+   Since ``dtype='category'`` is essentially ``CategoricalDtype(None, False)``,
+   and since all instances ``CategoricalDtype`` compare equal to ``'`category'``,
+   all instances of ``CategoricalDtype`` compare equal to a ``CategoricalDtype(None)``
+
 Description
 -----------
 
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
@@ -128,6 +128,8 @@ string data to a ``Categorical``:
 The ``.dtype`` property of a ``Categorical``, ``CategoricalIndex`` or a
 ``Series`` with categorical type will now return an instance of ``CategoricalDtype``.
 
+See :ref:`CategoricalDtype <categorical.categoricaldtype>` for more.
+
 .. _whatsnew_0210.enhancements.other:
 
 Other Enhancements
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
@@ -692,6 +692,21 @@ def is_dtype_equal(source, target):
         return False
 
 
+def _is_dtype_union_equal(source, target):
+    """
+    Check whether two arrays have compatible dtypes to do an intersection.
+    numpy types are checked with ``is_dtype_equal``. Extension types are
+    checked separately.
+    """
+    source = _get_dtype(source)
+    target = _get_dtype(target)
+    if source == 'category' and target == 'category':
+        # ordered False for both
+        return source.ordered is target.ordered
+    else:
+        return is_dtype_equal(source, target)
+
+
 def is_any_int_dtype(arr_or_dtype):
     """
     DEPRECATED: This function will be removed in a future version.
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -27,6 +27,7 @@
     is_integer,
     is_float,
     is_dtype_equal,
+    _is_dtype_union_equal,
     is_object_dtype,
     is_categorical_dtype,
     is_interval_dtype,
@@ -2177,7 +2178,11 @@ def union(self, other):
         if len(self) == 0:
             return other._get_consensus_name(self)
 
-        if not is_dtype_equal(self.dtype, other.dtype):
+        # TODO: _is_dtype_union_equal is a hack around lack of
+        # 1. buggy Multiset joins
+        # 2. CategoricalIndex lacking setops
+        # I'd like to fix those before merging CategoricalDtype
+        if not _is_dtype_union_equal(self.dtype, other.dtype):
             this = self.astype('O')
             other = other.astype('O')
             return this.union(other)
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
@@ -687,20 +687,6 @@ def _evaluate_compare(self, other):
         cls.__le__ = _make_compare('__le__')
         cls.__ge__ = _make_compare('__ge__')
 
-    def union(self, other):
-        """
-        Set union of a CategoricalIndex with some iterable
-        """
-        from pandas.api.types import union_categoricals
-
-        if isinstance(other, CategoricalIndex):
-            categories = union_categoricals([self, other]).categories
-            left = self.set_categories(categories)
-            right = other.set_categories(categories)
-        else:
-            left, right = self, other
-        return super(CategoricalIndex, left).union(right)
-
     def _delegate_method(self, name, *args, **kwargs):
         """ method delegation to the ._values """
         method = getattr(self._values, name)
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
@@ -142,8 +142,6 @@ def test_constructor_tuples_datetimes(self):
                           (Timestamp('2010-01-02'),)], tupleize_cols=False)
         tm.assert_index_equal(result.categories, expected)
 
->>>>>>> REF/ENH/API: Add parametrized CategoricalDtype
-
     def test_constructor_unsortable(self):
 
         # it works!