TomAugspurger
diff --git a/‎doc/source/advanced.rst
+3-1 b/‎doc/source/advanced.rst
+3-1
diff --git a/‎doc/source/api.rst
+4-1 b/‎doc/source/api.rst
+4-1
diff --git a/‎doc/source/categorical.rst
+30-22 b/‎doc/source/categorical.rst
+30-22
diff --git a/‎doc/source/whatsnew/v0.21.0.txt
+2-2 b/‎doc/source/whatsnew/v0.21.0.txt
+2-2
diff --git a/‎pandas/core/categorical.py
+52-14 b/‎pandas/core/categorical.py
+52-14
diff --git a/‎pandas/core/dtypes/common.py
+23-4 b/‎pandas/core/dtypes/common.py
+23-4
@@ -638,9 +638,11 @@ and allows efficient indexing and storage of an index with a large number of dup
 
 .. ipython:: python
 
+   from pandas.api.types import CategoricalDtype
+
    df = pd.DataFrame({'A': np.arange(6),
                       'B': list('aabbca')})
-   df['B'] = df['B'].astype(pd.api.types.CategoricalDtype(list('cab')))
+   df['B'] = df['B'].astype(CategoricalDtype(list('cab')))
    df
    df.dtypes
    df.B.cat.categories
 
@@ -637,7 +637,10 @@ strings and apply several methods to it. These can be accessed like
 Categorical
 ~~~~~~~~~~~
 
-If the Series is of dtype ``category``, ``Series.cat`` can be used to change the the categorical
+.. autoclass:: api.types.CategoricalDtype
+   :members: categories, ordered
+
+If the Series is of dtype ``CategoricalDtype``, ``Series.cat`` can be used to change the categorical
 data. This accessor is similar to the ``Series.dt`` or ``Series.str`` and has the
 following usable methods and properties:
 
 
@@ -99,9 +99,11 @@ of :class:`~pd.api.types.CategoricalDtype`.
 
 .. ipython:: python
 
+    from pandas.api.types import CategoricalDtype
+
     s = pd.Series(["a", "b", "c", "a"])
-    cat_type = pd.api.types.CategoricalDtype(categories=["b", "c", "d"],
-                                             ordered=False)
+    cat_type = CategoricalDtype(categories=["b", "c", "d"],
+                                ordered=False)
     s_cat = s.astype(cat_type)
     s_cat
 
@@ -141,33 +143,40 @@ constructor to save the factorize step during normal constructor mode:
     splitter = np.random.choice([0,1], 5, p=[0.5,0.5])
     s = pd.Series(pd.Categorical.from_codes(splitter, categories=["train", "test"]))
 
+.. _categorical.categoricaldtype:
+
 CategoricalDtype
 ----------------
 
 .. versionchanged:: 0.21.0
 
-A categorical's type is fully described by 1.) its categories (an iterable with
-unique values and no missing values), and 2.) its orderedness (a boolean).
+A categorical's type is fully described by
+
+1. its categories: a sequence of unique values and no missing values
+2. its orderedness: a boolean
+
 This information can be stored in a :class:`~pandas.api.types.CategoricalDtype`.
 The ``categories`` argument is optional, which implies that the actual categories
 should be inferred from whatever is present in the data when the
 :class:`pandas.Categorical` is created.
 
 .. ipython:: python
 
-   pd.api.types.CategoricalDtype(['a', 'b', 'c'])
-   pd.api.types.CategoricalDtype(['a', 'b', 'c'], ordered=True)
-   pd.api.types.CategoricalDtype()
+   from pandas.api.types import CategoricalDtype
+
+   CategoricalDtype(['a', 'b', 'c'])
+   CategoricalDtype(['a', 'b', 'c'], ordered=True)
+   CategoricalDtype()
 
 A :class:`~pandas.api.types.CategoricalDtype` can be used in any place pandas
 expects a `dtype`. For example :func:`pandas.read_csv`,
-:func:`pandas.DataFrame.astype`, or the Series constructor.
+:func:`pandas.DataFrame.astype`, or in the Series constructor.
 
-As a convenience, you can use the string `'category'` in place of a
+As a convenience, you can use the string ``'category'`` in place of a
 :class:`~pandas.api.types.CategoricalDtype` when you want the default behavior of
 the categories being unordered, and equal to the set values present in the
-array. On other words, ``dtype='category'`` is equivalent to
-``dtype=pd.api.types.CategoricalDtype()``.
+array. In other words, ``dtype='category'`` is equivalent to
+``dtype=CategoricalDtype()``.
 
 Equality Semantics
 ~~~~~~~~~~~~~~~~~~
@@ -178,19 +187,20 @@ order of the ``categories`` is not considered
 
 .. ipython:: python
 
-   c1 = pd.api.types.CategoricalDtype(['a', 'b', 'c'], ordered=False)
+   c1 = CategoricalDtype(['a', 'b', 'c'], ordered=False)
+
    # Equal, since order is not considered when ordered=False
-   c1 == pd.api.types.CategoricalDtype(['b', 'c', 'a'], ordered=False)
+   c1 == CategoricalDtype(['b', 'c', 'a'], ordered=False)
+
    # Unequal, since the second CategoricalDtype is ordered
-   c1 == pd.api.types.CategoricalDtype(['a',  'b', 'c'], ordered=True)
+   c1 == CategoricalDtype(['a',  'b', 'c'], ordered=True)
 
 All instances of ``CategoricalDtype`` compare equal to the string ``'category'``
 
 .. ipython:: python
 
    c1 == 'category'
 
-
 .. warning::
 
    Since ``dtype='category'`` is essentially ``CategoricalDtype(None, False)``,
@@ -246,9 +256,7 @@ It's also possible to pass in the categories in a specific order:
 
     .. ipython:: python
 
-         s = pd.Series(list('babc')).astype(
-             pd.api.types.CategoricalDtype(list('abcd'))
-         )
+         s = pd.Series(list('babc')).astype(CategoricalDtype(list('abcd')))
          s
 
          # categories
@@ -362,7 +370,7 @@ meaning and certain operations are possible. If the categorical is unordered, ``
     s = pd.Series(pd.Categorical(["a","b","c","a"], ordered=False))
     s.sort_values(inplace=True)
     s = pd.Series(["a","b","c","a"]).astype(
-        pd.api.types.CategoricalDtype(ordered=True)
+        CategoricalDtype(ordered=True)
     )
     s.sort_values(inplace=True)
     s
@@ -464,13 +472,13 @@ categories or a categorical with any list-like object, will raise a TypeError.
 .. ipython:: python
 
     cat = pd.Series([1,2,3]).astype(
-        pd.api.types.CategoricalDtype([3, 2, 1], ordered=True)
+        CategoricalDtype([3, 2, 1], ordered=True)
     )
     cat_base = pd.Series([2,2,2]).astype(
-        pd.api.types.CategoricalDtype([3, 2, 1], ordered=True)
+        CategoricalDtype([3, 2, 1], ordered=True)
     )
     cat_base2 = pd.Series([2,2,2]).astype(
-        pd.api.types.CategoricalDtype(ordered=True)
+        CategoricalDtype(ordered=True)
     )
 
     cat
 
@@ -10,6 +10,8 @@ users upgrade to this version.
 Highlights include:
 
 - Integration with `Apache Parquet <https://parquet.apache.org/>`__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here <io.parquet>`.
+- New user-facing :class:`pandas.api.types.CategoricalDtype` for specifying
+  categoricals independent of the data (:issue:`14711`, :issue:`15078`)
 
 Check the :ref:`API Changes <whatsnew_0210.api_breaking>` and :ref:`deprecations <whatsnew_0210.deprecations>` before updating.
 
@@ -22,8 +24,6 @@ Check the :ref:`API Changes <whatsnew_0210.api_breaking>` and :ref:`deprecations
 New features
 ~~~~~~~~~~~~
 
-- New user-facing :class:`pandas.api.types.CategoricalDtype` for specifying
-  categoricals independent of the data (:issue:`14711`, :issue:`15078`)
 - Support for `PEP 519 -- Adding a file system path protocol
   <https://www.python.org/dev/peps/pep-0519/>`_ on most readers and writers (:issue:`13823`)
 - Added ``__fspath__`` method to :class:`~pandas.HDFStore`, :class:`~pandas.ExcelFile`,
 
@@ -202,6 +202,7 @@ class Categorical(PandasObject):
         categorical, read only.
     ordered : boolean
         Whether or not this Categorical is ordered.
+    dtype : CategoricalDtype
 
     Raises
     ------
@@ -248,17 +249,30 @@ class Categorical(PandasObject):
     __array_priority__ = 1000
     _typ = 'categorical'
 
-    def __init__(self, values, categories=None, ordered=False, fastpath=False):
+    def __init__(self, values, categories=None, ordered=None, dtype=None,
+                 fastpath=False):
+
+        if dtype is not None:
+            if categories is not None or ordered is not None:
+                raise ValueError("Cannot specify both `dtype` and `categories`"
+                                 " or `ordered`.")
+            categories = dtype.categories
+            ordered = dtype.ordered
+
+        if ordered is None:
+            ordered = False
 
         if fastpath:
-            self._dtype = CategoricalDtype(categories, ordered)
+            if dtype is None:
+                dtype = CategoricalDtype(categories, ordered)
             self._codes = coerce_indexer_dtype(values, categories)
+            self._dtype = dtype
             return
 
         # sanitize input
         if is_categorical_dtype(values):
 
-            # we are either a Series, CategoricalIndex or CategoricalDtype
+            # we are either a Series, CategoricalIndex
             if isinstance(values, (ABCSeries, ABCCategoricalIndex)):
                 values = values._values
 
@@ -308,7 +322,8 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False):
                 raise NotImplementedError("> 1 ndim Categorical are not "
                                           "supported at this time")
 
-            dtype = CategoricalDtype(categories, ordered)
+            if dtype is None or isinstance(dtype, str):
+                dtype = CategoricalDtype(categories, ordered)
 
         else:
             # there were two ways if categories are present
@@ -320,7 +335,9 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False):
 
             # make sure that we always have the same type here, no matter what
             # we get passed in
-            dtype = CategoricalDtype(categories, ordered)
+            if dtype is None or isinstance(dtype, str):
+                dtype = CategoricalDtype(categories, ordered)
+
             codes = _get_codes_for_values(values, dtype.categories)
 
             # TODO: check for old style usage. These warnings should be removes
@@ -496,16 +513,14 @@ def from_codes(cls, codes, categories, ordered=False):
             categorical. If not given, the resulting categorical will be
             unordered.
         """
-        from pandas import Index
-
         try:
             codes = np.asarray(codes, np.int64)
         except:
             raise ValueError(
                 "codes need to be convertible to an arrays of integers")
 
         # have to use the instance, not property
-        categories = cls._dtype._validate_categories(Index(categories))
+        categories = CategoricalDtype._validate_categories(categories)
 
         if len(codes) and (codes.max() >= len(categories) or codes.min() < -1):
             raise ValueError("codes need to be between -1 and "
@@ -558,13 +573,13 @@ def _set_categories(self, categories, fastpath=False):
 
         """
 
-        new = CategoricalDtype(categories, self.ordered, fastpath)
+        new_dtype = CategoricalDtype(categories, self.ordered, fastpath)
         if (not fastpath and self.dtype.categories is not None and
-                len(new.categories) != len(self.dtype.categories)):
+                len(new_dtype.categories) != len(self.dtype.categories)):
             raise ValueError("new categories need to have the same number of "
                              "items than the old categories!")
 
-        self._dtype = new
+        self._dtype = new_dtype
 
     def _codes_for_groupby(self, sort):
         """
@@ -606,6 +621,29 @@ def _codes_for_groupby(self, sort):
 
         return self.reorder_categories(cat.categories)
 
+    def _set_dtype(self, dtype):
+        """Internal method for directly updating the CategoricalDtype
+
+        Parameters
+        ----------
+        dtype : CategoricalDtype
+
+        Notes
+        -----
+        We don't do any validation here. It's assumed that the dtype is
+        a (valid) instance of `CategoricalDtype`.
+        """
+        # We want to convert old codes -> new codes *without* going to values
+        # [b, a, c, a, b, f]  | original dtype: [a, b, c, d]
+        # [0, 1, 2, 0, 1, .]  | original codes
+        # ---------------  | ----------
+        # [b, a, ., a, b, .]  | new dtype: [b, a, e]
+        # [0, 1, ., 1, 0, .]  |
+        mapping = dtype.categories.get_indexer_for(self.categories)
+        codes = mapping[self.codes]
+        codes[self.codes == -1] = -1
+        return type(self)(codes, dtype=dtype, fastpath=True)
+
     def set_ordered(self, value, inplace=False):
         """
         Sets the ordered attribute to the boolean value
@@ -619,9 +657,9 @@ def set_ordered(self, value, inplace=False):
            of this categorical with ordered set to the value
         """
         inplace = validate_bool_kwarg(inplace, 'inplace')
-        new = CategoricalDtype(self.categories, ordered=value)
+        new_dtype = CategoricalDtype(self.categories, ordered=value)
         cat = self if inplace else self.copy()
-        cat._dtype = new
+        cat._dtype = new_dtype
         if not inplace:
             return cat
 
@@ -1222,7 +1260,7 @@ def value_counts(self, dropna=True):
             count = bincount(np.where(mask, code, ncat))
             ix = np.append(ix, -1)
 
-        ix = self._constructor(ix, categories=cat, ordered=obj.ordered,
+        ix = self._constructor(ix, dtype=self.dtype,
                                fastpath=True)
 
         return Series(count, index=CategoricalIndex(ix), dtype='int64')
 
@@ -692,19 +692,38 @@ def is_dtype_equal(source, target):
         return False
 
 
-def _is_dtype_union_equal(source, target):
+def is_dtype_union_equal(source, target):
     """
-    Check whether two arrays have compatible dtypes to do a unoin.
+    Check whether two arrays have compatible dtypes to do a union.
     numpy types are checked with ``is_dtype_equal``. Extension types are
     checked separately.
+
+    Parameters
+    ----------
+    source : The first dtype to compare
+    target : The second dtype to compare
+
+    Returns
+    ----------
+    boolean : Whether or not the two dtypes are equal.
+
+    >>> is_dtype_equal("int", int)
+    True
+
+    >>> is_dtype_equal(CategoricalDtype(['a', 'b'],
+    ...                CategoricalDtype(['b', 'c']))
+    True
+
+    >>> is_dtype_equal(CategoricalDtype(['a', 'b'],
+    ...                CategoricalDtype(['b', 'c'], ordered=True))
+    False
     """
     source = _get_dtype(source)
     target = _get_dtype(target)
     if is_categorical_dtype(source) and is_categorical_dtype(target):
         # ordered False for both
         return source.ordered is target.ordered
-    else:
-        return is_dtype_equal(source, target)
+    return is_dtype_equal(source, target)
 
 
 def is_any_int_dtype(arr_or_dtype):