pandas-dev · jreback · Dec 13, 2017 · Dec 10, 2017 · Dec 12, 2017 · Dec 13, 2017
diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
@@ -263,6 +263,7 @@ Conversion
 - Adding a ``Period`` object to a ``datetime`` or ``Timestamp`` object will now correctly raise a ``TypeError`` (:issue:`17983`)
 - Fixed a bug where ``FY5253`` date offsets could incorrectly raise an ``AssertionError`` in arithmetic operatons (:issue:`14774`)
 - Bug in :meth:`Index.astype` with a categorical dtype where the resultant index is not converted to a :class:`CategoricalIndex` for all types of index (:issue:`18630`)
+- Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`)
 
 
 Indexing

diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
@@ -436,9 +436,12 @@ def astype(self, dtype, copy=True):
 
         """
         if is_categorical_dtype(dtype):
-            if copy is True:
-                return self.copy()
-            return self
+            # GH 10696/18593
+            dtype = self.dtype._update_dtype(dtype)
     @pytest.mark.parametrize('dtype', [ 
         CategoricalDtype(list('abc'), False), 
         CategoricalDtype(list('abc'), True)]) 
     @pytest.mark.parametrize('new_dtype', [ 
         'category', 
         CategoricalDtype(None, False), 
         CategoricalDtype(None, True), 
         CategoricalDtype(list('abc'), False), 
         CategoricalDtype(list('abc'), True), 
         CategoricalDtype(list('cba'), False), 
         CategoricalDtype(list('cba'), True), 
         CategoricalDtype(list('wxyz'), False), 
         CategoricalDtype(list('wxyz'), True)]) 
     def test_update_dtype(self, dtype, new_dtype): 
         if isinstance(new_dtype, string_types) and new_dtype == 'category': 
             expected_categories = dtype.categories 
             expected_ordered = dtype.ordered 
         else: 
             expected_categories = new_dtype.categories 
             if expected_categories is None: 
                 expected_categories = dtype.categories 
             expected_ordered = new_dtype.ordered 
         result = dtype._update_dtype(new_dtype) 
         tm.assert_index_equal(result.categories, expected_categories) 
         assert result.ordered is expected_ordered 
     @pytest.mark.parametrize('dtype', [ 
         CategoricalDtype(list('abc'), False), 
         CategoricalDtype(list('abc'), True)]) 
     @pytest.mark.parametrize('new_dtype', [ 
         'category', 
         CategoricalDtype(None, False), 
         CategoricalDtype(None, True), 
         CategoricalDtype(list('abc'), False), 
         CategoricalDtype(list('abc'), True), 
         CategoricalDtype(list('cba'), False), 
         CategoricalDtype(list('cba'), True), 
         CategoricalDtype(list('wxyz'), False), 
         CategoricalDtype(list('wxyz'), True)]) 
     def test_update_dtype(self, dtype, new_dtype): 
         if isinstance(new_dtype, string_types) and new_dtype == 'category': 
             expected_categories = dtype.categories 
             expected_ordered = dtype.ordered 
         else: 
             expected_categories = new_dtype.categories 
             if expected_categories is None: 
                 expected_categories = dtype.categories 
             expected_ordered = new_dtype.ordered 
  
         result = dtype._update_dtype(new_dtype) 
         tm.assert_index_equal(result.categories, expected_categories) 
         assert result.ordered is expected_ordered 
+            self = self.copy() if copy else self
+            if dtype == self.dtype:
+                return self
+            return self._set_dtype(dtype)
         return np.array(self, dtype=dtype, copy=copy)
 
     @cache_readonly

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -54,7 +54,7 @@
 import pandas.core.dtypes.concat as _concat
 
 from pandas.core.dtypes.generic import ABCSeries, ABCDatetimeIndex
-from pandas.core.common import is_null_slice
+from pandas.core.common import is_null_slice, _any_not_none
 import pandas.core.algorithms as algos
 
 from pandas.core.index import Index, MultiIndex, _ensure_index
@@ -573,7 +573,6 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
             raise TypeError(msg)
 
         # may need to convert to categorical
-        # this is only called for non-categoricals
         if self.is_categorical_astype(dtype):
 
             # deprecated 17636
@@ -589,13 +588,16 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
                               "CategoricalDtype instead",
                               FutureWarning, stacklevel=7)
 
-            kwargs = kwargs.copy()
-            categories = getattr(dtype, 'categories', None)
-            ordered = getattr(dtype, 'ordered', False)
+            categories = kwargs.get('categories', None)
+            ordered = kwargs.get('ordered', None)
+            if _any_not_none(categories, ordered):
+                dtype = CategoricalDtype(categories, ordered)
 
-            kwargs.setdefault('categories', categories)
-            kwargs.setdefault('ordered', ordered)
-            return self.make_block(Categorical(self.values, **kwargs))
+            if is_categorical_dtype(self.values):
+                # GH 10696/18593: update an existing categorical efficiently
+                return self.make_block(self.values.astype(dtype, copy=copy))
+
+            return self.make_block(Categorical(self.values, dtype=dtype))
 
         # astype processing
         dtype = np.dtype(dtype)
@@ -2427,23 +2429,6 @@ def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None):
 
         return self.make_block_same_class(new_values, new_mgr_locs)
 
-    def _astype(self, dtype, copy=False, errors='raise', values=None,
-                klass=None, mgr=None):
-        """
-        Coerce to the new type (if copy=True, return a new copy)
-        raise on an except if raise == True
-        """
-
-        if self.is_categorical_astype(dtype):
-            values = self.values
-        else:
-            values = np.asarray(self.values).astype(dtype, copy=False)
-
-        if copy:
-            values = values.copy()
-
-        return self.make_block(values)
-
     def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs):
         """ convert to our native types format, slicing if desired """
 

diff --git a/pandas/tests/categorical/test_dtypes.py b/pandas/tests/categorical/test_dtypes.py
@@ -99,10 +99,54 @@ def test_codes_dtypes(self):
         result = result.remove_categories(['foo%05d' % i for i in range(300)])
         assert result.codes.dtype == 'int8'
 
-    def test_astype_categorical(self):
+    @pytest.mark.parametrize('ordered', [True, False])
+    def test_astype(self, ordered):
+        # string
+        cat = Categorical(list('abbaaccc'), ordered=ordered)
+        result = cat.astype(object)
+        expected = np.array(cat)
+        tm.assert_numpy_array_equal(result, expected)
+
+        msg = 'could not convert string to float'
+        with tm.assert_raises_regex(ValueError, msg):
+            cat.astype(float)
+
+        # numeric
+        cat = Categorical([0, 1, 2, 2, 1, 0, 1, 0, 2], ordered=ordered)
+        result = cat.astype(object)
+        expected = np.array(cat, dtype=object)
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = cat.astype(int)
+        expected = np.array(cat, dtype=np.int)
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = cat.astype(float)
+        expected = np.array(cat, dtype=np.float)
+        tm.assert_numpy_array_equal(result, expected)
+
+    @pytest.mark.parametrize('dtype_ordered', [True, False])
+    @pytest.mark.parametrize('cat_ordered', [True, False])
+    def test_astype_category(self, dtype_ordered, cat_ordered):
+        # GH 10696/18593
+        data = list('abcaacbab')
+        cat = Categorical(data, categories=list('bac'), ordered=cat_ordered)
+
+        # standard categories
+        dtype = CategoricalDtype(ordered=dtype_ordered)
+        result = cat.astype(dtype)
+        expected = Categorical(
+            data, categories=cat.categories, ordered=dtype_ordered)
+        tm.assert_categorical_equal(result, expected)
 
-        cat = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])
-        tm.assert_categorical_equal(cat, cat.astype('category'))
-        tm.assert_almost_equal(np.array(cat), cat.astype('object'))
+        # non-standard categories
+        dtype = CategoricalDtype(list('adc'), dtype_ordered)
+        result = cat.astype(dtype)
+        expected = Categorical(data, dtype=dtype)
+        tm.assert_categorical_equal(result, expected)
 
-        pytest.raises(ValueError, lambda: cat.astype(float))
+        if dtype_ordered is False:
+            # dtype='category' can't specify ordered, so only test once
+            result = cat.astype('category')
+            expected = cat
+            tm.assert_categorical_equal(result, expected)
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
@@ -411,19 +411,18 @@ def test_astype(self):
         result = IntervalIndex.from_intervals(result.values)
         tm.assert_index_equal(result, expected)
 
-    @pytest.mark.parametrize('copy', [True, False])
     @pytest.mark.parametrize('name', [None, 'foo'])
     @pytest.mark.parametrize('dtype_ordered', [True, False])
     @pytest.mark.parametrize('index_ordered', [True, False])
-    def test_astype_category(self, copy, name, dtype_ordered, index_ordered):
+    def test_astype_category(self, name, dtype_ordered, index_ordered):
         # GH 18630
         index = self.create_index(ordered=index_ordered)
         if name:
             index = index.rename(name)
 
         # standard categories
         dtype = CategoricalDtype(ordered=dtype_ordered)
-        result = index.astype(dtype, copy=copy)
+        result = index.astype(dtype)
         expected = CategoricalIndex(index.tolist(),
                                     name=name,
                                     categories=index.categories,
@@ -432,13 +431,13 @@ def test_astype_category(self, copy, name, dtype_ordered, index_ordered):
 
         # non-standard categories
         dtype = CategoricalDtype(index.unique().tolist()[:-1], dtype_ordered)
-        result = index.astype(dtype, copy=copy)
+        result = index.astype(dtype)
         expected = CategoricalIndex(index.tolist(), name=name, dtype=dtype)
         tm.assert_index_equal(result, expected)
 
         if dtype_ordered is False:
             # dtype='category' can't specify ordered, so only test once
-            result = index.astype('category', copy=copy)
+            result = index.astype('category')
             expected = index
             tm.assert_index_equal(result, expected)
 

diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py
@@ -322,6 +322,44 @@ def cmp(a, b):
                         lambda x: x.astype('object').astype(Categorical)]:
             pytest.raises(TypeError, lambda: invalid(s))
 
+    @pytest.mark.parametrize('name', [None, 'foo'])
+    @pytest.mark.parametrize('dtype_ordered', [True, False])
+    @pytest.mark.parametrize('series_ordered', [True, False])
+    def test_astype_categorical_to_categorical(self, name, dtype_ordered,
+                                               series_ordered):
+        # GH 10696/18593
+        s_data = list('abcaacbab')
+        s_dtype = CategoricalDtype(list('bac'), ordered=series_ordered)
+        s = Series(s_data, dtype=s_dtype, name=name)
+
+        # unspecified categories
+        dtype = CategoricalDtype(ordered=dtype_ordered)
+        result = s.astype(dtype)
+        exp_dtype = CategoricalDtype(s_dtype.categories, dtype_ordered)
+        expected = Series(s_data, name=name, dtype=exp_dtype)
+        tm.assert_series_equal(result, expected)
+
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            result = s.astype('category', ordered=dtype_ordered)
+        tm.assert_series_equal(result, expected)
+
+        # different categories
+        dtype = CategoricalDtype(list('adc'), dtype_ordered)
+        result = s.astype(dtype)
+        expected = Series(s_data, name=name, dtype=dtype)
+        tm.assert_series_equal(result, expected)
+
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            result = s.astype(
+                'category', categories=list('adc'), ordered=dtype_ordered)
+        tm.assert_series_equal(result, expected)
+
+        if dtype_ordered is False:
+            # not specifying ordered, so only test once
+            expected = s
+            result = s.astype('category')
+            tm.assert_series_equal(result, expected)
+
     def test_astype_categoricaldtype(self):
         s = Series(['a', 'b', 'a'])
         result = s.astype(CategoricalDtype(['a', 'b'], ordered=True))