pandas-dev · jbrockmendel · Jun 24, 2019 · Jun 26, 2019 · gfyoung · Jun 26, 2019
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -457,11 +457,12 @@ def _formatter(self, boxed=False):
         # Defer to CategoricalFormatter's formatter.
         return None
 
-    def copy(self):
-        """
-        Copy constructor.
-        """
-        return self._constructor(values=self._codes.copy(),
+    @Appender(ExtensionArray.copy.__doc__)
+    def copy(self, deep: bool = False):
+        values = self._codes
+        if deep:
+            values = values.copy()
+        return self._constructor(values=values,
                                  dtype=self.dtype,
                                  fastpath=True)
 
@@ -483,7 +484,7 @@ def astype(self, dtype, copy=True):
         if is_categorical_dtype(dtype):
             # GH 10696/18593
             dtype = self.dtype.update_dtype(dtype)
-            self = self.copy() if copy else self
+            self = self.copy(deep=True) if copy else self
             if dtype == self.dtype:
                 return self
             return self._set_dtype(dtype)
@@ -578,7 +579,7 @@ def _from_inferred_categories(cls, inferred_categories, inferred_codes,
             codes = _recode_for_categories(inferred_codes, cats, categories)
         elif not cats.is_monotonic_increasing:
             # Sort categories and recode for unknown categories.
-            unsorted = cats.copy()
+            unsorted = cats.copy(deep=True)
             categories = cats.sort_values()
 
             codes = _recode_for_categories(inferred_codes, unsorted,
@@ -751,7 +752,7 @@ def set_ordered(self, value, inplace=False):
         """
         inplace = validate_bool_kwarg(inplace, 'inplace')
         new_dtype = CategoricalDtype(self.categories, ordered=value)
-        cat = self if inplace else self.copy()
+        cat = self if inplace else self.copy(deep=True)
         cat._dtype = new_dtype
         if not inplace:
             return cat
@@ -849,7 +850,7 @@ def set_categories(self, new_categories, ordered=None, rename=False,
             ordered = self.dtype.ordered
         new_dtype = CategoricalDtype(new_categories, ordered=ordered)
 
-        cat = self if inplace else self.copy()
+        cat = self if inplace else self.copy(deep=True)
         if rename:
             if (cat.dtype.categories is not None and
                     len(new_dtype.categories) < len(cat.dtype.categories)):
@@ -937,7 +938,7 @@ def rename_categories(self, new_categories, inplace=False):
         Categories (2, object): [A, B]
         """
         inplace = validate_bool_kwarg(inplace, 'inplace')
-        cat = self if inplace else self.copy()
+        cat = self if inplace else self.copy(deep=True)
 
         if isinstance(new_categories, ABCSeries):
             msg = ("Treating Series 'new_categories' as a list-like and using "
@@ -1045,7 +1046,7 @@ def add_categories(self, new_categories, inplace=False):
         new_categories = list(self.dtype.categories) + list(new_categories)
         new_dtype = CategoricalDtype(new_categories, self.ordered)
 
-        cat = self if inplace else self.copy()
+        cat = self if inplace else self.copy(deep=True)
         cat._dtype = new_dtype
         cat._codes = coerce_indexer_dtype(cat._codes, new_dtype.categories)
         if not inplace:
@@ -1127,7 +1128,7 @@ def remove_unused_categories(self, inplace=False):
         set_categories
         """
         inplace = validate_bool_kwarg(inplace, 'inplace')
-        cat = self if inplace else self.copy()
+        cat = self if inplace else self.copy(deep=True)
         idx, inv = np.unique(cat._codes, return_inverse=True)
 
         if idx.size != 0 and idx[0] == -1:  # na sentinel
@@ -2295,6 +2296,8 @@ def unique(self):
 
         # unlike np.unique, unique1d does not sort
         unique_codes = unique1d(self.codes)
+
+        # We don't need a deep copy since we overwrite cat._codes immediately
         cat = self.copy()
 
         # keep nan in codes

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -720,7 +720,10 @@ def copy(self, deep=True):
         """ copy constructor """
         values = self.values
         if deep:
-            values = values.copy()
+            if self.is_extension:
+                values = values.copy(deep=True)
+            else:
+                values = values.copy()
         return self.make_block_same_class(values, ndim=self.ndim)
 
     def replace(self, to_replace, value, inplace=False, filter=None,
@@ -1855,7 +1858,7 @@ def where(self, other, cond, align=True, errors='raise',
             dtype = self.dtype
 
         try:
-            result = self.values.copy()
+            result = self.values.copy(deep=True)
             icond = ~cond
             if lib.is_scalar(other):
                 result[icond] = other

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
@@ -594,7 +594,7 @@ def sanitize_array(data, index, dtype=None, copy=False,
             subarr = data.astype(dtype)
 
         if copy:
-            subarr = data.copy()
+            subarr = data.copy(deep=True)
         return subarr
 
     elif isinstance(data, (list, tuple)) and len(data) > 0:

diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py
@@ -18,7 +18,9 @@ def test_setitem_scalar_series(self, data, box_in_series):
     def test_setitem_sequence(self, data, box_in_series):
         if box_in_series:
             data = pd.Series(data)
-        original = data.copy()
+            original = data.copy()
+        else:
+            original = data.copy(deep=True)
 
         data[[0, 1]] = [data[1], data[0]]
         assert data[0] == original[1]

diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py
@@ -243,3 +243,29 @@ def _compare_other(self, s, data, op_name, other):
 
 class TestParsing(base.BaseParsingTests):
     pass
+
+
+def test_copy_deep(data):
+    # GH#27024
+    assert data[0] != data[1]
+
+    orig = data.copy(deep=True)
+    other = data.copy(deep=True)
+
+    # Modifying other will _not_ modify `data`
+    other[0] = other[1]
+    assert other[0] == other[1]
+    assert data[0] != data[1]
+
+    # Modifying other _will_ modify `data`
+    other2 = data.copy(deep=False)
+    other2[0] = other2[1]
+    assert other2[0] == other2[1]
+    assert data[0] == data[1]
+
+    # Default behavior should be deep=False
+    data = orig.copy(deep=True)
+    other3 = data.copy()
+
+    other3[0] = other3[1]
+    assert data[0] == data[1]