diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 155638aca5560..a44c70f66a8b0 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -457,11 +457,12 @@ def _formatter(self, boxed=False): # Defer to CategoricalFormatter's formatter. return None - def copy(self): - """ - Copy constructor. - """ - return self._constructor(values=self._codes.copy(), + @Appender(ExtensionArray.copy.__doc__) + def copy(self, deep: bool = False): + values = self._codes + if deep: + values = values.copy() + return self._constructor(values=values, dtype=self.dtype, fastpath=True) @@ -483,7 +484,7 @@ def astype(self, dtype, copy=True): if is_categorical_dtype(dtype): # GH 10696/18593 dtype = self.dtype.update_dtype(dtype) - self = self.copy() if copy else self + self = self.copy(deep=True) if copy else self if dtype == self.dtype: return self return self._set_dtype(dtype) @@ -578,7 +579,7 @@ def _from_inferred_categories(cls, inferred_categories, inferred_codes, codes = _recode_for_categories(inferred_codes, cats, categories) elif not cats.is_monotonic_increasing: # Sort categories and recode for unknown categories. - unsorted = cats.copy() + unsorted = cats.copy(deep=True) categories = cats.sort_values() codes = _recode_for_categories(inferred_codes, unsorted, @@ -751,7 +752,7 @@ def set_ordered(self, value, inplace=False): """ inplace = validate_bool_kwarg(inplace, 'inplace') new_dtype = CategoricalDtype(self.categories, ordered=value) - cat = self if inplace else self.copy() + cat = self if inplace else self.copy(deep=True) cat._dtype = new_dtype if not inplace: return cat @@ -849,7 +850,7 @@ def set_categories(self, new_categories, ordered=None, rename=False, ordered = self.dtype.ordered new_dtype = CategoricalDtype(new_categories, ordered=ordered) - cat = self if inplace else self.copy() + cat = self if inplace else self.copy(deep=True) if rename: if (cat.dtype.categories is not None and len(new_dtype.categories) < len(cat.dtype.categories)): @@ -937,7 +938,7 @@ def rename_categories(self, new_categories, inplace=False): Categories (2, object): [A, B] """ inplace = validate_bool_kwarg(inplace, 'inplace') - cat = self if inplace else self.copy() + cat = self if inplace else self.copy(deep=True) if isinstance(new_categories, ABCSeries): msg = ("Treating Series 'new_categories' as a list-like and using " @@ -1045,7 +1046,7 @@ def add_categories(self, new_categories, inplace=False): new_categories = list(self.dtype.categories) + list(new_categories) new_dtype = CategoricalDtype(new_categories, self.ordered) - cat = self if inplace else self.copy() + cat = self if inplace else self.copy(deep=True) cat._dtype = new_dtype cat._codes = coerce_indexer_dtype(cat._codes, new_dtype.categories) if not inplace: @@ -1127,7 +1128,7 @@ def remove_unused_categories(self, inplace=False): set_categories """ inplace = validate_bool_kwarg(inplace, 'inplace') - cat = self if inplace else self.copy() + cat = self if inplace else self.copy(deep=True) idx, inv = np.unique(cat._codes, return_inverse=True) if idx.size != 0 and idx[0] == -1: # na sentinel @@ -2295,6 +2296,8 @@ def unique(self): # unlike np.unique, unique1d does not sort unique_codes = unique1d(self.codes) + + # We don't need a deep copy since we overwrite cat._codes immediately cat = self.copy() # keep nan in codes diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 4cc6c86417b3b..c0b715d1236bc 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -720,7 +720,10 @@ def copy(self, deep=True): """ copy constructor """ values = self.values if deep: - values = values.copy() + if self.is_extension: + values = values.copy(deep=True) + else: + values = values.copy() return self.make_block_same_class(values, ndim=self.ndim) def replace(self, to_replace, value, inplace=False, filter=None, @@ -1855,7 +1858,7 @@ def where(self, other, cond, align=True, errors='raise', dtype = self.dtype try: - result = self.values.copy() + result = self.values.copy(deep=True) icond = ~cond if lib.is_scalar(other): result[icond] = other diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index d766d7f06d34a..8ef1771586c23 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -594,7 +594,7 @@ def sanitize_array(data, index, dtype=None, copy=False, subarr = data.astype(dtype) if copy: - subarr = data.copy() + subarr = data.copy(deep=True) return subarr elif isinstance(data, (list, tuple)) and len(data) > 0: diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index db6328e39e6cc..a60d54ef05708 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -18,7 +18,9 @@ def test_setitem_scalar_series(self, data, box_in_series): def test_setitem_sequence(self, data, box_in_series): if box_in_series: data = pd.Series(data) - original = data.copy() + original = data.copy() + else: + original = data.copy(deep=True) data[[0, 1]] = [data[1], data[0]] assert data[0] == original[1] diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 4cf9f78e1531d..ba9a156e094b6 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -243,3 +243,29 @@ def _compare_other(self, s, data, op_name, other): class TestParsing(base.BaseParsingTests): pass + + +def test_copy_deep(data): + # GH#27024 + assert data[0] != data[1] + + orig = data.copy(deep=True) + other = data.copy(deep=True) + + # Modifying other will _not_ modify `data` + other[0] = other[1] + assert other[0] == other[1] + assert data[0] != data[1] + + # Modifying other _will_ modify `data` + other2 = data.copy(deep=False) + other2[0] = other2[1] + assert other2[0] == other2[1] + assert data[0] == data[1] + + # Default behavior should be deep=False + data = orig.copy(deep=True) + other3 = data.copy() + + other3[0] = other3[1] + assert data[0] == data[1]