diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 0853a5962272a..f6971158ee35f 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -804,6 +804,7 @@ ExtensionArray - Bug in :func:`factorize` when passing an ``ExtensionArray`` with a custom ``na_sentinel`` (:issue:`25696`). - :meth:`Series.count` miscounts NA values in ExtensionArrays (:issue:`26835`) +- Keyword argument ``deep`` has been removed from :method:`ExtensionArray.copy` (:issue:`27083`) Other ^^^^^ diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index d1dfb6b5e8599..6340cc732d6c1 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -820,15 +820,10 @@ def take(self, indices, allow_fill=False, fill_value=None): # pandas.api.extensions.take raise AbstractMethodError(self) - def copy(self, deep: bool = False) -> ABCExtensionArray: + def copy(self) -> ABCExtensionArray: """ Return a copy of the array. - Parameters - ---------- - deep : bool, default False - Also copy the underlying data backing this array. - Returns ------- ExtensionArray diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index ebf1f692ccde6..93166759d8dbd 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -605,7 +605,7 @@ def _concat_same_type(cls, to_concat): values = np.concatenate([x.asi8 for x in to_concat]) return cls(values, dtype=dtype) - def copy(self, deep=False): + def copy(self): values = self.asi8.copy() return type(self)._simple_new(values, dtype=self.dtype, freq=self.freq) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 07d5664f98714..88de497a3329f 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -1,4 +1,3 @@ -import copy import sys from typing import Type import warnings @@ -375,14 +374,10 @@ def take(self, indexer, allow_fill=False, fill_value=None): return type(self)(result, mask, copy=False) - def copy(self, deep=False): + def copy(self): data, mask = self._data, self._mask - if deep: - data = copy.deepcopy(data) - mask = copy.deepcopy(mask) - else: - data = data.copy() - mask = mask.copy() + data = data.copy() + mask = mask.copy() return type(self)(data, mask, copy=False) def __setitem__(self, key, value): diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 71f4cbae7c58d..aaa4124182598 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -680,21 +680,16 @@ def _shallow_copy(self, left=None, right=None, closed=None): return self._simple_new( left, right, closed=closed, verify_integrity=False) - def copy(self, deep=False): + def copy(self): """ Return a copy of the array. - Parameters - ---------- - deep : bool, default False - Also copy the underlying data backing this array. - Returns ------- IntervalArray """ - left = self.left.copy(deep=True) if deep else self.left - right = self.right.copy(deep=True) if deep else self.right + left = self.left.copy(deep=True) + right = self.right.copy(deep=True) closed = self.closed # TODO: Could skip verify_integrity here. return type(self).from_arrays(left, right, closed=closed) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index f651f89fab834..1c5dc7666c3a1 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -285,7 +285,7 @@ def take(self, indices, allow_fill=False, fill_value=None): fill_value=fill_value) return type(self)(result) - def copy(self, deep=False): + def copy(self): return type(self)(self._ndarray.copy()) def _values_for_argsort(self): diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 1c4ab70fa9332..3512d4e9e29db 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -1262,12 +1262,8 @@ def searchsorted(self, v, side="left", sorter=None): v, side, sorter ) - def copy(self, deep=False): - if deep: - values = self.sp_values.copy() - else: - values = self.sp_values - + def copy(self): + values = self.sp_values.copy() return self._simple_new(values, self.sp_index, self.dtype) @classmethod diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 49f657332bbbf..777fa2eadd289 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -429,7 +429,9 @@ def __reduce__(self): @Appender(_index_shared_docs['copy']) def copy(self, deep=False, name=None): - array = self._data.copy(deep=deep) + array = self._data + if deep: + array = array.copy() attributes = self._get_attributes_dict() if name is not None: attributes.update(name=name) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 0b2af9391784c..db0eb44eabbfe 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2212,7 +2212,7 @@ def copy(self, deep=True): """ copy constructor """ values = self.values if deep: - values = values.copy(deep=True) + values = values.copy() return self.make_block_same_class(values) def get_values(self, dtype=None): diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index d766d7f06d34a..6757c698de027 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -199,8 +199,10 @@ def init_dict(data, index, columns, dtype=None): arrays = (com.maybe_iterable_to_list(data[k]) for k in keys) # GH#24096 need copy to be deep for datetime64tz case # TODO: See if we can avoid these copies + arrays = [arr if not isinstance(arr, ABCIndexClass) else arr._data + for arr in arrays] arrays = [arr if not is_datetime64tz_dtype(arr) else - arr.copy(deep=True) for arr in arrays] + arr.copy() for arr in arrays] return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype) diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 3e3bae6444082..2e740c0acc465 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -450,7 +450,9 @@ def copy(self, deep=True): """ # TODO: https://github.com/pandas-dev/pandas/issues/22314 # We skip the block manager till that is resolved. - new_data = self.values.copy(deep=deep) + new_data = self.values + if deep: + new_data = new_data.copy() return self._constructor(new_data, sparse_index=self.sp_index, fill_value=self.fill_value, index=self.index.copy(), diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 231b5a92dbb3a..fbf86f66e437f 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -591,9 +591,9 @@ def test_set_fill_invalid_non_scalar(self, val): with pytest.raises(ValueError, match=msg): arr.fill_value = val - def test_copy_shallow(self): - arr2 = self.arr.copy(deep=False) - assert arr2.sp_values is self.arr.sp_values + def test_copy(self): + arr2 = self.arr.copy() + assert arr2.sp_values is not self.arr.sp_values assert arr2.sp_index is self.arr.sp_index def test_values_asarray(self): diff --git a/pandas/tests/extension/arrow/bool.py b/pandas/tests/extension/arrow/bool.py index 2263f53544e41..0d6396033fac7 100644 --- a/pandas/tests/extension/arrow/bool.py +++ b/pandas/tests/extension/arrow/bool.py @@ -108,11 +108,8 @@ def take(self, indices, allow_fill=False, fill_value=None): allow_fill=allow_fill) return self._from_sequence(result, dtype=self.dtype) - def copy(self, deep=False): - if deep: - return type(self)(copy.deepcopy(self._data)) - else: - return type(self)(copy.copy(self._data)) + def copy(self): + return type(self)(copy.copy(self._data)) @classmethod def _concat_same_type(cls, to_concat): diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py index a7f28310b7554..21ce5e999334e 100644 --- a/pandas/tests/extension/arrow/test_bool.py +++ b/pandas/tests/extension/arrow/test_bool.py @@ -17,8 +17,9 @@ def dtype(): @pytest.fixture def data(): - return ArrowBoolArray.from_scalars(np.random.randint(0, 2, size=100, - dtype=bool)) + values = np.random.randint(0, 2, size=100, dtype=bool) + values[1] = ~values[0] + return ArrowBoolArray.from_scalars(values) @pytest.fixture @@ -36,7 +37,9 @@ def test_array_type_with_arg(self, data, dtype): class TestInterface(BaseArrowTests, base.BaseInterfaceTests): - pass + def test_copy(self, data): + # __setitem__ does not work, so we only have a smoke-test + data.copy() class TestConstructors(BaseArrowTests, base.BaseConstructorsTests): diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index 6388902e45627..fd47ae6f31290 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -66,3 +66,11 @@ def test_isna_extension_array(self, data_missing): assert not na.all() assert na.dtype._is_boolean + + def test_copy(self, data): + # GH#27083 removing deep keyword from EA.copy + assert data[0] != data[1] + result = data.copy() + + data[1] = data[0] + assert result[1] != result[0] diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py index b6e839f250e4e..6fbd43e46495f 100644 --- a/pandas/tests/extension/conftest.py +++ b/pandas/tests/extension/conftest.py @@ -16,7 +16,7 @@ def data(): """Length-100 array for this type. * data[0] and data[1] should both be non missing - * data[0] and data[1] should not gbe equal + * data[0] and data[1] should not be equal """ raise NotImplementedError diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 3b95c8d919eb1..2b1bb53e962be 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -101,10 +101,8 @@ def take(self, indexer, allow_fill=False, fill_value=None): allow_fill=allow_fill) return self._from_sequence(result) - def copy(self, deep=False): - if deep: - return type(self)(self._data.copy()) - return type(self)(self) + def copy(self): + return type(self)(self._data.copy()) def astype(self, dtype, copy=True): if isinstance(dtype, type(self.dtype)): diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 4b93f0e12e32a..1b5009830303b 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -143,7 +143,7 @@ def take(self, indexer, allow_fill=False, fill_value=None): return self._from_sequence(output) - def copy(self, deep=False): + def copy(self): return type(self)(self.data[:]) def astype(self, dtype, copy=True): diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index faf1905ea1763..86ca3e230ddd5 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -98,6 +98,10 @@ class TestInterface(BaseSparseTests, base.BaseInterfaceTests): def test_no_values_attribute(self, data): pytest.skip("We have values") + def test_copy(self, data): + # __setitem__ does not work, so we only have a smoke-test + data.copy() + class TestConstructors(BaseSparseTests, base.BaseConstructorsTests): pass