From 05ef547255e24e69e74385b9f912931db4411b34 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Jun 2019 13:32:34 -0500 Subject: [PATCH 1/4] remove deep keyword from EA.copy --- pandas/core/arrays/base.py | 17 ++++++++++++++--- pandas/core/arrays/datetimelike.py | 2 +- pandas/core/arrays/integer.py | 11 +++-------- pandas/core/arrays/interval.py | 11 +++-------- pandas/core/arrays/numpy_.py | 2 +- pandas/core/arrays/sparse.py | 8 ++------ pandas/core/indexes/interval.py | 4 +++- pandas/core/internals/blocks.py | 2 +- pandas/core/internals/construction.py | 4 +++- pandas/core/sparse/series.py | 4 +++- pandas/tests/arrays/sparse/test_array.py | 9 +++++++-- pandas/tests/extension/arrow/bool.py | 7 ++----- pandas/tests/extension/decimal/array.py | 6 ++---- pandas/tests/extension/json/array.py | 2 +- 14 files changed, 46 insertions(+), 43 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index d1dfb6b5e8599..256f8bc022bd0 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -820,19 +820,30 @@ def take(self, indices, allow_fill=False, fill_value=None): # pandas.api.extensions.take raise AbstractMethodError(self) - def copy(self, deep: bool = False) -> ABCExtensionArray: + def copy(self) -> ABCExtensionArray: """ Return a copy of the array. + Returns + ------- + ExtensionArray + """ + raise AbstractMethodError(self) + + def view(self, dtype=None) -> ABCExtensionArray: + """ + Return a view on the array. + Parameters ---------- - deep : bool, default False - Also copy the underlying data backing this array. + dtype : NumPy dtype or pandas dtype Returns ------- ExtensionArray """ + if dtype is None: + return self raise AbstractMethodError(self) # ------------------------------------------------------------------------ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index ebf1f692ccde6..93166759d8dbd 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -605,7 +605,7 @@ def _concat_same_type(cls, to_concat): values = np.concatenate([x.asi8 for x in to_concat]) return cls(values, dtype=dtype) - def copy(self, deep=False): + def copy(self): values = self.asi8.copy() return type(self)._simple_new(values, dtype=self.dtype, freq=self.freq) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 07d5664f98714..88de497a3329f 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -1,4 +1,3 @@ -import copy import sys from typing import Type import warnings @@ -375,14 +374,10 @@ def take(self, indexer, allow_fill=False, fill_value=None): return type(self)(result, mask, copy=False) - def copy(self, deep=False): + def copy(self): data, mask = self._data, self._mask - if deep: - data = copy.deepcopy(data) - mask = copy.deepcopy(mask) - else: - data = data.copy() - mask = mask.copy() + data = data.copy() + mask = mask.copy() return type(self)(data, mask, copy=False) def __setitem__(self, key, value): diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 71f4cbae7c58d..aaa4124182598 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -680,21 +680,16 @@ def _shallow_copy(self, left=None, right=None, closed=None): return self._simple_new( left, right, closed=closed, verify_integrity=False) - def copy(self, deep=False): + def copy(self): """ Return a copy of the array. - Parameters - ---------- - deep : bool, default False - Also copy the underlying data backing this array. - Returns ------- IntervalArray """ - left = self.left.copy(deep=True) if deep else self.left - right = self.right.copy(deep=True) if deep else self.right + left = self.left.copy(deep=True) + right = self.right.copy(deep=True) closed = self.closed # TODO: Could skip verify_integrity here. return type(self).from_arrays(left, right, closed=closed) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index f651f89fab834..1c5dc7666c3a1 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -285,7 +285,7 @@ def take(self, indices, allow_fill=False, fill_value=None): fill_value=fill_value) return type(self)(result) - def copy(self, deep=False): + def copy(self): return type(self)(self._ndarray.copy()) def _values_for_argsort(self): diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index d692fe6d7cabe..abf74ee56cee9 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -1262,12 +1262,8 @@ def searchsorted(self, v, side="left", sorter=None): v, side, sorter ) - def copy(self, deep=False): - if deep: - values = self.sp_values.copy() - else: - values = self.sp_values - + def copy(self): + values = self.sp_values.copy() return self._simple_new(values, self.sp_index, self.dtype) @classmethod diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 49f657332bbbf..777fa2eadd289 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -429,7 +429,9 @@ def __reduce__(self): @Appender(_index_shared_docs['copy']) def copy(self, deep=False, name=None): - array = self._data.copy(deep=deep) + array = self._data + if deep: + array = array.copy() attributes = self._get_attributes_dict() if name is not None: attributes.update(name=name) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index c6be56df7ae0c..b535461f9a863 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2212,7 +2212,7 @@ def copy(self, deep=True): """ copy constructor """ values = self.values if deep: - values = values.copy(deep=True) + values = values.copy() return self.make_block_same_class(values) def get_values(self, dtype=None): diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index d766d7f06d34a..6757c698de027 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -199,8 +199,10 @@ def init_dict(data, index, columns, dtype=None): arrays = (com.maybe_iterable_to_list(data[k]) for k in keys) # GH#24096 need copy to be deep for datetime64tz case # TODO: See if we can avoid these copies + arrays = [arr if not isinstance(arr, ABCIndexClass) else arr._data + for arr in arrays] arrays = [arr if not is_datetime64tz_dtype(arr) else - arr.copy(deep=True) for arr in arrays] + arr.copy() for arr in arrays] return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype) diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 3e3bae6444082..2e740c0acc465 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -450,7 +450,9 @@ def copy(self, deep=True): """ # TODO: https://github.com/pandas-dev/pandas/issues/22314 # We skip the block manager till that is resolved. - new_data = self.values.copy(deep=deep) + new_data = self.values + if deep: + new_data = new_data.copy() return self._constructor(new_data, sparse_index=self.sp_index, fill_value=self.fill_value, index=self.index.copy(), diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 231b5a92dbb3a..d8ee9f640e86c 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -591,11 +591,16 @@ def test_set_fill_invalid_non_scalar(self, val): with pytest.raises(ValueError, match=msg): arr.fill_value = val - def test_copy_shallow(self): - arr2 = self.arr.copy(deep=False) + def test_view(self): + arr2 = self.arr.view() assert arr2.sp_values is self.arr.sp_values assert arr2.sp_index is self.arr.sp_index + def test_copy_shallow(self): + arr2 = self.arr.copy() + assert arr2.sp_values is not self.arr.sp_values + assert arr2.sp_index is self.arr.sp_index + def test_values_asarray(self): assert_almost_equal(self.arr.to_dense(), self.arr_data) diff --git a/pandas/tests/extension/arrow/bool.py b/pandas/tests/extension/arrow/bool.py index 2263f53544e41..0d6396033fac7 100644 --- a/pandas/tests/extension/arrow/bool.py +++ b/pandas/tests/extension/arrow/bool.py @@ -108,11 +108,8 @@ def take(self, indices, allow_fill=False, fill_value=None): allow_fill=allow_fill) return self._from_sequence(result, dtype=self.dtype) - def copy(self, deep=False): - if deep: - return type(self)(copy.deepcopy(self._data)) - else: - return type(self)(copy.copy(self._data)) + def copy(self): + return type(self)(copy.copy(self._data)) @classmethod def _concat_same_type(cls, to_concat): diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 3b95c8d919eb1..2b1bb53e962be 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -101,10 +101,8 @@ def take(self, indexer, allow_fill=False, fill_value=None): allow_fill=allow_fill) return self._from_sequence(result) - def copy(self, deep=False): - if deep: - return type(self)(self._data.copy()) - return type(self)(self) + def copy(self): + return type(self)(self._data.copy()) def astype(self, dtype, copy=True): if isinstance(dtype, type(self.dtype)): diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 4b93f0e12e32a..1b5009830303b 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -143,7 +143,7 @@ def take(self, indexer, allow_fill=False, fill_value=None): return self._from_sequence(output) - def copy(self, deep=False): + def copy(self): return type(self)(self.data[:]) def astype(self, dtype, copy=True): From 4a599d3031191a8c3307063041648fce7a3009cf Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Jun 2019 14:07:54 -0500 Subject: [PATCH 2/4] add tests for copy and view --- pandas/tests/extension/arrow/test_bool.py | 13 ++++++++++--- pandas/tests/extension/base/interface.py | 16 ++++++++++++++++ pandas/tests/extension/conftest.py | 2 +- pandas/tests/extension/test_sparse.py | 8 ++++++++ 4 files changed, 35 insertions(+), 4 deletions(-) diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py index a7f28310b7554..42944f31eea8a 100644 --- a/pandas/tests/extension/arrow/test_bool.py +++ b/pandas/tests/extension/arrow/test_bool.py @@ -17,8 +17,9 @@ def dtype(): @pytest.fixture def data(): - return ArrowBoolArray.from_scalars(np.random.randint(0, 2, size=100, - dtype=bool)) + values = np.random.randint(0, 2, size=100, dtype=bool) + values[1] = ~values[0] + return ArrowBoolArray.from_scalars(values) @pytest.fixture @@ -36,7 +37,13 @@ def test_array_type_with_arg(self, data, dtype): class TestInterface(BaseArrowTests, base.BaseInterfaceTests): - pass + def test_copy(self, data): + # __setitem__ does not work, so we only have a smoke-test + data.copy() + + def test_view(self, data): + # __setitem__ does not work, so we only have a smoke-test + data.view() class TestConstructors(BaseArrowTests, base.BaseConstructorsTests): diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index 6388902e45627..f0fd99ac6ee89 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -66,3 +66,19 @@ def test_isna_extension_array(self, data_missing): assert not na.all() assert na.dtype._is_boolean + + def test_copy(self, data): + # GH#27083 removing deep keyword from EA.copy, implement view + assert data[0] != data[1] + result = data.copy() + + data[1] = data[0] + assert result[1] != result[0] + + def test_view(self, data): + # GH#27083 removing deep keyword from EA.copy, implement view + assert data[0] != data[1] + result = data.view() + + data[1] = data[0] + assert result[1] == result[0] diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py index b6e839f250e4e..6fbd43e46495f 100644 --- a/pandas/tests/extension/conftest.py +++ b/pandas/tests/extension/conftest.py @@ -16,7 +16,7 @@ def data(): """Length-100 array for this type. * data[0] and data[1] should both be non missing - * data[0] and data[1] should not gbe equal + * data[0] and data[1] should not be equal """ raise NotImplementedError diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index faf1905ea1763..96ceaf5b5d940 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -98,6 +98,14 @@ class TestInterface(BaseSparseTests, base.BaseInterfaceTests): def test_no_values_attribute(self, data): pytest.skip("We have values") + def test_copy(self, data): + # __setitem__ does not work, so we only have a smoke-test + data.copy() + + def test_view(self, data): + # __setitem__ does not work, so we only have a smoke-test + data.view() + class TestConstructors(BaseSparseTests, base.BaseConstructorsTests): pass From 5475643b78443f6b7323f44b2a599b917efa4d20 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Jun 2019 15:06:48 -0500 Subject: [PATCH 3/4] whatsnew --- doc/source/whatsnew/v0.25.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 0853a5962272a..f6971158ee35f 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -804,6 +804,7 @@ ExtensionArray - Bug in :func:`factorize` when passing an ``ExtensionArray`` with a custom ``na_sentinel`` (:issue:`25696`). - :meth:`Series.count` miscounts NA values in ExtensionArrays (:issue:`26835`) +- Keyword argument ``deep`` has been removed from :method:`ExtensionArray.copy` (:issue:`27083`) Other ^^^^^ From cb5e1f9a5c1c7819e4b73f430b65c9df6bccd608 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Jun 2019 15:24:34 -0500 Subject: [PATCH 4/4] remove view --- pandas/core/arrays/base.py | 16 ---------------- pandas/tests/arrays/sparse/test_array.py | 7 +------ pandas/tests/extension/arrow/test_bool.py | 4 ---- pandas/tests/extension/base/interface.py | 10 +--------- pandas/tests/extension/test_sparse.py | 4 ---- 5 files changed, 2 insertions(+), 39 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 256f8bc022bd0..6340cc732d6c1 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -830,22 +830,6 @@ def copy(self) -> ABCExtensionArray: """ raise AbstractMethodError(self) - def view(self, dtype=None) -> ABCExtensionArray: - """ - Return a view on the array. - - Parameters - ---------- - dtype : NumPy dtype or pandas dtype - - Returns - ------- - ExtensionArray - """ - if dtype is None: - return self - raise AbstractMethodError(self) - # ------------------------------------------------------------------------ # Printing # ------------------------------------------------------------------------ diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index d8ee9f640e86c..fbf86f66e437f 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -591,12 +591,7 @@ def test_set_fill_invalid_non_scalar(self, val): with pytest.raises(ValueError, match=msg): arr.fill_value = val - def test_view(self): - arr2 = self.arr.view() - assert arr2.sp_values is self.arr.sp_values - assert arr2.sp_index is self.arr.sp_index - - def test_copy_shallow(self): + def test_copy(self): arr2 = self.arr.copy() assert arr2.sp_values is not self.arr.sp_values assert arr2.sp_index is self.arr.sp_index diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py index 42944f31eea8a..21ce5e999334e 100644 --- a/pandas/tests/extension/arrow/test_bool.py +++ b/pandas/tests/extension/arrow/test_bool.py @@ -41,10 +41,6 @@ def test_copy(self, data): # __setitem__ does not work, so we only have a smoke-test data.copy() - def test_view(self, data): - # __setitem__ does not work, so we only have a smoke-test - data.view() - class TestConstructors(BaseArrowTests, base.BaseConstructorsTests): def test_from_dtype(self, data): diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index f0fd99ac6ee89..fd47ae6f31290 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -68,17 +68,9 @@ def test_isna_extension_array(self, data_missing): assert na.dtype._is_boolean def test_copy(self, data): - # GH#27083 removing deep keyword from EA.copy, implement view + # GH#27083 removing deep keyword from EA.copy assert data[0] != data[1] result = data.copy() data[1] = data[0] assert result[1] != result[0] - - def test_view(self, data): - # GH#27083 removing deep keyword from EA.copy, implement view - assert data[0] != data[1] - result = data.view() - - data[1] = data[0] - assert result[1] == result[0] diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 96ceaf5b5d940..86ca3e230ddd5 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -102,10 +102,6 @@ def test_copy(self, data): # __setitem__ does not work, so we only have a smoke-test data.copy() - def test_view(self, data): - # __setitem__ does not work, so we only have a smoke-test - data.view() - class TestConstructors(BaseSparseTests, base.BaseConstructorsTests): pass