From c368fb864a4f78ad3cfcf8f3965eaa7ce3bb2493 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 28 Jul 2019 19:43:46 -0700 Subject: [PATCH 1/9] implement+test EA.view --- pandas/core/arrays/base.py | 20 +++++++++++++++++++- pandas/core/arrays/categorical.py | 16 +++++----------- pandas/core/arrays/datetimelike.py | 4 +++- pandas/core/arrays/interval.py | 8 ++------ pandas/core/arrays/numpy_.py | 4 ++-- pandas/core/arrays/sparse.py | 8 ++++---- pandas/tests/extension/arrow/test_bool.py | 4 ++++ pandas/tests/extension/base/interface.py | 12 ++++++++++++ pandas/tests/extension/decimal/array.py | 4 ++-- pandas/tests/extension/json/array.py | 7 +++++-- pandas/tests/extension/test_interval.py | 5 ++++- pandas/tests/extension/test_sparse.py | 4 ++++ 12 files changed, 66 insertions(+), 30 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index ee796f9896b52..11809106e7bf0 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -351,7 +351,7 @@ def ndim(self) -> int: """ Extension Arrays are only allowed to be 1-dimensional. """ - return 1 + return len(self.shape) @property def nbytes(self) -> int: @@ -867,6 +867,24 @@ def copy(self) -> ABCExtensionArray: """ raise AbstractMethodError(self) + def view(self, dtype=None) -> ABCExtensionArray: + """ + Return a view on the array. + + Returns + ------- + ExtensionArray + + Notes + ----- + - This must return a *new* object, not self. + - The only case that *must* be implemented is with dtype=None, + giving a view with the same dtype as self. + """ + if dtype is not None: + raise NotImplementedError(dtype) + return self[:] + # ------------------------------------------------------------------------ # Printing # ------------------------------------------------------------------------ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index c22f7e0429433..a06ae572b6664 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -517,19 +517,12 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: return self._set_dtype(dtype) return np.array(self, dtype=dtype, copy=copy) - @cache_readonly - def ndim(self) -> int: - """ - Number of dimensions of the Categorical - """ - return self._codes.ndim - @cache_readonly def size(self) -> int: """ return the len of myself """ - return len(self) + return self._codes.size @cache_readonly def itemsize(self) -> int: @@ -1764,7 +1757,7 @@ def ravel(self, order="C"): ) return np.array(self) - def view(self): + def view(self, dtype=None): """ Return a view of myself. @@ -1773,9 +1766,10 @@ def view(self): Returns ------- view : Categorical - Returns `self`! """ - return self + if dtype is not None: + raise NotImplementedError(dtype) + return self._constructor(values=self._codes, dtype=self.dtype, fastpath=True) def to_dense(self): """ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 932d96a37c04c..c6d57d22f9bc5 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -566,9 +566,11 @@ def view(self, dtype=None): Returns ------- - ndarray + ndarray or ExtensionArray With the specified `dtype`. """ + if dtype is None: + return type(self)(self._data, dtype=self.dtype, freq=self.freq) return self._data.view(dtype=dtype) # ------------------------------------------------------------------ diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 2a0d2c8770063..43cc8ca51401f 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -738,18 +738,14 @@ def isna(self): return isna(self.left) @property - def nbytes(self): + def nbytes(self) -> int: return self.left.nbytes + self.right.nbytes @property - def size(self): + def size(self) -> int: # Avoid materializing self.values return self.left.size - @property - def shape(self): - return self.left.shape - def take(self, indices, allow_fill=False, fill_value=None, axis=None, **kwargs): """ Take elements from the IntervalArray. diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 39529177b9e35..e8397341a1a1d 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -241,11 +241,11 @@ def __setitem__(self, key, value): else: self._ndarray[key] = value - def __len__(self): + def __len__(self) -> int: return len(self._ndarray) @property - def nbytes(self): + def nbytes(self) -> int: return self._ndarray.nbytes def isna(self): diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 048f6c6f5c680..7a3cb51a05fb4 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -839,7 +839,7 @@ def fill_value(self, value): self._dtype = SparseDtype(self.dtype.subtype, value) @property - def kind(self): + def kind(self) -> str: """ The kind of sparse index for this array. One of {'integer', 'block'}. """ @@ -854,7 +854,7 @@ def _valid_sp_values(self): mask = notna(sp_vals) return sp_vals[mask] - def __len__(self): + def __len__(self) -> int: return self.sp_index.length @property @@ -868,7 +868,7 @@ def _fill_value_matches(self, fill_value): return self.fill_value == fill_value @property - def nbytes(self): + def nbytes(self) -> int: return self.sp_values.nbytes + self.sp_index.nbytes @property @@ -886,7 +886,7 @@ def density(self): return r @property - def npoints(self): + def npoints(self) -> int: """ The number of non- ``fill_value`` points. diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py index 205edf5da5b74..d959afc5b871f 100644 --- a/pandas/tests/extension/arrow/test_bool.py +++ b/pandas/tests/extension/arrow/test_bool.py @@ -41,6 +41,10 @@ def test_copy(self, data): # __setitem__ does not work, so we only have a smoke-test data.copy() + def test_view(self, data): + # __setitem__ does not work, so we only have a smoke-test + data.view() + class TestConstructors(BaseArrowTests, base.BaseConstructorsTests): def test_from_dtype(self, data): diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index dee8021f5375f..dfbf0b1adf5ef 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -75,3 +75,15 @@ def test_copy(self, data): data[1] = data[0] assert result[1] != result[0] + + def test_view(self, data): + # view with no dtype should return a shallow copy, *not* the same + # object + assert data[1] != data[0] + + result = data.view() + assert result is not data + assert type(result) == type(data) + + result[1] = result[0] + assert data[1] == data[0] diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index c28ff956a33a4..a1988744d76a1 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -137,11 +137,11 @@ def __setitem__(self, key, value): value = decimal.Decimal(value) self._data[key] = value - def __len__(self): + def __len__(self) -> int: return len(self._data) @property - def nbytes(self): + def nbytes(self) -> int: n = len(self) if n: return n * sys.getsizeof(self[0]) diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 21c4ac8f055a2..b64ddbd6ac84d 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -80,6 +80,9 @@ def __getitem__(self, item): elif isinstance(item, abc.Iterable): # fancy indexing return type(self)([self.data[i] for i in item]) + elif isinstance(item, slice) and item == slice(None): + # Make sure we get a view + return type(self)(self.data) else: # slice return type(self)(self.data[item]) @@ -103,11 +106,11 @@ def __setitem__(self, key, value): assert isinstance(v, self.dtype.type) self.data[k] = v - def __len__(self): + def __len__(self) -> int: return len(self.data) @property - def nbytes(self): + def nbytes(self) -> int: return sys.getsizeof(self.data) def isna(self): diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index 1aab71286b4a6..4fdcf930d224f 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -95,7 +95,10 @@ class TestGrouping(BaseInterval, base.BaseGroupbyTests): class TestInterface(BaseInterval, base.BaseInterfaceTests): - pass + def test_view(self, data): + # __setitem__ incorrectly makes a copy (GH#27147), so we only + # have a smoke-test + data.view() class TestReduce(base.BaseNoReduceTests): diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 84d59902d2aa7..6ebe71e173ec2 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -103,6 +103,10 @@ def test_copy(self, data): # __setitem__ does not work, so we only have a smoke-test data.copy() + def test_view(self, data): + # __setitem__ does not work, so we only have a smoke-test + data.view() + class TestConstructors(BaseSparseTests, base.BaseConstructorsTests): pass From 6636c893fa623572ed06b7d405899aa5ef045506 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 30 Jul 2019 09:57:10 -0700 Subject: [PATCH 2/9] test, comment --- pandas/core/arrays/base.py | 3 ++- pandas/tests/extension/base/interface.py | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 55b62f9f948ab..71f128bbcb0a5 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -64,6 +64,7 @@ class ExtensionArray: shift take unique + view _concat_same_type _formatter _formatting_values @@ -147,7 +148,7 @@ class ExtensionArray: If implementing NumPy's ``__array_ufunc__`` interface, pandas expects that - 1. You defer by raising ``NotImplemented`` when any Series are present + 1. You defer by returning ``NotImplemented`` when any Series are present in `inputs`. Pandas will extract the arrays and call the ufunc again. 2. You define a ``_HANDLED_TYPES`` tuple as an attribute on the class. Pandas inspect this to determine whether the ufunc is valid for the diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index dfbf0b1adf5ef..9af7aed000573 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -87,3 +87,10 @@ def test_view(self, data): result[1] = result[0] assert data[1] == data[0] + + # check that `dtype` kwarg is accepted; it is OK if it + # raises NotImplementedError + try: + data.view(data.dtype) + except NotImplementedError: + pass From f99afb509613c8ea1168da9b4481e14d9e22c862 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 30 Jul 2019 15:18:03 -0700 Subject: [PATCH 3/9] fix broken case --- pandas/core/arrays/datetimelike.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 313c1f219f4ca..44c9aa5a99115 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -569,8 +569,8 @@ def view(self, dtype=None): ndarray or ExtensionArray With the specified `dtype`. """ - if dtype is None: - return type(self)(self._data, dtype=self.dtype, freq=self.freq) + if dtype is None or dtype is self.dtype: + return type(self)(self._data, dtype=self.dtype) return self._data.view(dtype=dtype) # ------------------------------------------------------------------ From aa290bf4f843081ff0ee3326d686d53a157f7a8d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 1 Aug 2019 09:22:11 -0700 Subject: [PATCH 4/9] docstring edits --- pandas/core/arrays/base.py | 5 +++++ pandas/core/arrays/categorical.py | 9 --------- pandas/core/arrays/datetimelike.py | 12 ------------ 3 files changed, 5 insertions(+), 21 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 71f128bbcb0a5..480de2ac46e77 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -867,6 +867,11 @@ def view(self, dtype=None) -> ABCExtensionArray: """ Return a view on the array. + Parameters + ---------- + dtype : str, np.dtype, or ExtensionDtype, optional + Default None + Returns ------- ExtensionArray diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index b053aa35a4a09..e56f623962fa3 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1758,15 +1758,6 @@ def ravel(self, order="C"): return np.array(self) def view(self, dtype=None): - """ - Return a view of myself. - - For internal compatibility with numpy arrays. - - Returns - ------- - view : Categorical - """ if dtype is not None: raise NotImplementedError(dtype) return self._constructor(values=self._codes, dtype=self.dtype, fastpath=True) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 44c9aa5a99115..cf836408a3aed 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -557,18 +557,6 @@ def astype(self, dtype, copy=True): return np.asarray(self, dtype=dtype) def view(self, dtype=None): - """ - New view on this array with the same data. - - Parameters - ---------- - dtype : numpy dtype, optional - - Returns - ------- - ndarray or ExtensionArray - With the specified `dtype`. - """ if dtype is None or dtype is self.dtype: return type(self)(self._data, dtype=self.dtype) return self._data.view(dtype=dtype) From 6053a509872257dcb566bbb583268024a69f2a9c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 1 Aug 2019 11:24:48 -0700 Subject: [PATCH 5/9] add view to doc --- doc/source/reference/extensions.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst index 407aab4bb1f1b..04974f05164f8 100644 --- a/doc/source/reference/extensions.rst +++ b/doc/source/reference/extensions.rst @@ -45,6 +45,7 @@ objects. api.extensions.ExtensionArray.argsort api.extensions.ExtensionArray.astype api.extensions.ExtensionArray.copy + api.extensions.ExtensionArray.view api.extensions.ExtensionArray.dropna api.extensions.ExtensionArray.factorize api.extensions.ExtensionArray.fillna From d9158ef8cd06bcb246c162c2df7c9c7e6f938a44 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 2 Aug 2019 07:36:18 -0700 Subject: [PATCH 6/9] simplify test --- pandas/tests/extension/base/interface.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index 9af7aed000573..a29f6deeffae6 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -88,9 +88,5 @@ def test_view(self, data): result[1] = result[0] assert data[1] == data[0] - # check that `dtype` kwarg is accepted; it is OK if it - # raises NotImplementedError - try: - data.view(data.dtype) - except NotImplementedError: - pass + # check specifically that the `dtype` kwarg is accepted + data.view(dtype=None) From 2b5e5fa6911c2631eb7305694e3dd1c85f591207 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 2 Aug 2019 09:46:20 -0700 Subject: [PATCH 7/9] docstring/comment edits --- pandas/core/arrays/base.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 480de2ac46e77..31949de018a3d 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -875,13 +875,11 @@ def view(self, dtype=None) -> ABCExtensionArray: Returns ------- ExtensionArray - - Notes - ----- - - This must return a *new* object, not self. - - The only case that *must* be implemented is with dtype=None, - giving a view with the same dtype as self. """ + # NB: + # - This must return a *new* object referencing the same data, not self. + # - The only case that *must* be implemented is with dtype=None, + # giving a view with the same dtype as self. if dtype is not None: raise NotImplementedError(dtype) return self[:] From 24b235c761f85062091c2a8393d690d3b015e5c3 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 2 Aug 2019 13:46:25 -0700 Subject: [PATCH 8/9] requested edits --- pandas/core/arrays/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 31949de018a3d..74e459622574d 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -355,7 +355,7 @@ def ndim(self) -> int: """ Extension Arrays are only allowed to be 1-dimensional. """ - return len(self.shape) + return 1 @property def nbytes(self) -> int: @@ -863,7 +863,7 @@ def copy(self) -> ABCExtensionArray: """ raise AbstractMethodError(self) - def view(self, dtype=None) -> ABCExtensionArray: + def view(self, dtype=None) -> Union[ABCExtensionArray, np.ndarray]: """ Return a view on the array. @@ -874,7 +874,7 @@ def view(self, dtype=None) -> ABCExtensionArray: Returns ------- - ExtensionArray + ExtensionArray or np.ndarray """ # NB: # - This must return a *new* object referencing the same data, not self. From c44aada5f9c45da39457b2059608852127e59e86 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 3 Aug 2019 13:08:44 -0700 Subject: [PATCH 9/9] revert return type --- pandas/core/arrays/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 74e459622574d..41d84b0ae4853 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -874,7 +874,7 @@ def view(self, dtype=None) -> Union[ABCExtensionArray, np.ndarray]: Returns ------- - ExtensionArray or np.ndarray + ExtensionArray """ # NB: # - This must return a *new* object referencing the same data, not self.