From 83a1fe2cb375a0289e3312af293f999e97279a67 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 11 Sep 2019 08:48:39 -0700 Subject: [PATCH 1/3] EA: require size instead of len --- pandas/core/arrays/base.py | 17 +++++++++++++---- pandas/core/arrays/datetimelike.py | 2 +- pandas/core/arrays/integer.py | 5 +++-- pandas/core/arrays/numpy_.py | 5 +++-- pandas/core/arrays/sparse.py | 3 ++- pandas/tests/extension/arrow/arrays.py | 3 ++- pandas/tests/extension/decimal/array.py | 3 ++- pandas/tests/extension/json/array.py | 3 ++- 8 files changed, 28 insertions(+), 13 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 0778b6726d104..d76f009c3402c 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -83,7 +83,7 @@ class ExtensionArray: * _from_sequence * _from_factorized * __getitem__ - * __len__ + * size * dtype * nbytes * isna @@ -319,7 +319,7 @@ def __len__(self) -> int: ------- length : int """ - raise AbstractMethodError(self) + return self.shape[0] def __iter__(self): """ @@ -342,19 +342,28 @@ def dtype(self) -> ExtensionDtype: """ raise AbstractMethodError(self) + @property + def size(self) -> int: + """ + An instance of 'ExtensionDtype'. + + Must *not* depend on self.shape or self.__len__ + """ + raise AbstractMethodError(self) + @property def shape(self) -> Tuple[int, ...]: """ Return a tuple of the array dimensions. """ - return (len(self),) + return (self.size,) @property def ndim(self) -> int: """ Extension Arrays are only allowed to be 1-dimensional. """ - return 1 + return len(self.shape) @property def nbytes(self) -> int: diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index bda5f8f4326f1..a307e742f1a44 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -394,7 +394,7 @@ def __array__(self, dtype=None): @property def size(self) -> int: """The number of elements in this array.""" - return np.prod(self.shape) + return self._data.size def __len__(self): return len(self._data) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 069d661e6af34..de433b8fc48fb 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -461,8 +461,9 @@ def __setitem__(self, key, value): self._data[key] = value self._mask[key] = mask - def __len__(self): - return len(self._data) + @property + def size(self) -> int: + return self._data.size @property def nbytes(self): diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 4e2e37d88eb9a..f0d6eee5ce54d 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -245,8 +245,9 @@ def __setitem__(self, key, value): else: self._ndarray[key] = value - def __len__(self) -> int: - return len(self._ndarray) + @property + def size(self) -> int: + return self._ndarray.size @property def nbytes(self) -> int: diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 201174b6b1995..f1dafd4232557 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -855,7 +855,8 @@ def _valid_sp_values(self): mask = notna(sp_vals) return sp_vals[mask] - def __len__(self) -> int: + @property + def size(self) -> int: return self.sp_index.length @property diff --git a/pandas/tests/extension/arrow/arrays.py b/pandas/tests/extension/arrow/arrays.py index 6a28f76e474cc..ce9aba536c328 100644 --- a/pandas/tests/extension/arrow/arrays.py +++ b/pandas/tests/extension/arrow/arrays.py @@ -88,7 +88,8 @@ def __getitem__(self, item): vals = self._data.to_pandas()[item] return type(self).from_scalars(vals) - def __len__(self): + @property + def size(self) -> int: return len(self._data) def astype(self, dtype, copy=True): diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index a1988744d76a1..e195463fe7149 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -137,7 +137,8 @@ def __setitem__(self, key, value): value = decimal.Decimal(value) self._data[key] = value - def __len__(self) -> int: + @property + def size(self) -> int: return len(self._data) @property diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index b64ddbd6ac84d..6dd49ed73c460 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -106,7 +106,8 @@ def __setitem__(self, key, value): assert isinstance(v, self.dtype.type) self.data[k] = v - def __len__(self) -> int: + @property + def size(self) -> int: return len(self.data) @property From 684bb05b3de178361042ca68047c884a0af67daf Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 11 Sep 2019 08:51:17 -0700 Subject: [PATCH 2/3] whatsnew --- doc/source/whatsnew/v1.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 9840e4e94d28c..ef194742ec97e 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -36,6 +36,7 @@ Other enhancements Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- The :class:`ExtensionArray` interface now requires the author to implement ``size`` instead of ``__len__``. The ``size`` method must _not_ depend on either ``__len__`` or ``shape`` (:issue:`????`) - :class:`pandas.core.groupby.GroupBy.transform` now raises on invalid operation names (:issue:`27489`). - :class:`pandas.core.arrays.IntervalArray` adopts a new ``__repr__`` in accordance with other array classes (:issue:`25022`) From a811e80efe431e9186e632130ac86208988fff19 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 13 Sep 2019 07:34:03 -0700 Subject: [PATCH 3/3] add gh reference --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 7719a49aac58e..cc07609b9a8e8 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -40,7 +40,7 @@ Other enhancements Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- The :class:`ExtensionArray` interface now requires the author to implement ``size`` instead of ``__len__``. The ``size`` method must _not_ depend on either ``__len__`` or ``shape`` (:issue:`????`) +- The :class:`ExtensionArray` interface now requires the author to implement ``size`` instead of ``__len__``. The ``size`` method must _not_ depend on either ``__len__`` or ``shape`` (:issue:`28389`) - :class:`pandas.core.groupby.GroupBy.transform` now raises on invalid operation names (:issue:`27489`). - :class:`pandas.core.arrays.IntervalArray` adopts a new ``__repr__`` in accordance with other array classes (:issue:`25022`)