diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index fd1c1271a5e37..0fbe5593f69fb 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -123,6 +123,7 @@ source, you should no longer need to install Cython into your build environment Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- The :class:`ExtensionArray` interface now requires the author to implement ``size`` instead of ``__len__``. The ``size`` method must _not_ depend on either ``__len__`` or ``shape`` (:issue:`28389`) - :class:`pandas.core.groupby.GroupBy.transform` now raises on invalid operation names (:issue:`27489`). - :class:`pandas.core.arrays.IntervalArray` adopts a new ``__repr__`` in accordance with other array classes (:issue:`25022`) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 0778b6726d104..d76f009c3402c 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -83,7 +83,7 @@ class ExtensionArray: * _from_sequence * _from_factorized * __getitem__ - * __len__ + * size * dtype * nbytes * isna @@ -319,7 +319,7 @@ def __len__(self) -> int: ------- length : int """ - raise AbstractMethodError(self) + return self.shape[0] def __iter__(self): """ @@ -342,19 +342,28 @@ def dtype(self) -> ExtensionDtype: """ raise AbstractMethodError(self) + @property + def size(self) -> int: + """ + An instance of 'ExtensionDtype'. + + Must *not* depend on self.shape or self.__len__ + """ + raise AbstractMethodError(self) + @property def shape(self) -> Tuple[int, ...]: """ Return a tuple of the array dimensions. """ - return (len(self),) + return (self.size,) @property def ndim(self) -> int: """ Extension Arrays are only allowed to be 1-dimensional. """ - return 1 + return len(self.shape) @property def nbytes(self) -> int: diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index bda5f8f4326f1..a307e742f1a44 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -394,7 +394,7 @@ def __array__(self, dtype=None): @property def size(self) -> int: """The number of elements in this array.""" - return np.prod(self.shape) + return self._data.size def __len__(self): return len(self._data) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 7b03bf35faf25..0f5cf6bd2ce82 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -469,8 +469,9 @@ def __setitem__(self, key, value): self._data[key] = value self._mask[key] = mask - def __len__(self): - return len(self._data) + @property + def size(self) -> int: + return self._data.size @property def nbytes(self): diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index bf7404e8997c6..da70d3382fe17 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -240,8 +240,9 @@ def __setitem__(self, key, value): self._ndarray[key] = value - def __len__(self) -> int: - return len(self._ndarray) + @property + def size(self) -> int: + return self._ndarray.size @property def nbytes(self) -> int: diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 5acc922734529..18129a7dace05 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -526,7 +526,8 @@ def _valid_sp_values(self): mask = notna(sp_vals) return sp_vals[mask] - def __len__(self) -> int: + @property + def size(self) -> int: return self.sp_index.length @property diff --git a/pandas/tests/extension/arrow/arrays.py b/pandas/tests/extension/arrow/arrays.py index 6a28f76e474cc..ce9aba536c328 100644 --- a/pandas/tests/extension/arrow/arrays.py +++ b/pandas/tests/extension/arrow/arrays.py @@ -88,7 +88,8 @@ def __getitem__(self, item): vals = self._data.to_pandas()[item] return type(self).from_scalars(vals) - def __len__(self): + @property + def size(self) -> int: return len(self._data) def astype(self, dtype, copy=True): diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index a1988744d76a1..e195463fe7149 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -137,7 +137,8 @@ def __setitem__(self, key, value): value = decimal.Decimal(value) self._data[key] = value - def __len__(self) -> int: + @property + def size(self) -> int: return len(self._data) @property diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index b64ddbd6ac84d..6dd49ed73c460 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -106,7 +106,8 @@ def __setitem__(self, key, value): assert isinstance(v, self.dtype.type) self.data[k] = v - def __len__(self) -> int: + @property + def size(self) -> int: return len(self.data) @property