diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 97a764fa7dbe8..1a76797afd9b9 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -61,7 +61,7 @@ class ExtensionArray(object): ExtensionArrays are limited to 1 dimension. - They may be backed by none, one, or many NumPy ararys. For example, + They may be backed by none, one, or many NumPy arrays. For example, ``pandas.Categorical`` is an extension array backed by two arrays, one for codes and one for categories. An array of IPv6 address may be backed by a NumPy structured array with two fields, one for the @@ -69,6 +69,11 @@ class ExtensionArray(object): by some other storage type, like Python lists. Pandas makes no assumptions on how the data are stored, just that it can be converted to a NumPy array. + The ExtensionArray interface does not impose any rules on how this data + is stored. However, currently, the backing data cannot be stored in + attributes called ``.values`` or ``._values`` to ensure full compatibility + with pandas internals. But other names as ``.data``, ``._data``, + ``._items``, ... can be freely used. """ # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray. # Don't override this. diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index 2162552e9650d..9b60652fbace3 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -50,3 +50,9 @@ def test_is_extension_array_dtype(self, data): assert is_extension_array_dtype(data.dtype) assert is_extension_array_dtype(pd.Series(data)) assert isinstance(data.dtype, ExtensionDtype) + + def test_no_values_attribute(self, data): + # GH-20735: EA's with .values attribute give problems with internal + # code, disallowing this for now until solved + assert not hasattr(data, 'values') + assert not hasattr(data, '_values') diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index a8e88365b5648..bd7d9500fdc14 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -30,10 +30,13 @@ class DecimalArray(ExtensionArray): def __init__(self, values): values = np.asarray(values, dtype=object) - self.values = values + self._data = values # Some aliases for common attribute names to ensure pandas supports # these - self._items = self._data = self.data = self.values + self._items = self.data = self._data + # those aliases are currently not working due to assumptions + # in internal code (GH-20735) + # self._values = self.values = self.data @classmethod def _constructor_from_sequence(cls, scalars): @@ -45,13 +48,13 @@ def _from_factorized(cls, values, original): def __getitem__(self, item): if isinstance(item, numbers.Integral): - return self.values[item] + return self._data[item] else: - return type(self)(self.values[item]) + return type(self)(self._data[item]) def copy(self, deep=False): if deep: - return type(self)(self.values.copy()) + return type(self)(self._data.copy()) return type(self)(self) def __setitem__(self, key, value): @@ -59,13 +62,13 @@ def __setitem__(self, key, value): value = [decimal.Decimal(v) for v in value] else: value = decimal.Decimal(value) - self.values[key] = value + self._data[key] = value def __len__(self): - return len(self.values) + return len(self._data) def __repr__(self): - return 'DecimalArray({!r})'.format(self.values) + return 'DecimalArray({!r})'.format(self._data) @property def nbytes(self): @@ -75,7 +78,7 @@ def nbytes(self): return 0 def isna(self): - return np.array([x.is_nan() for x in self.values]) + return np.array([x.is_nan() for x in self._data]) def take(self, indexer, allow_fill=True, fill_value=None): indexer = np.asarray(indexer) @@ -86,7 +89,7 @@ def take(self, indexer, allow_fill=True, fill_value=None): return type(self)([self._na_value] * len(indexer)) indexer = _ensure_platform_int(indexer) - out = self.values.take(indexer) + out = self._data.take(indexer) out[mask] = self._na_value return type(self)(out) @@ -97,7 +100,7 @@ def _na_value(self): @classmethod def _concat_same_type(cls, to_concat): - return cls(np.concatenate([x.values for x in to_concat])) + return cls(np.concatenate([x._data for x in to_concat])) def make_data(): diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 33843492cb706..f5290467203bb 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -33,6 +33,13 @@ def __init__(self, values): raise TypeError self.data = values + # Some aliases for common attribute names to ensure pandas supports + # these + self._items = self._data = self.data + # those aliases are currently not working due to assumptions + # in internal code (GH-20735) + # self._values = self.values = self.data + @classmethod def _constructor_from_sequence(cls, scalars): return cls(scalars)