From 1c0229bd4bbbe68c959dd827eb311fdff3e53b58 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 23 Apr 2018 13:52:00 +0200 Subject: [PATCH 1/3] Add data aliases for JSONArray to ensure tests are failing --- pandas/tests/extension/json/array.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 33843492cb706..32407f58a81fc 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -33,6 +33,10 @@ def __init__(self, values): raise TypeError self.data = values + # Some aliases for common attribute names to ensure pandas supports + # these + self._items = self._data = self.values = self.data + @classmethod def _constructor_from_sequence(cls, scalars): return cls(scalars) From 9aa32340bc921e32770ded038876236c466cd9d1 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 23 Apr 2018 13:41:26 +0200 Subject: [PATCH 2/3] TST: ExtensionArrays disallow .values attribute --- pandas/tests/extension/base/interface.py | 6 ++++++ pandas/tests/extension/decimal/array.py | 25 +++++++++++++----------- pandas/tests/extension/json/array.py | 5 ++++- 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index 2162552e9650d..9b60652fbace3 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -50,3 +50,9 @@ def test_is_extension_array_dtype(self, data): assert is_extension_array_dtype(data.dtype) assert is_extension_array_dtype(pd.Series(data)) assert isinstance(data.dtype, ExtensionDtype) + + def test_no_values_attribute(self, data): + # GH-20735: EA's with .values attribute give problems with internal + # code, disallowing this for now until solved + assert not hasattr(data, 'values') + assert not hasattr(data, '_values') diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index a8e88365b5648..bd7d9500fdc14 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -30,10 +30,13 @@ class DecimalArray(ExtensionArray): def __init__(self, values): values = np.asarray(values, dtype=object) - self.values = values + self._data = values # Some aliases for common attribute names to ensure pandas supports # these - self._items = self._data = self.data = self.values + self._items = self.data = self._data + # those aliases are currently not working due to assumptions + # in internal code (GH-20735) + # self._values = self.values = self.data @classmethod def _constructor_from_sequence(cls, scalars): @@ -45,13 +48,13 @@ def _from_factorized(cls, values, original): def __getitem__(self, item): if isinstance(item, numbers.Integral): - return self.values[item] + return self._data[item] else: - return type(self)(self.values[item]) + return type(self)(self._data[item]) def copy(self, deep=False): if deep: - return type(self)(self.values.copy()) + return type(self)(self._data.copy()) return type(self)(self) def __setitem__(self, key, value): @@ -59,13 +62,13 @@ def __setitem__(self, key, value): value = [decimal.Decimal(v) for v in value] else: value = decimal.Decimal(value) - self.values[key] = value + self._data[key] = value def __len__(self): - return len(self.values) + return len(self._data) def __repr__(self): - return 'DecimalArray({!r})'.format(self.values) + return 'DecimalArray({!r})'.format(self._data) @property def nbytes(self): @@ -75,7 +78,7 @@ def nbytes(self): return 0 def isna(self): - return np.array([x.is_nan() for x in self.values]) + return np.array([x.is_nan() for x in self._data]) def take(self, indexer, allow_fill=True, fill_value=None): indexer = np.asarray(indexer) @@ -86,7 +89,7 @@ def take(self, indexer, allow_fill=True, fill_value=None): return type(self)([self._na_value] * len(indexer)) indexer = _ensure_platform_int(indexer) - out = self.values.take(indexer) + out = self._data.take(indexer) out[mask] = self._na_value return type(self)(out) @@ -97,7 +100,7 @@ def _na_value(self): @classmethod def _concat_same_type(cls, to_concat): - return cls(np.concatenate([x.values for x in to_concat])) + return cls(np.concatenate([x._data for x in to_concat])) def make_data(): diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 32407f58a81fc..f5290467203bb 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -35,7 +35,10 @@ def __init__(self, values): # Some aliases for common attribute names to ensure pandas supports # these - self._items = self._data = self.values = self.data + self._items = self._data = self.data + # those aliases are currently not working due to assumptions + # in internal code (GH-20735) + # self._values = self.values = self.data @classmethod def _constructor_from_sequence(cls, scalars): From a57f8e067faadc53ebfdd1b645a8a322888b4a47 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 23 Apr 2018 14:01:02 +0200 Subject: [PATCH 3/3] add doc note --- pandas/core/arrays/base.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 97a764fa7dbe8..1a76797afd9b9 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -61,7 +61,7 @@ class ExtensionArray(object): ExtensionArrays are limited to 1 dimension. - They may be backed by none, one, or many NumPy ararys. For example, + They may be backed by none, one, or many NumPy arrays. For example, ``pandas.Categorical`` is an extension array backed by two arrays, one for codes and one for categories. An array of IPv6 address may be backed by a NumPy structured array with two fields, one for the @@ -69,6 +69,11 @@ class ExtensionArray(object): by some other storage type, like Python lists. Pandas makes no assumptions on how the data are stored, just that it can be converted to a NumPy array. + The ExtensionArray interface does not impose any rules on how this data + is stored. However, currently, the backing data cannot be stored in + attributes called ``.values`` or ``._values`` to ensure full compatibility + with pandas internals. But other names as ``.data``, ``._data``, + ``._items``, ... can be freely used. """ # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray. # Don't override this.