Skip to content

Commit 545d2de

Browse files
jorisvandenbosschejreback
authored andcommitted
TST: ExtensionArrays disallow .values attribute (pandas-dev#20794)
1 parent ed511e9 commit 545d2de

File tree

4 files changed

+33
-12
lines changed

4 files changed

+33
-12
lines changed

pandas/core/arrays/base.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,19 @@ class ExtensionArray(object):
6161
6262
ExtensionArrays are limited to 1 dimension.
6363
64-
They may be backed by none, one, or many NumPy ararys. For example,
64+
They may be backed by none, one, or many NumPy arrays. For example,
6565
``pandas.Categorical`` is an extension array backed by two arrays,
6666
one for codes and one for categories. An array of IPv6 address may
6767
be backed by a NumPy structured array with two fields, one for the
6868
lower 64 bits and one for the upper 64 bits. Or they may be backed
6969
by some other storage type, like Python lists. Pandas makes no
7070
assumptions on how the data are stored, just that it can be converted
7171
to a NumPy array.
72+
The ExtensionArray interface does not impose any rules on how this data
73+
is stored. However, currently, the backing data cannot be stored in
74+
attributes called ``.values`` or ``._values`` to ensure full compatibility
75+
with pandas internals. But other names as ``.data``, ``._data``,
76+
``._items``, ... can be freely used.
7277
"""
7378
# '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray.
7479
# Don't override this.

pandas/tests/extension/base/interface.py

+6
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,9 @@ def test_is_extension_array_dtype(self, data):
5050
assert is_extension_array_dtype(data.dtype)
5151
assert is_extension_array_dtype(pd.Series(data))
5252
assert isinstance(data.dtype, ExtensionDtype)
53+
54+
def test_no_values_attribute(self, data):
55+
# GH-20735: EA's with .values attribute give problems with internal
56+
# code, disallowing this for now until solved
57+
assert not hasattr(data, 'values')
58+
assert not hasattr(data, '_values')

pandas/tests/extension/decimal/array.py

+14-11
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,13 @@ class DecimalArray(ExtensionArray):
3030
def __init__(self, values):
3131
values = np.asarray(values, dtype=object)
3232

33-
self.values = values
33+
self._data = values
3434
# Some aliases for common attribute names to ensure pandas supports
3535
# these
36-
self._items = self._data = self.data = self.values
36+
self._items = self.data = self._data
37+
# those aliases are currently not working due to assumptions
38+
# in internal code (GH-20735)
39+
# self._values = self.values = self.data
3740

3841
@classmethod
3942
def _from_sequence(cls, scalars):
@@ -45,27 +48,27 @@ def _from_factorized(cls, values, original):
4548

4649
def __getitem__(self, item):
4750
if isinstance(item, numbers.Integral):
48-
return self.values[item]
51+
return self._data[item]
4952
else:
50-
return type(self)(self.values[item])
53+
return type(self)(self._data[item])
5154

5255
def copy(self, deep=False):
5356
if deep:
54-
return type(self)(self.values.copy())
57+
return type(self)(self._data.copy())
5558
return type(self)(self)
5659

5760
def __setitem__(self, key, value):
5861
if pd.api.types.is_list_like(value):
5962
value = [decimal.Decimal(v) for v in value]
6063
else:
6164
value = decimal.Decimal(value)
62-
self.values[key] = value
65+
self._data[key] = value
6366

6467
def __len__(self):
65-
return len(self.values)
68+
return len(self._data)
6669

6770
def __repr__(self):
68-
return 'DecimalArray({!r})'.format(self.values)
71+
return 'DecimalArray({!r})'.format(self._data)
6972

7073
@property
7174
def nbytes(self):
@@ -75,7 +78,7 @@ def nbytes(self):
7578
return 0
7679

7780
def isna(self):
78-
return np.array([x.is_nan() for x in self.values])
81+
return np.array([x.is_nan() for x in self._data])
7982

8083
def take(self, indexer, allow_fill=True, fill_value=None):
8184
indexer = np.asarray(indexer)
@@ -86,7 +89,7 @@ def take(self, indexer, allow_fill=True, fill_value=None):
8689
return type(self)([self._na_value] * len(indexer))
8790

8891
indexer = _ensure_platform_int(indexer)
89-
out = self.values.take(indexer)
92+
out = self._data.take(indexer)
9093
out[mask] = self._na_value
9194

9295
return type(self)(out)
@@ -97,7 +100,7 @@ def _na_value(self):
97100

98101
@classmethod
99102
def _concat_same_type(cls, to_concat):
100-
return cls(np.concatenate([x.values for x in to_concat]))
103+
return cls(np.concatenate([x._data for x in to_concat]))
101104

102105

103106
def make_data():

pandas/tests/extension/json/array.py

+7
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ def __init__(self, values):
3333
raise TypeError
3434
self.data = values
3535

36+
# Some aliases for common attribute names to ensure pandas supports
37+
# these
38+
self._items = self._data = self.data
39+
# those aliases are currently not working due to assumptions
40+
# in internal code (GH-20735)
41+
# self._values = self.values = self.data
42+
3643
@classmethod
3744
def _from_sequence(cls, scalars):
3845
return cls(scalars)

0 commit comments

Comments
 (0)