From 3b567aadb09be776cb473397e985bf6bbd4cecda Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 29 Nov 2019 10:47:43 +0100 Subject: [PATCH 1/2] BUG: Index.get_value implementation for ExtensionArray --- pandas/core/indexes/base.py | 32 ++++++++++--------- .../tests/extension/decimal/test_decimal.py | 21 ++++++++++++ 2 files changed, 38 insertions(+), 15 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 486cc0cd9032d..35839b60cd674 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4577,21 +4577,23 @@ def get_value(self, series, key): # use this, e.g. DatetimeIndex # Things like `Series._get_value` (via .at) pass the EA directly here. s = getattr(series, "_values", series) - if isinstance(s, (ExtensionArray, Index)) and is_scalar(key): - # GH 20882, 21257 - # Unify Index and ExtensionArray treatment - # First try to convert the key to a location - # If that fails, raise a KeyError if an integer - # index, otherwise, see if key is an integer, and - # try that - try: - iloc = self.get_loc(key) - return s[iloc] - except KeyError: - if len(self) > 0 and (self.holds_integer() or self.is_boolean()): - raise - elif is_integer(key): - return s[key] + if isinstance(s, ExtensionArray): + if is_scalar(key): + # GH 20882, 21257 + # First try to convert the key to a location + # If that fails, raise a KeyError if an integer + # index, otherwise, see if key is an integer, and + # try that + try: + iloc = self.get_loc(key) + return s[iloc] + except KeyError: + if len(self) > 0 and (self.holds_integer() or self.is_boolean()): + raise + elif is_integer(key): + return s[key] + else: + raise InvalidIndexError(key) s = com.values_from_object(series) k = com.values_from_object(key) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index ce819c13c4498..b5c3abd8ce8f6 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -478,3 +478,24 @@ def DecimalArray__my_sum(self): s = pd.Series(DecimalArray(data)) result = s.groupby(np.array([0, 0, 0, 1, 1])).agg(lambda x: x.values.my_sum()) tm.assert_series_equal(result, expected, check_names=False) + + +def test_indexing_no_materialize(monkeypatch): + # See https://github.com/pandas-dev/pandas/issues/29708 + # Ensure that indexing operations do not materialize (convert to a numpy + # array) the ExtensionArray unnecessary + + def DecimalArray__array__(self, dtype=None): + raise Exception("tried to convert a DecimalArray to a numpy array") + + monkeypatch.setattr(DecimalArray, "__array__", DecimalArray__array__, raising=False) + + data = make_data() + s = pd.Series(DecimalArray(data)) + df = pd.DataFrame({"a": s, "b": range(len(s))}) + + # ensure the following operations do not raise an error + s[s > 0.5] + df[s > 0.5] + s.at[0] + df.at[0, "a"] From 263e31a2ce6cf332ad10698c9bd7aae1f3191768 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 3 Dec 2019 11:52:19 +0100 Subject: [PATCH 2/2] use extract_array --- pandas/core/indexes/base.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 35839b60cd674..1f7bba3a4ea34 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -69,6 +69,7 @@ from pandas.core.arrays import ExtensionArray from pandas.core.base import IndexOpsMixin, PandasObject import pandas.core.common as com +from pandas.core.construction import extract_array from pandas.core.indexers import maybe_convert_indices from pandas.core.indexes.frozen import FrozenList import pandas.core.missing as missing @@ -4576,7 +4577,7 @@ def get_value(self, series, key): # if we have something that is Index-like, then # use this, e.g. DatetimeIndex # Things like `Series._get_value` (via .at) pass the EA directly here. - s = getattr(series, "_values", series) + s = extract_array(series, extract_numpy=True) if isinstance(s, ExtensionArray): if is_scalar(key): # GH 20882, 21257 @@ -4593,6 +4594,8 @@ def get_value(self, series, key): elif is_integer(key): return s[key] else: + # if key is not a scalar, directly raise an error (the code below + # would convert to numpy arrays and raise later any way) - GH29926 raise InvalidIndexError(key) s = com.values_from_object(series)