From 6e63ea8162d906412db388a91d3f4f13f8f9fbc5 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 16 Mar 2023 11:46:04 -0700 Subject: [PATCH 1/2] BUG: pickling ArrowExtensionArray with _pa_data rename --- pandas/core/arrays/arrow/array.py | 8 ++++++-- pandas/tests/extension/test_arrow.py | 16 ++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index c07aee737934b..2cfed49926f15 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -433,11 +433,15 @@ def __abs__(self) -> Self: # https://issues.apache.org/jira/browse/ARROW-10739 is addressed def __getstate__(self): state = self.__dict__.copy() - state["_data"] = self._pa_array.combine_chunks() + state["_pa_array"] = self._pa_array.combine_chunks() return state def __setstate__(self, state) -> None: - state["_pa_array"] = pa.chunked_array(state["_data"]) + if "_data" in state: + data = state["_data"] + else: + data = state["_pa_array"] + state["_pa_array"] = pa.chunked_array(data) self.__dict__.update(state) def _cmp_method(self, other, op): diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index fcca2e4fa8e8e..cff9e99c9317e 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2351,3 +2351,19 @@ def test_concat_empty_arrow_backed_series(dtype): expected = ser.copy() result = pd.concat([ser[np.array([], dtype=np.bool_)]]) tm.assert_series_equal(result, expected) + + +# _data was renamed to _pa_data +class OldArrowExtensionArray(ArrowExtensionArray): + def __getstate__(self): + state = super().__getstate__() + state["_data"] = state.pop("_pa_array") + return state + + +def test_pickle_old_arrowextensionarray(): + data = pa.array([1]) + expected = OldArrowExtensionArray(data) + result = pickle.loads(pickle.dumps(expected)) + tm.assert_extension_array_equal(result, expected) + assert result._pa_array == pa.chunked_array(data) From 42c7fabdc5809ac12aafc5b257c25f7adeadbf48 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 16 Mar 2023 11:58:43 -0700 Subject: [PATCH 2/2] pop _data if exists --- pandas/core/arrays/arrow/array.py | 2 +- pandas/tests/extension/test_arrow.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 2cfed49926f15..551b925f42579 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -438,7 +438,7 @@ def __getstate__(self): def __setstate__(self, state) -> None: if "_data" in state: - data = state["_data"] + data = state.pop("_data") else: data = state["_pa_array"] state["_pa_array"] = pa.chunked_array(data) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index cff9e99c9317e..2378710555340 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2367,3 +2367,4 @@ def test_pickle_old_arrowextensionarray(): result = pickle.loads(pickle.dumps(expected)) tm.assert_extension_array_equal(result, expected) assert result._pa_array == pa.chunked_array(data) + assert not hasattr(result, "_data")