From ede98ef0346da9d580451d5848e4fc148d14ede5 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 5 Nov 2020 16:37:39 -0800 Subject: [PATCH] BUG: unpickling modifies Block.ndim --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/core/internals/managers.py | 9 ++++++--- pandas/tests/io/test_pickle.py | 12 ++++++++++++ 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 690e6b8f725ad..41dc84a0a99f2 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -380,7 +380,7 @@ Categorical ^^^^^^^^^^^ - :meth:`Categorical.fillna` will always return a copy, will validate a passed fill value regardless of whether there are any NAs to fill, and will disallow a ``NaT`` as a fill value for numeric categories (:issue:`36530`) - Bug in :meth:`Categorical.__setitem__` that incorrectly raised when trying to set a tuple value (:issue:`20439`) -- +- Bug in indexing on a :class:`Series` with ``CategoricalDtype`` after unpickling (:issue:`37631`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index a06d57e268fe2..4a0351659b052 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -278,14 +278,17 @@ def __getstate__(self): return axes_array, block_values, block_items, extra_state def __setstate__(self, state): - def unpickle_block(values, mgr_locs): - return make_block(values, placement=mgr_locs) + def unpickle_block(values, mgr_locs, ndim: int): + # TODO(EA2D): ndim would be unnecessary with 2D EAs + return make_block(values, placement=mgr_locs, ndim=ndim) if isinstance(state, tuple) and len(state) >= 4 and "0.14.1" in state[3]: state = state[3]["0.14.1"] self.axes = [ensure_index(ax) for ax in state["axes"]] + ndim = len(self.axes) self.blocks = tuple( - unpickle_block(b["values"], b["mgr_locs"]) for b in state["blocks"] + unpickle_block(b["values"], b["mgr_locs"], ndim=ndim) + for b in state["blocks"] ) else: raise NotImplementedError("pre-0.14.1 pickles are no longer supported") diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 925f6b5f125c7..34b36e2549b62 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -576,3 +576,15 @@ def test_pickle_datetimes(datetime_series): def test_pickle_strings(string_series): unp_series = tm.round_trip_pickle(string_series) tm.assert_series_equal(unp_series, string_series) + + +def test_pickle_preserves_block_ndim(): + # GH#37631 + ser = Series(list("abc")).astype("category").iloc[[0]] + res = tm.round_trip_pickle(ser) + + assert res._mgr.blocks[0].ndim == 1 + assert res._mgr.blocks[0].shape == (1,) + + # GH#37631 OP issue was about indexing, underlying problem was pickle + tm.assert_series_equal(res[[True]], ser)