diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 8b2b3a09f8c87..59f69d5e656c1 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -99,6 +99,8 @@ Removal of prior version deprecations/changes - :meth:`pandas.Series.str.cat` does not accept list-likes *within* list-likes anymore (:issue:`27611`) - Removed the previously deprecated :meth:`ExtensionArray._formatting_values`. Use :attr:`ExtensionArray._formatter` instead. (:issue:`23601`) - Removed the previously deprecated ``IntervalIndex.from_intervals`` in favor of the :class:`IntervalIndex` constructor (:issue:`19263`) +- Ability to read pickles containing :class:`Categorical` instances created with pre-0.16 version of pandas has been removed (:issue:`27538`) +- .. _whatsnew_1000.performance: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 5929a8d51fe43..c81bcd491ff5d 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1350,24 +1350,7 @@ def __setstate__(self, state): if not isinstance(state, dict): raise Exception("invalid pickle state") - # Provide compatibility with pre-0.15.0 Categoricals. - if "_categories" not in state and "_levels" in state: - state["_categories"] = self.dtype.validate_categories(state.pop("_levels")) - if "_codes" not in state and "labels" in state: - state["_codes"] = coerce_indexer_dtype( - state.pop("labels"), state["_categories"] - ) - - # 0.16.0 ordered change - if "_ordered" not in state: - - # >=15.0 < 0.16.0 - if "ordered" in state: - state["_ordered"] = state.pop("ordered") - else: - state["_ordered"] = False - - # 0.21.0 CategoricalDtype change + # compat with pre 0.21.0 CategoricalDtype change if "_dtype" not in state: state["_dtype"] = CategoricalDtype(state["_categories"], state["_ordered"]) diff --git a/pandas/tests/io/data/categorical.0.25.0.pickle b/pandas/tests/io/data/categorical.0.25.0.pickle new file mode 100644 index 0000000000000..b756060c83d94 Binary files /dev/null and b/pandas/tests/io/data/categorical.0.25.0.pickle differ diff --git a/pandas/tests/io/data/categorical_0_14_1.pickle b/pandas/tests/io/data/categorical_0_14_1.pickle deleted file mode 100644 index 94f882b2f3027..0000000000000 --- a/pandas/tests/io/data/categorical_0_14_1.pickle +++ /dev/null @@ -1,94 +0,0 @@ -ccopy_reg -_reconstructor -p0 -(cpandas.core.categorical -Categorical -p1 -c__builtin__ -object -p2 -Ntp3 -Rp4 -(dp5 -S'_levels' -p6 -cnumpy.core.multiarray -_reconstruct -p7 -(cpandas.core.index -Index -p8 -(I0 -tp9 -S'b' -p10 -tp11 -Rp12 -((I1 -(I4 -tp13 -cnumpy -dtype -p14 -(S'O8' -p15 -I0 -I1 -tp16 -Rp17 -(I3 -S'|' -p18 -NNNI-1 -I-1 -I63 -tp19 -bI00 -(lp20 -S'a' -p21 -ag10 -aS'c' -p22 -aS'd' -p23 -atp24 -(Ntp25 -tp26 -bsS'labels' -p27 -g7 -(cnumpy -ndarray -p28 -(I0 -tp29 -g10 -tp30 -Rp31 -(I1 -(I3 -tp32 -g14 -(S'i8' -p33 -I0 -I1 -tp34 -Rp35 -(I3 -S'<' -p36 -NNNI-1 -I-1 -I0 -tp37 -bI00 -S'\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00' -p38 -tp39 -bsS'name' -p40 -S'foobar' -p41 -sb. \ No newline at end of file diff --git a/pandas/tests/io/data/categorical_0_15_2.pickle b/pandas/tests/io/data/categorical_0_15_2.pickle deleted file mode 100644 index 25cd862976cab..0000000000000 Binary files a/pandas/tests/io/data/categorical_0_15_2.pickle and /dev/null differ diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 8e09e96fbd471..655fd9d01c1c0 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -222,7 +222,7 @@ def test_read_expands_user_home_dir( (pd.read_sas, "os", ("io", "sas", "data", "test1.sas7bdat")), (pd.read_json, "os", ("io", "json", "data", "tsframe_v012.json")), (pd.read_msgpack, "os", ("io", "msgpack", "data", "frame.mp")), - (pd.read_pickle, "os", ("io", "data", "categorical_0_14_1.pickle")), + (pd.read_pickle, "os", ("io", "data", "categorical.0.25.0.pickle")), ], ) def test_read_fspath_all(self, reader, module, path, datapath): diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 30555508f0998..9fbb4dbcb581e 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -194,38 +194,6 @@ def python_unpickler(path): compare_element(result, expected, typ) -def test_pickle_v0_14_1(datapath): - - cat = pd.Categorical( - values=["a", "b", "c"], ordered=False, categories=["a", "b", "c", "d"] - ) - pickle_path = datapath("io", "data", "categorical_0_14_1.pickle") - # This code was executed once on v0.14.1 to generate the pickle: - # - # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'], - # name='foobar') - # with open(pickle_path, 'wb') as f: pickle.dump(cat, f) - # - tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) - - -def test_pickle_v0_15_2(datapath): - # ordered -> _ordered - # GH 9347 - - cat = pd.Categorical( - values=["a", "b", "c"], ordered=False, categories=["a", "b", "c", "d"] - ) - pickle_path = datapath("io", "data", "categorical_0_15_2.pickle") - # This code was executed once on v0.15.2 to generate the pickle: - # - # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'], - # name='foobar') - # with open(pickle_path, 'wb') as f: pickle.dump(cat, f) - # - tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) - - def test_pickle_path_pathlib(): df = tm.makeDataFrame() result = tm.round_trip_pathlib(df.to_pickle, pd.read_pickle)