diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index d6ad5eb2003ce..a9d697adc301f 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -528,6 +528,7 @@ I/O - Bug in :func:`read_json` not handling non-numpy dtypes correctly (especially ``category``) (:issue:`21892`, :issue:`33205`) - Bug in :func:`json_normalize` where multi-character ``sep`` parameter is incorrectly prefixed to every key (:issue:`43831`) - Bug in :func:`read_csv` with :code:`float_precision="round_trip"` which did not skip initial/trailing whitespace (:issue:`43713`) +- Bug in dumping/loading a :class:`DataFrame` with ``yaml.dump(frame)`` (:issue:`42748`) - Period diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 78853ce6e41dc..957432df20395 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -565,6 +565,14 @@ cpdef update_blklocs_and_blknos( return new_blklocs, new_blknos +def _unpickle_block(values, placement, ndim): + # We have to do some gymnastics b/c "ndim" is keyword-only + + from pandas.core.internals.blocks import new_block + + return new_block(values, placement, ndim=ndim) + + @cython.freelist(64) cdef class SharedBlock: """ @@ -588,14 +596,8 @@ cdef class SharedBlock: self.ndim = ndim cpdef __reduce__(self): - # We have to do some gymnastics b/c "ndim" is keyword-only - from functools import partial - - from pandas.core.internals.blocks import new_block - - args = (self.values, self.mgr_locs.indexer) - func = partial(new_block, ndim=self.ndim) - return func, args + args = (self.values, self.mgr_locs.indexer, self.ndim) + return _unpickle_block, args cpdef __setstate__(self, state): from pandas.core.construction import extract_array diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 583a22d09b110..9344aea8221d5 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -170,7 +170,12 @@ def set_axis(self, axis: int, new_labels: Index) -> None: def get_dtypes(self): return np.array([arr.dtype for arr in self.arrays], dtype="object") - # TODO setstate getstate + def __getstate__(self): + return self.arrays, self._axes + + def __setstate__(self, state): + self.arrays = state[0] + self._axes = state[1] def __repr__(self) -> str: output = type(self).__name__ diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index a8b05e3178197..f927a0ec0927b 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -167,6 +167,19 @@ def test_pyarrow(df): tm.assert_frame_equal(result, df) +def test_yaml_dump(df): + # GH#42748 + yaml = import_module("yaml") + + dumped = yaml.dump(df) + + loaded = yaml.load(dumped, Loader=yaml.Loader) + tm.assert_frame_equal(df, loaded) + + loaded2 = yaml.load(dumped, Loader=yaml.UnsafeLoader) + tm.assert_frame_equal(df, loaded2) + + def test_missing_required_dependency(): # GH 23868 # To ensure proper isolation, we pass these flags