Skip to content

Commit 989f912

Browse files
authored
DEPR: deprecate msgpack support (pandas-dev#27103)
* DEPR: deprecate msgpack support closes pandas-dev#27084 * warnings in docs * review comments
1 parent d050791 commit 989f912

File tree

7 files changed

+80
-25
lines changed

7 files changed

+80
-25
lines changed

doc/source/user_guide/io.rst

+10-4
Original file line numberDiff line numberDiff line change
@@ -3393,15 +3393,15 @@ both on the writing (serialization), and reading (deserialization).
33933393

33943394
.. warning::
33953395

3396-
This is a very new feature of pandas. We intend to provide certain
3397-
optimizations in the io of the ``msgpack`` data. Since this is marked
3398-
as an EXPERIMENTAL LIBRARY, the storage format may not be stable until a future release.
3396+
The msgpack format is deprecated as of 0.25 and will be removed in a future version.
3397+
It is recommended to use pyarrow for on-the-wire transmission of pandas objects.
33993398

34003399
.. warning::
34013400

34023401
:func:`read_msgpack` is only guaranteed backwards compatible back to pandas version 0.20.3
34033402

34043403
.. ipython:: python
3404+
:okwarning:
34053405
34063406
df = pd.DataFrame(np.random.rand(5, 2), columns=list('AB'))
34073407
df.to_msgpack('foo.msg')
@@ -3411,20 +3411,23 @@ both on the writing (serialization), and reading (deserialization).
34113411
You can pass a list of objects and you will receive them back on deserialization.
34123412

34133413
.. ipython:: python
3414+
:okwarning:
34143415
34153416
pd.to_msgpack('foo.msg', df, 'foo', np.array([1, 2, 3]), s)
34163417
pd.read_msgpack('foo.msg')
34173418
34183419
You can pass ``iterator=True`` to iterate over the unpacked results:
34193420

34203421
.. ipython:: python
3422+
:okwarning:
34213423
34223424
for o in pd.read_msgpack('foo.msg', iterator=True):
34233425
print(o)
34243426
34253427
You can pass ``append=True`` to the writer to append to an existing pack:
34263428

34273429
.. ipython:: python
3430+
:okwarning:
34283431
34293432
df.to_msgpack('foo.msg', append=True)
34303433
pd.read_msgpack('foo.msg')
@@ -3435,6 +3438,7 @@ can pack arbitrary collections of Python lists, dicts, scalars, while intermixin
34353438
pandas objects.
34363439

34373440
.. ipython:: python
3441+
:okwarning:
34383442
34393443
pd.to_msgpack('foo2.msg', {'dict': [{'df': df}, {'string': 'foo'},
34403444
{'scalar': 1.}, {'s': s}]})
@@ -3453,14 +3457,16 @@ Read/write API
34533457
Msgpacks can also be read from and written to strings.
34543458

34553459
.. ipython:: python
3460+
:okwarning:
34563461
34573462
df.to_msgpack()
34583463
34593464
Furthermore you can concatenate the strings to produce a list of the original objects.
34603465

34613466
.. ipython:: python
3467+
:okwarning:
34623468
3463-
pd.read_msgpack(df.to_msgpack() + s.to_msgpack())
3469+
pd.read_msgpack(df.to_msgpack() + s.to_msgpack())
34643470
34653471
.. _io.hdf5:
34663472

doc/source/whatsnew/v0.13.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -829,6 +829,7 @@ Experimental
829829
Since this is an EXPERIMENTAL LIBRARY, the storage format may not be stable until a future release.
830830

831831
.. ipython:: python
832+
:okwarning:
832833
833834
df = pd.DataFrame(np.random.rand(5, 2), columns=list('AB'))
834835
df.to_msgpack('foo.msg')
@@ -841,6 +842,7 @@ Experimental
841842
You can pass ``iterator=True`` to iterator over the unpacked results
842843

843844
.. ipython:: python
845+
:okwarning:
844846
845847
for o in pd.read_msgpack('foo.msg', iterator=True):
846848
print(o)

doc/source/whatsnew/v0.25.0.rst

+6
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,12 @@ by a ``Series`` or ``DataFrame`` with sparse values.
590590
591591
The memory usage of the two approaches is identical. See :ref:`sparse.migration` for more (:issue:`19239`).
592592

593+
msgpack format
594+
^^^^^^^^^^^^^^
595+
596+
The msgpack format is deprecated as of 0.25 and will be removed in a future version. It is recommended to use pyarrow for on-the-wire transmission of pandas objects. (:issue:`27084`)
597+
598+
593599
Other deprecations
594600
^^^^^^^^^^^^^^^^^^
595601

pandas/core/generic.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -2418,8 +2418,11 @@ def to_msgpack(self, path_or_buf=None, encoding='utf-8', **kwargs):
24182418
"""
24192419
Serialize object to input file path using msgpack format.
24202420
2421-
THIS IS AN EXPERIMENTAL LIBRARY and the storage format
2422-
may not be stable until a future release.
2421+
.. deprecated:: 0.25.0
2422+
2423+
to_msgpack is deprecated and will be removed in a future version.
2424+
It is recommended to use pyarrow for on-the-wire transmission of
2425+
pandas objects.
24232426
24242427
Parameters
24252428
----------

pandas/io/packers.py

+22-4
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,11 @@ def to_msgpack(path_or_buf, *args, **kwargs):
7878
"""
7979
msgpack (serialize) object to input file path
8080
81-
THIS IS AN EXPERIMENTAL LIBRARY and the storage format
82-
may not be stable until a future release.
81+
.. deprecated:: 0.25.0
82+
83+
to_msgpack is deprecated and will be removed in a future version.
84+
It is recommended to use pyarrow for on-the-wire transmission of
85+
pandas objects.
8386
8487
Parameters
8588
----------
@@ -92,6 +95,12 @@ def to_msgpack(path_or_buf, *args, **kwargs):
9295
compress : type of compressor (zlib or blosc), default to None (no
9396
compression)
9497
"""
98+
warnings.warn("to_msgpack is deprecated and will be removed in a "
99+
"future version.\n"
100+
"It is recommended to use pyarrow for on-the-wire "
101+
"transmission of pandas objects.",
102+
FutureWarning, stacklevel=3)
103+
95104
global compressor
96105
compressor = kwargs.pop('compress', None)
97106
append = kwargs.pop('append', None)
@@ -121,8 +130,11 @@ def read_msgpack(path_or_buf, encoding='utf-8', iterator=False, **kwargs):
121130
Load msgpack pandas object from the specified
122131
file path
123132
124-
THIS IS AN EXPERIMENTAL LIBRARY and the storage format
125-
may not be stable until a future release.
133+
.. deprecated:: 0.25.0
134+
135+
read_msgpack is deprecated and will be removed in a future version.
136+
It is recommended to use pyarrow for on-the-wire transmission of
137+
pandas objects.
126138
127139
Parameters
128140
----------
@@ -140,6 +152,12 @@ def read_msgpack(path_or_buf, encoding='utf-8', iterator=False, **kwargs):
140152
read_msgpack is only guaranteed to be backwards compatible to pandas
141153
0.20.3.
142154
"""
155+
warnings.warn("The read_msgpack is deprecated and will be removed in a "
156+
"future version.\n"
157+
"It is recommended to use pyarrow for on-the-wire "
158+
"transmission of pandas objects.",
159+
FutureWarning, stacklevel=3)
160+
143161
path_or_buf, _, _, should_close = get_filepath_or_buffer(path_or_buf)
144162
if iterator:
145163
return Iterator(path_or_buf)

pandas/tests/io/test_common.py

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def __fspath__(self):
4545

4646
# https://github.com/cython/cython/issues/1720
4747
@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning")
48+
@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning")
4849
class TestCommonIOCapabilities:
4950
data1 = """index,A,B,C,D
5051
foo,2,3,4,5

pandas/tests/io/test_packers.py

+34-15
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import glob
33
from io import BytesIO
44
import os
5-
from warnings import catch_warnings
5+
from warnings import catch_warnings, filterwarnings
66

77
import numpy as np
88
import pytest
@@ -83,6 +83,7 @@ def check_arbitrary(a, b):
8383
assert(a == b)
8484

8585

86+
@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning")
8687
class TestPackers:
8788

8889
def setup_method(self, method):
@@ -97,6 +98,7 @@ def encode_decode(self, x, compress=None, **kwargs):
9798
return read_msgpack(p, **kwargs)
9899

99100

101+
@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning")
100102
class TestAPI(TestPackers):
101103

102104
def test_string_io(self):
@@ -159,6 +161,7 @@ def __init__(self):
159161
read_msgpack(path_or_buf=A())
160162

161163

164+
@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning")
162165
class TestNumpy(TestPackers):
163166

164167
def test_numpy_scalar_float(self):
@@ -277,6 +280,7 @@ def test_list_mixed(self):
277280
tm.assert_almost_equal(tuple(x), x_rec)
278281

279282

283+
@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning")
280284
class TestBasic(TestPackers):
281285

282286
def test_timestamp(self):
@@ -322,6 +326,7 @@ def test_intervals(self):
322326
assert i == i_rec
323327

324328

329+
@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning")
325330
class TestIndex(TestPackers):
326331

327332
def setup_method(self, method):
@@ -387,6 +392,7 @@ def categorical_index(self):
387392
tm.assert_frame_equal(result, df)
388393

389394

395+
@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning")
390396
class TestSeries(TestPackers):
391397

392398
def setup_method(self, method):
@@ -437,6 +443,7 @@ def test_basic(self):
437443
assert_series_equal(i, i_rec)
438444

439445

446+
@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning")
440447
class TestCategorical(TestPackers):
441448

442449
def setup_method(self, method):
@@ -460,6 +467,7 @@ def test_basic(self):
460467
assert_categorical_equal(i, i_rec)
461468

462469

470+
@pytest.mark.filterwarnings("ignore:msgpack:FutureWarning")
463471
class TestNDFrame(TestPackers):
464472

465473
def setup_method(self, method):
@@ -549,6 +557,7 @@ def test_dataframe_duplicate_column_names(self):
549557
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
550558
@pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
551559
@pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
560+
@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning")
552561
class TestSparse(TestPackers):
553562

554563
def _check_roundtrip(self, obj, comparator, **kwargs):
@@ -595,6 +604,7 @@ def test_sparse_frame(self):
595604
check_frame_type=True)
596605

597606

607+
@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning")
598608
class TestCompression(TestPackers):
599609
"""See https://github.com/pandas-dev/pandas/pull/9783
600610
"""
@@ -676,18 +686,21 @@ def decompress(ob):
676686
with monkeypatch.context() as m, \
677687
tm.assert_produces_warning(PerformanceWarning) as ws:
678688
m.setattr(compress_module, 'decompress', decompress)
679-
i_rec = self.encode_decode(self.frame, compress=compress)
680-
for k in self.frame.keys():
681-
682-
value = i_rec[k]
683-
expected = self.frame[k]
684-
assert_frame_equal(value, expected)
685-
# make sure that we can write to the new frames even though
686-
# we needed to copy the data
687-
for block in value._data.blocks:
688-
assert block.values.flags.writeable
689-
# mutate the data in some way
690-
block.values[0] += rhs[block.dtype]
689+
690+
with catch_warnings():
691+
filterwarnings('ignore', category=FutureWarning)
692+
i_rec = self.encode_decode(self.frame, compress=compress)
693+
for k in self.frame.keys():
694+
695+
value = i_rec[k]
696+
expected = self.frame[k]
697+
assert_frame_equal(value, expected)
698+
# make sure that we can write to the new frames even though
699+
# we needed to copy the data
700+
for block in value._data.blocks:
701+
assert block.values.flags.writeable
702+
# mutate the data in some way
703+
block.values[0] += rhs[block.dtype]
691704

692705
for w in ws:
693706
# check the messages from our warnings
@@ -715,14 +728,18 @@ def test_compression_warns_when_decompress_caches_blosc(self, monkeypatch):
715728
def _test_small_strings_no_warn(self, compress):
716729
empty = np.array([], dtype='uint8')
717730
with tm.assert_produces_warning(None):
718-
empty_unpacked = self.encode_decode(empty, compress=compress)
731+
with catch_warnings():
732+
filterwarnings('ignore', category=FutureWarning)
733+
empty_unpacked = self.encode_decode(empty, compress=compress)
719734

720735
tm.assert_numpy_array_equal(empty_unpacked, empty)
721736
assert empty_unpacked.flags.writeable
722737

723738
char = np.array([ord(b'a')], dtype='uint8')
724739
with tm.assert_produces_warning(None):
725-
char_unpacked = self.encode_decode(char, compress=compress)
740+
with catch_warnings():
741+
filterwarnings('ignore', category=FutureWarning)
742+
char_unpacked = self.encode_decode(char, compress=compress)
726743

727744
tm.assert_numpy_array_equal(char_unpacked, char)
728745
assert char_unpacked.flags.writeable
@@ -794,6 +811,7 @@ def test_readonly_axis_zlib_to_sql(self):
794811
assert_frame_equal(expected, result)
795812

796813

814+
@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning")
797815
class TestEncoding(TestPackers):
798816

799817
def setup_method(self, method):
@@ -839,6 +857,7 @@ def legacy_packer(request, datapath):
839857

840858

841859
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
860+
@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning")
842861
class TestMsgpack:
843862
"""
844863
How to add msgpack tests:

0 commit comments

Comments
 (0)