From 8559a24271e5ed474e423e3d518f7accf6212aa2 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 4 Aug 2020 15:08:40 -0500 Subject: [PATCH 1/2] DOC: Document that read_hdf can use pickle --- doc/source/user_guide/io.rst | 9 ++++--- pandas/io/pytables.py | 51 +++++++++++++++++++++++++++++++++++- 2 files changed, 55 insertions(+), 5 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index d4be9d802d697..cc42f952b1733 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -3441,10 +3441,11 @@ for some advanced strategies .. warning:: - pandas requires ``PyTables`` >= 3.0.0. - There is a indexing bug in ``PyTables`` < 3.2 which may appear when querying stores using an index. - If you see a subset of results being returned, upgrade to ``PyTables`` >= 3.2. - Stores created previously will need to be rewritten using the updated version. + Pandas uses PyTables for reading and writing HDF5 files, which allows + serializing object-dtype data with pickle. Loading pickled data received from + untrusted sources can be unsafe. + + See: https://docs.python.org/3/library/pickle.html for more. .. ipython:: python :suppress: diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index e0df4c29e543e..b8de8a8fbfa48 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -289,7 +289,15 @@ def read_hdf( Read from the store, close it if we opened it. Retrieve pandas object stored in file, optionally based on where - criteria + criteria. + + .. warning:: + + Pandas uses PyTables for reading and writing HDF5 files, which allows + serializing object-dtype data with pickle. Loading pickled data received + from untrusted sources can be unsafe. + + See: https://docs.python.org/3/library/pickle.html for more. Parameters ---------- @@ -445,6 +453,14 @@ class HDFStore: Either Fixed or Table format. + .. warning:: + + Pandas uses PyTables for reading and writing HDF5 files, which allows + serializing object-dtype data with pickle. Loading pickled data received + from untrusted sources can be unsafe. + + See: https://docs.python.org/3/library/pickle.html for more. + Parameters ---------- path : str @@ -789,6 +805,14 @@ def select( """ Retrieve pandas object stored in file, optionally based on where criteria. + .. warning:: + + Pandas uses PyTables for reading and writing HDF5 files, which allows + serializing object-dtype data with pickle. Loading pickled data received + from untrusted sources can be unsafe. + + See: https://docs.python.org/3/library/pickle.html for more. + Parameters ---------- key : str @@ -852,6 +876,15 @@ def select_as_coordinates( """ return the selection as an Index + .. warning:: + + Pandas uses PyTables for reading and writing HDF5 files, which allows + serializing object-dtype data with pickle. Loading pickled data received + from untrusted sources can be unsafe. + + See: https://docs.python.org/3/library/pickle.html for more. + + Parameters ---------- key : str @@ -876,6 +909,14 @@ def select_column( return a single column from the table. This is generally only useful to select an indexable + .. warning:: + + Pandas uses PyTables for reading and writing HDF5 files, which allows + serializing object-dtype data with pickle. Loading pickled data received + from untrusted sources can be unsafe. + + See: https://docs.python.org/3/library/pickle.html for more. + Parameters ---------- key : str @@ -912,6 +953,14 @@ def select_as_multiple( """ Retrieve pandas objects from multiple tables. + .. warning:: + + Pandas uses PyTables for reading and writing HDF5 files, which allows + serializing object-dtype data with pickle. Loading pickled data received + from untrusted sources can be unsafe. + + See: https://docs.python.org/3/library/pickle.html for more. + Parameters ---------- keys : a list of the tables From 52ee3f096e58a1d67440bc9a7af23113b04912e4 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 5 Aug 2020 08:32:43 -0500 Subject: [PATCH 2/2] fixed note --- pandas/io/pytables.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index b8de8a8fbfa48..6497067e3930c 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -294,8 +294,8 @@ def read_hdf( .. warning:: Pandas uses PyTables for reading and writing HDF5 files, which allows - serializing object-dtype data with pickle. Loading pickled data received - from untrusted sources can be unsafe. + serializing object-dtype data with pickle when using the "fixed" format. + Loading pickled data received from untrusted sources can be unsafe. See: https://docs.python.org/3/library/pickle.html for more. @@ -456,8 +456,8 @@ class HDFStore: .. warning:: Pandas uses PyTables for reading and writing HDF5 files, which allows - serializing object-dtype data with pickle. Loading pickled data received - from untrusted sources can be unsafe. + serializing object-dtype data with pickle when using the "fixed" format. + Loading pickled data received from untrusted sources can be unsafe. See: https://docs.python.org/3/library/pickle.html for more. @@ -808,8 +808,8 @@ def select( .. warning:: Pandas uses PyTables for reading and writing HDF5 files, which allows - serializing object-dtype data with pickle. Loading pickled data received - from untrusted sources can be unsafe. + serializing object-dtype data with pickle when using the "fixed" format. + Loading pickled data received from untrusted sources can be unsafe. See: https://docs.python.org/3/library/pickle.html for more. @@ -879,8 +879,8 @@ def select_as_coordinates( .. warning:: Pandas uses PyTables for reading and writing HDF5 files, which allows - serializing object-dtype data with pickle. Loading pickled data received - from untrusted sources can be unsafe. + serializing object-dtype data with pickle when using the "fixed" format. + Loading pickled data received from untrusted sources can be unsafe. See: https://docs.python.org/3/library/pickle.html for more. @@ -912,8 +912,8 @@ def select_column( .. warning:: Pandas uses PyTables for reading and writing HDF5 files, which allows - serializing object-dtype data with pickle. Loading pickled data received - from untrusted sources can be unsafe. + serializing object-dtype data with pickle when using the "fixed" format. + Loading pickled data received from untrusted sources can be unsafe. See: https://docs.python.org/3/library/pickle.html for more. @@ -956,8 +956,8 @@ def select_as_multiple( .. warning:: Pandas uses PyTables for reading and writing HDF5 files, which allows - serializing object-dtype data with pickle. Loading pickled data received - from untrusted sources can be unsafe. + serializing object-dtype data with pickle when using the "fixed" format. + Loading pickled data received from untrusted sources can be unsafe. See: https://docs.python.org/3/library/pickle.html for more.