Skip to content

Commit bee30f3

Browse files
linebppcluo
authored andcommitted
Unblock supported compression libs in pytables (pandas-dev#16196)
1 parent ee4810c commit bee30f3

File tree

4 files changed

+62
-18
lines changed

4 files changed

+62
-18
lines changed

doc/source/whatsnew/v0.20.2.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ Highlights include:
1919
Enhancements
2020
~~~~~~~~~~~~
2121

22-
22+
- Unblocked access to additional compression types supported in pytables: 'blosc:blosclz, 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', 'blosc:zlib', 'blosc:zstd' (:issue:`14478`)
2323

2424
.. _whatsnew_0202.performance:
2525

pandas/core/generic.py

+11-6
Original file line numberDiff line numberDiff line change
@@ -1266,12 +1266,17 @@ def to_hdf(self, path_or_buf, key, **kwargs):
12661266
<http://pandas.pydata.org/pandas-docs/stable/io.html#query-via-data-columns>`__.
12671267
12681268
Applicable only to format='table'.
1269-
complevel : int, 1-9, default 0
1270-
If a complib is specified compression will be applied
1271-
where possible
1272-
complib : {'zlib', 'bzip2', 'lzo', 'blosc', None}, default None
1273-
If complevel is > 0 apply compression to objects written
1274-
in the store wherever possible
1269+
complevel : int, 0-9, default 0
1270+
Specifies a compression level for data.
1271+
A value of 0 disables compression.
1272+
complib : {'zlib', 'lzo', 'bzip2', 'blosc', None}, default None
1273+
Specifies the compression library to be used.
1274+
As of v0.20.2 these additional compressors for Blosc are supported
1275+
(default if no compressor specified: 'blosc:blosclz'):
1276+
{'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy',
1277+
'blosc:zlib', 'blosc:zstd'}.
1278+
Specifying a compression library which is not available issues
1279+
a ValueError.
12751280
fletcher32 : bool, default False
12761281
If applying compression use the fletcher32 checksum
12771282
dropna : boolean, default False.

pandas/io/pytables.py

+15-9
Original file line numberDiff line numberDiff line change
@@ -402,12 +402,17 @@ class HDFStore(StringMixin):
402402
and if the file does not exist it is created.
403403
``'r+'``
404404
It is similar to ``'a'``, but the file must already exist.
405-
complevel : int, 1-9, default 0
406-
If a complib is specified compression will be applied
407-
where possible
408-
complib : {'zlib', 'bzip2', 'lzo', 'blosc', None}, default None
409-
If complevel is > 0 apply compression to objects written
410-
in the store wherever possible
405+
complevel : int, 0-9, default 0
406+
Specifies a compression level for data.
407+
A value of 0 disables compression.
408+
complib : {'zlib', 'lzo', 'bzip2', 'blosc', None}, default None
409+
Specifies the compression library to be used.
410+
As of v0.20.2 these additional compressors for Blosc are supported
411+
(default if no compressor specified: 'blosc:blosclz'):
412+
{'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy',
413+
'blosc:zlib', 'blosc:zstd'}.
414+
Specifying a compression library which is not available issues
415+
a ValueError.
411416
fletcher32 : bool, default False
412417
If applying compression use the fletcher32 checksum
413418
@@ -430,9 +435,10 @@ def __init__(self, path, mode=None, complevel=None, complib=None,
430435
raise ImportError('HDFStore requires PyTables, "{ex}" problem '
431436
'importing'.format(ex=str(ex)))
432437

433-
if complib not in (None, 'blosc', 'bzip2', 'lzo', 'zlib'):
434-
raise ValueError("complib only supports 'blosc', 'bzip2', lzo' "
435-
"or 'zlib' compression.")
438+
if complib is not None and complib not in tables.filters.all_complibs:
439+
raise ValueError(
440+
"complib only supports {libs} compression.".format(
441+
libs=tables.filters.all_complibs))
436442

437443
self._path = path
438444
if mode is None:

pandas/tests/io/test_pytables.py

+35-2
Original file line numberDiff line numberDiff line change
@@ -734,6 +734,39 @@ def test_put_compression_blosc(self):
734734
store.put('c', df, format='table', complib='blosc')
735735
tm.assert_frame_equal(store['c'], df)
736736

737+
def test_complibs(self):
738+
# GH14478
739+
df = tm.makeDataFrame()
740+
741+
# Building list of all complibs and complevels tuples
742+
all_complibs = tables.filters.all_complibs
743+
# Remove lzo if its not available on this platform
744+
if not tables.which_lib_version('lzo'):
745+
all_complibs.remove('lzo')
746+
all_levels = range(0, 10)
747+
all_tests = [(lib, lvl) for lib in all_complibs for lvl in all_levels]
748+
749+
for (lib, lvl) in all_tests:
750+
with ensure_clean_path(self.path) as tmpfile:
751+
gname = 'foo'
752+
753+
# Write and read file to see if data is consistent
754+
df.to_hdf(tmpfile, gname, complib=lib, complevel=lvl)
755+
result = pd.read_hdf(tmpfile, gname)
756+
tm.assert_frame_equal(result, df)
757+
758+
# Open file and check metadata
759+
# for correct amount of compression
760+
h5table = tables.open_file(tmpfile, mode='r')
761+
for node in h5table.walk_nodes(where='/' + gname,
762+
classname='Leaf'):
763+
assert node.filters.complevel == lvl
764+
if lvl == 0:
765+
assert node.filters.complib is None
766+
else:
767+
assert node.filters.complib == lib
768+
h5table.close()
769+
737770
def test_put_integer(self):
738771
# non-date, non-string index
739772
df = DataFrame(np.random.randn(50, 100))
@@ -4939,8 +4972,8 @@ def test_invalid_complib(self):
49394972
index=list('abcd'),
49404973
columns=list('ABCDE'))
49414974
with ensure_clean_path(self.path) as path:
4942-
pytest.raises(ValueError, df.to_hdf, path,
4943-
'df', complib='blosc:zlib')
4975+
with pytest.raises(ValueError):
4976+
df.to_hdf(path, 'df', complib='foolib')
49444977
# GH10443
49454978

49464979
def test_read_nokey(self):

0 commit comments

Comments
 (0)