Skip to content

Commit 33cfaea

Browse files
committed
BUG: invalid column names in a HDF5 table format
Have DataFrame.to_hdf() raise an error when using pytables with non-string column types. Fixes #9057
1 parent 82387a6 commit 33cfaea

File tree

3 files changed

+44
-1
lines changed

3 files changed

+44
-1
lines changed

doc/source/whatsnew/v0.16.2.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,6 @@ Bug Fixes
9292
- Bug in ``SparseSeries`` constructor ignores input data name (:issue:`10258`)
9393

9494
- Bug where infer_freq infers timerule (WOM-5XXX) unsupported by to_offset (:issue:`9425`)
95-
95+
- Bug in ``DataFrame.to_hdf()`` where table format would raise a seemingly unrelated error for invalid (non-string) column names. This is now explicitly forbidden. (:issue:`9057`)
9696
- Bug to handle masking empty ``DataFrame``(:issue:`10126`)
9797
- Bug where MySQL interface could not handle numeric table/column names (:issue:`10255`)

pandas/io/pytables.py

+14
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@ def _tables():
257257
def to_hdf(path_or_buf, key, value, mode=None, complevel=None, complib=None,
258258
append=None, **kwargs):
259259
""" store this object, close it if we opened it """
260+
260261
if append:
261262
f = lambda store: store.append(key, value, **kwargs)
262263
else:
@@ -1535,6 +1536,12 @@ def maybe_set_size(self, min_itemsize=None, **kwargs):
15351536
self.typ = _tables(
15361537
).StringCol(itemsize=min_itemsize, pos=self.pos)
15371538

1539+
def validate(self, handler, append, **kwargs):
1540+
self.validate_names()
1541+
1542+
def validate_names(self):
1543+
pass
1544+
15381545
def validate_and_set(self, handler, append, **kwargs):
15391546
self.set_table(handler.table)
15401547
self.validate_col()
@@ -2080,6 +2087,10 @@ class DataIndexableCol(DataCol):
20802087
""" represent a data column that can be indexed """
20812088
is_data_indexable = True
20822089

2090+
def validate_names(self):
2091+
if not Index(self.values).is_object():
2092+
raise ValueError("cannot have non-object label DataIndexableCol")
2093+
20832094
def get_atom_string(self, block, itemsize):
20842095
return _tables().StringCol(itemsize=itemsize)
20852096

@@ -3756,6 +3767,9 @@ def write(self, obj, axes=None, append=False, complib=None,
37563767
min_itemsize=min_itemsize,
37573768
**kwargs)
37583769

3770+
for a in self.axes:
3771+
a.validate(self, append)
3772+
37593773
if not self.is_exists:
37603774

37613775
# create the table

pandas/io/tests/test_pytables.py

+29
Original file line numberDiff line numberDiff line change
@@ -4640,6 +4640,35 @@ def test_colums_multiindex_modified(self):
46404640
df_loaded = read_hdf(path, 'df', columns=cols2load)
46414641
self.assertTrue(cols2load_original == cols2load)
46424642

4643+
def test_to_hdf_with_object_column_names(self):
4644+
# GH9057
4645+
# Writing HDF5 table format should only work for string-like
4646+
# column types
4647+
4648+
types_should_fail = [ tm.makeIntIndex, tm.makeFloatIndex,
4649+
tm.makeDateIndex, tm.makeTimedeltaIndex,
4650+
tm.makePeriodIndex ]
4651+
types_should_run = [ tm.makeStringIndex, tm.makeCategoricalIndex ]
4652+
4653+
if compat.PY3:
4654+
types_should_run.append(tm.makeUnicodeIndex)
4655+
else:
4656+
types_should_fail.append(tm.makeUnicodeIndex)
4657+
4658+
for index in types_should_fail:
4659+
df = DataFrame(np.random.randn(10, 2), columns=index(2))
4660+
with ensure_clean_path(self.path) as path:
4661+
with self.assertRaises(ValueError,
4662+
msg="cannot have non-object label DataIndexableCol"):
4663+
df.to_hdf(path, 'df', format='table', data_columns=True)
4664+
4665+
for index in types_should_run:
4666+
df = DataFrame(np.random.randn(10, 2), columns=index(2))
4667+
with ensure_clean_path(self.path) as path:
4668+
df.to_hdf(path, 'df', format='table', data_columns=True)
4669+
result = pd.read_hdf(path, 'df', where="index = [{0}]".format(df.index[0]))
4670+
assert(len(result))
4671+
46434672

46444673
def _test_sort(obj):
46454674
if isinstance(obj, DataFrame):

0 commit comments

Comments
 (0)