Skip to content

Commit 4c125e3

Browse files
committed
Used jreback's solution to pandas-dev#10098 and rewrote the test to be more comprehensive
1 parent bde903f commit 4c125e3

File tree

2 files changed

+38
-32
lines changed

2 files changed

+38
-32
lines changed

pandas/io/pytables.py

+13-23
Original file line numberDiff line numberDiff line change
@@ -258,29 +258,6 @@ def to_hdf(path_or_buf, key, value, mode=None, complevel=None, complib=None,
258258
append=None, **kwargs):
259259
""" store this object, close it if we opened it """
260260

261-
# PyTables has some limitations that we need to check for
262-
if kwargs.get('format', None) == 'table':
263-
if value.columns.dtype.kind == 'S':
264-
pass
265-
elif value.columns.dtype.kind == 'O':
266-
# PyTables doesn't support Unicode columns, but types 'O' can still
267-
# contain unicode values so check for that
268-
for col in value.columns:
269-
try:
270-
col.encode('ascii').decode('ascii')
271-
except UnicodeEncodeError:
272-
raise ValueError(
273-
"PyTables does not support UTF-8 column headers\n"
274-
"but header {} is at least not encodable to ASCII"
275-
.format(col)
276-
)
277-
else:
278-
raise ValueError(
279-
"Storing data in HDF with PyTables requires that the column\n"
280-
"types be of some string type or an object.\n"
281-
" Instead, your columns: {}\n"
282-
" are of type {}".format(value.columns, value.columns.dtype))
283-
284261
if append:
285262
f = lambda store: store.append(key, value, **kwargs)
286263
else:
@@ -1559,6 +1536,12 @@ def maybe_set_size(self, min_itemsize=None, **kwargs):
15591536
self.typ = _tables(
15601537
).StringCol(itemsize=min_itemsize, pos=self.pos)
15611538

1539+
def validate(self, handler, append, **kwargs):
1540+
self.validate_names()
1541+
1542+
def validate_names(self):
1543+
pass
1544+
15621545
def validate_and_set(self, handler, append, **kwargs):
15631546
self.set_table(handler.table)
15641547
self.validate_col()
@@ -2104,6 +2087,10 @@ class DataIndexableCol(DataCol):
21042087
""" represent a data column that can be indexed """
21052088
is_data_indexable = True
21062089

2090+
def validate_names(self):
2091+
if not Index(self.values).is_object():
2092+
raise ValueError("cannot have non-object label DataIndexableCol")
2093+
21072094
def get_atom_string(self, block, itemsize):
21082095
return _tables().StringCol(itemsize=itemsize)
21092096

@@ -3776,6 +3763,9 @@ def write(self, obj, axes=None, append=False, complib=None,
37763763
min_itemsize=min_itemsize,
37773764
**kwargs)
37783765

3766+
for a in self.axes:
3767+
a.validate(self, append)
3768+
37793769
if not self.is_exists:
37803770

37813771
# create the table

pandas/io/tests/test_pytables.py

+25-9
Original file line numberDiff line numberDiff line change
@@ -4617,19 +4617,35 @@ def test_preserve_timedeltaindex_type(self):
46174617
store['df'] = df
46184618
assert_frame_equal(store['df'], df)
46194619

4620-
def test_to_hdf_with_integer_column_names(self):
4620+
def test_to_hdf_with_object_column_names(self):
46214621
# GH9057
4622-
df = DataFrame(columns=[5, 10], data=[[1, 0]])
4622+
# Writing HDF5 table format should only work for string-like
4623+
# column types
4624+
def column_index_data_frame(index):
4625+
return DataFrame(np.random.randn(10,2), columns=index(2))
4626+
4627+
types_should_fail = [ tm.makeIntIndex, tm.makeFloatIndex,
4628+
tm.makeDateIndex, tm.makeTimedeltaIndex,
4629+
tm.makePeriodIndex ]
4630+
types_should_run = [ tm.makeStringIndex, tm.makeCategoricalIndex ]
4631+
4632+
if compat.PY3:
4633+
types_should_run.append(tm.makeUnicodeIndex)
4634+
else:
4635+
types_should_fail.append(tm.makeUnicodeIndex)
46234636

4624-
with ensure_clean_path(self.path) as path:
4625-
self.assertRaises(ValueError, df.to_hdf, path, 'df', format='table', data_columns=True)
4637+
for index in types_should_fail:
4638+
df = column_index_data_frame(index)
4639+
with ensure_clean_path(self.path) as path, \
4640+
self.assertRaises(ValueError,
4641+
msg="cannot have non-object label DataIndexableCol"):
4642+
df.to_hdf(path, 'df', format='table', data_columns=True)
46264643

4627-
def test_to_hdf_with_unicode_column_names(self):
4628-
# GH9057
4629-
df = DataFrame(columns=[u('\u1234'), u('\u2345')], data=[[1, 0]])
4644+
for index in types_should_run:
4645+
df = DataFrame(np.random.randn(10, 2), columns=index(2))
4646+
with ensure_clean_path(self.path) as path:
4647+
df.to_hdf(path, 'df', format='table', data_columns=True)
46304648

4631-
with ensure_clean_path(self.path) as path:
4632-
self.assertRaises(ValueError, df.to_hdf, path, 'df', format='table', data_columns=True)
46334649

46344650
def _test_sort(obj):
46354651
if isinstance(obj, DataFrame):

0 commit comments

Comments
 (0)