Skip to content

Commit 78a5112

Browse files
committed
Merge pull request #5387 from jreback/hdf_index_tests
ER/API: unicode indices not supported on table formats in py2 (GH5386)
2 parents 6eba2e4 + 2210bca commit 78a5112

File tree

4 files changed

+56
-8
lines changed

4 files changed

+56
-8
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,7 @@ API Changes
277277
- ``numexpr`` 2.2.2 fixes incompatiblity in PyTables 2.4 (:issue:`4908`)
278278
- ``flush`` now accepts an ``fsync`` parameter, which defaults to ``False``
279279
(:issue:`5364`)
280+
- ``unicode`` indices not supported on ``table`` formats (:issue:`5386`)
280281
- ``JSON``
281282

282283
- added ``date_unit`` parameter to specify resolution of timestamps.

pandas/io/pytables.py

+21-7
Original file line numberDiff line numberDiff line change
@@ -1919,6 +1919,10 @@ def set_version(self):
19191919
def pandas_type(self):
19201920
return _ensure_decoded(getattr(self.group._v_attrs, 'pandas_type', None))
19211921

1922+
@property
1923+
def format_type(self):
1924+
return 'fixed'
1925+
19221926
def __unicode__(self):
19231927
""" return a pretty representation of myself """
19241928
self.infer_axes()
@@ -2146,7 +2150,8 @@ def write_index(self, key, index):
21462150
self.write_sparse_intindex(key, index)
21472151
else:
21482152
setattr(self.attrs, '%s_variety' % key, 'regular')
2149-
converted = _convert_index(index, self.encoding).set_name('index')
2153+
converted = _convert_index(index, self.encoding,
2154+
self.format_type).set_name('index')
21502155
self.write_array(key, converted.values)
21512156
node = getattr(self.group, key)
21522157
node._v_attrs.kind = converted.kind
@@ -2192,7 +2197,8 @@ def write_multi_index(self, key, index):
21922197
index.names)):
21932198
# write the level
21942199
level_key = '%s_level%d' % (key, i)
2195-
conv_level = _convert_index(lev, self.encoding).set_name(level_key)
2200+
conv_level = _convert_index(lev, self.encoding,
2201+
self.format_type).set_name(level_key)
21962202
self.write_array(level_key, conv_level.values)
21972203
node = getattr(self.group, level_key)
21982204
node._v_attrs.kind = conv_level.kind
@@ -2609,6 +2615,10 @@ def __init__(self, *args, **kwargs):
26092615
def table_type_short(self):
26102616
return self.table_type.split('_')[0]
26112617

2618+
@property
2619+
def format_type(self):
2620+
return 'table'
2621+
26122622
def __unicode__(self):
26132623
""" return a pretty representatgion of myself """
26142624
self.infer_axes()
@@ -2991,7 +3001,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None,
29913001
if i in axes:
29923002
name = obj._AXIS_NAMES[i]
29933003
index_axes_map[i] = _convert_index(
2994-
a, self.encoding).set_name(name).set_axis(i)
3004+
a, self.encoding, self.format_type).set_name(name).set_axis(i)
29953005
else:
29963006

29973007
# we might be able to change the axes on the appending data if
@@ -3823,7 +3833,7 @@ def _get_info(info, name):
38233833
idx = info[name] = dict()
38243834
return idx
38253835

3826-
def _convert_index(index, encoding=None):
3836+
def _convert_index(index, encoding=None, format_type=None):
38273837
index_name = getattr(index, 'name', None)
38283838

38293839
if isinstance(index, DatetimeIndex):
@@ -3870,9 +3880,13 @@ def _convert_index(index, encoding=None):
38703880
converted, 'string', _tables().StringCol(itemsize), itemsize=itemsize,
38713881
index_name=index_name)
38723882
elif inferred_type == 'unicode':
3873-
atom = _tables().ObjectAtom()
3874-
return IndexCol(np.asarray(values, dtype='O'), 'object', atom,
3875-
index_name=index_name)
3883+
if format_type == 'fixed':
3884+
atom = _tables().ObjectAtom()
3885+
return IndexCol(np.asarray(values, dtype='O'), 'object', atom,
3886+
index_name=index_name)
3887+
raise TypeError(
3888+
"[unicode] is not supported as a in index type for [{0}] formats".format(format_type))
3889+
38763890
elif inferred_type == 'integer':
38773891
# take a guess for now, hope the values fit
38783892
atom = _tables().Int64Col()

pandas/io/tests/test_pytables.py

+32
Original file line numberDiff line numberDiff line change
@@ -755,6 +755,38 @@ def test_append_series(self):
755755
store.append('mi', s)
756756
tm.assert_series_equal(store['mi'], s)
757757

758+
def test_store_index_types(self):
759+
# GH5386
760+
# test storing various index types
761+
762+
with ensure_clean(self.path) as store:
763+
764+
def check(format,index):
765+
df = DataFrame(np.random.randn(10,2),columns=list('AB'))
766+
df.index = index(len(df))
767+
768+
_maybe_remove(store, 'df')
769+
store.put('df',df,format=format)
770+
assert_frame_equal(df,store['df'])
771+
772+
for index in [ tm.makeFloatIndex, tm.makeStringIndex, tm.makeIntIndex,
773+
tm.makeDateIndex, tm.makePeriodIndex ]:
774+
775+
check('table',index)
776+
check('fixed',index)
777+
778+
# unicode
779+
index = tm.makeUnicodeIndex
780+
if compat.PY3:
781+
check('table',index)
782+
check('fixed',index)
783+
else:
784+
785+
# only support for fixed types (and they have a perf warning)
786+
self.assertRaises(TypeError, check, 'table', index)
787+
with tm.assert_produces_warning(expected_warning=PerformanceWarning):
788+
check('fixed',index)
789+
758790
def test_encoding(self):
759791

760792
if sys.byteorder != 'little':

pandas/util/testing.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,8 @@ def ensure_clean(filename=None, return_filelike=False):
336336
yield filename
337337
finally:
338338
try:
339-
os.remove(filename)
339+
if os.path.exists(filename):
340+
os.remove(filename)
340341
except Exception as e:
341342
print(e)
342343

0 commit comments

Comments
 (0)