Skip to content

Commit 57c2c93

Browse files
committed
TST/ENH: more helpful exceptions on trying to store invalid data
TST: added better exception for incopatible types when appending tables
1 parent 4a04fae commit 57c2c93

File tree

2 files changed

+81
-10
lines changed

2 files changed

+81
-10
lines changed

pandas/io/pytables.py

+43-10
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel
1919
from pandas.sparse.array import BlockIndex, IntIndex
2020
from pandas.tseries.api import PeriodIndex, DatetimeIndex
21-
from pandas.core.common import adjoin
21+
from pandas.core.common import adjoin, isnull
2222
from pandas.core.algorithms import match, unique, factorize
2323
from pandas.core.categorical import Categorical
2424
from pandas.core.common import _asarray_tuplesafe, _try_sort
@@ -727,8 +727,8 @@ def _create_storer(self, group, value = None, table = False, append = False, **k
727727
""" return a suitable Storer class to operate """
728728

729729
def error(t):
730-
raise Exception("cannot properly create the storer for: [%s] [group->%s,value->%s,table->%s,append->%s,kwargs->%s]" %
731-
(t,group,type(value),table,append,kwargs))
730+
raise NotImplementedError("cannot properly create the storer for: [%s] [group->%s,value->%s,table->%s,append->%s,kwargs->%s]" %
731+
(t,group,type(value),table,append,kwargs))
732732

733733
pt = getattr(group._v_attrs,'pandas_type',None)
734734
tt = getattr(group._v_attrs,'table_type',None)
@@ -768,7 +768,12 @@ def error(t):
768768
if value is not None:
769769

770770
if pt == 'frame_table':
771-
tt = 'appendable_frame' if value.index.nlevels == 1 else 'appendable_multiframe'
771+
index = getattr(value,'index',None)
772+
if index is not None:
773+
if index.nlevels == 1:
774+
tt = 'appendable_frame'
775+
elif index.nlevels > 1:
776+
tt = 'appendable_multiframe'
772777
elif pt == 'wide_table':
773778
tt = 'appendable_panel'
774779
elif pt == 'ndim_table':
@@ -1187,7 +1192,23 @@ def get_atom_string(self, block, itemsize):
11871192

11881193
def set_atom_string(self, block, existing_col, min_itemsize, nan_rep):
11891194
# fill nan items with myself
1190-
data = block.fillna(nan_rep).values
1195+
block = block.fillna(nan_rep)
1196+
data = block.values
1197+
1198+
# see if we have a valid string type
1199+
inferred_type = lib.infer_dtype(data.ravel())
1200+
if inferred_type != 'string':
1201+
1202+
# we cannot serialize this data, so report an exception on a column by column basis
1203+
for item in block.items:
1204+
1205+
col = block.get(item)
1206+
inferred_type = lib.infer_dtype(col.ravel())
1207+
if inferred_type != 'string':
1208+
raise NotImplementedError("cannot serialize the column [%s] because "
1209+
"its data contents are [%s] object dtype" %
1210+
(item,inferred_type))
1211+
11911212

11921213
# itemsize is the maximum length of a string (along any dimension)
11931214
itemsize = lib.max_len_string_array(data.ravel())
@@ -2234,7 +2255,11 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
22342255

22352256
# set the default axes if needed
22362257
if axes is None:
2237-
axes = _AXES_MAP[type(obj)]
2258+
try:
2259+
axes = _AXES_MAP[type(obj)]
2260+
except:
2261+
raise NotImplementedError("cannot properly create the storer for: [group->%s,value->%s]" %
2262+
(self.group._v_name,type(obj)))
22382263

22392264
# map axes to numbers
22402265
axes = [obj._get_axis_number(a) for a in axes]
@@ -2251,7 +2276,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
22512276

22522277
# currently support on ndim-1 axes
22532278
if len(axes) != self.ndim - 1:
2254-
raise Exception("currenctly only support ndim-1 indexers in an AppendableTable")
2279+
raise Exception("currently only support ndim-1 indexers in an AppendableTable")
22552280

22562281
# create according to the new data
22572282
self.non_index_axes = []
@@ -2335,10 +2360,18 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
23352360
name = b.items[0]
23362361
self.data_columns.append(name)
23372362

2338-
try:
2339-
existing_col = existing_table.values_axes[
2340-
i] if existing_table is not None and validate else None
2363+
# make sure that we match up the existing columns
2364+
# if we have an existing table
2365+
if existing_table is not None and validate:
2366+
try:
2367+
existing_col = existing_table.values_axes[i]
2368+
except:
2369+
raise Exception("Incompatible appended table [%s] with existing table [%s]" %
2370+
(blocks,existing_table.values_axes))
2371+
else:
2372+
existing_col = None
23412373

2374+
try:
23422375
col = klass.create_for_block(
23432376
i=i, name=name, version=self.version)
23442377
col.set_atom(block=b,

pandas/io/tests/test_pytables.py

+38
Original file line numberDiff line numberDiff line change
@@ -1002,6 +1002,44 @@ def test_append_misc(self):
10021002
result = store.select('df1')
10031003
tm.assert_frame_equal(result, df)
10041004

1005+
def test_append_raise(self):
1006+
1007+
with ensure_clean(self.path) as store:
1008+
1009+
# test append with invalid input to get good error messages
1010+
1011+
# list in column
1012+
df = tm.makeDataFrame()
1013+
df['invalid'] = [['a']] * len(df)
1014+
self.assert_(df.dtypes['invalid'] == np.object_)
1015+
self.assertRaises(NotImplementedError, store.append,'df',df)
1016+
1017+
# multiple invalid columns
1018+
df['invalid2'] = [['a']] * len(df)
1019+
df['invalid3'] = [['a']] * len(df)
1020+
self.assertRaises(NotImplementedError, store.append,'df',df)
1021+
1022+
# datetime with embedded nans as object
1023+
df = tm.makeDataFrame()
1024+
s = Series(datetime.datetime(2001,1,2),index=df.index,dtype=object)
1025+
s[0:5] = np.nan
1026+
df['invalid'] = s
1027+
self.assert_(df.dtypes['invalid'] == np.object_)
1028+
self.assertRaises(NotImplementedError, store.append,'df', df)
1029+
1030+
# directy ndarray
1031+
self.assertRaises(NotImplementedError, store.append,'df',np.arange(10))
1032+
1033+
# series directly
1034+
self.assertRaises(NotImplementedError, store.append,'df',Series(np.arange(10)))
1035+
1036+
# appending an incompatbile table
1037+
df = tm.makeDataFrame()
1038+
store.append('df',df)
1039+
1040+
df['foo'] = 'foo'
1041+
self.assertRaises(Exception, store.append,'df',df)
1042+
10051043
def test_table_index_incompatible_dtypes(self):
10061044
df1 = DataFrame({'a': [1, 2, 3]})
10071045
df2 = DataFrame({'a': [4, 5, 6]},

0 commit comments

Comments
 (0)