Skip to content

ENH: better errors message on storage failure in HDFStore #3088

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 19, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions doc/source/cookbook.rst
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,12 @@ Grouping
`Create multiple aggregated columns
<http://stackoverflow.com/questions/14897100/create-multiple-columns-in-pandas-aggregation-function>`__

Expanding Data
~~~~~~~~~~~~~~

`Alignment and to-date
<http://stackoverflow.com/questions/15489011/python-time-series-alignment-and-to-date-functions>`__

Splitting
~~~~~~~~~

Expand Down Expand Up @@ -171,8 +177,8 @@ SQL
`Reading from databases with SQL
<http://stackoverflow.com/questions/10065051/python-pandas-and-databases-like-mysql>`__

HDF5
~~~~
HDFStore
~~~~~~~~

`Simple Queries with a Timestamp Index
<http://stackoverflow.com/questions/13926089/selecting-columns-from-pandas-hdfstore-table>`__
Expand All @@ -186,6 +192,9 @@ HDF5
`Large Data work flows
<http://stackoverflow.com/questions/14262433/large-data-work-flows-using-pandas>`__

`Troubleshoot HDFStore exceptions
<http://stackoverflow.com/questions/15488809/how-to-trouble-shoot-hdfstore-exception-cannot-find-the-correct-atom-type>`__

Storing Attributes to a group node

.. ipython:: python
Expand Down
53 changes: 43 additions & 10 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel
from pandas.sparse.array import BlockIndex, IntIndex
from pandas.tseries.api import PeriodIndex, DatetimeIndex
from pandas.core.common import adjoin
from pandas.core.common import adjoin, isnull
from pandas.core.algorithms import match, unique, factorize
from pandas.core.categorical import Categorical
from pandas.core.common import _asarray_tuplesafe, _try_sort
Expand Down Expand Up @@ -727,8 +727,8 @@ def _create_storer(self, group, value = None, table = False, append = False, **k
""" return a suitable Storer class to operate """

def error(t):
raise Exception("cannot properly create the storer for: [%s] [group->%s,value->%s,table->%s,append->%s,kwargs->%s]" %
(t,group,type(value),table,append,kwargs))
raise NotImplementedError("cannot properly create the storer for: [%s] [group->%s,value->%s,table->%s,append->%s,kwargs->%s]" %
(t,group,type(value),table,append,kwargs))

pt = getattr(group._v_attrs,'pandas_type',None)
tt = getattr(group._v_attrs,'table_type',None)
Expand Down Expand Up @@ -768,7 +768,12 @@ def error(t):
if value is not None:

if pt == 'frame_table':
tt = 'appendable_frame' if value.index.nlevels == 1 else 'appendable_multiframe'
index = getattr(value,'index',None)
if index is not None:
if index.nlevels == 1:
tt = 'appendable_frame'
elif index.nlevels > 1:
tt = 'appendable_multiframe'
elif pt == 'wide_table':
tt = 'appendable_panel'
elif pt == 'ndim_table':
Expand Down Expand Up @@ -1187,7 +1192,23 @@ def get_atom_string(self, block, itemsize):

def set_atom_string(self, block, existing_col, min_itemsize, nan_rep):
# fill nan items with myself
data = block.fillna(nan_rep).values
block = block.fillna(nan_rep)
data = block.values

# see if we have a valid string type
inferred_type = lib.infer_dtype(data.ravel())
if inferred_type != 'string':

# we cannot serialize this data, so report an exception on a column by column basis
for item in block.items:

col = block.get(item)
inferred_type = lib.infer_dtype(col.ravel())
if inferred_type != 'string':
raise NotImplementedError("cannot serialize the column [%s] because "
"its data contents are [%s] object dtype" %
(item,inferred_type))


# itemsize is the maximum length of a string (along any dimension)
itemsize = lib.max_len_string_array(data.ravel())
Expand Down Expand Up @@ -2234,7 +2255,11 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,

# set the default axes if needed
if axes is None:
axes = _AXES_MAP[type(obj)]
try:
axes = _AXES_MAP[type(obj)]
except:
raise NotImplementedError("cannot properly create the storer for: [group->%s,value->%s]" %
(self.group._v_name,type(obj)))

# map axes to numbers
axes = [obj._get_axis_number(a) for a in axes]
Expand All @@ -2251,7 +2276,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,

# currently support on ndim-1 axes
if len(axes) != self.ndim - 1:
raise Exception("currenctly only support ndim-1 indexers in an AppendableTable")
raise Exception("currently only support ndim-1 indexers in an AppendableTable")

# create according to the new data
self.non_index_axes = []
Expand Down Expand Up @@ -2335,10 +2360,18 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
name = b.items[0]
self.data_columns.append(name)

try:
existing_col = existing_table.values_axes[
i] if existing_table is not None and validate else None
# make sure that we match up the existing columns
# if we have an existing table
if existing_table is not None and validate:
try:
existing_col = existing_table.values_axes[i]
except:
raise Exception("Incompatible appended table [%s] with existing table [%s]" %
(blocks,existing_table.values_axes))
else:
existing_col = None

try:
col = klass.create_for_block(
i=i, name=name, version=self.version)
col.set_atom(block=b,
Expand Down
38 changes: 38 additions & 0 deletions pandas/io/tests/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -1002,6 +1002,44 @@ def test_append_misc(self):
result = store.select('df1')
tm.assert_frame_equal(result, df)

def test_append_raise(self):

with ensure_clean(self.path) as store:

# test append with invalid input to get good error messages

# list in column
df = tm.makeDataFrame()
df['invalid'] = [['a']] * len(df)
self.assert_(df.dtypes['invalid'] == np.object_)
self.assertRaises(NotImplementedError, store.append,'df',df)

# multiple invalid columns
df['invalid2'] = [['a']] * len(df)
df['invalid3'] = [['a']] * len(df)
self.assertRaises(NotImplementedError, store.append,'df',df)

# datetime with embedded nans as object
df = tm.makeDataFrame()
s = Series(datetime.datetime(2001,1,2),index=df.index,dtype=object)
s[0:5] = np.nan
df['invalid'] = s
self.assert_(df.dtypes['invalid'] == np.object_)
self.assertRaises(NotImplementedError, store.append,'df', df)

# directy ndarray
self.assertRaises(NotImplementedError, store.append,'df',np.arange(10))

# series directly
self.assertRaises(NotImplementedError, store.append,'df',Series(np.arange(10)))

# appending an incompatbile table
df = tm.makeDataFrame()
store.append('df',df)

df['foo'] = 'foo'
self.assertRaises(Exception, store.append,'df',df)

def test_table_index_incompatible_dtypes(self):
df1 = DataFrame({'a': [1, 2, 3]})
df2 = DataFrame({'a': [4, 5, 6]},
Expand Down