diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index 4d99ebdce86dc..0a55d78dd24c3 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -99,6 +99,12 @@ Grouping `Create multiple aggregated columns `__ +Expanding Data +~~~~~~~~~~~~~~ + +`Alignment and to-date +`__ + Splitting ~~~~~~~~~ @@ -171,8 +177,8 @@ SQL `Reading from databases with SQL `__ -HDF5 -~~~~ +HDFStore +~~~~~~~~ `Simple Queries with a Timestamp Index `__ @@ -186,6 +192,9 @@ HDF5 `Large Data work flows `__ +`Troubleshoot HDFStore exceptions +`__ + Storing Attributes to a group node .. ipython:: python diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index ca2e3b6e04f19..929d9182f35a9 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -18,7 +18,7 @@ from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel from pandas.sparse.array import BlockIndex, IntIndex from pandas.tseries.api import PeriodIndex, DatetimeIndex -from pandas.core.common import adjoin +from pandas.core.common import adjoin, isnull from pandas.core.algorithms import match, unique, factorize from pandas.core.categorical import Categorical from pandas.core.common import _asarray_tuplesafe, _try_sort @@ -727,8 +727,8 @@ def _create_storer(self, group, value = None, table = False, append = False, **k """ return a suitable Storer class to operate """ def error(t): - raise Exception("cannot properly create the storer for: [%s] [group->%s,value->%s,table->%s,append->%s,kwargs->%s]" % - (t,group,type(value),table,append,kwargs)) + raise NotImplementedError("cannot properly create the storer for: [%s] [group->%s,value->%s,table->%s,append->%s,kwargs->%s]" % + (t,group,type(value),table,append,kwargs)) pt = getattr(group._v_attrs,'pandas_type',None) tt = getattr(group._v_attrs,'table_type',None) @@ -768,7 +768,12 @@ def error(t): if value is not None: if pt == 'frame_table': - tt = 'appendable_frame' if value.index.nlevels == 1 else 'appendable_multiframe' + index = getattr(value,'index',None) + if index is not None: + if index.nlevels == 1: + tt = 'appendable_frame' + elif index.nlevels > 1: + tt = 'appendable_multiframe' elif pt == 'wide_table': tt = 'appendable_panel' elif pt == 'ndim_table': @@ -1187,7 +1192,23 @@ def get_atom_string(self, block, itemsize): def set_atom_string(self, block, existing_col, min_itemsize, nan_rep): # fill nan items with myself - data = block.fillna(nan_rep).values + block = block.fillna(nan_rep) + data = block.values + + # see if we have a valid string type + inferred_type = lib.infer_dtype(data.ravel()) + if inferred_type != 'string': + + # we cannot serialize this data, so report an exception on a column by column basis + for item in block.items: + + col = block.get(item) + inferred_type = lib.infer_dtype(col.ravel()) + if inferred_type != 'string': + raise NotImplementedError("cannot serialize the column [%s] because " + "its data contents are [%s] object dtype" % + (item,inferred_type)) + # itemsize is the maximum length of a string (along any dimension) itemsize = lib.max_len_string_array(data.ravel()) @@ -2234,7 +2255,11 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, # set the default axes if needed if axes is None: - axes = _AXES_MAP[type(obj)] + try: + axes = _AXES_MAP[type(obj)] + except: + raise NotImplementedError("cannot properly create the storer for: [group->%s,value->%s]" % + (self.group._v_name,type(obj))) # map axes to numbers axes = [obj._get_axis_number(a) for a in axes] @@ -2251,7 +2276,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, # currently support on ndim-1 axes if len(axes) != self.ndim - 1: - raise Exception("currenctly only support ndim-1 indexers in an AppendableTable") + raise Exception("currently only support ndim-1 indexers in an AppendableTable") # create according to the new data self.non_index_axes = [] @@ -2335,10 +2360,18 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, name = b.items[0] self.data_columns.append(name) - try: - existing_col = existing_table.values_axes[ - i] if existing_table is not None and validate else None + # make sure that we match up the existing columns + # if we have an existing table + if existing_table is not None and validate: + try: + existing_col = existing_table.values_axes[i] + except: + raise Exception("Incompatible appended table [%s] with existing table [%s]" % + (blocks,existing_table.values_axes)) + else: + existing_col = None + try: col = klass.create_for_block( i=i, name=name, version=self.version) col.set_atom(block=b, diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 8cf40a77d639f..e2d1d75e69329 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -1002,6 +1002,44 @@ def test_append_misc(self): result = store.select('df1') tm.assert_frame_equal(result, df) + def test_append_raise(self): + + with ensure_clean(self.path) as store: + + # test append with invalid input to get good error messages + + # list in column + df = tm.makeDataFrame() + df['invalid'] = [['a']] * len(df) + self.assert_(df.dtypes['invalid'] == np.object_) + self.assertRaises(NotImplementedError, store.append,'df',df) + + # multiple invalid columns + df['invalid2'] = [['a']] * len(df) + df['invalid3'] = [['a']] * len(df) + self.assertRaises(NotImplementedError, store.append,'df',df) + + # datetime with embedded nans as object + df = tm.makeDataFrame() + s = Series(datetime.datetime(2001,1,2),index=df.index,dtype=object) + s[0:5] = np.nan + df['invalid'] = s + self.assert_(df.dtypes['invalid'] == np.object_) + self.assertRaises(NotImplementedError, store.append,'df', df) + + # directy ndarray + self.assertRaises(NotImplementedError, store.append,'df',np.arange(10)) + + # series directly + self.assertRaises(NotImplementedError, store.append,'df',Series(np.arange(10))) + + # appending an incompatbile table + df = tm.makeDataFrame() + store.append('df',df) + + df['foo'] = 'foo' + self.assertRaises(Exception, store.append,'df',df) + def test_table_index_incompatible_dtypes(self): df1 = DataFrame({'a': [1, 2, 3]}) df2 = DataFrame({'a': [4, 5, 6]},