From 50a6ef7384c33f2a0fdb311f7e54be44cf4f1d89 Mon Sep 17 00:00:00 2001 From: jreback Date: Thu, 21 Mar 2013 16:44:05 -0400 Subject: [PATCH] DOC: better error message on ndarray construction, GH3105 CLN: refactor to put all block manager creation calls into internals.py so to wrap with invalid constructions TST: now py3 compatible --- RELEASE.rst | 2 ++ pandas/core/frame.py | 21 +++++++--------- pandas/core/internals.py | 43 +++++++++++++++++++++++++++++---- pandas/core/panel.py | 13 ++++------ pandas/io/pytables.py | 2 +- pandas/tests/test_frame.py | 49 ++++++++++++++++++++++++++++++++++++-- pandas/tests/test_panel.py | 22 ++++++++++++++--- 7 files changed, 120 insertions(+), 32 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index d71fce70dd5d8..45477610cabb2 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -147,6 +147,8 @@ pandas 0.11.0 - arguments to DataFrame.clip were inconsistent to numpy and Series clipping (GH2747_) - util.testing.assert_frame_equal now checks the column and index names (GH2964_) + - Constructors will now return a more informative ValueError on failures + when invalid shapes are passed **Bug Fixes** diff --git a/pandas/core/frame.py b/pandas/core/frame.py index afb698221c48b..b1241d7fc12a4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -30,7 +30,9 @@ from pandas.core.indexing import (_NDFrameIndexer, _maybe_droplevels, _convert_to_index_sliceable, _check_bool_indexer, _maybe_convert_indices) -from pandas.core.internals import BlockManager, make_block, form_blocks +from pandas.core.internals import (BlockManager, + create_block_manager_from_arrays, + create_block_manager_from_blocks) from pandas.core.series import Series, _radd_compat import pandas.core.expressions as expressions from pandas.compat.scipy import scoreatpercentile as _quantile @@ -553,9 +555,8 @@ def _init_ndarray(self, values, index, columns, dtype=None, else: columns = _ensure_index(columns) - block = make_block(values.T, columns, columns) - return BlockManager([block], [columns, index]) - + return create_block_manager_from_blocks([ values.T ], [ columns, index ]) + def _wrap_array(self, arr, axes, copy=False): index, columns = axes return self._constructor(arr, index=index, columns=columns, copy=copy) @@ -1283,7 +1284,7 @@ def to_panel(self): minor_axis.name = self.index.names[1] new_axes = [selfsorted.columns, major_axis, minor_axis] - new_mgr = BlockManager(new_blocks, new_axes) + new_mgr = create_block_manager_from_blocks(new_blocks, new_axes) return Panel(new_mgr) @@ -5300,13 +5301,7 @@ def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None): # from BlockManager perspective axes = [_ensure_index(columns), _ensure_index(index)] - # segregates dtypes and forms blocks matching to columns - blocks = form_blocks(arrays, arr_names, axes) - - # consolidate for now - mgr = BlockManager(blocks, axes) - return mgr.consolidate() - + return create_block_manager_from_arrays(arrays, arr_names, axes) def extract_index(data): from pandas.core.index import _union_indexes @@ -5384,7 +5379,7 @@ def convert(v): if values.ndim == 1: values = values.reshape((values.shape[0], 1)) elif values.ndim != 2: - raise Exception('Must pass 2-d input') + raise ValueError('Must pass 2-d input') return values diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 385695ec6cc50..6bbb1d9ce979b 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -33,11 +33,11 @@ def __init__(self, values, items, ref_items, ndim=2): values = np.array(values, dtype=object) if values.ndim != ndim: - raise AssertionError('Wrong number of dimensions') + raise ValueError('Wrong number of dimensions') if len(items) != len(values): - raise AssertionError('Wrong number of items passed (%d vs %d)' - % (len(items), len(values))) + raise ValueError('Wrong number of items passed %d, indices imply %d' + % (len(items), len(values))) self._ref_locs = None self.values = values @@ -911,13 +911,14 @@ def shape(self): def _verify_integrity(self): mgr_shape = self.shape + tot_items = sum(len(x.items) for x in self.blocks) for block in self.blocks: if block.ref_items is not self.items: raise AssertionError("Block ref_items must be BlockManager " "items") if block.values.shape[1:] != mgr_shape[1:]: - raise AssertionError('Block shape incompatible with manager') - tot_items = sum(len(x.items) for x in self.blocks) + construction_error(tot_items,block.values.shape[1:],self.axes) + if len(self.items) != tot_items: raise AssertionError('Number of manager items must equal union of ' 'block items') @@ -1704,7 +1705,39 @@ def item_dtypes(self): return result +def construction_error(tot_items, block_shape, axes): + """ raise a helpful message about our construction """ + raise ValueError("Shape of passed values is %s, indices imply %s" % ( + tuple([tot_items] + list(block_shape)),tuple(len(ax) for ax in axes))) + + +def create_block_manager_from_blocks(blocks, axes): + try: + + # if we are passed values, make the blocks + if len(blocks) == 1 and not isinstance(blocks[0], Block): + blocks = [ make_block(blocks[0], axes[0], axes[0]) ] + + mgr = BlockManager(blocks, axes) + mgr._consolidate_inplace() + return mgr + + except (ValueError): + blocks = [ getattr(b,'values',b) for b in blocks ] + tot_items = sum(b.shape[0] for b in blocks) + construction_error(tot_items,blocks[0].shape[1:],axes) + +def create_block_manager_from_arrays(arrays, names, axes): + try: + blocks = form_blocks(arrays, names, axes) + mgr = BlockManager(blocks, axes) + mgr._consolidate_inplace() + return mgr + except (ValueError): + construction_error(len(arrays),arrays[0].shape[1:],axes) + def form_blocks(arrays, names, axes): + # pre-filter out items if we passed it items = axes[0] diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 9f91d8add1eac..d33ce4c90244b 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -13,7 +13,9 @@ from pandas.core.index import (Index, MultiIndex, _ensure_index, _get_combined_index) from pandas.core.indexing import _maybe_droplevels, _is_list_like -from pandas.core.internals import BlockManager, make_block, form_blocks +from pandas.core.internals import (BlockManager, + create_block_manager_from_arrays, + create_block_manager_from_blocks) from pandas.core.series import Series from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame @@ -310,10 +312,7 @@ def _init_dict(self, data, axes, dtype=None): return self._init_arrays(arrays, haxis, [haxis] + raxes) def _init_arrays(self, arrays, arr_names, axes): - # segregates dtypes and forms blocks matching to columns - blocks = form_blocks(arrays, arr_names, axes) - mgr = BlockManager(blocks, axes).consolidate() - return mgr + return create_block_manager_from_arrays(arrays, arr_names, axes) @property def shape(self): @@ -398,9 +397,7 @@ def _init_matrix(self, data, axes, dtype=None, copy=False): ax = _ensure_index(ax) fixed_axes.append(ax) - items = fixed_axes[0] - block = make_block(values, items, items) - return BlockManager([block], fixed_axes) + return create_block_manager_from_blocks([ values ], fixed_axes) #---------------------------------------------------------------------- # Array interface diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 84a4121387964..f9cc850cc6d27 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -22,7 +22,7 @@ from pandas.core.algorithms import match, unique, factorize from pandas.core.categorical import Categorical from pandas.core.common import _asarray_tuplesafe, _try_sort -from pandas.core.internals import BlockManager, make_block, form_blocks +from pandas.core.internals import BlockManager, make_block from pandas.core.reshape import block2d_to_blocknd, factor_indexer from pandas.core.index import Int64Index, _ensure_index import pandas.core.common as com diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index b363a276723da..93477073d2a8e 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1976,7 +1976,7 @@ def test_constructor_cast_failure(self): df['foo'] = np.ones((4,2)).tolist() # this is not ok - self.assertRaises(AssertionError, df.__setitem__, tuple(['test']), np.ones((4,2))) + self.assertRaises(ValueError, df.__setitem__, tuple(['test']), np.ones((4,2))) # this is ok df['foo2'] = np.ones((4,2)).tolist() @@ -2135,6 +2135,51 @@ def test_constructor_dict(self): frame = DataFrame({'A': [], 'B': []}, columns=['A', 'B']) self.assert_(frame.index.equals(Index([]))) + def test_constructor_error_msgs(self): + + # mix dict and array, wrong size + try: + DataFrame({'A': {'a': 'a', 'b': 'b'}, + 'B': ['a', 'b', 'c']}) + except (Exception), detail: + self.assert_(type(detail) == ValueError) + self.assert_("Mixing dicts with non-Series may lead to ambiguous ordering." in str(detail)) + + # wrong size ndarray, GH 3105 + from pandas import date_range + try: + DataFrame(np.arange(12).reshape((4, 3)), columns=['foo', 'bar', 'baz'], + index=date_range('2000-01-01', periods=3)) + except (Exception), detail: + self.assert_(type(detail) == ValueError) + self.assert_(str(detail).startswith("Shape of passed values is (3, 4), indices imply (3, 3)")) + + # higher dim raise exception + try: + DataFrame(np.zeros((3, 3, 3)), columns=['A', 'B', 'C'], index=[1]) + except (Exception), detail: + self.assert_(type(detail) == ValueError) + self.assert_("Must pass 2-d input" in str(detail)) + + # wrong size axis labels + try: + DataFrame(np.random.rand(2,3), columns=['A', 'B', 'C'], index=[1]) + except (Exception), detail: + self.assert_(type(detail) == ValueError) + self.assert_(str(detail).startswith("Shape of passed values is (3, 2), indices imply (3, 1)")) + + try: + DataFrame(np.random.rand(2,3), columns=['A', 'B'], index=[1, 2]) + except (Exception), detail: + self.assert_(type(detail) == ValueError) + self.assert_(str(detail).startswith("Shape of passed values is (3, 2), indices imply (2, 2)")) + + try: + DataFrame({'a': False, 'b': True}) + except (Exception), detail: + self.assert_(type(detail) == ValueError) + self.assert_("If use all scalar values, must pass index" in str(detail)) + def test_constructor_subclass_dict(self): # Test for passing dict subclass to constructor data = {'col1': tm.TestSubDict((x, 10.0 * x) for x in xrange(10)), @@ -3545,7 +3590,7 @@ def test_from_records_bad_index_column(self): assert(df1.index.equals(Index(df.C))) # should fail - self.assertRaises(Exception, DataFrame.from_records, df, index=[2]) + self.assertRaises(ValueError, DataFrame.from_records, df, index=[2]) self.assertRaises(KeyError, DataFrame.from_records, df, index=2) def test_from_records_non_tuple(self): diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 84f5f3afab6db..921097e3408fd 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -939,9 +939,25 @@ def test_from_dict_mixed_orient(self): self.assert_(panel['foo'].values.dtype == np.object_) self.assert_(panel['A'].values.dtype == np.float64) - def test_values(self): - self.assertRaises(Exception, Panel, np.random.randn(5, 5, 5), - range(5), range(5), range(4)) + def test_constructor_error_msgs(self): + + try: + Panel(np.random.randn(3,4,5), range(4), range(5), range(5)) + except (Exception), detail: + self.assert_(type(detail) == ValueError) + self.assert_(str(detail).startswith("Shape of passed values is (3, 4, 5), indices imply (4, 5, 5)")) + + try: + Panel(np.random.randn(3,4,5), range(5), range(4), range(5)) + except (Exception), detail: + self.assert_(type(detail) == ValueError) + self.assert_(str(detail).startswith("Shape of passed values is (3, 4, 5), indices imply (5, 4, 5)")) + + try: + Panel(np.random.randn(3,4,5), range(5), range(5), range(4)) + except (Exception), detail: + self.assert_(type(detail) == ValueError) + self.assert_(str(detail).startswith("Shape of passed values is (3, 4, 5), indices imply (5, 5, 4)")) def test_conform(self): df = self.panel['ItemA'][:-5].filter(items=['A', 'B'])