Skip to content

DOC: GH3105 better error message on ndarray construction #3131

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 25, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ pandas 0.11.0
- arguments to DataFrame.clip were inconsistent to numpy and Series clipping
(GH2747_)
- util.testing.assert_frame_equal now checks the column and index names (GH2964_)
- Constructors will now return a more informative ValueError on failures
when invalid shapes are passed

**Bug Fixes**

Expand Down
21 changes: 8 additions & 13 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@
from pandas.core.indexing import (_NDFrameIndexer, _maybe_droplevels,
_convert_to_index_sliceable, _check_bool_indexer,
_maybe_convert_indices)
from pandas.core.internals import BlockManager, make_block, form_blocks
from pandas.core.internals import (BlockManager,
create_block_manager_from_arrays,
create_block_manager_from_blocks)
from pandas.core.series import Series, _radd_compat
import pandas.core.expressions as expressions
from pandas.compat.scipy import scoreatpercentile as _quantile
Expand Down Expand Up @@ -553,9 +555,8 @@ def _init_ndarray(self, values, index, columns, dtype=None,
else:
columns = _ensure_index(columns)

block = make_block(values.T, columns, columns)
return BlockManager([block], [columns, index])

return create_block_manager_from_blocks([ values.T ], [ columns, index ])

def _wrap_array(self, arr, axes, copy=False):
index, columns = axes
return self._constructor(arr, index=index, columns=columns, copy=copy)
Expand Down Expand Up @@ -1283,7 +1284,7 @@ def to_panel(self):
minor_axis.name = self.index.names[1]

new_axes = [selfsorted.columns, major_axis, minor_axis]
new_mgr = BlockManager(new_blocks, new_axes)
new_mgr = create_block_manager_from_blocks(new_blocks, new_axes)

return Panel(new_mgr)

Expand Down Expand Up @@ -5300,13 +5301,7 @@ def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None):
# from BlockManager perspective
axes = [_ensure_index(columns), _ensure_index(index)]

# segregates dtypes and forms blocks matching to columns
blocks = form_blocks(arrays, arr_names, axes)

# consolidate for now
mgr = BlockManager(blocks, axes)
return mgr.consolidate()

return create_block_manager_from_arrays(arrays, arr_names, axes)

def extract_index(data):
from pandas.core.index import _union_indexes
Expand Down Expand Up @@ -5384,7 +5379,7 @@ def convert(v):
if values.ndim == 1:
values = values.reshape((values.shape[0], 1))
elif values.ndim != 2:
raise Exception('Must pass 2-d input')
raise ValueError('Must pass 2-d input')

return values

Expand Down
43 changes: 38 additions & 5 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ def __init__(self, values, items, ref_items, ndim=2):
values = np.array(values, dtype=object)

if values.ndim != ndim:
raise AssertionError('Wrong number of dimensions')
raise ValueError('Wrong number of dimensions')

if len(items) != len(values):
raise AssertionError('Wrong number of items passed (%d vs %d)'
% (len(items), len(values)))
raise ValueError('Wrong number of items passed %d, indices imply %d'
% (len(items), len(values)))

self._ref_locs = None
self.values = values
Expand Down Expand Up @@ -911,13 +911,14 @@ def shape(self):

def _verify_integrity(self):
mgr_shape = self.shape
tot_items = sum(len(x.items) for x in self.blocks)
for block in self.blocks:
if block.ref_items is not self.items:
raise AssertionError("Block ref_items must be BlockManager "
"items")
if block.values.shape[1:] != mgr_shape[1:]:
raise AssertionError('Block shape incompatible with manager')
tot_items = sum(len(x.items) for x in self.blocks)
construction_error(tot_items,block.values.shape[1:],self.axes)

if len(self.items) != tot_items:
raise AssertionError('Number of manager items must equal union of '
'block items')
Expand Down Expand Up @@ -1704,7 +1705,39 @@ def item_dtypes(self):
return result


def construction_error(tot_items, block_shape, axes):
""" raise a helpful message about our construction """
raise ValueError("Shape of passed values is %s, indices imply %s" % (
tuple([tot_items] + list(block_shape)),tuple(len(ax) for ax in axes)))


def create_block_manager_from_blocks(blocks, axes):
try:

# if we are passed values, make the blocks
if len(blocks) == 1 and not isinstance(blocks[0], Block):
blocks = [ make_block(blocks[0], axes[0], axes[0]) ]

mgr = BlockManager(blocks, axes)
mgr._consolidate_inplace()
return mgr

except (ValueError):
blocks = [ getattr(b,'values',b) for b in blocks ]
tot_items = sum(b.shape[0] for b in blocks)
construction_error(tot_items,blocks[0].shape[1:],axes)

def create_block_manager_from_arrays(arrays, names, axes):
try:
blocks = form_blocks(arrays, names, axes)
mgr = BlockManager(blocks, axes)
mgr._consolidate_inplace()
return mgr
except (ValueError):
construction_error(len(arrays),arrays[0].shape[1:],axes)

def form_blocks(arrays, names, axes):

# pre-filter out items if we passed it
items = axes[0]

Expand Down
13 changes: 5 additions & 8 deletions pandas/core/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
from pandas.core.index import (Index, MultiIndex, _ensure_index,
_get_combined_index)
from pandas.core.indexing import _maybe_droplevels, _is_list_like
from pandas.core.internals import BlockManager, make_block, form_blocks
from pandas.core.internals import (BlockManager,
create_block_manager_from_arrays,
create_block_manager_from_blocks)
from pandas.core.series import Series
from pandas.core.frame import DataFrame
from pandas.core.generic import NDFrame
Expand Down Expand Up @@ -310,10 +312,7 @@ def _init_dict(self, data, axes, dtype=None):
return self._init_arrays(arrays, haxis, [haxis] + raxes)

def _init_arrays(self, arrays, arr_names, axes):
# segregates dtypes and forms blocks matching to columns
blocks = form_blocks(arrays, arr_names, axes)
mgr = BlockManager(blocks, axes).consolidate()
return mgr
return create_block_manager_from_arrays(arrays, arr_names, axes)

@property
def shape(self):
Expand Down Expand Up @@ -398,9 +397,7 @@ def _init_matrix(self, data, axes, dtype=None, copy=False):
ax = _ensure_index(ax)
fixed_axes.append(ax)

items = fixed_axes[0]
block = make_block(values, items, items)
return BlockManager([block], fixed_axes)
return create_block_manager_from_blocks([ values ], fixed_axes)

#----------------------------------------------------------------------
# Array interface
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from pandas.core.algorithms import match, unique, factorize
from pandas.core.categorical import Categorical
from pandas.core.common import _asarray_tuplesafe, _try_sort
from pandas.core.internals import BlockManager, make_block, form_blocks
from pandas.core.internals import BlockManager, make_block
from pandas.core.reshape import block2d_to_blocknd, factor_indexer
from pandas.core.index import Int64Index, _ensure_index
import pandas.core.common as com
Expand Down
49 changes: 47 additions & 2 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1976,7 +1976,7 @@ def test_constructor_cast_failure(self):
df['foo'] = np.ones((4,2)).tolist()

# this is not ok
self.assertRaises(AssertionError, df.__setitem__, tuple(['test']), np.ones((4,2)))
self.assertRaises(ValueError, df.__setitem__, tuple(['test']), np.ones((4,2)))

# this is ok
df['foo2'] = np.ones((4,2)).tolist()
Expand Down Expand Up @@ -2135,6 +2135,51 @@ def test_constructor_dict(self):
frame = DataFrame({'A': [], 'B': []}, columns=['A', 'B'])
self.assert_(frame.index.equals(Index([])))

def test_constructor_error_msgs(self):

# mix dict and array, wrong size
try:
DataFrame({'A': {'a': 'a', 'b': 'b'},
'B': ['a', 'b', 'c']})
except (Exception), detail:
self.assert_(type(detail) == ValueError)
self.assert_("Mixing dicts with non-Series may lead to ambiguous ordering." in str(detail))

# wrong size ndarray, GH 3105
from pandas import date_range
try:
DataFrame(np.arange(12).reshape((4, 3)), columns=['foo', 'bar', 'baz'],
index=date_range('2000-01-01', periods=3))
except (Exception), detail:
self.assert_(type(detail) == ValueError)
self.assert_(str(detail).startswith("Shape of passed values is (3, 4), indices imply (3, 3)"))

# higher dim raise exception
try:
DataFrame(np.zeros((3, 3, 3)), columns=['A', 'B', 'C'], index=[1])
except (Exception), detail:
self.assert_(type(detail) == ValueError)
self.assert_("Must pass 2-d input" in str(detail))

# wrong size axis labels
try:
DataFrame(np.random.rand(2,3), columns=['A', 'B', 'C'], index=[1])
except (Exception), detail:
self.assert_(type(detail) == ValueError)
self.assert_(str(detail).startswith("Shape of passed values is (3, 2), indices imply (3, 1)"))

try:
DataFrame(np.random.rand(2,3), columns=['A', 'B'], index=[1, 2])
except (Exception), detail:
self.assert_(type(detail) == ValueError)
self.assert_(str(detail).startswith("Shape of passed values is (3, 2), indices imply (2, 2)"))

try:
DataFrame({'a': False, 'b': True})
except (Exception), detail:
self.assert_(type(detail) == ValueError)
self.assert_("If use all scalar values, must pass index" in str(detail))

def test_constructor_subclass_dict(self):
# Test for passing dict subclass to constructor
data = {'col1': tm.TestSubDict((x, 10.0 * x) for x in xrange(10)),
Expand Down Expand Up @@ -3545,7 +3590,7 @@ def test_from_records_bad_index_column(self):
assert(df1.index.equals(Index(df.C)))

# should fail
self.assertRaises(Exception, DataFrame.from_records, df, index=[2])
self.assertRaises(ValueError, DataFrame.from_records, df, index=[2])
self.assertRaises(KeyError, DataFrame.from_records, df, index=2)

def test_from_records_non_tuple(self):
Expand Down
22 changes: 19 additions & 3 deletions pandas/tests/test_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -939,9 +939,25 @@ def test_from_dict_mixed_orient(self):
self.assert_(panel['foo'].values.dtype == np.object_)
self.assert_(panel['A'].values.dtype == np.float64)

def test_values(self):
self.assertRaises(Exception, Panel, np.random.randn(5, 5, 5),
range(5), range(5), range(4))
def test_constructor_error_msgs(self):

try:
Panel(np.random.randn(3,4,5), range(4), range(5), range(5))
except (Exception), detail:
self.assert_(type(detail) == ValueError)
self.assert_(str(detail).startswith("Shape of passed values is (3, 4, 5), indices imply (4, 5, 5)"))

try:
Panel(np.random.randn(3,4,5), range(5), range(4), range(5))
except (Exception), detail:
self.assert_(type(detail) == ValueError)
self.assert_(str(detail).startswith("Shape of passed values is (3, 4, 5), indices imply (5, 4, 5)"))

try:
Panel(np.random.randn(3,4,5), range(5), range(5), range(4))
except (Exception), detail:
self.assert_(type(detail) == ValueError)
self.assert_(str(detail).startswith("Shape of passed values is (3, 4, 5), indices imply (5, 5, 4)"))

def test_conform(self):
df = self.panel['ItemA'][:-5].filter(items=['A', 'B'])
Expand Down