Skip to content

Commit e6140e9

Browse files
committed
Merge pull request #3131 from jreback/GH3105
DOC: GH3105 better error message on ndarray construction
2 parents e1e14a4 + 50a6ef7 commit e6140e9

File tree

7 files changed

+120
-32
lines changed

7 files changed

+120
-32
lines changed

RELEASE.rst

+2
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,8 @@ pandas 0.11.0
147147
- arguments to DataFrame.clip were inconsistent to numpy and Series clipping
148148
(GH2747_)
149149
- util.testing.assert_frame_equal now checks the column and index names (GH2964_)
150+
- Constructors will now return a more informative ValueError on failures
151+
when invalid shapes are passed
150152

151153
**Bug Fixes**
152154

pandas/core/frame.py

+8-13
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@
3030
from pandas.core.indexing import (_NDFrameIndexer, _maybe_droplevels,
3131
_convert_to_index_sliceable, _check_bool_indexer,
3232
_maybe_convert_indices)
33-
from pandas.core.internals import BlockManager, make_block, form_blocks
33+
from pandas.core.internals import (BlockManager,
34+
create_block_manager_from_arrays,
35+
create_block_manager_from_blocks)
3436
from pandas.core.series import Series, _radd_compat
3537
import pandas.core.expressions as expressions
3638
from pandas.compat.scipy import scoreatpercentile as _quantile
@@ -553,9 +555,8 @@ def _init_ndarray(self, values, index, columns, dtype=None,
553555
else:
554556
columns = _ensure_index(columns)
555557

556-
block = make_block(values.T, columns, columns)
557-
return BlockManager([block], [columns, index])
558-
558+
return create_block_manager_from_blocks([ values.T ], [ columns, index ])
559+
559560
def _wrap_array(self, arr, axes, copy=False):
560561
index, columns = axes
561562
return self._constructor(arr, index=index, columns=columns, copy=copy)
@@ -1283,7 +1284,7 @@ def to_panel(self):
12831284
minor_axis.name = self.index.names[1]
12841285

12851286
new_axes = [selfsorted.columns, major_axis, minor_axis]
1286-
new_mgr = BlockManager(new_blocks, new_axes)
1287+
new_mgr = create_block_manager_from_blocks(new_blocks, new_axes)
12871288

12881289
return Panel(new_mgr)
12891290

@@ -5300,13 +5301,7 @@ def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None):
53005301
# from BlockManager perspective
53015302
axes = [_ensure_index(columns), _ensure_index(index)]
53025303

5303-
# segregates dtypes and forms blocks matching to columns
5304-
blocks = form_blocks(arrays, arr_names, axes)
5305-
5306-
# consolidate for now
5307-
mgr = BlockManager(blocks, axes)
5308-
return mgr.consolidate()
5309-
5304+
return create_block_manager_from_arrays(arrays, arr_names, axes)
53105305

53115306
def extract_index(data):
53125307
from pandas.core.index import _union_indexes
@@ -5384,7 +5379,7 @@ def convert(v):
53845379
if values.ndim == 1:
53855380
values = values.reshape((values.shape[0], 1))
53865381
elif values.ndim != 2:
5387-
raise Exception('Must pass 2-d input')
5382+
raise ValueError('Must pass 2-d input')
53885383

53895384
return values
53905385

pandas/core/internals.py

+38-5
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@ def __init__(self, values, items, ref_items, ndim=2):
3333
values = np.array(values, dtype=object)
3434

3535
if values.ndim != ndim:
36-
raise AssertionError('Wrong number of dimensions')
36+
raise ValueError('Wrong number of dimensions')
3737

3838
if len(items) != len(values):
39-
raise AssertionError('Wrong number of items passed (%d vs %d)'
40-
% (len(items), len(values)))
39+
raise ValueError('Wrong number of items passed %d, indices imply %d'
40+
% (len(items), len(values)))
4141

4242
self._ref_locs = None
4343
self.values = values
@@ -911,13 +911,14 @@ def shape(self):
911911

912912
def _verify_integrity(self):
913913
mgr_shape = self.shape
914+
tot_items = sum(len(x.items) for x in self.blocks)
914915
for block in self.blocks:
915916
if block.ref_items is not self.items:
916917
raise AssertionError("Block ref_items must be BlockManager "
917918
"items")
918919
if block.values.shape[1:] != mgr_shape[1:]:
919-
raise AssertionError('Block shape incompatible with manager')
920-
tot_items = sum(len(x.items) for x in self.blocks)
920+
construction_error(tot_items,block.values.shape[1:],self.axes)
921+
921922
if len(self.items) != tot_items:
922923
raise AssertionError('Number of manager items must equal union of '
923924
'block items')
@@ -1710,7 +1711,39 @@ def item_dtypes(self):
17101711
return result
17111712

17121713

1714+
def construction_error(tot_items, block_shape, axes):
1715+
""" raise a helpful message about our construction """
1716+
raise ValueError("Shape of passed values is %s, indices imply %s" % (
1717+
tuple([tot_items] + list(block_shape)),tuple(len(ax) for ax in axes)))
1718+
1719+
1720+
def create_block_manager_from_blocks(blocks, axes):
1721+
try:
1722+
1723+
# if we are passed values, make the blocks
1724+
if len(blocks) == 1 and not isinstance(blocks[0], Block):
1725+
blocks = [ make_block(blocks[0], axes[0], axes[0]) ]
1726+
1727+
mgr = BlockManager(blocks, axes)
1728+
mgr._consolidate_inplace()
1729+
return mgr
1730+
1731+
except (ValueError):
1732+
blocks = [ getattr(b,'values',b) for b in blocks ]
1733+
tot_items = sum(b.shape[0] for b in blocks)
1734+
construction_error(tot_items,blocks[0].shape[1:],axes)
1735+
1736+
def create_block_manager_from_arrays(arrays, names, axes):
1737+
try:
1738+
blocks = form_blocks(arrays, names, axes)
1739+
mgr = BlockManager(blocks, axes)
1740+
mgr._consolidate_inplace()
1741+
return mgr
1742+
except (ValueError):
1743+
construction_error(len(arrays),arrays[0].shape[1:],axes)
1744+
17131745
def form_blocks(arrays, names, axes):
1746+
17141747
# pre-filter out items if we passed it
17151748
items = axes[0]
17161749

pandas/core/panel.py

+5-8
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
from pandas.core.index import (Index, MultiIndex, _ensure_index,
1414
_get_combined_index)
1515
from pandas.core.indexing import _maybe_droplevels, _is_list_like
16-
from pandas.core.internals import BlockManager, make_block, form_blocks
16+
from pandas.core.internals import (BlockManager,
17+
create_block_manager_from_arrays,
18+
create_block_manager_from_blocks)
1719
from pandas.core.series import Series
1820
from pandas.core.frame import DataFrame
1921
from pandas.core.generic import NDFrame
@@ -310,10 +312,7 @@ def _init_dict(self, data, axes, dtype=None):
310312
return self._init_arrays(arrays, haxis, [haxis] + raxes)
311313

312314
def _init_arrays(self, arrays, arr_names, axes):
313-
# segregates dtypes and forms blocks matching to columns
314-
blocks = form_blocks(arrays, arr_names, axes)
315-
mgr = BlockManager(blocks, axes).consolidate()
316-
return mgr
315+
return create_block_manager_from_arrays(arrays, arr_names, axes)
317316

318317
@property
319318
def shape(self):
@@ -398,9 +397,7 @@ def _init_matrix(self, data, axes, dtype=None, copy=False):
398397
ax = _ensure_index(ax)
399398
fixed_axes.append(ax)
400399

401-
items = fixed_axes[0]
402-
block = make_block(values, items, items)
403-
return BlockManager([block], fixed_axes)
400+
return create_block_manager_from_blocks([ values ], fixed_axes)
404401

405402
#----------------------------------------------------------------------
406403
# Array interface

pandas/io/pytables.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from pandas.core.algorithms import match, unique, factorize
2323
from pandas.core.categorical import Categorical
2424
from pandas.core.common import _asarray_tuplesafe, _try_sort
25-
from pandas.core.internals import BlockManager, make_block, form_blocks
25+
from pandas.core.internals import BlockManager, make_block
2626
from pandas.core.reshape import block2d_to_blocknd, factor_indexer
2727
from pandas.core.index import Int64Index, _ensure_index
2828
import pandas.core.common as com

pandas/tests/test_frame.py

+47-2
Original file line numberDiff line numberDiff line change
@@ -1976,7 +1976,7 @@ def test_constructor_cast_failure(self):
19761976
df['foo'] = np.ones((4,2)).tolist()
19771977

19781978
# this is not ok
1979-
self.assertRaises(AssertionError, df.__setitem__, tuple(['test']), np.ones((4,2)))
1979+
self.assertRaises(ValueError, df.__setitem__, tuple(['test']), np.ones((4,2)))
19801980

19811981
# this is ok
19821982
df['foo2'] = np.ones((4,2)).tolist()
@@ -2135,6 +2135,51 @@ def test_constructor_dict(self):
21352135
frame = DataFrame({'A': [], 'B': []}, columns=['A', 'B'])
21362136
self.assert_(frame.index.equals(Index([])))
21372137

2138+
def test_constructor_error_msgs(self):
2139+
2140+
# mix dict and array, wrong size
2141+
try:
2142+
DataFrame({'A': {'a': 'a', 'b': 'b'},
2143+
'B': ['a', 'b', 'c']})
2144+
except (Exception), detail:
2145+
self.assert_(type(detail) == ValueError)
2146+
self.assert_("Mixing dicts with non-Series may lead to ambiguous ordering." in str(detail))
2147+
2148+
# wrong size ndarray, GH 3105
2149+
from pandas import date_range
2150+
try:
2151+
DataFrame(np.arange(12).reshape((4, 3)), columns=['foo', 'bar', 'baz'],
2152+
index=date_range('2000-01-01', periods=3))
2153+
except (Exception), detail:
2154+
self.assert_(type(detail) == ValueError)
2155+
self.assert_(str(detail).startswith("Shape of passed values is (3, 4), indices imply (3, 3)"))
2156+
2157+
# higher dim raise exception
2158+
try:
2159+
DataFrame(np.zeros((3, 3, 3)), columns=['A', 'B', 'C'], index=[1])
2160+
except (Exception), detail:
2161+
self.assert_(type(detail) == ValueError)
2162+
self.assert_("Must pass 2-d input" in str(detail))
2163+
2164+
# wrong size axis labels
2165+
try:
2166+
DataFrame(np.random.rand(2,3), columns=['A', 'B', 'C'], index=[1])
2167+
except (Exception), detail:
2168+
self.assert_(type(detail) == ValueError)
2169+
self.assert_(str(detail).startswith("Shape of passed values is (3, 2), indices imply (3, 1)"))
2170+
2171+
try:
2172+
DataFrame(np.random.rand(2,3), columns=['A', 'B'], index=[1, 2])
2173+
except (Exception), detail:
2174+
self.assert_(type(detail) == ValueError)
2175+
self.assert_(str(detail).startswith("Shape of passed values is (3, 2), indices imply (2, 2)"))
2176+
2177+
try:
2178+
DataFrame({'a': False, 'b': True})
2179+
except (Exception), detail:
2180+
self.assert_(type(detail) == ValueError)
2181+
self.assert_("If use all scalar values, must pass index" in str(detail))
2182+
21382183
def test_constructor_subclass_dict(self):
21392184
# Test for passing dict subclass to constructor
21402185
data = {'col1': tm.TestSubDict((x, 10.0 * x) for x in xrange(10)),
@@ -3545,7 +3590,7 @@ def test_from_records_bad_index_column(self):
35453590
assert(df1.index.equals(Index(df.C)))
35463591

35473592
# should fail
3548-
self.assertRaises(Exception, DataFrame.from_records, df, index=[2])
3593+
self.assertRaises(ValueError, DataFrame.from_records, df, index=[2])
35493594
self.assertRaises(KeyError, DataFrame.from_records, df, index=2)
35503595

35513596
def test_from_records_non_tuple(self):

pandas/tests/test_panel.py

+19-3
Original file line numberDiff line numberDiff line change
@@ -939,9 +939,25 @@ def test_from_dict_mixed_orient(self):
939939
self.assert_(panel['foo'].values.dtype == np.object_)
940940
self.assert_(panel['A'].values.dtype == np.float64)
941941

942-
def test_values(self):
943-
self.assertRaises(Exception, Panel, np.random.randn(5, 5, 5),
944-
range(5), range(5), range(4))
942+
def test_constructor_error_msgs(self):
943+
944+
try:
945+
Panel(np.random.randn(3,4,5), range(4), range(5), range(5))
946+
except (Exception), detail:
947+
self.assert_(type(detail) == ValueError)
948+
self.assert_(str(detail).startswith("Shape of passed values is (3, 4, 5), indices imply (4, 5, 5)"))
949+
950+
try:
951+
Panel(np.random.randn(3,4,5), range(5), range(4), range(5))
952+
except (Exception), detail:
953+
self.assert_(type(detail) == ValueError)
954+
self.assert_(str(detail).startswith("Shape of passed values is (3, 4, 5), indices imply (5, 4, 5)"))
955+
956+
try:
957+
Panel(np.random.randn(3,4,5), range(5), range(5), range(4))
958+
except (Exception), detail:
959+
self.assert_(type(detail) == ValueError)
960+
self.assert_(str(detail).startswith("Shape of passed values is (3, 4, 5), indices imply (5, 5, 4)"))
945961

946962
def test_conform(self):
947963
df = self.panel['ItemA'][:-5].filter(items=['A', 'B'])

0 commit comments

Comments
 (0)