Skip to content

Commit 6b7b030

Browse files
anhqlesweb
authored andcommitted
ENH catch the int32 overflow error earlier and in two separate places: in pivot_table and unstack
1 parent db2319e commit 6b7b030

File tree

4 files changed

+21
-6
lines changed

4 files changed

+21
-6
lines changed

pandas/core/reshape/pivot.py

+5
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,11 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
3131
index = _convert_by(index)
3232
columns = _convert_by(columns)
3333

34+
num_rows = data.reindex(index, axis='columns').shape[0]
35+
num_columns = data.reindex(columns, axis='columns').shape[0]
36+
if num_rows * num_columns > (2 ** 31 - 1):
37+
raise ValueError('Pivot table is too big, causing int32 overflow')
38+
3439
if isinstance(aggfunc, list):
3540
pieces = []
3641
keys = []

pandas/core/reshape/reshape.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,11 @@ def __init__(self, values, index, level=-1, value_columns=None,
127127
self.removed_level = self.new_index_levels.pop(self.level)
128128
self.removed_level_full = index.levels[self.level]
129129

130+
num_rows = np.max([index_level.size for index_level in self.new_index_levels])
131+
num_columns = self.removed_level.size
132+
if num_rows * num_columns > (2 ** 31 - 1):
133+
raise ValueError('Unstacked data frame is too big, causing int32 overflow')
134+
130135
self._make_sorted_values_labels()
131136
self._make_selectors()
132137

@@ -162,8 +167,6 @@ def _make_selectors(self):
162167
self.full_shape = ngroups, stride
163168

164169
selector = self.sorted_labels[-1] + stride * comp_index + self.lift
165-
if np.prod(self.full_shape) > (2 ** 31 - 1):
166-
raise ValueError('Pivot table is too big, causing int32 overflow')
167170
mask = np.zeros(np.prod(self.full_shape), dtype=bool)
168171
mask.put(selector, True)
169172

pandas/tests/reshape/test_pivot.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1279,11 +1279,11 @@ def test_pivot_string_func_vs_func(self, f, f_numpy):
12791279
@pytest.mark.slow
12801280
def test_pivot_number_of_levels_larger_than_int32(self):
12811281
# GH 20601
1282-
data = DataFrame({'ind1': list(range(1337600)) * 2,
1283-
'ind2': list(range(3040)) * 2 * 440,
1284-
'count': [1] * 2 * 1337600})
1282+
df = DataFrame({'ind1': np.arange(2 ** 16),
1283+
'ind2': np.arange(2 ** 16),
1284+
'count': np.arange(2 ** 16)})
12851285
with tm.assert_raises_regex(ValueError, 'int32 overflow'):
1286-
data.pivot_table(index='ind1', columns='ind2',
1286+
df.pivot_table(index='ind1', columns='ind2',
12871287
values='count', aggfunc='count')
12881288

12891289

pandas/tests/test_multilevel.py

+7
Original file line numberDiff line numberDiff line change
@@ -1212,6 +1212,13 @@ def test_unstack_unobserved_keys(self):
12121212
recons = result.stack()
12131213
tm.assert_frame_equal(recons, df)
12141214

1215+
@pytest.mark.slow
1216+
def test_unstack_number_of_levels_larger_than_int32(self):
1217+
# GH 20601
1218+
df = DataFrame(np.random.randn(2 ** 16, 2), index=[np.arange(2 ** 16), np.arange(2 ** 16)])
1219+
with tm.assert_raises_regex(ValueError, 'int32 overflow'):
1220+
df.unstack()
1221+
12151222
def test_stack_order_with_unsorted_levels(self):
12161223
# GH 16323
12171224

0 commit comments

Comments
 (0)