Skip to content

Commit a69438f

Browse files
anhqlesweb
authored andcommitted
ENH calculate size of the resulting pivot table and raise error if it's too big
1 parent 23dae93 commit a69438f

File tree

1 file changed

+8
-8
lines changed

1 file changed

+8
-8
lines changed

pandas/core/reshape/pivot.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,6 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
3131
index = _convert_by(index)
3232
columns = _convert_by(columns)
3333

34-
num_rows = data.reindex(index, axis='columns').shape[0]
35-
num_columns = data.reindex(columns, axis='columns').shape[0]
36-
if num_rows * num_columns > (2 ** 31 - 1):
37-
raise ValueError('Pivot table is too big, causing int32 overflow')
38-
3934
if isinstance(aggfunc, list):
4035
pieces = []
4136
keys = []
@@ -86,9 +81,14 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
8681
pass
8782
values = list(values)
8883

89-
# group by the cartesian product of the grouper
90-
# if we have a categorical
91-
grouped = data.groupby(keys, observed=False)
84+
num_rows = (data.reindex(columns=index).drop_duplicates().shape[0]
85+
if index else 1)
86+
num_cols = (data.reindex(columns=columns).drop_duplicates().shape[0]
87+
if columns else 1)
88+
if num_rows * num_cols * len(values) > (2 ** 31 - 1):
89+
raise ValueError('Pivot table is too big, causing int32 overflow')
90+
91+
grouped = data.groupby(keys)
9292
agged = grouped.agg(aggfunc)
9393
if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
9494
agged = agged.dropna(how='all')

0 commit comments

Comments
 (0)