Skip to content

Commit ba8172b

Browse files
committed
BUG: detect/raise exception if possible int64 overflow #851
1 parent de81cc1 commit ba8172b

File tree

2 files changed

+30
-5
lines changed

2 files changed

+30
-5
lines changed

pandas/core/groupby.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1468,16 +1468,27 @@ def get_group_index(label_list, shape):
14681468
return label_list[0]
14691469

14701470
n = len(label_list[0])
1471-
group_index = np.zeros(n, dtype=int)
1471+
group_index = np.zeros(n, dtype=np.int64)
14721472
mask = np.zeros(n, dtype=bool)
1473-
for i in xrange(len(shape)):
1474-
stride = np.prod([x for x in shape[i+1:]], dtype=int)
1475-
group_index += com._ensure_int64(label_list[i]) * stride
1476-
mask |= label_list[i] < 0
1473+
1474+
if _int64_overflow_possible(shape):
1475+
raise Exception('Possible int64 overflow, raise exception for now')
1476+
else:
1477+
for i in xrange(len(shape)):
1478+
stride = np.prod([x for x in shape[i+1:]], dtype=np.int64)
1479+
group_index += com._ensure_int64(label_list[i]) * stride
1480+
mask |= label_list[i] < 0
14771481

14781482
np.putmask(group_index, mask, -1)
14791483
return group_index
14801484

1485+
def _int64_overflow_possible(shape):
1486+
the_prod = 1L
1487+
for x in shape:
1488+
the_prod *= long(x)
1489+
1490+
return the_prod >= 2**63
1491+
14811492
def decons_group_index(comp_labels, shape):
14821493
# reconstruct labels
14831494
label_list = []

pandas/tests/test_groupby.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1467,6 +1467,20 @@ def test_int32_overflow(self):
14671467
right = df.groupby(['B', 'A']).sum()
14681468
self.assert_(len(left) == len(right))
14691469

1470+
def test_int64_overflow(self):
1471+
B = np.concatenate((np.arange(100000), np.arange(100000),
1472+
np.arange(50000)))
1473+
A = np.arange(250000)
1474+
df = DataFrame({'A' : A, 'B' : B,
1475+
'C' : A, 'D' : B,
1476+
'values' : np.random.randn(250000)})
1477+
1478+
self.assertRaises(Exception, df.groupby(['A', 'B', 'C', 'D']).sum)
1479+
1480+
# left = df.groupby(['A', 'B', 'C', 'D']).sum()
1481+
# right = df.groupby(['D', 'C', 'B', 'A']).sum()
1482+
# self.assert_(len(left) == len(right))
1483+
14701484
def test_decons():
14711485
from pandas.core.groupby import decons_group_index, get_group_index
14721486

0 commit comments

Comments
 (0)