Skip to content

Commit de81cc1

Browse files
committed
BUG: fix int32 overflow when computing group_index, GH #850
1 parent 89bdb1e commit de81cc1

File tree

4 files changed

+28
-12
lines changed

4 files changed

+28
-12
lines changed

pandas/core/groupby.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1472,7 +1472,7 @@ def get_group_index(label_list, shape):
14721472
mask = np.zeros(n, dtype=bool)
14731473
for i in xrange(len(shape)):
14741474
stride = np.prod([x for x in shape[i+1:]], dtype=int)
1475-
group_index += label_list[i] * stride
1475+
group_index += com._ensure_int64(label_list[i]) * stride
14761476
mask |= label_list[i] < 0
14771477

14781478
np.putmask(group_index, mask, -1)

pandas/tests/test_groupby.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1457,6 +1457,15 @@ def test_groupby_2d_malformed(self):
14571457
self.assert_(np.array_equal(tmp.columns, ['zeros', 'ones']))
14581458
self.assert_(np.array_equal(tmp.values, res_values))
14591459

1460+
def test_int32_overflow(self):
1461+
B = np.concatenate((np.arange(100000), np.arange(100000),
1462+
np.arange(50000)))
1463+
A = np.arange(250000)
1464+
df = DataFrame({'A' : A, 'B' : B, 'C' : np.random.randn(250000)})
1465+
1466+
left = df.groupby(['A', 'B']).sum()
1467+
right = df.groupby(['B', 'A']).sum()
1468+
self.assert_(len(left) == len(right))
14601469

14611470
def test_decons():
14621471
from pandas.core.groupby import decons_group_index, get_group_index

pandas/tools/plotting.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,21 +22,24 @@ def hist(data, column, by=None, ax=None, fontsize=None):
2222
return ax
2323

2424
def grouped_hist(data, column=None, by=None, ax=None, bins=50, log=False,
25-
figsize=None, layout=None):
25+
figsize=None, layout=None, sharex=False, sharey=False,
26+
rot=90):
2627
"""
2728
2829
Returns
2930
-------
3031
fig : matplotlib.Figure
3132
"""
32-
if isinstance(data, DataFrame):
33-
data = data[column]
33+
# if isinstance(data, DataFrame):
34+
# data = data[column]
3435

3536
def plot_group(group, ax):
3637
ax.hist(group.dropna(), bins=bins)
37-
fig, axes = _grouped_plot(plot_group, data, by=by, sharex=False,
38-
sharey=False, figsize=figsize,
39-
layout=layout)
38+
39+
fig, axes = _grouped_plot(plot_group, data, column=column,
40+
by=by, sharex=sharex, sharey=sharey,
41+
figsize=figsize, layout=layout,
42+
rot=rot)
4043
fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9,
4144
hspace=0.3, wspace=0.2)
4245
return fig
@@ -45,7 +48,7 @@ def plot_group(group, ax):
4548
def boxplot(data, column=None, by=None, ax=None, fontsize=None,
4649
rot=0, grid=True, figsize=None):
4750
"""
48-
Make a box plot from DataFrame column optionally grouped by some columns or
51+
Make a box plot from DataFrame column optionally grouped b ysome columns or
4952
other inputs
5053
5154
Parameters
@@ -142,15 +145,19 @@ def plot_group(group, ax):
142145

143146
return fig
144147

145-
def _grouped_plot(plotf, data, by=None, numeric_only=True, figsize=None,
146-
sharex=True, sharey=True, layout=None):
148+
def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True,
149+
figsize=None, sharex=True, sharey=True, layout=None,
150+
rot=0):
147151
import matplotlib.pyplot as plt
148152

149153
# allow to specify mpl default with 'default'
150154
if figsize is None or figsize == 'default':
151155
figsize = (10, 5) # our default
152156

153157
grouped = data.groupby(by)
158+
if column is not None:
159+
grouped = grouped[column]
160+
154161
ngroups = len(grouped)
155162

156163
nrows, ncols = layout or _get_layout(ngroups)

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,8 +164,8 @@
164164

165165
MAJOR = 0
166166
MINOR = 7
167-
MICRO = 1
168-
ISRELEASED = True
167+
MICRO = 2
168+
ISRELEASED = False
169169
VERSION = '%d.%d.%d' % (MAJOR, MINOR, MICRO)
170170
QUALIFIER = ''
171171

0 commit comments

Comments
 (0)