Skip to content

Commit 2b6bb03

Browse files
lodagrowesm
authored andcommitted
ENH: implement DataFrameGroupBy.boxplot(), close #1507
1 parent 53ae1d5 commit 2b6bb03

File tree

3 files changed

+99
-0
lines changed

3 files changed

+99
-0
lines changed

pandas/core/groupby.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1816,6 +1816,9 @@ def _wrap_agged_blocks(self, blocks):
18161816

18171817
return result
18181818

1819+
from pandas.tools.plotting import boxplot_frame_groupby
1820+
DataFrameGroupBy.boxplot = boxplot_frame_groupby
1821+
18191822
class PanelGroupBy(NDFrameGroupBy):
18201823

18211824
def _iterate_slices(self):

pandas/tests/test_graphics.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,38 @@ def test_legend_name(self):
333333
def _check_plot_fails(self, f, *args, **kwargs):
334334
self.assertRaises(Exception, f, *args, **kwargs)
335335

336+
class TestDataFrameGroupByPlots(unittest.TestCase):
337+
338+
@classmethod
339+
def setUpClass(cls):
340+
import sys
341+
if 'IPython' in sys.modules:
342+
raise nose.SkipTest
343+
344+
try:
345+
import matplotlib as mpl
346+
mpl.use('Agg', warn=False)
347+
except ImportError:
348+
raise nose.SkipTest
349+
350+
@slow
351+
def test_boxplot(self):
352+
df = DataFrame(np.random.rand(10,2), columns=['Col1', 'Col2'] )
353+
df['X'] = Series(['A','A','A','A','A','B','B','B','B','B'])
354+
grouped = df.groupby(by='X')
355+
_check_plot_works(grouped.boxplot)
356+
_check_plot_works(grouped.boxplot, subplots=False)
357+
358+
tuples = zip(list(string.ascii_letters[:10]), range(10))
359+
df = DataFrame(np.random.rand(10, 3),
360+
index=MultiIndex.from_tuples(tuples))
361+
grouped = df.groupby(level=1)
362+
_check_plot_works(grouped.boxplot)
363+
_check_plot_works(grouped.boxplot, subplots=False)
364+
grouped = df.unstack(level=1).groupby(level=0, axis=1)
365+
_check_plot_works(grouped.boxplot)
366+
_check_plot_works(grouped.boxplot, subplots=False)
367+
336368
PNG_PATH = 'tmp.png'
337369

338370
def _check_plot_works(f, *args, **kwargs):

pandas/tools/plotting.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1226,6 +1226,70 @@ def hist_series(self, ax=None, grid=True, xlabelsize=None, xrot=None,
12261226

12271227
return ax
12281228

1229+
def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None,
1230+
rot=0, grid=True, figsize=None, **kwds):
1231+
"""
1232+
Make box plots from DataFrameGroupBy data.
1233+
1234+
Parameters
1235+
----------
1236+
subplots :
1237+
* ``False`` - no subplots will be used
1238+
* ``True`` - create a subplot for each group
1239+
column : column name or list of names, or vector
1240+
Can be any valid input to groupby
1241+
fontsize : int or string
1242+
rot : label rotation angle
1243+
kwds : other plotting keyword arguments to be passed to matplotlib boxplot
1244+
function
1245+
1246+
Returns
1247+
-------
1248+
dict of key/value = group key/DataFrame.boxplot return value
1249+
or DataFrame.boxplot return value in case subplots=figures=False
1250+
1251+
Examples
1252+
--------
1253+
>>> import pandas
1254+
>>> import numpy as np
1255+
>>> import itertools
1256+
>>>
1257+
>>> tuples = [t for t in itertools.product(range(1000), range(4))]
1258+
>>> index = pandas.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1'])
1259+
>>> data = np.random.randn(len(index),4)
1260+
>>> df = pandas.DataFrame(data, columns=list('ABCD'), index=index)
1261+
>>>
1262+
>>> grouped = df.groupby(level='lvl1')
1263+
>>> boxplot_frame_groupby(grouped)
1264+
>>>
1265+
>>> grouped = df.unstack(level='lvl1').groupby(level=0, axis=1)
1266+
>>> boxplot_frame_groupby(grouped, subplots=False)
1267+
"""
1268+
if subplots is True:
1269+
nrows, ncols = _get_layout(len(grouped))
1270+
_, axes = _subplots(nrows=nrows, ncols=ncols, squeeze=False,
1271+
sharex=False, sharey=True)
1272+
axes = axes.reshape(-1) if len(grouped) > 1 else axes
1273+
1274+
ret = {}
1275+
for (key, group), ax in zip(grouped, axes):
1276+
d = group.boxplot(ax=ax, column=column, fontsize=fontsize,
1277+
rot=rot, grid=grid, figsize=figsize, **kwds)
1278+
ax.set_title(_stringify(key))
1279+
ret[key] = d
1280+
else:
1281+
from pandas.tools.merge import concat
1282+
keys, frames = zip(*grouped)
1283+
if grouped.axis == 0:
1284+
df = concat(frames, keys=keys, axis=1)
1285+
else:
1286+
if len(frames) > 1:
1287+
df = frames[0].join(frames[1::])
1288+
else:
1289+
df = frames[0]
1290+
ret = df.boxplot(column=column, fontsize=fontsize, rot=rot,
1291+
grid=grid, figsize=figsize, **kwds)
1292+
return ret
12291293

12301294
def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True,
12311295
figsize=None, sharex=True, sharey=True, layout=None,

0 commit comments

Comments
 (0)