Skip to content

CLN: Simplify boxplot and tests #7351

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 17, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 70 additions & 75 deletions pandas/tests/test_graphics.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,54 @@ def _check_has_errorbars(self, axes, xerr=0, yerr=0):
self.assertEqual(xerr, xerr_count)
self.assertEqual(yerr, yerr_count)

def _check_box_return_type(self, returned, return_type, expected_keys=None):
"""
Check box returned type is correct

Parameters
----------
returned : object to be tested, returned from boxplot
return_type : str
return_type passed to boxplot
expected_keys : list-like, optional
group labels in subplot case. If not passed,
the function checks assuming boxplot uses single ax
"""
from matplotlib.axes import Axes
types = {'dict': dict, 'axes': Axes, 'both': tuple}
if expected_keys is None:
# should be fixed when the returning default is changed
if return_type is None:
return_type = 'dict'

self.assertTrue(isinstance(returned, types[return_type]))
if return_type == 'both':
self.assertIsInstance(returned.ax, Axes)
self.assertIsInstance(returned.lines, dict)
else:
# should be fixed when the returning default is changed
if return_type is None:
for r in self._flatten_visible(returned):
self.assertIsInstance(r, Axes)
return

self.assertTrue(isinstance(returned, OrderedDict))
self.assertEqual(sorted(returned.keys()), sorted(expected_keys))
for key, value in iteritems(returned):
self.assertTrue(isinstance(value, types[return_type]))
# check returned dict has correct mapping
if return_type == 'axes':
self.assertEqual(value.get_title(), key)
elif return_type == 'both':
self.assertEqual(value.ax.get_title(), key)
self.assertIsInstance(value.ax, Axes)
self.assertIsInstance(value.lines, dict)
elif return_type == 'dict':
line = value['medians'][0]
self.assertEqual(line.get_axes().get_title(), key)
else:
raise AssertionError


@tm.mplskip
class TestSeriesPlots(TestPlotBase):
Expand Down Expand Up @@ -1421,65 +1469,20 @@ def test_boxplot_return_type(self):

with tm.assert_produces_warning(FutureWarning):
result = df.boxplot()
self.assertIsInstance(result, dict) # change to Axes in future
# change to Axes in future
self._check_box_return_type(result, 'dict')

with tm.assert_produces_warning(False):
result = df.boxplot(return_type='dict')
self.assertIsInstance(result, dict)
self._check_box_return_type(result, 'dict')

with tm.assert_produces_warning(False):
result = df.boxplot(return_type='axes')
self.assertIsInstance(result, mpl.axes.Axes)
self._check_box_return_type(result, 'axes')

with tm.assert_produces_warning(False):
result = df.boxplot(return_type='both')
self.assertIsInstance(result, tuple)

@slow
def test_boxplot_return_type_by(self):
import matplotlib as mpl

df = DataFrame(np.random.randn(10, 2))
df['g'] = ['a'] * 5 + ['b'] * 5

# old style: return_type=None
result = df.boxplot(by='g')
self.assertIsInstance(result, np.ndarray)
self.assertIsInstance(result[0], mpl.axes.Axes)

result = df.boxplot(by='g', return_type='dict')
self.assertIsInstance(result, dict)
self.assertIsInstance(result[0], dict)

result = df.boxplot(by='g', return_type='axes')
self.assertIsInstance(result, dict)
self.assertIsInstance(result[0], mpl.axes.Axes)

result = df.boxplot(by='g', return_type='both')
self.assertIsInstance(result, dict)
self.assertIsInstance(result[0], tuple)
self.assertIsInstance(result[0][0], mpl.axes.Axes)
self.assertIsInstance(result[0][1], dict)

# now for groupby
with tm.assert_produces_warning(FutureWarning):
result = df.groupby('g').boxplot()
self.assertIsInstance(result, dict)
self.assertIsInstance(result['a'], dict)

result = df.groupby('g').boxplot(return_type='dict')
self.assertIsInstance(result, dict)
self.assertIsInstance(result['a'], dict)

result = df.groupby('g').boxplot(return_type='axes')
self.assertIsInstance(result, dict)
self.assertIsInstance(result['a'], mpl.axes.Axes)

result = df.groupby('g').boxplot(return_type='both')
self.assertIsInstance(result, dict)
self.assertIsInstance(result['a'], tuple)
self.assertIsInstance(result['a'][0], mpl.axes.Axes)
self.assertIsInstance(result['a'][1], dict)
self._check_box_return_type(result, 'both')

@slow
def test_kde(self):
Expand Down Expand Up @@ -2278,47 +2281,39 @@ def test_grouped_hist(self):
with tm.assertRaises(AttributeError):
plotting.grouped_hist(df.A, by=df.C, foo='bar')

def _check_box_dict(self, returned, return_type,
expected_klass, expected_keys):
self.assertTrue(isinstance(returned, OrderedDict))
self.assertEqual(sorted(returned.keys()), sorted(expected_keys))
for key, value in iteritems(returned):
self.assertTrue(isinstance(value, expected_klass))
# check returned dict has correct mapping
if return_type == 'axes':
self.assertEqual(value.get_title(), key)
elif return_type == 'both':
self.assertEqual(value.ax.get_title(), key)
elif return_type == 'dict':
line = value['medians'][0]
self.assertEqual(line.get_axes().get_title(), key)
else:
raise AssertionError

@slow
def test_grouped_box_return_type(self):
import matplotlib.axes

df = self.hist_df

# old style: return_type=None
result = df.boxplot(by='gender')
self.assertIsInstance(result, np.ndarray)
self._check_box_return_type(result, None,
expected_keys=['height', 'weight', 'category'])

# now for groupby
with tm.assert_produces_warning(FutureWarning):
result = df.groupby('gender').boxplot()
self._check_box_return_type(result, 'dict', expected_keys=['Male', 'Female'])

columns2 = 'X B C D A G Y N Q O'.split()
df2 = DataFrame(random.randn(50, 10), columns=columns2)
categories2 = 'A B C D E F G H I J'.split()
df2['category'] = categories2 * 5

types = {'dict': dict, 'axes': matplotlib.axes.Axes, 'both': tuple}
for t, klass in iteritems(types):
for t in ['dict', 'axes', 'both']:
returned = df.groupby('classroom').boxplot(return_type=t)
self._check_box_dict(returned, t, klass, ['A', 'B', 'C'])
self._check_box_return_type(returned, t, expected_keys=['A', 'B', 'C'])

returned = df.boxplot(by='classroom', return_type=t)
self._check_box_dict(returned, t, klass, ['height', 'weight', 'category'])
self._check_box_return_type(returned, t,
expected_keys=['height', 'weight', 'category'])

returned = df2.groupby('category').boxplot(return_type=t)
self._check_box_dict(returned, t, klass, categories2)
self._check_box_return_type(returned, t, expected_keys=categories2)

returned = df2.boxplot(by='category', return_type=t)
self._check_box_dict(returned, t, klass, columns2)
self._check_box_return_type(returned, t, expected_keys=columns2)

@slow
def test_grouped_box_layout(self):
Expand Down
103 changes: 39 additions & 64 deletions pandas/tools/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -2323,13 +2323,11 @@ def boxplot(data, column=None, by=None, ax=None, fontsize=None,
if return_type not in valid_types:
raise ValueError("return_type")


from pandas import Series, DataFrame
if isinstance(data, Series):
data = DataFrame({'x': data})
column = 'x'


def _get_colors():
return _get_standard_colors(color=kwds.get('color'), num_colors=1)

Expand All @@ -2340,8 +2338,9 @@ def maybe_color_bp(bp):
setp(bp['whiskers'],color=colors[0],alpha=1)
setp(bp['medians'],color=colors[2],alpha=1)

def plot_group(grouped, ax):
keys, values = zip(*grouped)
BP = namedtuple("Boxplot", ['ax', 'lines']) # namedtuple to hold results

def plot_group(keys, values, ax):
keys = [com.pprint_thing(x) for x in keys]
values = [remove_na(v) for v in values]
bp = ax.boxplot(values, **kwds)
Expand All @@ -2350,7 +2349,14 @@ def plot_group(grouped, ax):
else:
ax.set_yticklabels(keys, rotation=rot, fontsize=fontsize)
maybe_color_bp(bp)
return bp

# Return axes in multiplot case, maybe revisit later # 985
if return_type == 'dict':
return bp
elif return_type == 'both':
return BP(ax=ax, lines=bp)
else:
return ax

colors = _get_colors()
if column is None:
Expand All @@ -2361,56 +2367,14 @@ def plot_group(grouped, ax):
else:
columns = [column]

BP = namedtuple("Boxplot", ['ax', 'lines']) # namedtuple to hold results

if by is not None:
fig, axes, d = _grouped_plot_by_column(plot_group, data, columns=columns,
by=by, grid=grid, figsize=figsize,
ax=ax, layout=layout)

# Return axes in multiplot case, maybe revisit later # 985
if return_type is None:
ret = axes
if return_type == 'axes':
ret = compat.OrderedDict()
axes = _flatten(axes)[:len(d)]
for k, ax in zip(d.keys(), axes):
ret[k] = ax
elif return_type == 'dict':
ret = d
elif return_type == 'both':
ret = compat.OrderedDict()
axes = _flatten(axes)[:len(d)]
for (k, line), ax in zip(d.items(), axes):
ret[k] = BP(ax=ax, lines=line)
result = _grouped_plot_by_column(plot_group, data, columns=columns,
by=by, grid=grid, figsize=figsize,
ax=ax, layout=layout, return_type=return_type)
else:
if layout is not None:
raise ValueError("The 'layout' keyword is not supported when "
"'by' is None")
if ax is None:
ax = _gca()
fig = ax.get_figure()
data = data._get_numeric_data()
if columns:
cols = columns
else:
cols = data.columns
keys = [com.pprint_thing(x) for x in cols]

# Return boxplot dict in single plot case

clean_values = [remove_na(x) for x in data[cols].values.T]

bp = ax.boxplot(clean_values, **kwds)
maybe_color_bp(bp)

if kwds.get('vert', 1):
ax.set_xticklabels(keys, rotation=rot, fontsize=fontsize)
else:
ax.set_yticklabels(keys, rotation=rot, fontsize=fontsize)
ax.grid(grid)

ret = ax

if return_type is None:
msg = ("\nThe default value for 'return_type' will change to "
Expand All @@ -2420,13 +2384,18 @@ def plot_group(grouped, ax):
"return_type='dict'.")
warnings.warn(msg, FutureWarning)
return_type = 'dict'
if return_type == 'dict':
ret = bp
elif return_type == 'both':
ret = BP(ax=ret, lines=bp)
if ax is None:
ax = _gca()
data = data._get_numeric_data()
if columns is None:
columns = data.columns
else:
data = data[columns]

fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2)
return ret
result = plot_group(columns, data.values.T, ax)
ax.grid(grid)

return result


def format_date_labels(ax, rot):
Expand Down Expand Up @@ -2734,7 +2703,7 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None,
if subplots is True:
naxes = len(grouped)
nrows, ncols = _get_layout(naxes, layout=layout)
_, axes = _subplots(nrows=nrows, ncols=ncols, naxes=naxes, squeeze=False,
fig, axes = _subplots(nrows=nrows, ncols=ncols, naxes=naxes, squeeze=False,
sharex=False, sharey=True)
axes = _flatten(axes)

Expand All @@ -2744,6 +2713,7 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None,
rot=rot, grid=grid, **kwds)
ax.set_title(com.pprint_thing(key))
ret[key] = d
fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2)
else:
from pandas.tools.merge import concat
keys, frames = zip(*grouped)
Expand Down Expand Up @@ -2795,9 +2765,8 @@ def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True,

def _grouped_plot_by_column(plotf, data, columns=None, by=None,
numeric_only=True, grid=False,
figsize=None, ax=None, layout=None, **kwargs):
from pandas.core.frame import DataFrame

figsize=None, ax=None, layout=None, return_type=None,
**kwargs):
grouped = data.groupby(by)
if columns is None:
if not isinstance(by, (list, tuple)):
Expand All @@ -2818,20 +2787,26 @@ def _grouped_plot_by_column(plotf, data, columns=None, by=None,

ravel_axes = _flatten(axes)

out_dict = compat.OrderedDict()
result = compat.OrderedDict()
for i, col in enumerate(columns):
ax = ravel_axes[i]
gp_col = grouped[col]
re_plotf = plotf(gp_col, ax, **kwargs)
keys, values = zip(*gp_col)
re_plotf = plotf(keys, values, ax, **kwargs)
ax.set_title(col)
ax.set_xlabel(com.pprint_thing(by))
result[col] = re_plotf
ax.grid(grid)
out_dict[col] = re_plotf

# Return axes in multiplot case, maybe revisit later # 985
if return_type is None:
result = axes

byline = by[0] if len(by) == 1 else by
fig.suptitle('Boxplot grouped by %s' % byline)
fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2)

return fig, axes, out_dict
return result


def table(ax, data, rowLabels=None, colLabels=None,
Expand Down