Skip to content

BUG: Boxplots ignore xlabel and boxplot() doesn't support horizontal layout #45465

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jan 24, 2022
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,9 @@ Period
Plotting
^^^^^^^^
- Bug in :meth:`DataFrame.plot.barh` that prevented labeling the x-axis and ``xlabel`` updating the y-axis label (:issue:`45144`)
- Bug in :meth:`DataFrame.plot.box` that prevented labeling the x-axis (:issue:`45463`)
- Bug in :meth:`DataFrame.boxplot` that prevented passing in ``xlabel`` and ``ylabel`` (:issue:`45463`)
- Bug in :meth:`DataFrame.boxplot` that prevented specifying ``vert=False`` (:issue:`36918`)
-

Groupby/resample/rolling
Expand Down
46 changes: 34 additions & 12 deletions pandas/plotting/_matplotlib/boxplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,11 @@ def _make_legend(self):
pass

def _post_plot_logic(self, ax, data):
pass
# GH 45465: make sure that the boxplot doesn't ignore xlabel/ylabel
if self.xlabel:
ax.set_xlabel(pprint_thing(self.xlabel))
if self.ylabel:
ax.set_ylabel(pprint_thing(self.ylabel))

@property
def orientation(self):
Expand Down Expand Up @@ -237,20 +241,27 @@ def _grouped_plot_by_column(
columns = data._get_numeric_data().columns.difference(by)
naxes = len(columns)
fig, axes = create_subplots(
naxes=naxes, sharex=True, sharey=True, figsize=figsize, ax=ax, layout=layout
naxes=naxes, sharex=kwargs.pop("sharex", True),
sharey=kwargs.pop("sharey", True), figsize=figsize, ax=ax, layout=layout
)

_axes = flatten_axes(axes)

# GH 45465: move the "by" label based on "vert"
xlabel, ylabel = kwargs.pop("xlabel", None), kwargs.pop("ylabel", None)
if kwargs.get("vert", 1):
xlabel = xlabel or by
else:
ylabel = ylabel or by

ax_values = []

for i, col in enumerate(columns):
ax = _axes[i]
gp_col = grouped[col]
keys, values = zip(*gp_col)
re_plotf = plotf(keys, values, ax, **kwargs)
re_plotf = plotf(keys, values, ax, xlabel=xlabel, ylabel=ylabel, **kwargs)
ax.set_title(col)
ax.set_xlabel(pprint_thing(by))
ax_values.append(re_plotf)
ax.grid(grid)

Expand Down Expand Up @@ -332,18 +343,28 @@ def maybe_color_bp(bp, **kwds):
if not kwds.get("capprops"):
setp(bp["caps"], color=colors[3], alpha=1)

def plot_group(keys, values, ax: Axes):
def plot_group(keys, values, ax: Axes, **kwds):
# GH 45465: xlabel/ylabel need to be popped out before plotting happens
xlabel, ylabel = kwds.pop('xlabel', None), kwds.pop('ylabel', None)
if xlabel:
ax.set_xlabel(pprint_thing(xlabel))
if ylabel:
ax.set_ylabel(pprint_thing(ylabel))

keys = [pprint_thing(x) for x in keys]
values = [np.asarray(remove_na_arraylike(v), dtype=object) for v in values]
bp = ax.boxplot(values, **kwds)
if fontsize is not None:
ax.tick_params(axis="both", labelsize=fontsize)
if kwds.get("vert", 1):
ticks = ax.get_xticks()
if len(ticks) != len(keys):
i, remainder = divmod(len(ticks), len(keys))
assert remainder == 0, remainder
keys *= i

# GH 45465: x/y are flipped when "vert" changes
is_vertical = kwds.get("vert", 1)
ticks = ax.get_xticks() if is_vertical else ax.get_yticks()
if len(ticks) != len(keys):
i, remainder = divmod(len(ticks), len(keys))
assert remainder == 0, remainder
keys *= i
if is_vertical:
ax.set_xticklabels(keys, rotation=rot)
else:
ax.set_yticklabels(keys, rotation=rot)
Expand Down Expand Up @@ -379,6 +400,7 @@ def plot_group(keys, values, ax: Axes):
ax=ax,
layout=layout,
return_type=return_type,
**kwds
)
else:
if return_type is None:
Expand All @@ -401,7 +423,7 @@ def plot_group(keys, values, ax: Axes):
else:
data = data[columns]

result = plot_group(columns, data.values.T, ax)
result = plot_group(columns, data.values.T, ax, **kwds)
ax.grid(grid)

return result
Expand Down
49 changes: 49 additions & 0 deletions pandas/tests/plotting/test_boxplot_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import itertools
import string

from pandas.io.formats.printing import pprint_thing

import numpy as np
import pytest

Expand Down Expand Up @@ -254,6 +256,53 @@ def test_specified_props_kwd(self, props, expected):

assert result[expected][0].get_color() == "C1"

def test_xlabel_ylabel(self):
df = DataFrame({
"a": np.random.randn(100), "b": np.random.randn(100),
"group": np.random.choice(["group1", "group2"], 100)
})
xlabel, ylabel = "x", "y"
ax = df.plot(kind="box", xlabel=xlabel, ylabel=ylabel)
assert ax.get_xlabel() == xlabel
assert ax.get_ylabel() == ylabel
self.plt.close()

ax = df.plot(kind="box", vert=False, xlabel=xlabel, ylabel=ylabel)
assert ax.get_xlabel() == xlabel
assert ax.get_ylabel() == ylabel
self.plt.close()

ax = df.boxplot(xlabel=xlabel, ylabel=ylabel)
assert ax.get_xlabel() == xlabel
assert ax.get_ylabel() == ylabel
self.plt.close()

ax = df.boxplot(vert=False, xlabel=xlabel, ylabel=ylabel)
assert ax.get_xlabel() == xlabel
assert ax.get_ylabel() == ylabel
self.plt.close()

ax = df.boxplot(by="group", xlabel=xlabel, ylabel=ylabel)
for subplot in ax:
assert subplot.get_xlabel() == xlabel
assert subplot.get_ylabel() == ylabel

ax = df.boxplot(by="group", vert=False, xlabel=xlabel, ylabel=ylabel)
for subplot in ax:
assert subplot.get_xlabel() == xlabel
assert subplot.get_ylabel() == ylabel
self.plt.close()

ax = df.boxplot(by="group")
for subplot in ax:
assert subplot.get_xlabel() == pprint_thing(["group"])
self.plt.close()

ax = df.boxplot(by="group", vert=False)
for subplot in ax:
assert subplot.get_ylabel() == pprint_thing(["group"])
self.plt.close()


@td.skip_if_no_mpl
class TestDataFrameGroupByPlots(TestPlotBase):
Expand Down