Skip to content

BUG: Boxplots ignore xlabel and boxplot() doesn't support horizontal layout #45465

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jan 24, 2022
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,9 @@ Period
Plotting
^^^^^^^^
- Bug in :meth:`DataFrame.plot.barh` that prevented labeling the x-axis and ``xlabel`` updating the y-axis label (:issue:`45144`)
- Bug in :meth:`DataFrame.plot.box` that prevented labeling the x-axis (:issue:`45463`)
- Bug in :meth:`DataFrame.boxplot` that prevented passing in ``xlabel`` and ``ylabel`` (:issue:`45463`)
- Bug in :meth:`DataFrame.boxplot` that prevented specifying ``vert=False`` (:issue:`36918`)
-

Groupby/resample/rolling
Expand Down
50 changes: 38 additions & 12 deletions pandas/plotting/_matplotlib/boxplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,11 @@ def _make_legend(self):
pass

def _post_plot_logic(self, ax, data):
pass
# GH 45465: make sure that the boxplot doesn't ignore xlabel/ylabel
if self.xlabel:
ax.set_xlabel(pprint_thing(self.xlabel))
if self.ylabel:
ax.set_ylabel(pprint_thing(self.ylabel))

@property
def orientation(self):
Expand Down Expand Up @@ -237,20 +241,31 @@ def _grouped_plot_by_column(
columns = data._get_numeric_data().columns.difference(by)
naxes = len(columns)
fig, axes = create_subplots(
naxes=naxes, sharex=True, sharey=True, figsize=figsize, ax=ax, layout=layout
naxes=naxes,
sharex=kwargs.pop("sharex", True),
sharey=kwargs.pop("sharey", True),
figsize=figsize,
ax=ax,
layout=layout,
)

_axes = flatten_axes(axes)

# GH 45465: move the "by" label based on "vert"
xlabel, ylabel = kwargs.pop("xlabel", None), kwargs.pop("ylabel", None)
if kwargs.get("vert", True):
xlabel = xlabel or by
else:
ylabel = ylabel or by

ax_values = []

for i, col in enumerate(columns):
ax = _axes[i]
gp_col = grouped[col]
keys, values = zip(*gp_col)
re_plotf = plotf(keys, values, ax, **kwargs)
re_plotf = plotf(keys, values, ax, xlabel=xlabel, ylabel=ylabel, **kwargs)
ax.set_title(col)
ax.set_xlabel(pprint_thing(by))
ax_values.append(re_plotf)
ax.grid(grid)

Expand Down Expand Up @@ -332,18 +347,28 @@ def maybe_color_bp(bp, **kwds):
if not kwds.get("capprops"):
setp(bp["caps"], color=colors[3], alpha=1)

def plot_group(keys, values, ax: Axes):
def plot_group(keys, values, ax: Axes, **kwds):
# GH 45465: xlabel/ylabel need to be popped out before plotting happens
xlabel, ylabel = kwds.pop("xlabel", None), kwds.pop("ylabel", None)
if xlabel:
ax.set_xlabel(pprint_thing(xlabel))
if ylabel:
ax.set_ylabel(pprint_thing(ylabel))

keys = [pprint_thing(x) for x in keys]
values = [np.asarray(remove_na_arraylike(v), dtype=object) for v in values]
bp = ax.boxplot(values, **kwds)
if fontsize is not None:
ax.tick_params(axis="both", labelsize=fontsize)
if kwds.get("vert", 1):
ticks = ax.get_xticks()
if len(ticks) != len(keys):
i, remainder = divmod(len(ticks), len(keys))
assert remainder == 0, remainder
keys *= i

# GH 45465: x/y are flipped when "vert" changes
is_vertical = kwds.get("vert", True)
ticks = ax.get_xticks() if is_vertical else ax.get_yticks()
if len(ticks) != len(keys):
i, remainder = divmod(len(ticks), len(keys))
assert remainder == 0, remainder
keys *= i
if is_vertical:
ax.set_xticklabels(keys, rotation=rot)
else:
ax.set_yticklabels(keys, rotation=rot)
Expand Down Expand Up @@ -379,6 +404,7 @@ def plot_group(keys, values, ax: Axes):
ax=ax,
layout=layout,
return_type=return_type,
**kwds,
)
else:
if return_type is None:
Expand All @@ -401,7 +427,7 @@ def plot_group(keys, values, ax: Axes):
else:
data = data[columns]

result = plot_group(columns, data.values.T, ax)
result = plot_group(columns, data.values.T, ax, **kwds)
ax.grid(grid)

return result
Expand Down
51 changes: 51 additions & 0 deletions pandas/tests/plotting/test_boxplot_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
_check_plot_works,
)

from pandas.io.formats.printing import pprint_thing
import pandas.plotting as plotting

pytestmark = pytest.mark.slow
Expand Down Expand Up @@ -254,6 +255,56 @@ def test_specified_props_kwd(self, props, expected):

assert result[expected][0].get_color() == "C1"

@pytest.mark.parametrize("vert", [True, False])
def test_plot_xlabel_ylabel(self, vert):
df = DataFrame(
{
"a": np.random.randn(100),
"b": np.random.randn(100),
"group": np.random.choice(["group1", "group2"], 100),
}
)
xlabel, ylabel = "x", "y"
ax = df.plot(kind="box", vert=vert, xlabel=xlabel, ylabel=ylabel)
assert ax.get_xlabel() == xlabel
assert ax.get_ylabel() == ylabel

@pytest.mark.parametrize("vert", [True, False])
def test_boxplot_xlabel_ylabel(self, vert):
df = DataFrame(
{
"a": np.random.randn(100),
"b": np.random.randn(100),
"group": np.random.choice(["group1", "group2"], 100),
}
)
xlabel, ylabel = "x", "y"
ax = df.boxplot(vert=vert, xlabel=xlabel, ylabel=ylabel)
assert ax.get_xlabel() == xlabel
assert ax.get_ylabel() == ylabel

@pytest.mark.parametrize("vert", [True, False])
def test_boxplot_group_xlabel_ylabel(self, vert):
df = DataFrame(
{
"a": np.random.randn(100),
"b": np.random.randn(100),
"group": np.random.choice(["group1", "group2"], 100),
}
)
xlabel, ylabel = "x", "y"
ax = df.boxplot(by="group", vert=vert, xlabel=xlabel, ylabel=ylabel)
for subplot in ax:
assert subplot.get_xlabel() == xlabel
assert subplot.get_ylabel() == ylabel
self.plt.close()

ax = df.boxplot(by="group", vert=vert)
for subplot in ax:
target_label = subplot.get_xlabel() if vert else subplot.get_ylabel()
assert target_label == pprint_thing(["group"])
self.plt.close()


@td.skip_if_no_mpl
class TestDataFrameGroupByPlots(TestPlotBase):
Expand Down