Skip to content

BUG: groupby.hist legend should use group keys #33493

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Jun 22, 2020
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,8 @@ Other enhancements
- :meth:`HDFStore.put` now accepts `track_times` parameter. Parameter is passed to ``create_table`` method of ``PyTables`` (:issue:`32682`).
- Make :class:`pandas.core.window.Rolling` and :class:`pandas.core.window.Expanding` iterable(:issue:`11704`)
- Make ``option_context`` a :class:`contextlib.ContextDecorator`, which allows it to be used as a decorator over an entire function (:issue:`34253`).
- :meth:`DataFrame.hist`, :meth:`Series.hist`, :meth:`DataFrameGroupby.hist`, and :meth:`SeriesGroupby.hist` have gained the ``legend`` argument. Set to True to show a legend in the histogram. (:issue:`6279`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DataFrameGroupBy and SeriesGroupBy aren't in the top-level. It should be something like core.groupby.DataFrameGroupBy. See doc/source/referenece.groupby.rst for the right path..

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah - thanks!



.. ---------------------------------------------------------------------------

Expand Down
61 changes: 40 additions & 21 deletions pandas/plotting/_core.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,33 @@
import importlib
from typing import TYPE_CHECKING, Optional, Sequence, Tuple, Union

from pandas._config import get_option

from pandas._typing import Label
from pandas.util._decorators import Appender, Substitution

from pandas.core.dtypes.common import is_integer, is_list_like
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries

from pandas.core.base import PandasObject

if TYPE_CHECKING:
from pandas import DataFrame


def hist_series(
self,
by=None,
ax=None,
grid=True,
xlabelsize=None,
xrot=None,
ylabelsize=None,
yrot=None,
figsize=None,
bins=10,
backend=None,
grid: bool = True,
xlabelsize: Optional[int] = None,
xrot: Optional[float] = None,
ylabelsize: Optional[int] = None,
yrot: Optional[float] = None,
figsize: Optional[Tuple[int, int]] = None,
bins: Union[int, Sequence[int]] = 10,
legend: bool = False,
backend: Optional[str] = None,
**kwargs,
):
"""
Expand Down Expand Up @@ -50,6 +56,11 @@ def hist_series(
bin edges are calculated and returned. If bins is a sequence, gives
bin edges, including left edge of first bin and right edge of last
bin. In this case, bins is returned unmodified.
legend : bool, default False
Whether to show the legend.

..versionadded:: 1.1.0

backend : str, default None
Backend to use instead of the backend specified in the option
``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
Expand Down Expand Up @@ -82,26 +93,28 @@ def hist_series(
yrot=yrot,
figsize=figsize,
bins=bins,
legend=legend,
**kwargs,
)


def hist_frame(
data,
column=None,
data: "DataFrame",
column: Union[Label, Sequence[Label]] = None,
by=None,
grid=True,
xlabelsize=None,
xrot=None,
ylabelsize=None,
yrot=None,
grid: bool = True,
xlabelsize: Optional[int] = None,
xrot: Optional[float] = None,
ylabelsize: Optional[int] = None,
yrot: Optional[float] = None,
ax=None,
sharex=False,
sharey=False,
figsize=None,
layout=None,
bins=10,
backend=None,
sharex: bool = False,
sharey: bool = False,
figsize: Optional[Tuple[int, int]] = None,
layout: Optional[Tuple[int, int]] = None,
bins: Union[int, Sequence[int]] = 10,
legend: bool = False,
backend: Optional[str] = None,
**kwargs,
):
"""
Expand Down Expand Up @@ -154,6 +167,11 @@ def hist_frame(
bin edges are calculated and returned. If bins is a sequence, gives
bin edges, including left edge of first bin and right edge of last
bin. In this case, bins is returned unmodified.
legend : bool, default False
Whether to show the legend.

..versionadded:: 1.1.0

backend : str, default None
Backend to use instead of the backend specified in the option
``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
Expand Down Expand Up @@ -203,6 +221,7 @@ def hist_frame(
sharey=sharey,
figsize=figsize,
layout=layout,
legend=legend,
bins=bins,
**kwargs,
)
Expand Down
32 changes: 31 additions & 1 deletion pandas/plotting/_matplotlib/hist.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ def _grouped_hist(
xrot=None,
ylabelsize=None,
yrot=None,
legend=False,
**kwargs,
):
"""
Expand All @@ -243,15 +244,26 @@ def _grouped_hist(
sharey : bool, default False
rot : int, default 90
grid : bool, default True
legend: : bool, default False
kwargs : dict, keyword arguments passed to matplotlib.Axes.hist

Returns
-------
collection of Matplotlib Axes
"""
if legend:
if isinstance(data, ABCDataFrame):
if column is None:
kwargs["label"] = data.columns
else:
kwargs["label"] = column
else:
kwargs["label"] = data.name

def plot_group(group, ax):
ax.hist(group.dropna().values, bins=bins, **kwargs)
if legend:
ax.legend()

if xrot is None:
xrot = rot
Expand Down Expand Up @@ -290,10 +302,14 @@ def hist_series(
yrot=None,
figsize=None,
bins=10,
legend: bool = False,
**kwds,
):
import matplotlib.pyplot as plt

if legend and "label" in kwds:
raise ValueError("Cannot use both legend and label")

if by is None:
if kwds.get("layout", None) is not None:
raise ValueError("The 'layout' keyword is not supported when 'by' is None")
Expand All @@ -308,8 +324,11 @@ def hist_series(
elif ax.get_figure() != fig:
raise AssertionError("passed axis not bound to passed figure")
values = self.dropna().values

if legend:
kwds["label"] = self.name
ax.hist(values, bins=bins, **kwds)
if legend:
ax.legend()
ax.grid(grid)
axes = np.array([ax])

Expand All @@ -334,6 +353,7 @@ def hist_series(
xrot=xrot,
ylabelsize=ylabelsize,
yrot=yrot,
legend=legend,
**kwds,
)

Expand All @@ -358,8 +378,11 @@ def hist_frame(
figsize=None,
layout=None,
bins=10,
legend: bool = False,
**kwds,
):
if legend and "label" in kwds:
raise ValueError("Cannot use both legend and label")
if by is not None:
axes = _grouped_hist(
data,
Expand All @@ -376,6 +399,7 @@ def hist_frame(
xrot=xrot,
ylabelsize=ylabelsize,
yrot=yrot,
legend=legend,
**kwds,
)
return axes
Expand All @@ -401,11 +425,17 @@ def hist_frame(
)
_axes = _flatten(axes)

can_set_label = "label" not in kwds

for i, col in enumerate(data.columns):
ax = _axes[i]
if legend and can_set_label:
kwds["label"] = col
ax.hist(data[col].dropna().values, bins=bins, **kwds)
ax.set_title(col)
ax.grid(grid)
if legend:
ax.legend()

_set_ticks_props(
axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot
Expand Down
51 changes: 50 additions & 1 deletion pandas/tests/plotting/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@


import numpy as np
import pytest

import pandas.util._test_decorators as td

from pandas import DataFrame, Series
from pandas import DataFrame, Index, Series
import pandas._testing as tm
from pandas.tests.plotting.common import TestPlotBase

Expand Down Expand Up @@ -65,3 +66,51 @@ def test_plot_kwargs(self):

res = df.groupby("z").plot.scatter(x="x", y="y")
assert len(res["a"].collections) == 1

@pytest.mark.parametrize("column, expected_axes_num", [(None, 2), ("b", 1)])
@pytest.mark.parametrize("label", [None, "d"])
def test_groupby_hist_frame_with_legend(self, column, expected_axes_num, label):
# GH 6279 - Histogram can have a legend
expected_layout = (1, expected_axes_num)
expected_labels = label or column or [["a"], ["b"]]

index = Index(15 * ["1"] + 15 * ["2"], name="c")
df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"])
g = df.groupby("c")

kwargs = {"legend": True, "column": column}

if label is not None:
kwargs["label"] = label
msg = "Cannot use both legend and label"
with pytest.raises(ValueError, match=msg):
g.hist(**kwargs)
else:
for axes in g.hist(**kwargs):
self._check_axes_shape(
axes, axes_num=expected_axes_num, layout=expected_layout
)
for ax, expected_label in zip(axes[0], expected_labels):
self._check_legend_labels(ax, expected_label)

@pytest.mark.parametrize(
"label, expected_label", [(None, ["1", "2"]), ("d", ["d", "d"])]
)
def test_groupby_hist_series_with_legend(self, label, expected_label):
# GH 6279 - Histogram can have a legend
index = Index(15 * ["1"] + 15 * ["2"], name="c")
df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"])
g = df.groupby("c")

kwargs = {"legend": True}

# We get warnings if kwargs contains "label": None
if label is not None:
kwargs["label"] = label
msg = "Cannot use both legend and label"
with pytest.raises(ValueError, match=msg):
g.hist(**kwargs)
else:
for ax in g["a"].hist(**kwargs):
self._check_axes_shape(ax, axes_num=1, layout=(1, 1))
self._check_legend_labels(ax, expected_label)
59 changes: 58 additions & 1 deletion pandas/tests/plotting/test_hist_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import pandas.util._test_decorators as td

from pandas import DataFrame, Series
from pandas import DataFrame, Index, Series
import pandas._testing as tm
from pandas.tests.plotting.common import TestPlotBase, _check_plot_works

Expand Down Expand Up @@ -129,6 +129,32 @@ def test_plot_fails_when_ax_differs_from_figure(self):
with pytest.raises(AssertionError):
self.ts.hist(ax=ax1, figure=fig2)

@pytest.mark.parametrize(
"by, expected_axes_num, expected_layout", [(None, 1, (1, 1)), ("b", 2, (1, 2))]
)
@pytest.mark.parametrize("label, expected_label", [(None, "a"), ("c", "c")])
def test_hist_with_legend(
self, by, expected_axes_num, expected_layout, label, expected_label
):
# GH 6279 - Histogram can have a legend
index = 15 * ["1"] + 15 * ["2"]
s = Series(np.random.randn(30), index=index, name="a")
s.index.name = "b"

kwargs = {"legend": True, "by": by}

if label is not None:
kwargs["label"] = label
msg = "Cannot use both legend and label"
with pytest.raises(ValueError, match=msg):
s.hist(**kwargs)
else:
axes = _check_plot_works(s.hist, **kwargs)
self._check_axes_shape(
axes, axes_num=expected_axes_num, layout=expected_layout
)
self._check_legend_labels(axes, expected_label)


@td.skip_if_no_mpl
class TestDataFramePlots(TestPlotBase):
Expand Down Expand Up @@ -293,6 +319,37 @@ def test_hist_column_order_unchanged(self, column, expected):

assert result == expected

@pytest.mark.parametrize("by", [None, "c"])
@pytest.mark.parametrize("column", [None, "b"])
@pytest.mark.parametrize("label", [None, "d"])
def test_hist_with_legend(self, by, column, label):
# GH 6279 - Histogram can have a legend
expected_axes_num = 1 if by is None and column is not None else 2
expected_layout = (1, expected_axes_num)
expected_labels = label or column or ["a", "b"]
if by is not None:
expected_labels = [expected_labels] * 2

index = Index(15 * ["1"] + 15 * ["2"], name="c")
df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"])

kwargs = {"legend": True, "by": by, "column": column}

if label is not None:
kwargs["label"] = label
msg = "Cannot use both legend and label"
with pytest.raises(ValueError, match=msg):
df.hist(**kwargs)
else:
axes = _check_plot_works(df.hist, **kwargs)
self._check_axes_shape(
axes, axes_num=expected_axes_num, layout=expected_layout
)
if by is None and column is None and label is None:
axes = axes[0]
for expected_label, ax in zip(expected_labels, axes):
self._check_legend_labels(ax, expected_label)


@td.skip_if_no_mpl
class TestDataFrameGroupByPlots(TestPlotBase):
Expand Down