pandas-dev · WillAyd · Jun 22, 2020 · Apr 12, 2020 · May 17, 2020 · May 18, 2020
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -292,6 +292,7 @@ Other enhancements
 - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now accept an ``errors`` argument (:issue:`22610`)
 - :meth:`groupby.transform` now allows ``func`` to be ``pad``, ``backfill`` and ``cumcount`` (:issue:`31269`).
 - :meth:`~pandas.io.json.read_json` now accepts `nrows` parameter. (:issue:`33916`).
+- :meth:`DataFrame.hist`, :meth:`Series.hist`, :meth:`core.groupby.DataFrameGroupBy.hist`, and :meth:`core.groupby.SeriesGroupBy.hist` have gained the ``legend`` argument. Set to True to show a legend in the histogram. (:issue:`6279`)
 - :func:`concat` and :meth:`~DataFrame.append` now preserve extension dtypes, for example
   combining a nullable integer column with a numpy integer column will no longer
   result in object dtype but preserve the integer dtype (:issue:`33607`, :issue:`34339`).

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
@@ -1,27 +1,33 @@
 import importlib
+from typing import TYPE_CHECKING, Optional, Sequence, Tuple, Union
 
 from pandas._config import get_option
 
+from pandas._typing import Label
 from pandas.util._decorators import Appender, Substitution
 
 from pandas.core.dtypes.common import is_integer, is_list_like
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
 
 from pandas.core.base import PandasObject
 
+if TYPE_CHECKING:
+    from pandas import DataFrame
+
 
 def hist_series(
     self,
     by=None,
     ax=None,
-    grid=True,
-    xlabelsize=None,
-    xrot=None,
-    ylabelsize=None,
-    yrot=None,
-    figsize=None,
-    bins=10,
-    backend=None,
+    grid: bool = True,
+    xlabelsize: Optional[int] = None,
+    xrot: Optional[float] = None,
+    ylabelsize: Optional[int] = None,
+    yrot: Optional[float] = None,
+    figsize: Optional[Tuple[int, int]] = None,
+    bins: Union[int, Sequence[int]] = 10,
+    backend: Optional[str] = None,
+    legend: bool = False,
     **kwargs,
 ):
     """
@@ -58,6 +64,11 @@ def hist_series(
 
         .. versionadded:: 1.0.0
 
+    legend : bool, default False
+        Whether to show the legend.
+
+        ..versionadded:: 1.1.0
+
     **kwargs
         To be passed to the actual plotting function.
 
@@ -82,26 +93,28 @@ def hist_series(
         yrot=yrot,
         figsize=figsize,
         bins=bins,
+        legend=legend,
         **kwargs,
     )
 
 
 def hist_frame(
-    data,
-    column=None,
+    data: "DataFrame",
+    column: Union[Label, Sequence[Label]] = None,
     by=None,
-    grid=True,
-    xlabelsize=None,
-    xrot=None,
-    ylabelsize=None,
-    yrot=None,
+    grid: bool = True,
+    xlabelsize: Optional[int] = None,
+    xrot: Optional[float] = None,
+    ylabelsize: Optional[int] = None,
+    yrot: Optional[float] = None,
     ax=None,
-    sharex=False,
-    sharey=False,
-    figsize=None,
-    layout=None,
-    bins=10,
-    backend=None,
+    sharex: bool = False,
+    sharey: bool = False,
+    figsize: Optional[Tuple[int, int]] = None,
+    layout: Optional[Tuple[int, int]] = None,
+    bins: Union[int, Sequence[int]] = 10,
+    backend: Optional[str] = None,
+    legend: bool = False,
     **kwargs,
 ):
     """
@@ -154,6 +167,7 @@ def hist_frame(
         bin edges are calculated and returned. If bins is a sequence, gives
         bin edges, including left edge of first bin and right edge of last
         bin. In this case, bins is returned unmodified.
+
     backend : str, default None
         Backend to use instead of the backend specified in the option
         ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
@@ -162,6 +176,11 @@ def hist_frame(
 
         .. versionadded:: 1.0.0
 
+    legend : bool, default False
+        Whether to show the legend.
+
+        ..versionadded:: 1.1.0
+
     **kwargs
         All other plotting keyword arguments to be passed to
         :meth:`matplotlib.pyplot.hist`.
@@ -203,6 +222,7 @@ def hist_frame(
         sharey=sharey,
         figsize=figsize,
         layout=layout,
+        legend=legend,
         bins=bins,
         **kwargs,
     )

diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
@@ -225,6 +225,7 @@ def _grouped_hist(
     xrot=None,
     ylabelsize=None,
     yrot=None,
+    legend=False,
     **kwargs,
 ):
     """
@@ -243,15 +244,26 @@ def _grouped_hist(
     sharey : bool, default False
     rot : int, default 90
     grid : bool, default True
+    legend: : bool, default False
     kwargs : dict, keyword arguments passed to matplotlib.Axes.hist
 
     Returns
     -------
     collection of Matplotlib Axes
     """
+    if legend:
+        assert "label" not in kwargs
+        if data.ndim == 1:
+            kwargs["label"] = data.name
+        elif column is None:
+            kwargs["label"] = data.columns
+        else:
+            kwargs["label"] = column
 
     def plot_group(group, ax):
         ax.hist(group.dropna().values, bins=bins, **kwargs)
+        if legend:
+            ax.legend()
 
     if xrot is None:
         xrot = rot
@@ -290,10 +302,14 @@ def hist_series(
     yrot=None,
     figsize=None,
     bins=10,
+    legend: bool = False,
     **kwds,
 ):
     import matplotlib.pyplot as plt
 
+    if legend and "label" in kwds:
+        raise ValueError("Cannot use both legend and label")
+
     if by is None:
         if kwds.get("layout", None) is not None:
             raise ValueError("The 'layout' keyword is not supported when 'by' is None")
@@ -308,8 +324,11 @@ def hist_series(
         elif ax.get_figure() != fig:
             raise AssertionError("passed axis not bound to passed figure")
         values = self.dropna().values
-
+        if legend:
+            kwds["label"] = self.name
         ax.hist(values, bins=bins, **kwds)
+        if legend:
+            ax.legend()
         ax.grid(grid)
         axes = np.array([ax])
 
@@ -334,6 +353,7 @@ def hist_series(
             xrot=xrot,
             ylabelsize=ylabelsize,
             yrot=yrot,
+            legend=legend,
             **kwds,
         )
 
@@ -358,8 +378,11 @@ def hist_frame(
     figsize=None,
     layout=None,
     bins=10,
+    legend: bool = False,
     **kwds,
 ):
+    if legend and "label" in kwds:
+        raise ValueError("Cannot use both legend and label")
     if by is not None:
         axes = _grouped_hist(
             data,
@@ -376,6 +399,7 @@ def hist_frame(
             xrot=xrot,
             ylabelsize=ylabelsize,
             yrot=yrot,
+            legend=legend,
             **kwds,
         )
         return axes
@@ -401,11 +425,17 @@ def hist_frame(
     )
     _axes = _flatten(axes)
 
+    can_set_label = "label" not in kwds
+
     for i, col in enumerate(data.columns):
         ax = _axes[i]
+        if legend and can_set_label:
+            kwds["label"] = col
         ax.hist(data[col].dropna().values, bins=bins, **kwds)
         ax.set_title(col)
         ax.grid(grid)
+        if legend:
+            ax.legend()
 
     _set_ticks_props(
         axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot

diff --git a/pandas/tests/plotting/test_groupby.py b/pandas/tests/plotting/test_groupby.py
@@ -2,10 +2,11 @@
 
 
 import numpy as np
+import pytest
 
 import pandas.util._test_decorators as td
 
-from pandas import DataFrame, Series
+from pandas import DataFrame, Index, Series
 import pandas._testing as tm
 from pandas.tests.plotting.common import TestPlotBase
 
@@ -65,3 +66,49 @@ def test_plot_kwargs(self):
 
         res = df.groupby("z").plot.scatter(x="x", y="y")
         assert len(res["a"].collections) == 1
+
+    @pytest.mark.parametrize("column, expected_axes_num", [(None, 2), ("b", 1)])
+    def test_groupby_hist_frame_with_legend(self, column, expected_axes_num):
+        # GH 6279 - DataFrameGroupBy histogram can have a legend
+        expected_layout = (1, expected_axes_num)
+        expected_labels = column or [["a"], ["b"]]
+
+        index = Index(15 * ["1"] + 15 * ["2"], name="c")
+        df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"])
+        g = df.groupby("c")
+
+        for axes in g.hist(legend=True, column=column):
+            self._check_axes_shape(
+                axes, axes_num=expected_axes_num, layout=expected_layout
+            )
+            for ax, expected_label in zip(axes[0], expected_labels):
+                self._check_legend_labels(ax, expected_label)
+
+    @pytest.mark.parametrize("column", [None, "b"])
+    def test_groupby_hist_frame_with_legend_raises(self, column):
+        # GH 6279 - DataFrameGroupBy histogram with legend and label raises
+        index = Index(15 * ["1"] + 15 * ["2"], name="c")
+        df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"])
+        g = df.groupby("c")
+
+        with pytest.raises(ValueError, match="Cannot use both legend and label"):
+            g.hist(legend=True, column=column, label="d")
+
+    def test_groupby_hist_series_with_legend(self):
+        # GH 6279 - SeriesGroupBy histogram can have a legend
+        index = Index(15 * ["1"] + 15 * ["2"], name="c")
+        df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"])
+        g = df.groupby("c")
+
+        for ax in g["a"].hist(legend=True):
+            self._check_axes_shape(ax, axes_num=1, layout=(1, 1))
+            self._check_legend_labels(ax, ["1", "2"])
+
+    def test_groupby_hist_series_with_legend_raises(self):
+        # GH 6279 - SeriesGroupBy histogram with legend and label raises
+        index = Index(15 * ["1"] + 15 * ["2"], name="c")
+        df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"])
+        g = df.groupby("c")
+
+        with pytest.raises(ValueError, match="Cannot use both legend and label"):
+            g.hist(legend=True, label="d")
diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py
@@ -6,7 +6,7 @@
 
 import pandas.util._test_decorators as td
 
-from pandas import DataFrame, Series
+from pandas import DataFrame, Index, Series
 import pandas._testing as tm
 from pandas.tests.plotting.common import TestPlotBase, _check_plot_works
 
@@ -129,6 +129,29 @@ def test_plot_fails_when_ax_differs_from_figure(self):
         with pytest.raises(AssertionError):
             self.ts.hist(ax=ax1, figure=fig2)
 
+    @pytest.mark.parametrize(
+        "by, expected_axes_num, expected_layout", [(None, 1, (1, 1)), ("b", 2, (1, 2))]
+    )
+    def test_hist_with_legend(self, by, expected_axes_num, expected_layout):
+        # GH 6279 - Series histogram can have a legend
+        index = 15 * ["1"] + 15 * ["2"]
+        s = Series(np.random.randn(30), index=index, name="a")
+        s.index.name = "b"
+
+        axes = _check_plot_works(s.hist, legend=True, by=by)
+        self._check_axes_shape(axes, axes_num=expected_axes_num, layout=expected_layout)
+        self._check_legend_labels(axes, "a")
+
+    @pytest.mark.parametrize("by", [None, "b"])
+    def test_hist_with_legend_raises(self, by):
+        # GH 6279 - Series histogram with legend and label raises
+        index = 15 * ["1"] + 15 * ["2"]
+        s = Series(np.random.randn(30), index=index, name="a")
+        s.index.name = "b"
+
+        with pytest.raises(ValueError, match="Cannot use both legend and label"):
+            s.hist(legend=True, by=by, label="c")
+
 
 @td.skip_if_no_mpl
 class TestDataFramePlots(TestPlotBase):
@@ -293,6 +316,36 @@ def test_hist_column_order_unchanged(self, column, expected):
 
         assert result == expected
 
+    @pytest.mark.parametrize("by", [None, "c"])
+    @pytest.mark.parametrize("column", [None, "b"])
+    def test_hist_with_legend(self, by, column):
+        # GH 6279 - DataFrame histogram can have a legend
+        expected_axes_num = 1 if by is None and column is not None else 2
+        expected_layout = (1, expected_axes_num)
+        expected_labels = column or ["a", "b"]
+        if by is not None:
+            expected_labels = [expected_labels] * 2
+
+        index = Index(15 * ["1"] + 15 * ["2"], name="c")
+        df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"])
+
+        axes = _check_plot_works(df.hist, legend=True, by=by, column=column)
+        self._check_axes_shape(axes, axes_num=expected_axes_num, layout=expected_layout)
+        if by is None and column is None:
+            axes = axes[0]
+        for expected_label, ax in zip(expected_labels, axes):
+            self._check_legend_labels(ax, expected_label)
+
+    @pytest.mark.parametrize("by", [None, "c"])
+    @pytest.mark.parametrize("column", [None, "b"])
+    def test_hist_with_legend_raises(self, by, column):
+        # GH 6279 - DataFrame histogram with legend and label raises
+        index = Index(15 * ["1"] + 15 * ["2"], name="c")
+        df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"])
+
+        with pytest.raises(ValueError, match="Cannot use both legend and label"):
+            df.hist(legend=True, by=by, column=column, label="d")
+
 
 @td.skip_if_no_mpl
 class TestDataFrameGroupByPlots(TestPlotBase):