Skip to content

Commit 506eb54

Browse files
authored
BUG: groupby.hist legend should use group keys (#33493)
1 parent 8ba9c62 commit 506eb54

File tree

5 files changed

+175
-24
lines changed

5 files changed

+175
-24
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,7 @@ Other enhancements
292292
- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now accept an ``errors`` argument (:issue:`22610`)
293293
- :meth:`groupby.transform` now allows ``func`` to be ``pad``, ``backfill`` and ``cumcount`` (:issue:`31269`).
294294
- :meth:`~pandas.io.json.read_json` now accepts `nrows` parameter. (:issue:`33916`).
295+
- :meth:`DataFrame.hist`, :meth:`Series.hist`, :meth:`core.groupby.DataFrameGroupBy.hist`, and :meth:`core.groupby.SeriesGroupBy.hist` have gained the ``legend`` argument. Set to True to show a legend in the histogram. (:issue:`6279`)
295296
- :func:`concat` and :meth:`~DataFrame.append` now preserve extension dtypes, for example
296297
combining a nullable integer column with a numpy integer column will no longer
297298
result in object dtype but preserve the integer dtype (:issue:`33607`, :issue:`34339`).

pandas/plotting/_core.py

+41-21
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,33 @@
11
import importlib
2+
from typing import TYPE_CHECKING, Optional, Sequence, Tuple, Union
23

34
from pandas._config import get_option
45

6+
from pandas._typing import Label
57
from pandas.util._decorators import Appender, Substitution
68

79
from pandas.core.dtypes.common import is_integer, is_list_like
810
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
911

1012
from pandas.core.base import PandasObject
1113

14+
if TYPE_CHECKING:
15+
from pandas import DataFrame
16+
1217

1318
def hist_series(
1419
self,
1520
by=None,
1621
ax=None,
17-
grid=True,
18-
xlabelsize=None,
19-
xrot=None,
20-
ylabelsize=None,
21-
yrot=None,
22-
figsize=None,
23-
bins=10,
24-
backend=None,
22+
grid: bool = True,
23+
xlabelsize: Optional[int] = None,
24+
xrot: Optional[float] = None,
25+
ylabelsize: Optional[int] = None,
26+
yrot: Optional[float] = None,
27+
figsize: Optional[Tuple[int, int]] = None,
28+
bins: Union[int, Sequence[int]] = 10,
29+
backend: Optional[str] = None,
30+
legend: bool = False,
2531
**kwargs,
2632
):
2733
"""
@@ -58,6 +64,11 @@ def hist_series(
5864
5965
.. versionadded:: 1.0.0
6066
67+
legend : bool, default False
68+
Whether to show the legend.
69+
70+
..versionadded:: 1.1.0
71+
6172
**kwargs
6273
To be passed to the actual plotting function.
6374
@@ -82,26 +93,28 @@ def hist_series(
8293
yrot=yrot,
8394
figsize=figsize,
8495
bins=bins,
96+
legend=legend,
8597
**kwargs,
8698
)
8799

88100

89101
def hist_frame(
90-
data,
91-
column=None,
102+
data: "DataFrame",
103+
column: Union[Label, Sequence[Label]] = None,
92104
by=None,
93-
grid=True,
94-
xlabelsize=None,
95-
xrot=None,
96-
ylabelsize=None,
97-
yrot=None,
105+
grid: bool = True,
106+
xlabelsize: Optional[int] = None,
107+
xrot: Optional[float] = None,
108+
ylabelsize: Optional[int] = None,
109+
yrot: Optional[float] = None,
98110
ax=None,
99-
sharex=False,
100-
sharey=False,
101-
figsize=None,
102-
layout=None,
103-
bins=10,
104-
backend=None,
111+
sharex: bool = False,
112+
sharey: bool = False,
113+
figsize: Optional[Tuple[int, int]] = None,
114+
layout: Optional[Tuple[int, int]] = None,
115+
bins: Union[int, Sequence[int]] = 10,
116+
backend: Optional[str] = None,
117+
legend: bool = False,
105118
**kwargs,
106119
):
107120
"""
@@ -154,6 +167,7 @@ def hist_frame(
154167
bin edges are calculated and returned. If bins is a sequence, gives
155168
bin edges, including left edge of first bin and right edge of last
156169
bin. In this case, bins is returned unmodified.
170+
157171
backend : str, default None
158172
Backend to use instead of the backend specified in the option
159173
``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
@@ -162,6 +176,11 @@ def hist_frame(
162176
163177
.. versionadded:: 1.0.0
164178
179+
legend : bool, default False
180+
Whether to show the legend.
181+
182+
..versionadded:: 1.1.0
183+
165184
**kwargs
166185
All other plotting keyword arguments to be passed to
167186
:meth:`matplotlib.pyplot.hist`.
@@ -203,6 +222,7 @@ def hist_frame(
203222
sharey=sharey,
204223
figsize=figsize,
205224
layout=layout,
225+
legend=legend,
206226
bins=bins,
207227
**kwargs,
208228
)

pandas/plotting/_matplotlib/hist.py

+31-1
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ def _grouped_hist(
225225
xrot=None,
226226
ylabelsize=None,
227227
yrot=None,
228+
legend=False,
228229
**kwargs,
229230
):
230231
"""
@@ -243,15 +244,26 @@ def _grouped_hist(
243244
sharey : bool, default False
244245
rot : int, default 90
245246
grid : bool, default True
247+
legend: : bool, default False
246248
kwargs : dict, keyword arguments passed to matplotlib.Axes.hist
247249
248250
Returns
249251
-------
250252
collection of Matplotlib Axes
251253
"""
254+
if legend:
255+
assert "label" not in kwargs
256+
if data.ndim == 1:
257+
kwargs["label"] = data.name
258+
elif column is None:
259+
kwargs["label"] = data.columns
260+
else:
261+
kwargs["label"] = column
252262

253263
def plot_group(group, ax):
254264
ax.hist(group.dropna().values, bins=bins, **kwargs)
265+
if legend:
266+
ax.legend()
255267

256268
if xrot is None:
257269
xrot = rot
@@ -290,10 +302,14 @@ def hist_series(
290302
yrot=None,
291303
figsize=None,
292304
bins=10,
305+
legend: bool = False,
293306
**kwds,
294307
):
295308
import matplotlib.pyplot as plt
296309

310+
if legend and "label" in kwds:
311+
raise ValueError("Cannot use both legend and label")
312+
297313
if by is None:
298314
if kwds.get("layout", None) is not None:
299315
raise ValueError("The 'layout' keyword is not supported when 'by' is None")
@@ -308,8 +324,11 @@ def hist_series(
308324
elif ax.get_figure() != fig:
309325
raise AssertionError("passed axis not bound to passed figure")
310326
values = self.dropna().values
311-
327+
if legend:
328+
kwds["label"] = self.name
312329
ax.hist(values, bins=bins, **kwds)
330+
if legend:
331+
ax.legend()
313332
ax.grid(grid)
314333
axes = np.array([ax])
315334

@@ -334,6 +353,7 @@ def hist_series(
334353
xrot=xrot,
335354
ylabelsize=ylabelsize,
336355
yrot=yrot,
356+
legend=legend,
337357
**kwds,
338358
)
339359

@@ -358,8 +378,11 @@ def hist_frame(
358378
figsize=None,
359379
layout=None,
360380
bins=10,
381+
legend: bool = False,
361382
**kwds,
362383
):
384+
if legend and "label" in kwds:
385+
raise ValueError("Cannot use both legend and label")
363386
if by is not None:
364387
axes = _grouped_hist(
365388
data,
@@ -376,6 +399,7 @@ def hist_frame(
376399
xrot=xrot,
377400
ylabelsize=ylabelsize,
378401
yrot=yrot,
402+
legend=legend,
379403
**kwds,
380404
)
381405
return axes
@@ -401,11 +425,17 @@ def hist_frame(
401425
)
402426
_axes = _flatten(axes)
403427

428+
can_set_label = "label" not in kwds
429+
404430
for i, col in enumerate(data.columns):
405431
ax = _axes[i]
432+
if legend and can_set_label:
433+
kwds["label"] = col
406434
ax.hist(data[col].dropna().values, bins=bins, **kwds)
407435
ax.set_title(col)
408436
ax.grid(grid)
437+
if legend:
438+
ax.legend()
409439

410440
_set_ticks_props(
411441
axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot

pandas/tests/plotting/test_groupby.py

+48-1
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@
22

33

44
import numpy as np
5+
import pytest
56

67
import pandas.util._test_decorators as td
78

8-
from pandas import DataFrame, Series
9+
from pandas import DataFrame, Index, Series
910
import pandas._testing as tm
1011
from pandas.tests.plotting.common import TestPlotBase
1112

@@ -65,3 +66,49 @@ def test_plot_kwargs(self):
6566

6667
res = df.groupby("z").plot.scatter(x="x", y="y")
6768
assert len(res["a"].collections) == 1
69+
70+
@pytest.mark.parametrize("column, expected_axes_num", [(None, 2), ("b", 1)])
71+
def test_groupby_hist_frame_with_legend(self, column, expected_axes_num):
72+
# GH 6279 - DataFrameGroupBy histogram can have a legend
73+
expected_layout = (1, expected_axes_num)
74+
expected_labels = column or [["a"], ["b"]]
75+
76+
index = Index(15 * ["1"] + 15 * ["2"], name="c")
77+
df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"])
78+
g = df.groupby("c")
79+
80+
for axes in g.hist(legend=True, column=column):
81+
self._check_axes_shape(
82+
axes, axes_num=expected_axes_num, layout=expected_layout
83+
)
84+
for ax, expected_label in zip(axes[0], expected_labels):
85+
self._check_legend_labels(ax, expected_label)
86+
87+
@pytest.mark.parametrize("column", [None, "b"])
88+
def test_groupby_hist_frame_with_legend_raises(self, column):
89+
# GH 6279 - DataFrameGroupBy histogram with legend and label raises
90+
index = Index(15 * ["1"] + 15 * ["2"], name="c")
91+
df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"])
92+
g = df.groupby("c")
93+
94+
with pytest.raises(ValueError, match="Cannot use both legend and label"):
95+
g.hist(legend=True, column=column, label="d")
96+
97+
def test_groupby_hist_series_with_legend(self):
98+
# GH 6279 - SeriesGroupBy histogram can have a legend
99+
index = Index(15 * ["1"] + 15 * ["2"], name="c")
100+
df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"])
101+
g = df.groupby("c")
102+
103+
for ax in g["a"].hist(legend=True):
104+
self._check_axes_shape(ax, axes_num=1, layout=(1, 1))
105+
self._check_legend_labels(ax, ["1", "2"])
106+
107+
def test_groupby_hist_series_with_legend_raises(self):
108+
# GH 6279 - SeriesGroupBy histogram with legend and label raises
109+
index = Index(15 * ["1"] + 15 * ["2"], name="c")
110+
df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"])
111+
g = df.groupby("c")
112+
113+
with pytest.raises(ValueError, match="Cannot use both legend and label"):
114+
g.hist(legend=True, label="d")

pandas/tests/plotting/test_hist_method.py

+54-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import pandas.util._test_decorators as td
88

9-
from pandas import DataFrame, Series
9+
from pandas import DataFrame, Index, Series
1010
import pandas._testing as tm
1111
from pandas.tests.plotting.common import TestPlotBase, _check_plot_works
1212

@@ -129,6 +129,29 @@ def test_plot_fails_when_ax_differs_from_figure(self):
129129
with pytest.raises(AssertionError):
130130
self.ts.hist(ax=ax1, figure=fig2)
131131

132+
@pytest.mark.parametrize(
133+
"by, expected_axes_num, expected_layout", [(None, 1, (1, 1)), ("b", 2, (1, 2))]
134+
)
135+
def test_hist_with_legend(self, by, expected_axes_num, expected_layout):
136+
# GH 6279 - Series histogram can have a legend
137+
index = 15 * ["1"] + 15 * ["2"]
138+
s = Series(np.random.randn(30), index=index, name="a")
139+
s.index.name = "b"
140+
141+
axes = _check_plot_works(s.hist, legend=True, by=by)
142+
self._check_axes_shape(axes, axes_num=expected_axes_num, layout=expected_layout)
143+
self._check_legend_labels(axes, "a")
144+
145+
@pytest.mark.parametrize("by", [None, "b"])
146+
def test_hist_with_legend_raises(self, by):
147+
# GH 6279 - Series histogram with legend and label raises
148+
index = 15 * ["1"] + 15 * ["2"]
149+
s = Series(np.random.randn(30), index=index, name="a")
150+
s.index.name = "b"
151+
152+
with pytest.raises(ValueError, match="Cannot use both legend and label"):
153+
s.hist(legend=True, by=by, label="c")
154+
132155

133156
@td.skip_if_no_mpl
134157
class TestDataFramePlots(TestPlotBase):
@@ -293,6 +316,36 @@ def test_hist_column_order_unchanged(self, column, expected):
293316

294317
assert result == expected
295318

319+
@pytest.mark.parametrize("by", [None, "c"])
320+
@pytest.mark.parametrize("column", [None, "b"])
321+
def test_hist_with_legend(self, by, column):
322+
# GH 6279 - DataFrame histogram can have a legend
323+
expected_axes_num = 1 if by is None and column is not None else 2
324+
expected_layout = (1, expected_axes_num)
325+
expected_labels = column or ["a", "b"]
326+
if by is not None:
327+
expected_labels = [expected_labels] * 2
328+
329+
index = Index(15 * ["1"] + 15 * ["2"], name="c")
330+
df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"])
331+
332+
axes = _check_plot_works(df.hist, legend=True, by=by, column=column)
333+
self._check_axes_shape(axes, axes_num=expected_axes_num, layout=expected_layout)
334+
if by is None and column is None:
335+
axes = axes[0]
336+
for expected_label, ax in zip(expected_labels, axes):
337+
self._check_legend_labels(ax, expected_label)
338+
339+
@pytest.mark.parametrize("by", [None, "c"])
340+
@pytest.mark.parametrize("column", [None, "b"])
341+
def test_hist_with_legend_raises(self, by, column):
342+
# GH 6279 - DataFrame histogram with legend and label raises
343+
index = Index(15 * ["1"] + 15 * ["2"], name="c")
344+
df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"])
345+
346+
with pytest.raises(ValueError, match="Cannot use both legend and label"):
347+
df.hist(legend=True, by=by, column=column, label="d")
348+
296349

297350
@td.skip_if_no_mpl
298351
class TestDataFrameGroupByPlots(TestPlotBase):

0 commit comments

Comments
 (0)