Skip to content

Commit cd7f09e

Browse files
committed
Merge pull request #7809 from sinhrks/histplot
ENH/CLN: add HistPlot class inheriting MPLPlot
2 parents f400014 + 4a23a84 commit cd7f09e

File tree

4 files changed

+374
-68
lines changed

4 files changed

+374
-68
lines changed

doc/source/v0.15.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,8 @@ previously results in ``Exception`` or ``TypeError`` (:issue:`7812`)
170170
- ``merge``, ``DataFrame.merge``, and ``ordered_merge`` now return the same type
171171
as the ``left`` argument. (:issue:`7737`)
172172

173+
- Histogram from ``DataFrame.plot`` with ``kind='hist'`` (:issue:`7809`), See :ref:`the docs<visualization.hist>`.
174+
173175
.. _whatsnew_0150.dt:
174176

175177
.dt accessor

doc/source/visualization.rst

+41
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ a handful of values for plots other than the default Line plot.
123123
These include:
124124

125125
* :ref:`'bar' <visualization.barplot>` or :ref:`'barh' <visualization.barplot>` for bar plots
126+
* :ref:`'hist' <visualization.hist>` for histogram
126127
* :ref:`'kde' <visualization.kde>` or ``'density'`` for density plots
127128
* :ref:`'area' <visualization.area_plot>` for area plots
128129
* :ref:`'scatter' <visualization.scatter_matrix>` for scatter plots
@@ -205,6 +206,46 @@ To get horizontal bar plots, pass ``kind='barh'``:
205206

206207
Histograms
207208
~~~~~~~~~~
209+
210+
.. versionadded:: 0.15.0
211+
212+
Histogram can be drawn specifying ``kind='hist'``.
213+
214+
.. ipython:: python
215+
216+
df4 = DataFrame({'a': randn(1000) + 1, 'b': randn(1000),
217+
'c': randn(1000) - 1}, columns=['a', 'b', 'c'])
218+
219+
plt.figure();
220+
221+
@savefig hist_new.png
222+
df4.plot(kind='hist', alpha=0.5)
223+
224+
Histogram can be stacked by ``stacked=True``. Bin size can be changed by ``bins`` keyword.
225+
226+
.. ipython:: python
227+
228+
plt.figure();
229+
230+
@savefig hist_new_stacked.png
231+
df4.plot(kind='hist', stacked=True, bins=20)
232+
233+
You can pass other keywords supported by matplotlib ``hist``. For example, horizontal and cumulative histgram can be drawn by ``orientation='horizontal'`` and ``cumulative='True'``.
234+
235+
.. ipython:: python
236+
237+
plt.figure();
238+
239+
@savefig hist_new_kwargs.png
240+
df4['a'].plot(kind='hist', orientation='horizontal', cumulative=True)
241+
242+
243+
See the :meth:`hist <matplotlib.axes.Axes.hist>` method and the
244+
`matplotlib hist documenation <http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.hist>`__ for more.
245+
246+
247+
The previous interface ``DataFrame.hist`` to plot histogram still can be used.
248+
208249
.. ipython:: python
209250
210251
plt.figure();

pandas/tests/test_graphics.py

+212-9
Original file line numberDiff line numberDiff line change
@@ -452,7 +452,7 @@ def test_plot(self):
452452
_check_plot_works(self.ts.plot, kind='area', stacked=False)
453453
_check_plot_works(self.iseries.plot)
454454

455-
for kind in ['line', 'bar', 'barh', 'kde']:
455+
for kind in ['line', 'bar', 'barh', 'kde', 'hist']:
456456
if not _ok_for_gaussian_kde(kind):
457457
continue
458458
_check_plot_works(self.series[:5].plot, kind=kind)
@@ -616,7 +616,13 @@ def test_pie_series(self):
616616
self._check_text_labels(ax.texts, series.index)
617617

618618
@slow
619-
def test_hist(self):
619+
def test_hist_df_kwargs(self):
620+
df = DataFrame(np.random.randn(10, 2))
621+
ax = df.plot(kind='hist', bins=5)
622+
self.assertEqual(len(ax.patches), 10)
623+
624+
@slow
625+
def test_hist_legacy(self):
620626
_check_plot_works(self.ts.hist)
621627
_check_plot_works(self.ts.hist, grid=False)
622628
_check_plot_works(self.ts.hist, figsize=(8, 10))
@@ -637,7 +643,7 @@ def test_hist(self):
637643
self.ts.hist(by=self.ts.index, figure=fig)
638644

639645
@slow
640-
def test_hist_bins(self):
646+
def test_hist_bins_legacy(self):
641647
df = DataFrame(np.random.randn(10, 2))
642648
ax = df.hist(bins=2)[0][0]
643649
self.assertEqual(len(ax.patches), 2)
@@ -701,13 +707,25 @@ def test_plot_fails_when_ax_differs_from_figure(self):
701707
self.ts.hist(ax=ax1, figure=fig2)
702708

703709
@slow
704-
def test_kde(self):
710+
def test_hist_kde(self):
711+
ax = self.ts.plot(kind='hist', logy=True)
712+
self._check_ax_scales(ax, yaxis='log')
713+
xlabels = ax.get_xticklabels()
714+
# ticks are values, thus ticklabels are blank
715+
self._check_text_labels(xlabels, [''] * len(xlabels))
716+
ylabels = ax.get_yticklabels()
717+
self._check_text_labels(ylabels, [''] * len(ylabels))
718+
705719
tm._skip_if_no_scipy()
706720
_skip_if_no_scipy_gaussian_kde()
707721
_check_plot_works(self.ts.plot, kind='kde')
708722
_check_plot_works(self.ts.plot, kind='density')
709723
ax = self.ts.plot(kind='kde', logy=True)
710724
self._check_ax_scales(ax, yaxis='log')
725+
xlabels = ax.get_xticklabels()
726+
self._check_text_labels(xlabels, [''] * len(xlabels))
727+
ylabels = ax.get_yticklabels()
728+
self._check_text_labels(ylabels, [''] * len(ylabels))
711729

712730
@slow
713731
def test_kde_kwargs(self):
@@ -718,9 +736,29 @@ def test_kde_kwargs(self):
718736
_check_plot_works(self.ts.plot, kind='density', bw_method=.5, ind=linspace(-100,100,20))
719737
ax = self.ts.plot(kind='kde', logy=True, bw_method=.5, ind=linspace(-100,100,20))
720738
self._check_ax_scales(ax, yaxis='log')
739+
self._check_text_labels(ax.yaxis.get_label(), 'Density')
721740

722741
@slow
723-
def test_kde_color(self):
742+
def test_hist_kwargs(self):
743+
ax = self.ts.plot(kind='hist', bins=5)
744+
self.assertEqual(len(ax.patches), 5)
745+
self._check_text_labels(ax.yaxis.get_label(), 'Degree')
746+
tm.close()
747+
748+
ax = self.ts.plot(kind='hist', orientation='horizontal')
749+
self._check_text_labels(ax.xaxis.get_label(), 'Degree')
750+
tm.close()
751+
752+
ax = self.ts.plot(kind='hist', align='left', stacked=True)
753+
tm.close()
754+
755+
@slow
756+
def test_hist_kde_color(self):
757+
ax = self.ts.plot(kind='hist', logy=True, bins=10, color='b')
758+
self._check_ax_scales(ax, yaxis='log')
759+
self.assertEqual(len(ax.patches), 10)
760+
self._check_colors(ax.patches, facecolors=['b'] * 10)
761+
724762
tm._skip_if_no_scipy()
725763
_skip_if_no_scipy_gaussian_kde()
726764
ax = self.ts.plot(kind='kde', logy=True, color='r')
@@ -1611,7 +1649,7 @@ def test_boxplot_return_type(self):
16111649
self._check_box_return_type(result, 'both')
16121650

16131651
@slow
1614-
def test_kde(self):
1652+
def test_kde_df(self):
16151653
tm._skip_if_no_scipy()
16161654
_skip_if_no_scipy_gaussian_kde()
16171655
df = DataFrame(randn(100, 4))
@@ -1630,7 +1668,122 @@ def test_kde(self):
16301668
self._check_ax_scales(axes, yaxis='log')
16311669

16321670
@slow
1633-
def test_hist(self):
1671+
def test_hist_df(self):
1672+
df = DataFrame(randn(100, 4))
1673+
series = df[0]
1674+
1675+
ax = _check_plot_works(df.plot, kind='hist')
1676+
expected = [com.pprint_thing(c) for c in df.columns]
1677+
self._check_legend_labels(ax, labels=expected)
1678+
1679+
axes = _check_plot_works(df.plot, kind='hist', subplots=True, logy=True)
1680+
self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
1681+
self._check_ax_scales(axes, yaxis='log')
1682+
1683+
axes = series.plot(kind='hist', rot=40)
1684+
self._check_ticks_props(axes, xrot=40, yrot=0)
1685+
tm.close()
1686+
1687+
ax = series.plot(kind='hist', normed=True, cumulative=True, bins=4)
1688+
# height of last bin (index 5) must be 1.0
1689+
self.assertAlmostEqual(ax.get_children()[5].get_height(), 1.0)
1690+
tm.close()
1691+
1692+
ax = series.plot(kind='hist', cumulative=True, bins=4)
1693+
self.assertAlmostEqual(ax.get_children()[5].get_height(), 100.0)
1694+
tm.close()
1695+
1696+
# if horizontal, yticklabels are rotated
1697+
axes = df.plot(kind='hist', rot=50, fontsize=8, orientation='horizontal')
1698+
self._check_ticks_props(axes, xrot=0, yrot=50, ylabelsize=8)
1699+
1700+
def _check_box_coord(self, patches, expected_y=None, expected_h=None,
1701+
expected_x=None, expected_w=None):
1702+
result_y = np.array([p.get_y() for p in patches])
1703+
result_height = np.array([p.get_height() for p in patches])
1704+
result_x = np.array([p.get_x() for p in patches])
1705+
result_width = np.array([p.get_width() for p in patches])
1706+
1707+
if expected_y is not None:
1708+
self.assert_numpy_array_equal(result_y, expected_y)
1709+
if expected_h is not None:
1710+
self.assert_numpy_array_equal(result_height, expected_h)
1711+
if expected_x is not None:
1712+
self.assert_numpy_array_equal(result_x, expected_x)
1713+
if expected_w is not None:
1714+
self.assert_numpy_array_equal(result_width, expected_w)
1715+
1716+
@slow
1717+
def test_hist_df_coord(self):
1718+
normal_df = DataFrame({'A': np.repeat(np.array([1, 2, 3, 4, 5]),
1719+
np.array([10, 9, 8, 7, 6])),
1720+
'B': np.repeat(np.array([1, 2, 3, 4, 5]),
1721+
np.array([8, 8, 8, 8, 8])),
1722+
'C': np.repeat(np.array([1, 2, 3, 4, 5]),
1723+
np.array([6, 7, 8, 9, 10]))},
1724+
columns=['A', 'B', 'C'])
1725+
1726+
nan_df = DataFrame({'A': np.repeat(np.array([np.nan, 1, 2, 3, 4, 5]),
1727+
np.array([3, 10, 9, 8, 7, 6])),
1728+
'B': np.repeat(np.array([1, np.nan, 2, 3, 4, 5]),
1729+
np.array([8, 3, 8, 8, 8, 8])),
1730+
'C': np.repeat(np.array([1, 2, 3, np.nan, 4, 5]),
1731+
np.array([6, 7, 8, 3, 9, 10]))},
1732+
columns=['A', 'B', 'C'])
1733+
1734+
for df in [normal_df, nan_df]:
1735+
ax = df.plot(kind='hist', bins=5)
1736+
self._check_box_coord(ax.patches[:5], expected_y=np.array([0, 0, 0, 0, 0]),
1737+
expected_h=np.array([10, 9, 8, 7, 6]))
1738+
self._check_box_coord(ax.patches[5:10], expected_y=np.array([0, 0, 0, 0, 0]),
1739+
expected_h=np.array([8, 8, 8, 8, 8]))
1740+
self._check_box_coord(ax.patches[10:], expected_y=np.array([0, 0, 0, 0, 0]),
1741+
expected_h=np.array([6, 7, 8, 9, 10]))
1742+
1743+
ax = df.plot(kind='hist', bins=5, stacked=True)
1744+
self._check_box_coord(ax.patches[:5], expected_y=np.array([0, 0, 0, 0, 0]),
1745+
expected_h=np.array([10, 9, 8, 7, 6]))
1746+
self._check_box_coord(ax.patches[5:10], expected_y=np.array([10, 9, 8, 7, 6]),
1747+
expected_h=np.array([8, 8, 8, 8, 8]))
1748+
self._check_box_coord(ax.patches[10:], expected_y=np.array([18, 17, 16, 15, 14]),
1749+
expected_h=np.array([6, 7, 8, 9, 10]))
1750+
1751+
axes = df.plot(kind='hist', bins=5, stacked=True, subplots=True)
1752+
self._check_box_coord(axes[0].patches, expected_y=np.array([0, 0, 0, 0, 0]),
1753+
expected_h=np.array([10, 9, 8, 7, 6]))
1754+
self._check_box_coord(axes[1].patches, expected_y=np.array([0, 0, 0, 0, 0]),
1755+
expected_h=np.array([8, 8, 8, 8, 8]))
1756+
self._check_box_coord(axes[2].patches, expected_y=np.array([0, 0, 0, 0, 0]),
1757+
expected_h=np.array([6, 7, 8, 9, 10]))
1758+
1759+
# horizontal
1760+
ax = df.plot(kind='hist', bins=5, orientation='horizontal')
1761+
self._check_box_coord(ax.patches[:5], expected_x=np.array([0, 0, 0, 0, 0]),
1762+
expected_w=np.array([10, 9, 8, 7, 6]))
1763+
self._check_box_coord(ax.patches[5:10], expected_x=np.array([0, 0, 0, 0, 0]),
1764+
expected_w=np.array([8, 8, 8, 8, 8]))
1765+
self._check_box_coord(ax.patches[10:], expected_x=np.array([0, 0, 0, 0, 0]),
1766+
expected_w=np.array([6, 7, 8, 9, 10]))
1767+
1768+
ax = df.plot(kind='hist', bins=5, stacked=True, orientation='horizontal')
1769+
self._check_box_coord(ax.patches[:5], expected_x=np.array([0, 0, 0, 0, 0]),
1770+
expected_w=np.array([10, 9, 8, 7, 6]))
1771+
self._check_box_coord(ax.patches[5:10], expected_x=np.array([10, 9, 8, 7, 6]),
1772+
expected_w=np.array([8, 8, 8, 8, 8]))
1773+
self._check_box_coord(ax.patches[10:], expected_x=np.array([18, 17, 16, 15, 14]),
1774+
expected_w=np.array([6, 7, 8, 9, 10]))
1775+
1776+
axes = df.plot(kind='hist', bins=5, stacked=True,
1777+
subplots=True, orientation='horizontal')
1778+
self._check_box_coord(axes[0].patches, expected_x=np.array([0, 0, 0, 0, 0]),
1779+
expected_w=np.array([10, 9, 8, 7, 6]))
1780+
self._check_box_coord(axes[1].patches, expected_x=np.array([0, 0, 0, 0, 0]),
1781+
expected_w=np.array([8, 8, 8, 8, 8]))
1782+
self._check_box_coord(axes[2].patches, expected_x=np.array([0, 0, 0, 0, 0]),
1783+
expected_w=np.array([6, 7, 8, 9, 10]))
1784+
1785+
@slow
1786+
def test_hist_df_legacy(self):
16341787
_check_plot_works(self.hist_df.hist)
16351788

16361789
# make sure layout is handled
@@ -1849,7 +2002,7 @@ def test_plot_int_columns(self):
18492002

18502003
@slow
18512004
def test_df_legend_labels(self):
1852-
kinds = 'line', 'bar', 'barh', 'kde', 'area'
2005+
kinds = ['line', 'bar', 'barh', 'kde', 'area', 'hist']
18532006
df = DataFrame(rand(3, 3), columns=['a', 'b', 'c'])
18542007
df2 = DataFrame(rand(3, 3), columns=['d', 'e', 'f'])
18552008
df3 = DataFrame(rand(3, 3), columns=['g', 'h', 'i'])
@@ -1927,7 +2080,7 @@ def test_legend_name(self):
19272080

19282081
@slow
19292082
def test_no_legend(self):
1930-
kinds = 'line', 'bar', 'barh', 'kde', 'area'
2083+
kinds = ['line', 'bar', 'barh', 'kde', 'area', 'hist']
19312084
df = DataFrame(rand(3, 3), columns=['a', 'b', 'c'])
19322085

19332086
for kind in kinds:
@@ -2019,6 +2172,56 @@ def test_area_colors(self):
20192172
poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)]
20202173
self._check_colors(poly, facecolors=rgba_colors)
20212174

2175+
@slow
2176+
def test_hist_colors(self):
2177+
default_colors = self.plt.rcParams.get('axes.color_cycle')
2178+
2179+
df = DataFrame(randn(5, 5))
2180+
ax = df.plot(kind='hist')
2181+
self._check_colors(ax.patches[::10], facecolors=default_colors[:5])
2182+
tm.close()
2183+
2184+
custom_colors = 'rgcby'
2185+
ax = df.plot(kind='hist', color=custom_colors)
2186+
self._check_colors(ax.patches[::10], facecolors=custom_colors)
2187+
tm.close()
2188+
2189+
from matplotlib import cm
2190+
# Test str -> colormap functionality
2191+
ax = df.plot(kind='hist', colormap='jet')
2192+
rgba_colors = lmap(cm.jet, np.linspace(0, 1, 5))
2193+
self._check_colors(ax.patches[::10], facecolors=rgba_colors)
2194+
tm.close()
2195+
2196+
# Test colormap functionality
2197+
ax = df.plot(kind='hist', colormap=cm.jet)
2198+
rgba_colors = lmap(cm.jet, np.linspace(0, 1, 5))
2199+
self._check_colors(ax.patches[::10], facecolors=rgba_colors)
2200+
tm.close()
2201+
2202+
ax = df.ix[:, [0]].plot(kind='hist', color='DodgerBlue')
2203+
self._check_colors([ax.patches[0]], facecolors=['DodgerBlue'])
2204+
2205+
@slow
2206+
def test_kde_colors(self):
2207+
from matplotlib import cm
2208+
2209+
custom_colors = 'rgcby'
2210+
df = DataFrame(rand(5, 5))
2211+
2212+
ax = df.plot(kind='kde', color=custom_colors)
2213+
self._check_colors(ax.get_lines(), linecolors=custom_colors)
2214+
tm.close()
2215+
2216+
ax = df.plot(kind='kde', colormap='jet')
2217+
rgba_colors = lmap(cm.jet, np.linspace(0, 1, len(df)))
2218+
self._check_colors(ax.get_lines(), linecolors=rgba_colors)
2219+
tm.close()
2220+
2221+
ax = df.plot(kind='kde', colormap=cm.jet)
2222+
rgba_colors = lmap(cm.jet, np.linspace(0, 1, len(df)))
2223+
self._check_colors(ax.get_lines(), linecolors=rgba_colors)
2224+
20222225
def test_default_color_cycle(self):
20232226
import matplotlib.pyplot as plt
20242227
plt.rcParams['axes.color_cycle'] = list('rgbk')

0 commit comments

Comments
 (0)