Skip to content

Commit cab2a93

Browse files
committed
Merge pull request pandas-dev#5478 from TomAugspurger/hexbin-plot
VIS/ENH Hexbin plot
2 parents e56411b + 0b48464 commit cab2a93

File tree

5 files changed

+194
-2
lines changed

5 files changed

+194
-2
lines changed

doc/source/release.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ pandas 0.14.0
5353
New features
5454
~~~~~~~~~~~~
5555

56+
- Hexagonal bin plots from ``DataFrame.plot`` with ``kind='hexbin'`` (:issue:`5478`)
57+
5658
API Changes
5759
~~~~~~~~~~~
5860

doc/source/v0.14.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ Enhancements
154154
- ``plot(legend='reverse')`` will now reverse the order of legend labels for
155155
most plot kinds. (:issue:`6014`)
156156
- improve performance of slice indexing on Series with string keys (:issue:`6341`)
157+
- Hexagonal bin plots from ``DataFrame.plot`` with ``kind='hexbin'`` (:issue:`5478`)
157158

158159
Performance
159160
~~~~~~~~~~~

doc/source/visualization.rst

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,59 @@ setting `kind='kde'`:
414414
@savefig kde_plot.png
415415
ser.plot(kind='kde')
416416
417+
.. _visualization.hexbin
418+
419+
Hexagonal Bin plot
420+
~~~~~~~~~~~~~~~~~~
421+
*New in .14* You can create hexagonal bin plots with ``DataFrame.plot`` and
422+
``kind='hexbin'``.
423+
Hexbin plots can be a useful alternative to scatter plots if your data are
424+
too dense to plot each point individually.
425+
426+
.. ipython:: python
427+
:suppress:
428+
429+
plt.figure();
430+
431+
.. ipython:: python
432+
433+
df = DataFrame(randn(1000, 2), columns=['a', 'b'])
434+
df['b'] = df['b'] = df['b'] + np.arange(1000)
435+
436+
@savefig hexbin_plot.png
437+
df.plot(kind='hexbin', x='a', y='b', gridsize=25)
438+
439+
440+
A useful keyword argument is ``gridsize``; it controls the number of hexagons
441+
in the x-direction, and defaults to 100. A larger ``gridsize`` means more, smaller
442+
bins.
443+
444+
By default, a histogram of the counts around each ``(x, y)`` point is computed.
445+
You can specify alternative aggregations by passing values to the ``C`` and
446+
``reduce_C_function`` arguments. ``C`` specifies the value at each ``(x, y)`` point
447+
and ``reduce_C_function`` is a function of one argument that reduces all the
448+
values in a bin to a single number (e.g. ``mean``, ``max``, ``sum``, ``std``). In this
449+
example the positions are given by columns ``a`` and ``b``, while the value is
450+
given by column ``z``. The bins are aggregated with numpy's ``max`` function.
451+
452+
.. ipython:: python
453+
:suppress:
454+
455+
plt.figure();
456+
457+
.. ipython:: python
458+
459+
df = DataFrame(randn(1000, 2), columns=['a', 'b'])
460+
df['b'] = df['b'] = df['b'] + np.arange(1000)
461+
df['z'] = np.random.uniform(0, 3, 1000)
462+
463+
@savefig hexbin_plot_agg.png
464+
df.plot(kind='hexbin', x='a', y='b', C='z', reduce_C_function=np.max,
465+
gridsize=25)
466+
467+
468+
See the `matplotlib hexbin documenation <http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.hexbin>`__ for more.
469+
417470
.. _visualization.andrews_curves:
418471

419472
Andrews Curves

pandas/tests/test_graphics.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -956,6 +956,65 @@ def test_invalid_kind(self):
956956
with tm.assertRaises(ValueError):
957957
df.plot(kind='aasdf')
958958

959+
@slow
960+
def test_hexbin_basic(self):
961+
df = DataFrame({"A": np.random.uniform(size=20),
962+
"B": np.random.uniform(size=20),
963+
"C": np.arange(20) + np.random.uniform(size=20)})
964+
965+
ax = df.plot(kind='hexbin', x='A', y='B', gridsize=10)
966+
# TODO: need better way to test. This just does existence.
967+
self.assert_(len(ax.collections) == 1)
968+
969+
@slow
970+
def test_hexbin_with_c(self):
971+
df = DataFrame({"A": np.random.uniform(size=20),
972+
"B": np.random.uniform(size=20),
973+
"C": np.arange(20) + np.random.uniform(size=20)})
974+
975+
ax = df.plot(kind='hexbin', x='A', y='B', C='C')
976+
self.assert_(len(ax.collections) == 1)
977+
978+
ax = df.plot(kind='hexbin', x='A', y='B', C='C',
979+
reduce_C_function=np.std)
980+
self.assert_(len(ax.collections) == 1)
981+
982+
@slow
983+
def test_hexbin_cmap(self):
984+
df = DataFrame({"A": np.random.uniform(size=20),
985+
"B": np.random.uniform(size=20),
986+
"C": np.arange(20) + np.random.uniform(size=20)})
987+
988+
# Default to BuGn
989+
ax = df.plot(kind='hexbin', x='A', y='B')
990+
self.assertEquals(ax.collections[0].cmap.name, 'BuGn')
991+
992+
cm = 'cubehelix'
993+
ax = df.plot(kind='hexbin', x='A', y='B', colormap=cm)
994+
self.assertEquals(ax.collections[0].cmap.name, cm)
995+
996+
@slow
997+
def test_no_color_bar(self):
998+
df = DataFrame({"A": np.random.uniform(size=20),
999+
"B": np.random.uniform(size=20),
1000+
"C": np.arange(20) + np.random.uniform(size=20)})
1001+
1002+
ax = df.plot(kind='hexbin', x='A', y='B', colorbar=None)
1003+
self.assertIs(ax.collections[0].colorbar, None)
1004+
1005+
@slow
1006+
def test_allow_cmap(self):
1007+
df = DataFrame({"A": np.random.uniform(size=20),
1008+
"B": np.random.uniform(size=20),
1009+
"C": np.arange(20) + np.random.uniform(size=20)})
1010+
1011+
ax = df.plot(kind='hexbin', x='A', y='B', cmap='YlGn')
1012+
self.assertEquals(ax.collections[0].cmap.name, 'YlGn')
1013+
1014+
with tm.assertRaises(TypeError):
1015+
df.plot(kind='hexbin', x='A', y='B', cmap='YlGn',
1016+
colormap='BuGn')
1017+
9591018

9601019
@tm.mplskip
9611020
class TestDataFrameGroupByPlots(tm.TestCase):

pandas/tools/plotting.py

Lines changed: 79 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -835,7 +835,14 @@ def __init__(self, data, kind=None, by=None, subplots=False, sharex=True,
835835
secondary_y = [secondary_y]
836836
self.secondary_y = secondary_y
837837

838-
self.colormap = colormap
838+
# ugly TypeError if user passes matplotlib's `cmap` name.
839+
# Probably better to accept either.
840+
if 'cmap' in kwds and colormap:
841+
raise TypeError("Only specify one of `cmap` and `colormap`.")
842+
elif 'cmap' in kwds:
843+
self.colormap = kwds.pop('cmap')
844+
else:
845+
self.colormap = colormap
839846

840847
self.kwds = kwds
841848

@@ -1263,6 +1270,52 @@ def _post_plot_logic(self):
12631270
ax.set_xlabel(com.pprint_thing(x))
12641271

12651272

1273+
class HexBinPlot(MPLPlot):
1274+
def __init__(self, data, x, y, C=None, **kwargs):
1275+
MPLPlot.__init__(self, data, **kwargs)
1276+
1277+
if x is None or y is None:
1278+
raise ValueError('hexbin requires and x and y column')
1279+
if com.is_integer(x) and not self.data.columns.holds_integer():
1280+
x = self.data.columns[x]
1281+
if com.is_integer(y) and not self.data.columns.holds_integer():
1282+
y = self.data.columns[y]
1283+
1284+
if com.is_integer(C) and not self.data.columns.holds_integer():
1285+
C = self.data.columns[C]
1286+
1287+
self.x = x
1288+
self.y = y
1289+
self.C = C
1290+
1291+
def _make_plot(self):
1292+
import matplotlib.pyplot as plt
1293+
1294+
x, y, data, C = self.x, self.y, self.data, self.C
1295+
ax = self.axes[0]
1296+
# pandas uses colormap, matplotlib uses cmap.
1297+
cmap = self.colormap or 'BuGn'
1298+
cmap = plt.cm.get_cmap(cmap)
1299+
cb = self.kwds.pop('colorbar', True)
1300+
1301+
if C is None:
1302+
c_values = None
1303+
else:
1304+
c_values = data[C].values
1305+
1306+
ax.hexbin(data[x].values, data[y].values, C=c_values, cmap=cmap,
1307+
**self.kwds)
1308+
if cb:
1309+
img = ax.collections[0]
1310+
self.fig.colorbar(img, ax=ax)
1311+
1312+
def _post_plot_logic(self):
1313+
ax = self.axes[0]
1314+
x, y = self.x, self.y
1315+
ax.set_ylabel(com.pprint_thing(y))
1316+
ax.set_xlabel(com.pprint_thing(x))
1317+
1318+
12661319
class LinePlot(MPLPlot):
12671320

12681321
def __init__(self, data, **kwargs):
@@ -1663,11 +1716,12 @@ def plot_frame(frame=None, x=None, y=None, subplots=False, sharex=True,
16631716
ax : matplotlib axis object, default None
16641717
style : list or dict
16651718
matplotlib line style per column
1666-
kind : {'line', 'bar', 'barh', 'kde', 'density', 'scatter'}
1719+
kind : {'line', 'bar', 'barh', 'kde', 'density', 'scatter', 'hexbin'}
16671720
bar : vertical bar plot
16681721
barh : horizontal bar plot
16691722
kde/density : Kernel Density Estimation plot
16701723
scatter: scatter plot
1724+
hexbin: hexbin plot
16711725
logx : boolean, default False
16721726
For line plots, use log scaling on x axis
16731727
logy : boolean, default False
@@ -1695,6 +1749,17 @@ def plot_frame(frame=None, x=None, y=None, subplots=False, sharex=True,
16951749
Returns
16961750
-------
16971751
ax_or_axes : matplotlib.AxesSubplot or list of them
1752+
1753+
Notes
1754+
-----
1755+
1756+
If `kind`='hexbin', you can control the size of the bins with the
1757+
`gridsize` argument. By default, a histogram of the counts around each
1758+
`(x, y)` point is computed. You can specify alternative aggregations
1759+
by passing values to the `C` and `reduce_C_function` arguments.
1760+
`C` specifies the value at each `(x, y)` point and `reduce_C_function`
1761+
is a function of one argument that reduces all the values in a bin to
1762+
a single number (e.g. `mean`, `max`, `sum`, `std`).
16981763
"""
16991764
kind = _get_standard_kind(kind.lower().strip())
17001765
if kind == 'line':
@@ -1705,6 +1770,8 @@ def plot_frame(frame=None, x=None, y=None, subplots=False, sharex=True,
17051770
klass = KdePlot
17061771
elif kind == 'scatter':
17071772
klass = ScatterPlot
1773+
elif kind == 'hexbin':
1774+
klass = HexBinPlot
17081775
else:
17091776
raise ValueError('Invalid chart type given %s' % kind)
17101777

@@ -1717,6 +1784,16 @@ def plot_frame(frame=None, x=None, y=None, subplots=False, sharex=True,
17171784
figsize=figsize, logx=logx, logy=logy,
17181785
sort_columns=sort_columns, secondary_y=secondary_y,
17191786
**kwds)
1787+
elif kind == 'hexbin':
1788+
C = kwds.pop('C', None) # remove from kwargs so we can set default
1789+
plot_obj = klass(frame, x=x, y=y, kind=kind, subplots=subplots,
1790+
rot=rot,legend=legend, ax=ax, style=style,
1791+
fontsize=fontsize, use_index=use_index, sharex=sharex,
1792+
sharey=sharey, xticks=xticks, yticks=yticks,
1793+
xlim=xlim, ylim=ylim, title=title, grid=grid,
1794+
figsize=figsize, logx=logx, logy=logy,
1795+
sort_columns=sort_columns, secondary_y=secondary_y,
1796+
C=C, **kwds)
17201797
else:
17211798
if x is not None:
17221799
if com.is_integer(x) and not frame.columns.holds_integer():

0 commit comments

Comments
 (0)