Skip to content

Commit 08e0a96

Browse files
author
Tom Augspurger
committed
Merge pull request #5638 from gibbonorbiter/master
VIS: added ability to plot DataFrames and Series with errorbars
2 parents 4b9baed + 765b3e0 commit 08e0a96

File tree

5 files changed

+344
-17
lines changed

5 files changed

+344
-17
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ New features
5959
Date is used primarily in astronomy and represents the number of days from
6060
noon, January 1, 4713 BC. Because nanoseconds are used to define the time
6161
in pandas the actual range of dates that you can use is 1678 AD to 2262 AD. (:issue:`4041`)
62+
- Added error bar support to the ``.plot`` method of ``DataFrame`` and ``Series`` (:issue:`3796`)
63+
6264

6365
API Changes
6466
~~~~~~~~~~~

doc/source/v0.14.0.txt

+14
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,20 @@ You can use a right-hand-side of an alignable object as well.
286286
df2.loc[idx[:,:,['C1','C3']],:] = df2*1000
287287
df2
288288

289+
Plotting With Errorbars
290+
~~~~~~~~~~~~~~~~~~~~~~~
291+
292+
Plotting with error bars is now supported in the ``.plot`` method of ``DataFrame`` and ``Series`` objects (:issue:`3796`).
293+
294+
x and y errorbars are supported and can be supplied using the ``xerr`` and ``yerr`` keyword arguments to ``.plot()`` The error values can be specified using a variety of formats.
295+
296+
- As a ``DataFrame`` or ``dict`` of errors with one or more of the column names (or dictionary keys) matching one or more of the column names of the plotting ``DataFrame`` or matching the ``name`` attribute of the ``Series``
297+
- As a ``str`` indicating which of the columns of plotting ``DataFrame`` contain the error values
298+
- As raw values (``list``, ``tuple``, or ``np.ndarray``). Must be the same length as the plotting ``DataFrame``/``Series``
299+
300+
Asymmetrical error bars are also supported, however raw error values must be provided in this case. For a ``M`` length ``Series``, a ``Mx2`` array should be provided indicating lower and upper (or left and right) errors. For a ``MxN`` ``DataFrame``, asymmetrical errors should be in a ``Mx2xN`` array.
301+
302+
289303
Prior Version Deprecations/Changes
290304
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
291305

doc/source/visualization.rst

+34
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,40 @@ columns:
381381
382382
plt.close('all')
383383
384+
.. _visualization.errorbars:
385+
386+
Plotting With Error Bars
387+
~~~~~~~~~~~~~~~~~~~~~~~~
388+
Plotting with error bars is now supported in the ``.plot`` method of ``DataFrame`` and ``Series`` objects.
389+
390+
x and y errorbars are supported and be supplied using the ``xerr`` and ``yerr`` keyword arguments to ``.plot()`` The error values can be specified using a variety of formats.
391+
392+
- As a ``DataFrame`` or ``dict`` of errors with column names matching the ``columns`` attribute of the plotting ``DataFrame`` or matching the ``name`` attribute of the ``Series``
393+
- As a ``str`` indicating which of the columns of plotting ``DataFrame`` contain the error values
394+
- As raw values (``list``, ``tuple``, or ``np.ndarray``). Must be the same length as the plotting ``DataFrame``/``Series``
395+
396+
Asymmetrical error bars are also supported, however raw error values must be provided in this case. For a ``M`` length ``Series``, a ``Mx2`` array should be provided indicating lower and upper (or left and right) errors. For a ``MxN`` ``DataFrame``, asymmetrical errors should be in a ``Mx2xN`` array.
397+
398+
Here is an example of one way to easily plot group means with standard deviations from the raw data.
399+
400+
.. ipython:: python
401+
402+
# Generate the data
403+
ix3 = pd.MultiIndex.from_arrays([['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b'], ['foo', 'foo', 'bar', 'bar', 'foo', 'foo', 'bar', 'bar']], names=['letter', 'word'])
404+
df3 = pd.DataFrame({'data1': [3, 2, 4, 3, 2, 4, 3, 2], 'data2': [6, 5, 7, 5, 4, 5, 6, 5]}, index=ix3)
405+
406+
# Group by index labels and take the means and standard deviations for each group
407+
gp3 = df3.groupby(level=('letter', 'word'))
408+
means = gp3.mean()
409+
errors = gp3.std()
410+
means
411+
errors
412+
413+
# Plot
414+
fig, ax = plt.subplots()
415+
@savefig errorbar_example.png
416+
means.plot(yerr=errors, ax=ax, kind='bar')
417+
384418
.. _visualization.scatter_matrix:
385419

386420
Scatter plot matrix

pandas/tests/test_graphics.py

+127
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,35 @@ def test_dup_datetime_index_plot(self):
360360
s = Series(values, index=index)
361361
_check_plot_works(s.plot)
362362

363+
@slow
364+
def test_errorbar_plot(self):
365+
366+
s = Series(np.arange(10))
367+
s_err = np.random.randn(10)
368+
369+
# test line and bar plots
370+
kinds = ['line', 'bar']
371+
for kind in kinds:
372+
_check_plot_works(s.plot, yerr=Series(s_err), kind=kind)
373+
_check_plot_works(s.plot, yerr=s_err, kind=kind)
374+
_check_plot_works(s.plot, yerr=s_err.tolist(), kind=kind)
375+
376+
_check_plot_works(s.plot, xerr=s_err)
377+
378+
# test time series plotting
379+
ix = date_range('1/1/2000', '1/1/2001', freq='M')
380+
ts = Series(np.arange(12), index=ix)
381+
ts_err = Series(np.random.randn(12), index=ix)
382+
383+
_check_plot_works(ts.plot, yerr=ts_err)
384+
385+
# check incorrect lengths and types
386+
with tm.assertRaises(ValueError):
387+
s.plot(yerr=np.arange(11))
388+
389+
s_err = ['zzz']*10
390+
with tm.assertRaises(TypeError):
391+
s.plot(yerr=s_err)
363392

364393
@tm.mplskip
365394
class TestDataFramePlots(tm.TestCase):
@@ -1015,6 +1044,104 @@ def test_allow_cmap(self):
10151044
df.plot(kind='hexbin', x='A', y='B', cmap='YlGn',
10161045
colormap='BuGn')
10171046

1047+
def test_errorbar_plot(self):
1048+
1049+
d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)}
1050+
df = DataFrame(d)
1051+
d_err = {'x': np.ones(12)*0.2, 'y': np.ones(12)*0.4}
1052+
df_err = DataFrame(d_err)
1053+
1054+
# check line plots
1055+
_check_plot_works(df.plot, yerr=df_err, logy=True)
1056+
_check_plot_works(df.plot, yerr=df_err, logx=True, logy=True)
1057+
1058+
kinds = ['line', 'bar', 'barh']
1059+
for kind in kinds:
1060+
_check_plot_works(df.plot, yerr=df_err['x'], kind=kind)
1061+
_check_plot_works(df.plot, yerr=d_err, kind=kind)
1062+
_check_plot_works(df.plot, yerr=df_err, xerr=df_err, kind=kind)
1063+
_check_plot_works(df.plot, yerr=df_err['x'], xerr=df_err['x'], kind=kind)
1064+
_check_plot_works(df.plot, yerr=df_err, xerr=df_err, subplots=True, kind=kind)
1065+
1066+
_check_plot_works((df+1).plot, yerr=df_err, xerr=df_err, kind='bar', log=True)
1067+
1068+
# yerr is raw error values
1069+
_check_plot_works(df['y'].plot, yerr=np.ones(12)*0.4)
1070+
_check_plot_works(df.plot, yerr=np.ones((2, 12))*0.4)
1071+
1072+
# yerr is column name
1073+
df['yerr'] = np.ones(12)*0.2
1074+
_check_plot_works(df.plot, y='y', x='x', yerr='yerr')
1075+
1076+
with tm.assertRaises(ValueError):
1077+
df.plot(yerr=np.random.randn(11))
1078+
1079+
df_err = DataFrame({'x': ['zzz']*12, 'y': ['zzz']*12})
1080+
with tm.assertRaises(TypeError):
1081+
df.plot(yerr=df_err)
1082+
1083+
@slow
1084+
def test_errorbar_with_integer_column_names(self):
1085+
# test with integer column names
1086+
df = DataFrame(np.random.randn(10, 2))
1087+
df_err = DataFrame(np.random.randn(10, 2))
1088+
_check_plot_works(df.plot, yerr=df_err)
1089+
_check_plot_works(df.plot, y=0, yerr=1)
1090+
1091+
@slow
1092+
def test_errorbar_with_partial_columns(self):
1093+
df = DataFrame(np.random.randn(10, 3))
1094+
df_err = DataFrame(np.random.randn(10, 2), columns=[0, 2])
1095+
kinds = ['line', 'bar']
1096+
for kind in kinds:
1097+
_check_plot_works(df.plot, yerr=df_err, kind=kind)
1098+
1099+
ix = date_range('1/1/2000', periods=10, freq='M')
1100+
df.set_index(ix, inplace=True)
1101+
df_err.set_index(ix, inplace=True)
1102+
_check_plot_works(df.plot, yerr=df_err, kind='line')
1103+
1104+
@slow
1105+
def test_errorbar_timeseries(self):
1106+
1107+
d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)}
1108+
d_err = {'x': np.ones(12)*0.2, 'y': np.ones(12)*0.4}
1109+
1110+
# check time-series plots
1111+
ix = date_range('1/1/2000', '1/1/2001', freq='M')
1112+
tdf = DataFrame(d, index=ix)
1113+
tdf_err = DataFrame(d_err, index=ix)
1114+
1115+
kinds = ['line', 'bar', 'barh']
1116+
for kind in kinds:
1117+
_check_plot_works(tdf.plot, yerr=tdf_err, kind=kind)
1118+
_check_plot_works(tdf.plot, yerr=d_err, kind=kind)
1119+
_check_plot_works(tdf.plot, y='y', kind=kind)
1120+
_check_plot_works(tdf.plot, y='y', yerr='x', kind=kind)
1121+
_check_plot_works(tdf.plot, yerr=tdf_err, kind=kind)
1122+
_check_plot_works(tdf.plot, kind=kind, subplots=True)
1123+
1124+
1125+
def test_errorbar_asymmetrical(self):
1126+
1127+
np.random.seed(0)
1128+
err = np.random.rand(3, 2, 5)
1129+
1130+
data = np.random.randn(5, 3)
1131+
df = DataFrame(data)
1132+
1133+
ax = df.plot(yerr=err, xerr=err/2)
1134+
1135+
self.assertEqual(ax.lines[7].get_ydata()[0], data[0,1]-err[1,0,0])
1136+
self.assertEqual(ax.lines[8].get_ydata()[0], data[0,1]+err[1,1,0])
1137+
1138+
self.assertEqual(ax.lines[5].get_xdata()[0], -err[1,0,0]/2)
1139+
self.assertEqual(ax.lines[6].get_xdata()[0], err[1,1,0]/2)
1140+
1141+
with tm.assertRaises(ValueError):
1142+
df.plot(yerr=err.T)
1143+
1144+
tm.close()
10181145

10191146
@tm.mplskip
10201147
class TestDataFrameGroupByPlots(tm.TestCase):

0 commit comments

Comments
 (0)