Merge pull request #5638 from gibbonorbiter/master

Tom Augspurger · Tom Augspurger · commit 08e0a96ddb7d · 2014-03-18T12:42:12.000-05:00
VIS: added ability to plot DataFrames and Series with errorbars
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -59,6 +59,8 @@ New features
   Date is used primarily in astronomy and represents the number of days from
   noon, January 1, 4713 BC.  Because nanoseconds are used to define the time
   in pandas the actual range of dates that you can use is 1678 AD to 2262 AD. (:issue:`4041`)
+- Added error bar support to the ``.plot`` method of ``DataFrame`` and ``Series`` (:issue:`3796`)
+
 
 API Changes
 ~~~~~~~~~~~
diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt
@@ -286,6 +286,20 @@ You can use a right-hand-side of an alignable object as well.
    df2.loc[idx[:,:,['C1','C3']],:] = df2*1000
    df2
 
+Plotting With Errorbars
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Plotting with error bars is now supported in the ``.plot`` method of ``DataFrame`` and ``Series`` objects (:issue:`3796`).
+
+x and y errorbars are supported and can be supplied using the ``xerr`` and ``yerr`` keyword arguments to ``.plot()`` The error values can be specified using a variety of formats.
+
+- As a ``DataFrame`` or ``dict`` of errors with one or more of the column names (or dictionary keys) matching one or more of the column names of the plotting ``DataFrame`` or matching the ``name`` attribute of the ``Series``
+- As a ``str`` indicating which of the columns of plotting ``DataFrame`` contain the error values
+- As raw values (``list``, ``tuple``, or ``np.ndarray``). Must be the same length as the plotting ``DataFrame``/``Series``
+
+Asymmetrical error bars are also supported, however raw error values must be provided in this case. For a ``M`` length ``Series``, a ``Mx2`` array should be provided indicating lower and upper (or left and right) errors. For a ``MxN`` ``DataFrame``, asymmetrical errors should be in a ``Mx2xN`` array.
+
+
 Prior Version Deprecations/Changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst
@@ -381,6 +381,40 @@ columns:
 
     plt.close('all')
 
+.. _visualization.errorbars:
+
+Plotting With Error Bars
+~~~~~~~~~~~~~~~~~~~~~~~~
+Plotting with error bars is now supported in the ``.plot`` method of ``DataFrame`` and ``Series`` objects.
+
+x and y errorbars are supported and be supplied using the ``xerr`` and ``yerr`` keyword arguments to ``.plot()`` The error values can be specified using a variety of formats.
+
+- As a ``DataFrame`` or ``dict`` of errors with column names matching the ``columns`` attribute of the plotting ``DataFrame`` or matching the ``name`` attribute of the ``Series``
+- As a ``str`` indicating which of the columns of plotting ``DataFrame`` contain the error values
+- As raw values (``list``, ``tuple``, or ``np.ndarray``). Must be the same length as the plotting ``DataFrame``/``Series``
+
+Asymmetrical error bars are also supported, however raw error values must be provided in this case. For a ``M`` length ``Series``, a ``Mx2`` array should be provided indicating lower and upper (or left and right) errors. For a ``MxN`` ``DataFrame``, asymmetrical errors should be in a ``Mx2xN`` array.
+
+Here is an example of one way to easily plot group means with standard deviations from the raw data.
+
+.. ipython:: python
+
+   # Generate the data
+   ix3 = pd.MultiIndex.from_arrays([['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b'], ['foo', 'foo', 'bar', 'bar', 'foo', 'foo', 'bar', 'bar']], names=['letter', 'word'])
+   df3 = pd.DataFrame({'data1': [3, 2, 4, 3, 2, 4, 3, 2], 'data2': [6, 5, 7, 5, 4, 5, 6, 5]}, index=ix3)
+
+   # Group by index labels and take the means and standard deviations for each group
+   gp3 = df3.groupby(level=('letter', 'word'))
+   means = gp3.mean()
+   errors = gp3.std()
+   means
+   errors
+
+   # Plot
+   fig, ax = plt.subplots()
+   @savefig errorbar_example.png
+   means.plot(yerr=errors, ax=ax, kind='bar')
+
 .. _visualization.scatter_matrix:
 
 Scatter plot matrix
diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py
@@ -360,6 +360,35 @@ def test_dup_datetime_index_plot(self):
         s = Series(values, index=index)
         _check_plot_works(s.plot)
 
+    @slow
+    def test_errorbar_plot(self):
+
+        s = Series(np.arange(10))
+        s_err = np.random.randn(10)
+
+        # test line and bar plots
+        kinds = ['line', 'bar']
+        for kind in kinds:
+            _check_plot_works(s.plot, yerr=Series(s_err), kind=kind)
+            _check_plot_works(s.plot, yerr=s_err, kind=kind)
+            _check_plot_works(s.plot, yerr=s_err.tolist(), kind=kind)
+
+        _check_plot_works(s.plot, xerr=s_err)
+
+        # test time series plotting
+        ix = date_range('1/1/2000', '1/1/2001', freq='M')
+        ts = Series(np.arange(12), index=ix)
+        ts_err = Series(np.random.randn(12), index=ix)
+
+        _check_plot_works(ts.plot, yerr=ts_err)
+
+        # check incorrect lengths and types
+        with tm.assertRaises(ValueError):
+            s.plot(yerr=np.arange(11))
+
+        s_err = ['zzz']*10
+        with tm.assertRaises(TypeError):
+            s.plot(yerr=s_err)
 
 @tm.mplskip
 class TestDataFramePlots(tm.TestCase):
@@ -1015,6 +1044,104 @@ def test_allow_cmap(self):
             df.plot(kind='hexbin', x='A', y='B', cmap='YlGn',
                          colormap='BuGn')
 
+    def test_errorbar_plot(self):
+
+        d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)}
+        df = DataFrame(d)
+        d_err = {'x': np.ones(12)*0.2, 'y': np.ones(12)*0.4}
+        df_err = DataFrame(d_err)
+
+        # check line plots
+        _check_plot_works(df.plot, yerr=df_err, logy=True)
+        _check_plot_works(df.plot, yerr=df_err, logx=True, logy=True)
+
+        kinds = ['line', 'bar', 'barh']
+        for kind in kinds:
+            _check_plot_works(df.plot, yerr=df_err['x'], kind=kind)
+            _check_plot_works(df.plot, yerr=d_err, kind=kind)
+            _check_plot_works(df.plot, yerr=df_err, xerr=df_err, kind=kind)
+            _check_plot_works(df.plot, yerr=df_err['x'], xerr=df_err['x'], kind=kind)
+            _check_plot_works(df.plot, yerr=df_err, xerr=df_err, subplots=True, kind=kind)
+
+        _check_plot_works((df+1).plot, yerr=df_err, xerr=df_err, kind='bar', log=True)
+
+        # yerr is raw error values
+        _check_plot_works(df['y'].plot, yerr=np.ones(12)*0.4)
+        _check_plot_works(df.plot, yerr=np.ones((2, 12))*0.4)
+
+        # yerr is column name
+        df['yerr'] = np.ones(12)*0.2
+        _check_plot_works(df.plot, y='y', x='x', yerr='yerr')
+
+        with tm.assertRaises(ValueError):
+            df.plot(yerr=np.random.randn(11))
+
+        df_err = DataFrame({'x': ['zzz']*12, 'y': ['zzz']*12})
+        with tm.assertRaises(TypeError):
+            df.plot(yerr=df_err)
+
+    @slow
+    def test_errorbar_with_integer_column_names(self):
+        # test with integer column names
+        df = DataFrame(np.random.randn(10, 2))
+        df_err = DataFrame(np.random.randn(10, 2))
+        _check_plot_works(df.plot, yerr=df_err)
+        _check_plot_works(df.plot, y=0, yerr=1)
+
+    @slow
+    def test_errorbar_with_partial_columns(self):
+        df = DataFrame(np.random.randn(10, 3))
+        df_err = DataFrame(np.random.randn(10, 2), columns=[0, 2])
+        kinds = ['line', 'bar']
+        for kind in kinds:
+            _check_plot_works(df.plot, yerr=df_err, kind=kind)
+
+        ix = date_range('1/1/2000', periods=10, freq='M')
+        df.set_index(ix, inplace=True)
+        df_err.set_index(ix, inplace=True)
+        _check_plot_works(df.plot, yerr=df_err, kind='line')
+
+    @slow
+    def test_errorbar_timeseries(self):
+
+        d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)}
+        d_err = {'x': np.ones(12)*0.2, 'y': np.ones(12)*0.4}
+
+        # check time-series plots
+        ix = date_range('1/1/2000', '1/1/2001', freq='M')
+        tdf = DataFrame(d, index=ix)
+        tdf_err = DataFrame(d_err, index=ix)
+
+        kinds = ['line', 'bar', 'barh']
+        for kind in kinds:
+            _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind)
+            _check_plot_works(tdf.plot, yerr=d_err, kind=kind)
+            _check_plot_works(tdf.plot, y='y', kind=kind)
+            _check_plot_works(tdf.plot, y='y', yerr='x', kind=kind)
+            _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind)
+            _check_plot_works(tdf.plot, kind=kind, subplots=True)
+
+
+    def test_errorbar_asymmetrical(self):
+
+        np.random.seed(0)
+        err = np.random.rand(3, 2, 5)
+
+        data = np.random.randn(5, 3)
+        df = DataFrame(data)
+
+        ax = df.plot(yerr=err, xerr=err/2)
+
+        self.assertEqual(ax.lines[7].get_ydata()[0], data[0,1]-err[1,0,0])
+        self.assertEqual(ax.lines[8].get_ydata()[0], data[0,1]+err[1,1,0])
+
+        self.assertEqual(ax.lines[5].get_xdata()[0], -err[1,0,0]/2)
+        self.assertEqual(ax.lines[6].get_xdata()[0], err[1,1,0]/2)
+
+        with tm.assertRaises(ValueError):
+            df.plot(yerr=err.T)
+
+        tm.close()
 
 @tm.mplskip
 class TestDataFrameGroupByPlots(tm.TestCase):
diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py