From 36a6a35d6e7ea08e5cb5c12f0db791a294c6da4d Mon Sep 17 00:00:00 2001 From: Jacob Deppen Date: Tue, 5 May 2020 01:17:12 -0700 Subject: [PATCH 1/3] DOC: Add plotting examples and fix broken examples --- pandas/plotting/_misc.py | 112 +++++++++++++++++++++++++++++---------- 1 file changed, 85 insertions(+), 27 deletions(-) diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index 30c5ba0ed94b6..a1c716220dbbc 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -117,8 +117,13 @@ def scatter_matrix( Examples -------- - >>> df = pd.DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D']) - >>> scatter_matrix(df, alpha=0.2) + + .. plot:: + :context: close-figs + + >>> np.random.seed(5) + >>> df = pd.DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D']) + >>> pd.plotting.scatter_matrix(df, alpha=0.2) """ plot_backend = _get_plot_backend("matplotlib") return plot_backend.scatter_matrix( @@ -179,24 +184,31 @@ def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): Examples -------- + .. plot:: :context: close-figs - >>> df = pd.DataFrame({ - ... 'SepalLength': [6.5, 7.7, 5.1, 5.8, 7.6, 5.0, 5.4, 4.6, - ... 6.7, 4.6], - ... 'SepalWidth': [3.0, 3.8, 3.8, 2.7, 3.0, 2.3, 3.0, 3.2, - ... 3.3, 3.6], - ... 'PetalLength': [5.5, 6.7, 1.9, 5.1, 6.6, 3.3, 4.5, 1.4, - ... 5.7, 1.0], - ... 'PetalWidth': [1.8, 2.2, 0.4, 1.9, 2.1, 1.0, 1.5, 0.2, - ... 2.1, 0.2], - ... 'Category': ['virginica', 'virginica', 'setosa', - ... 'virginica', 'virginica', 'versicolor', - ... 'versicolor', 'setosa', 'virginica', - ... 'setosa'] - ... }) - >>> rad_viz = pd.plotting.radviz(df, 'Category') # doctest: +SKIP + >>> df = pd.DataFrame( + ... { + ... 'SepalLength': [6.5, 7.7, 5.1, 5.8, 7.6, 5.0, 5.4, 4.6, 6.7, 4.6], + ... 'SepalWidth': [3.0, 3.8, 3.8, 2.7, 3.0, 2.3, 3.0, 3.2, 3.3, 3.6], + ... 'PetalLength': [5.5, 6.7, 1.9, 5.1, 6.6, 3.3, 4.5, 1.4, 5.7, 1.0], + ... 'PetalWidth': [1.8, 2.2, 0.4, 1.9, 2.1, 1.0, 1.5, 0.2, 2.1, 0.2], + ... 'Category': [ + ... 'virginica', + ... 'virginica', + ... 'setosa', + ... 'virginica', + ... 'virginica', + ... 'versicolor', + ... 'versicolor', + ... 'setosa', + ... 'virginica', + ... 'setosa' + ... ] + ... } + ... ) + >>> pd.plotting.radviz(df, 'Category') """ plot_backend = _get_plot_backend("matplotlib") return plot_backend.radviz( @@ -243,6 +255,15 @@ def andrews_curves( Returns ------- class:`matplotlip.axis.Axes` + + Examples + -------- + + .. plot:: + :context: close-figs + + >>> df = pd.read_csv('https://raw.github.com/pandas-dev/pandas/master/pandas/tests/data/iris.csv') + >>> pd.plotting.andrews_curves(df, 'Name') """ plot_backend = _get_plot_backend("matplotlib") return plot_backend.andrews_curves( @@ -298,10 +319,11 @@ def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): This example draws a basic bootstap plot for a Series. .. plot:: - :context: close-figs + :context: close-figs - >>> s = pd.Series(np.random.uniform(size=100)) - >>> fig = pd.plotting.bootstrap_plot(s) # doctest: +SKIP + >>> np.random.seed(5) + >>> s = pd.Series(np.random.uniform(size=100)) + >>> pd.plotting.bootstrap_plot(s) """ plot_backend = _get_plot_backend("matplotlib") return plot_backend.bootstrap_plot( @@ -358,13 +380,12 @@ def parallel_coordinates( Examples -------- - >>> from matplotlib import pyplot as plt - >>> df = pd.read_csv('https://raw.github.com/pandas-dev/pandas/master' - '/pandas/tests/data/iris.csv') - >>> pd.plotting.parallel_coordinates( - df, 'Name', - color=('#556270', '#4ECDC4', '#C7F464')) - >>> plt.show() + + .. plot:: + :context: close-figs + + >>> df = pd.read_csv('https://raw.github.com/pandas-dev/pandas/master/pandas/tests/data/iris.csv') + >>> pd.plotting.parallel_coordinates(df, 'Name', color=('#556270', '#4ECDC4', '#C7F464')) """ plot_backend = _get_plot_backend("matplotlib") return plot_backend.parallel_coordinates( @@ -398,6 +419,28 @@ def lag_plot(series, lag=1, ax=None, **kwds): Returns ------- class:`matplotlib.axis.Axes` + + Examples + -------- + + Lag plots are most commonly used to look for patterns in time series data. + + Given the following time series + + .. plot:: + :context: close-figs + + >>> np.random.seed(5) + >>> x = np.cumsum(np.random.normal(loc=1, scale=5, size=50)) + >>> s = pd.Series(x) + >>> s.plot() + + A lag plot with ``lag=1`` returns + + .. plot:: + :context: close-figs + + >>> pd.plotting.lag_plot(s, lag=1) """ plot_backend = _get_plot_backend("matplotlib") return plot_backend.lag_plot(series=series, lag=lag, ax=ax, **kwds) @@ -417,6 +460,21 @@ def autocorrelation_plot(series, ax=None, **kwargs): Returns ------- class:`matplotlib.axis.Axes` + + Examples + -------- + + The horizontal lines displayed in the plot correspond to 95% and 99% confidence bands. + + The dashed line is 99% confidence band. + + .. plot:: + :context: close-figs + + >>> np.random.seed(5) + >>> spacing = np.linspace(-9 * np.pi, 9 * np.pi, num=1000) + >>> s = pd.Series(0.7 * np.random.rand(1000) + 0.3 * np.sin(spacing)) + >>> pd.plotting.autocorrelation_plot(s) """ plot_backend = _get_plot_backend("matplotlib") return plot_backend.autocorrelation_plot(series=series, ax=ax, **kwargs) From 8f3a036e8a0b8d16fe3fd76fc413a698b3269b31 Mon Sep 17 00:00:00 2001 From: Jacob Deppen Date: Tue, 5 May 2020 01:30:26 -0700 Subject: [PATCH 2/3] Fix E501 errors --- pandas/plotting/_misc.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index a1c716220dbbc..358ecdac32829 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -262,7 +262,10 @@ def andrews_curves( .. plot:: :context: close-figs - >>> df = pd.read_csv('https://raw.github.com/pandas-dev/pandas/master/pandas/tests/data/iris.csv') + >>> df = pd.read_csv( + ... 'https://raw.github.com/pandas-dev/' + ... 'pandas/master/pandas/tests/data/iris.csv' + ... ) >>> pd.plotting.andrews_curves(df, 'Name') """ plot_backend = _get_plot_backend("matplotlib") @@ -384,8 +387,13 @@ def parallel_coordinates( .. plot:: :context: close-figs - >>> df = pd.read_csv('https://raw.github.com/pandas-dev/pandas/master/pandas/tests/data/iris.csv') - >>> pd.plotting.parallel_coordinates(df, 'Name', color=('#556270', '#4ECDC4', '#C7F464')) + >>> df = pd.read_csv( + ... 'https://raw.github.com/pandas-dev/' + ... 'pandas/master/pandas/tests/data/iris.csv' + ... ) + >>> pd.plotting.parallel_coordinates( + ... df, 'Name', color=('#556270', '#4ECDC4', '#C7F464') + ... ) """ plot_backend = _get_plot_backend("matplotlib") return plot_backend.parallel_coordinates( @@ -464,7 +472,7 @@ def autocorrelation_plot(series, ax=None, **kwargs): Examples -------- - The horizontal lines displayed in the plot correspond to 95% and 99% confidence bands. + The horizontal lines in the plot correspond to 95% and 99% confidence bands. The dashed line is 99% confidence band. From 92103c6692ab038eba7494b74e6a6e501520fd4b Mon Sep 17 00:00:00 2001 From: Jacob Deppen Date: Tue, 5 May 2020 14:05:10 -0700 Subject: [PATCH 3/3] Remove random seed except where it impacts interpretability --- pandas/plotting/_misc.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index 358ecdac32829..594b95d1937ea 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -121,7 +121,6 @@ def scatter_matrix( .. plot:: :context: close-figs - >>> np.random.seed(5) >>> df = pd.DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D']) >>> pd.plotting.scatter_matrix(df, alpha=0.2) """ @@ -324,7 +323,6 @@ def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): .. plot:: :context: close-figs - >>> np.random.seed(5) >>> s = pd.Series(np.random.uniform(size=100)) >>> pd.plotting.bootstrap_plot(s) """ @@ -479,7 +477,6 @@ def autocorrelation_plot(series, ax=None, **kwargs): .. plot:: :context: close-figs - >>> np.random.seed(5) >>> spacing = np.linspace(-9 * np.pi, 9 * np.pi, num=1000) >>> s = pd.Series(0.7 * np.random.rand(1000) + 0.3 * np.sin(spacing)) >>> pd.plotting.autocorrelation_plot(s)