diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index 34bd803516468..f2676e6423b2f 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -325,3 +325,28 @@ confidence band. @savefig autocorrelation_plot.png width=6in autocorrelation_plot(data) + +Probability Plot +~~~~~~~~~~~~~~~~ + +Probability plots are used to check if given data follows some probability +distribution. With default parameters it plots against normal distribution. +The data are plotted against the theoretical distribution in such a way that +if the data follow the distribution it should display a straight line. + +.. ipython:: python + + from pandas.tools.plotting import probability_plot + + plt.figure() + + u_data = Series(np.random.random(1000)) + n_data = Series(np.random.randn(1000)) + + @savefig probability_plot_u.png width=6in + probability_plot(u_data, dist='norm', marker='+', color='black') + + plt.figure() + + @savefig probability_plot_n.png width=6in + probability_plot(n_data, dist='norm', marker='+', color='black') \ No newline at end of file diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 8326445f28cb0..aabcb3d8642cd 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -86,6 +86,13 @@ def test_lag_plot(self): from pandas.tools.plotting import lag_plot _check_plot_works(lag_plot, self.ts) + @slow + def test_probability_plot(self): + from pandas.tools.plotting import probability_plot + _check_plot_works(probability_plot, self.ts) + _check_plot_works(probability_plot, self.ts, marker='+', color='black') + _check_plot_works(probability_plot, self.ts, dist='cauchy', distargs=(1.0, 0.01), marker='+', color='black') + class TestDataFramePlots(unittest.TestCase): @classmethod diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 515434edda6b0..5ac153df71966 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -202,6 +202,36 @@ def lag_plot(series, ax=None, **kwds): ax.scatter(y1, y2, **kwds) return ax +def probability_plot(series, ax=None, dist='norm', distargs=(), **kwds): + """Probability plot for uni-variate data. + + Parameters: + ----------- + series: Time series + ax: Matplotlib axis object, optional + dist: Distribution name, one supported by scipy + http://docs.scipy.org/doc/scipy/reference/stats.html#continuous-distributions + distargs: Distribution specific parameters usually location and scale. + kwds: Matplotlib scatter method keyword arguments, optional + + Returns: + -------- + fig: Matplotlib figure object + """ + import matplotlib.pyplot as plt + from scipy.stats import probplot + if ax == None: + ax = plt.gca() + data = series.values + (x, y), (slope, intercept, _) = probplot(data, dist=dist, sparams=distargs) + ax.scatter(x, y, **kwds) + y1, y2 = ax.get_ylim() + x1, x2 = (y1 - intercept) / slope, (y2 - intercept) / slope + ax.plot([x1, x2], [y1, y2], color='grey') + ax.set_xlabel("Theoretical Quantiles") + ax.set_ylabel("Sample Quantiles") + return ax.get_figure() + def autocorrelation_plot(series, ax=None): """Autocorrelation plot for time series.