diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst index 12af80f1bce80..b492a4edd70a4 100644 --- a/doc/source/development/extending.rst +++ b/doc/source/development/extending.rst @@ -416,3 +416,30 @@ Below is an example to define two original properties, "internal_cache" as a tem # properties defined in _metadata are retained >>> df[['A', 'B']].added_property property + +.. _extending.plotting-backends: + +Plotting backends +----------------- + +Starting in 0.25 pandas can be extended with third-party plotting backends. The +main idea is letting users select a plotting backend different than the provided +one based on Matplotlib. For example: + +.. code-block:: python + + >>> pd.set_option('plotting.backend', 'backend.module') + >>> pd.Series([1, 2, 3]).plot() + +This would be more or less equivalent to: + +.. code-block:: python + + >>> import backend.module + >>> backend.module.plot(pd.Series([1, 2, 3])) + +The backend module can then use other visualization tools (Bokeh, Altair,...) +to generate the plots. + +More information on how to implement a third-party plotting backend can be found at +https://github.com/pandas-dev/pandas/blob/master/pandas/plotting/__init__.py#L1. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3ff3fff22f4f0..aabf12cce2c16 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7946,7 +7946,7 @@ def isin(self, values): # ---------------------------------------------------------------------- # Add plotting methods to DataFrame - plot = CachedAccessor("plot", pandas.plotting.FramePlotMethods) + plot = CachedAccessor("plot", pandas.plotting.PlotAccessor) hist = pandas.plotting.hist_frame boxplot = pandas.plotting.boxplot_frame sparse = CachedAccessor("sparse", SparseFrameAccessor) diff --git a/pandas/core/series.py b/pandas/core/series.py index 9179099562832..36d8b12b73648 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4518,7 +4518,7 @@ def to_period(self, freq=None, copy=True): str = CachedAccessor("str", StringMethods) dt = CachedAccessor("dt", CombinedDatetimelikeProperties) cat = CachedAccessor("cat", CategoricalAccessor) - plot = CachedAccessor("plot", pandas.plotting.SeriesPlotMethods) + plot = CachedAccessor("plot", pandas.plotting.PlotAccessor) sparse = CachedAccessor("sparse", SparseAccessor) # ---------------------------------------------------------------------- diff --git a/pandas/plotting/__init__.py b/pandas/plotting/__init__.py index ac983e7efd618..57a45f0f18d90 100644 --- a/pandas/plotting/__init__.py +++ b/pandas/plotting/__init__.py @@ -1,18 +1,73 @@ """ -Plotting public API +Plotting public API. + +Authors of third-party plotting backends should implement a module with a +public ``plot(data, kind, **kwargs)``. The parameter `data` will contain +the data structure and can be a `Series` or a `DataFrame`. For example, +for ``df.plot()`` the parameter `data` will contain the DataFrame `df`. +In some cases, the data structure is transformed before being sent to +the backend (see PlotAccessor.__call__ in pandas/plotting/_core.py for +the exact transformations). + +The parameter `kind` will be one of: + +- line +- bar +- barh +- box +- hist +- kde +- area +- pie +- scatter +- hexbin + +See the pandas API reference for documentation on each kind of plot. + +Any other keyword argument is currently assumed to be backend specific, +but some parameters may be unified and added to the signature in the +future (e.g. `title` which should be useful for any backend). + +Currently, all the Matplotlib functions in pandas are accessed through +the selected backend. For example, `pandas.plotting.boxplot` (equivalent +to `DataFrame.boxplot`) is also accessed in the selected backend. This +is expected to change, and the exact API is under discussion. But with +the current version, backends are expected to implement the next functions: + +- plot (describe above, used for `Series.plot` and `DataFrame.plot`) +- hist_series and hist_frame (for `Series.hist` and `DataFrame.hist`) +- boxplot (`pandas.plotting.boxplot(df)` equivalent to `DataFrame.boxplot`) +- boxplot_frame and boxplot_frame_groupby +- tsplot (deprecated) +- register and deregister (register converters for the tick formats) +- Plots not called as `Series` and `DataFrame` methods: + - table + - andrews_curves + - autocorrelation_plot + - bootstrap_plot + - lag_plot + - parallel_coordinates + - radviz + - scatter_matrix + +Use the code in pandas/plotting/_matplotib.py and +https://github.com/pyviz/hvplot as a reference on how to write a backend. + +For the discussion about the API see +https://github.com/pandas-dev/pandas/issues/26747. """ from pandas.plotting._core import ( - FramePlotMethods, SeriesPlotMethods, boxplot, boxplot_frame, - boxplot_frame_groupby, hist_frame, hist_series) + PlotAccessor, boxplot, boxplot_frame, boxplot_frame_groupby, hist_frame, + hist_series) from pandas.plotting._misc import ( andrews_curves, autocorrelation_plot, bootstrap_plot, deregister as deregister_matplotlib_converters, lag_plot, parallel_coordinates, plot_params, radviz, register as register_matplotlib_converters, scatter_matrix, table) -__all__ = ['boxplot', 'boxplot_frame', 'boxplot_frame_groupby', 'hist_frame', - 'hist_series', 'FramePlotMethods', 'SeriesPlotMethods', - 'scatter_matrix', 'radviz', 'andrews_curves', 'bootstrap_plot', - 'parallel_coordinates', 'lag_plot', 'autocorrelation_plot', - 'table', 'plot_params', 'register_matplotlib_converters', +__all__ = ['PlotAccessor', 'boxplot', 'boxplot_frame', 'boxplot_frame_groupby', + 'hist_frame', 'hist_series', 'scatter_matrix', 'radviz', + 'andrews_curves', 'bootstrap_plot', 'parallel_coordinates', + 'lag_plot', 'autocorrelation_plot', 'table', 'plot_params', + 'register_matplotlib_converters', 'deregister_matplotlib_converters'] diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index b0e928fa8022b..2f46df2985703 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1,5 +1,6 @@ import importlib from typing import List, Type # noqa +import warnings from pandas.util._decorators import Appender @@ -8,7 +9,6 @@ import pandas from pandas.core.base import PandasObject -from pandas.core.generic import _shared_doc_kwargs, _shared_docs # Trigger matplotlib import, which implicitly registers our # converts. Implicit registration is deprecated, and when enforced @@ -18,160 +18,148 @@ except ImportError: pass -df_kind = """- 'scatter' : scatter plot - - 'hexbin' : hexbin plot""" -series_kind = "" -df_coord = """x : label or position, default None - y : label, position or list of label, positions, default None - Allows plotting of one column versus another""" -series_coord = "" - -df_unique = """stacked : bool, default False in line and - bar plots, and True in area plot. If True, create stacked plot. - sort_columns : bool, default False - Sort column names to determine plot ordering - secondary_y : bool or sequence, default False - Whether to plot on the secondary y-axis - If a list/tuple, which columns to plot on secondary y-axis""" -series_unique = """label : label argument to provide to plot - secondary_y : bool or sequence of ints, default False - If True then y-axis will be on the right""" - -df_ax = """ax : matplotlib axes object, default None - subplots : bool, default False - Make separate subplots for each column - sharex : bool, default True if ax is None else False - In case subplots=True, share x axis and set some x axis labels to - invisible; defaults to True if ax is None otherwise False if an ax - is passed in; Be aware, that passing in both an ax and sharex=True - will alter all x axis labels for all axis in a figure! - sharey : bool, default False - In case subplots=True, share y axis and set some y axis labels to - invisible - layout : tuple (optional) - (rows, columns) for the layout of subplots""" -series_ax = """ax : matplotlib axes object - If not passed, uses gca()""" - -df_note = """- If `kind` = 'scatter' and the argument `c` is the name of a dataframe - column, the values of that column are used to color each point. - - If `kind` = 'hexbin', you can control the size of the bins with the - `gridsize` argument. By default, a histogram of the counts around each - `(x, y)` point is computed. You can specify alternative aggregations - by passing values to the `C` and `reduce_C_function` arguments. - `C` specifies the value at each `(x, y)` point and `reduce_C_function` - is a function of one argument that reduces all the values in a bin to - a single number (e.g. `mean`, `max`, `sum`, `std`).""" -series_note = "" - -_shared_doc_df_kwargs = dict(klass='DataFrame', klass_obj='df', - klass_kind=df_kind, klass_coord=df_coord, - klass_ax=df_ax, klass_unique=df_unique, - klass_note=df_note) -_shared_doc_series_kwargs = dict(klass='Series', klass_obj='s', - klass_kind=series_kind, - klass_coord=series_coord, klass_ax=series_ax, - klass_unique=series_unique, - klass_note=series_note) - -_shared_docs['plot'] = """ - Make plots of %(klass)s using matplotlib / pylab. - - *New in version 0.17.0:* Each plot kind has a corresponding method on the - ``%(klass)s.plot`` accessor: - ``%(klass_obj)s.plot(kind='line')`` is equivalent to - ``%(klass_obj)s.plot.line()``. +def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, + xrot=None, ylabelsize=None, yrot=None, figsize=None, + bins=10, **kwds): + """ + Draw histogram of the input series using matplotlib. Parameters ---------- - data : %(klass)s - %(klass_coord)s - kind : str - - 'line' : line plot (default) - - 'bar' : vertical bar plot - - 'barh' : horizontal bar plot - - 'hist' : histogram - - 'box' : boxplot - - 'kde' : Kernel Density Estimation plot - - 'density' : same as 'kde' - - 'area' : area plot - - 'pie' : pie plot - %(klass_kind)s - %(klass_ax)s - figsize : a tuple (width, height) in inches - use_index : bool, default True - Use index as ticks for x axis - title : string or list - Title to use for the plot. If a string is passed, print the string at - the top of the figure. If a list is passed and `subplots` is True, - print each item in the list above the corresponding subplot. - grid : bool, default None (matlab style default) - Axis grid lines - legend : False/True/'reverse' - Place legend on axis subplots - style : list or dict - matplotlib line style per column - logx : bool or 'sym', default False - Use log scaling or symlog scaling on x axis - .. versionchanged:: 0.25.0 + by : object, optional + If passed, then used to form histograms for separate groups + ax : matplotlib axis object + If not passed, uses gca() + grid : bool, default True + Whether to show axis grid lines + xlabelsize : int, default None + If specified changes the x-axis label size + xrot : float, default None + rotation of x axis labels + ylabelsize : int, default None + If specified changes the y-axis label size + yrot : float, default None + rotation of y axis labels + figsize : tuple, default None + figure size in inches by default + bins : integer or sequence, default 10 + Number of histogram bins to be used. If an integer is given, bins + 1 + bin edges are calculated and returned. If bins is a sequence, gives + bin edges, including left edge of first bin and right edge of last + bin. In this case, bins is returned unmodified. + `**kwds` : keywords + To be passed to the actual plotting function - logy : bool or 'sym' default False - Use log scaling or symlog scaling on y axis - .. versionchanged:: 0.25.0 + Returns + ------- + matplotlib.AxesSubplot + A histogram plot. - loglog : bool or 'sym', default False - Use log scaling or symlog scaling on both x and y axes - .. versionchanged:: 0.25.0 + See Also + -------- + matplotlib.axes.Axes.hist : Plot a histogram using matplotlib. + """ + plot_backend = _get_plot_backend() + return plot_backend.hist_series(self, by=by, ax=ax, grid=grid, + xlabelsize=xlabelsize, xrot=xrot, + ylabelsize=ylabelsize, yrot=yrot, + figsize=figsize, bins=bins, **kwds) - xticks : sequence - Values to use for the xticks - yticks : sequence - Values to use for the yticks - xlim : 2-tuple/list - ylim : 2-tuple/list - rot : int, default None - Rotation for ticks (xticks for vertical, yticks for horizontal plots) - fontsize : int, default None - Font size for xticks and yticks - colormap : str or matplotlib colormap object, default None - Colormap to select colors from. If string, load colormap with that name - from matplotlib. - colorbar : bool, optional - If True, plot colorbar (only relevant for 'scatter' and 'hexbin' plots) - position : float - Specify relative alignments for bar plot layout. - From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 (center) - table : bool, Series or DataFrame, default False - If True, draw a table using the data in the DataFrame and the data will - be transposed to meet matplotlib's default layout. - If a Series or DataFrame is passed, use passed data to draw a table. - yerr : DataFrame, Series, array-like, dict and str - See :ref:`Plotting with Error Bars ` for - detail. - xerr : same types as yerr. - %(klass_unique)s - mark_right : bool, default True - When using a secondary_y axis, automatically mark the column - labels with "(right)" in the legend - `**kwds` : keywords - Options to pass to matplotlib plotting method + +def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, + xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, + sharey=False, figsize=None, layout=None, bins=10, **kwds): + """ + Make a histogram of the DataFrame's. + + A `histogram`_ is a representation of the distribution of data. + This function calls :meth:`matplotlib.pyplot.hist`, on each series in + the DataFrame, resulting in one histogram per column. + + .. _histogram: https://en.wikipedia.org/wiki/Histogram + + Parameters + ---------- + data : DataFrame + The pandas object holding the data. + column : string or sequence + If passed, will be used to limit data to a subset of columns. + by : object, optional + If passed, then used to form histograms for separate groups. + grid : bool, default True + Whether to show axis grid lines. + xlabelsize : int, default None + If specified changes the x-axis label size. + xrot : float, default None + Rotation of x axis labels. For example, a value of 90 displays the + x labels rotated 90 degrees clockwise. + ylabelsize : int, default None + If specified changes the y-axis label size. + yrot : float, default None + Rotation of y axis labels. For example, a value of 90 displays the + y labels rotated 90 degrees clockwise. + ax : Matplotlib axes object, default None + The axes to plot the histogram on. + sharex : bool, default True if ax is None else False + In case subplots=True, share x axis and set some x axis labels to + invisible; defaults to True if ax is None otherwise False if an ax + is passed in. + Note that passing in both an ax and sharex=True will alter all x axis + labels for all subplots in a figure. + sharey : bool, default False + In case subplots=True, share y axis and set some y axis labels to + invisible. + figsize : tuple + The size in inches of the figure to create. Uses the value in + `matplotlib.rcParams` by default. + layout : tuple, optional + Tuple of (rows, columns) for the layout of the histograms. + bins : integer or sequence, default 10 + Number of histogram bins to be used. If an integer is given, bins + 1 + bin edges are calculated and returned. If bins is a sequence, gives + bin edges, including left edge of first bin and right edge of last + bin. In this case, bins is returned unmodified. + **kwds + All other plotting keyword arguments to be passed to + :meth:`matplotlib.pyplot.hist`. Returns ------- - :class:`matplotlib.axes.Axes` or numpy.ndarray of them + matplotlib.AxesSubplot or numpy.ndarray of them - Notes - ----- + See Also + -------- + matplotlib.pyplot.hist : Plot a histogram using matplotlib. - - See matplotlib documentation online for more on this subject - - If `kind` = 'bar' or 'barh', you can specify relative alignments - for bar plot layout by `position` keyword. - From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 (center) - %(klass_note)s + Examples + -------- + + .. plot:: + :context: close-figs + + This example draws a histogram based on the length and width of + some animals, displayed in three bins + + >>> df = pd.DataFrame({ + ... 'length': [1.5, 0.5, 1.2, 0.9, 3], + ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1] + ... }, index= ['pig', 'rabbit', 'duck', 'chicken', 'horse']) + >>> hist = df.hist(bins=3) """ + plot_backend = _get_plot_backend() + return plot_backend.hist_frame(data, column=column, by=by, grid=grid, + xlabelsize=xlabelsize, xrot=xrot, + ylabelsize=ylabelsize, yrot=yrot, + ax=ax, sharex=sharex, sharey=sharey, + figsize=figsize, layout=layout, bins=bins, + **kwds) + -_shared_docs['boxplot'] = """ +def boxplot(data, column=None, by=None, ax=None, fontsize=None, + rot=0, grid=True, figsize=None, layout=None, return_type=None, + **kwds): + """ Make a box plot from DataFrame columns. Make a box-and-whisker plot from DataFrame columns, optionally grouped @@ -333,193 +321,6 @@ >>> type(boxplot) """ - -_shared_docs['kde'] = """ - Generate Kernel Density Estimate plot using Gaussian kernels. - - In statistics, `kernel density estimation`_ (KDE) is a non-parametric - way to estimate the probability density function (PDF) of a random - variable. This function uses Gaussian kernels and includes automatic - bandwidth determination. - - .. _kernel density estimation: - https://en.wikipedia.org/wiki/Kernel_density_estimation - - Parameters - ---------- - bw_method : str, scalar or callable, optional - The method used to calculate the estimator bandwidth. This can be - 'scott', 'silverman', a scalar constant or a callable. - If None (default), 'scott' is used. - See :class:`scipy.stats.gaussian_kde` for more information. - ind : NumPy array or integer, optional - Evaluation points for the estimated PDF. If None (default), - 1000 equally spaced points are used. If `ind` is a NumPy array, the - KDE is evaluated at the points passed. If `ind` is an integer, - `ind` number of equally spaced points are used. - **kwds : optional - Additional keyword arguments are documented in - :meth:`pandas.%(this-datatype)s.plot`. - - Returns - ------- - matplotlib.axes.Axes or numpy.ndarray of them - - See Also - -------- - scipy.stats.gaussian_kde : Representation of a kernel-density - estimate using Gaussian kernels. This is the function used - internally to estimate the PDF. - %(sibling-datatype)s.plot.kde : Generate a KDE plot for a - %(sibling-datatype)s. - - Examples - -------- - %(examples)s - """ - - -def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, - xrot=None, ylabelsize=None, yrot=None, figsize=None, - bins=10, **kwds): - """ - Draw histogram of the input series using matplotlib. - - Parameters - ---------- - by : object, optional - If passed, then used to form histograms for separate groups - ax : matplotlib axis object - If not passed, uses gca() - grid : bool, default True - Whether to show axis grid lines - xlabelsize : int, default None - If specified changes the x-axis label size - xrot : float, default None - rotation of x axis labels - ylabelsize : int, default None - If specified changes the y-axis label size - yrot : float, default None - rotation of y axis labels - figsize : tuple, default None - figure size in inches by default - bins : integer or sequence, default 10 - Number of histogram bins to be used. If an integer is given, bins + 1 - bin edges are calculated and returned. If bins is a sequence, gives - bin edges, including left edge of first bin and right edge of last - bin. In this case, bins is returned unmodified. - `**kwds` : keywords - To be passed to the actual plotting function - - Returns - ------- - matplotlib.AxesSubplot - A histogram plot. - - See Also - -------- - matplotlib.axes.Axes.hist : Plot a histogram using matplotlib. - """ - plot_backend = _get_plot_backend() - return plot_backend.hist_series(self, by=by, ax=ax, grid=grid, - xlabelsize=xlabelsize, xrot=xrot, - ylabelsize=ylabelsize, yrot=yrot, - figsize=figsize, bins=bins, **kwds) - - -def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, - xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, - sharey=False, figsize=None, layout=None, bins=10, **kwds): - """ - Make a histogram of the DataFrame's. - - A `histogram`_ is a representation of the distribution of data. - This function calls :meth:`matplotlib.pyplot.hist`, on each series in - the DataFrame, resulting in one histogram per column. - - .. _histogram: https://en.wikipedia.org/wiki/Histogram - - Parameters - ---------- - data : DataFrame - The pandas object holding the data. - column : string or sequence - If passed, will be used to limit data to a subset of columns. - by : object, optional - If passed, then used to form histograms for separate groups. - grid : bool, default True - Whether to show axis grid lines. - xlabelsize : int, default None - If specified changes the x-axis label size. - xrot : float, default None - Rotation of x axis labels. For example, a value of 90 displays the - x labels rotated 90 degrees clockwise. - ylabelsize : int, default None - If specified changes the y-axis label size. - yrot : float, default None - Rotation of y axis labels. For example, a value of 90 displays the - y labels rotated 90 degrees clockwise. - ax : Matplotlib axes object, default None - The axes to plot the histogram on. - sharex : bool, default True if ax is None else False - In case subplots=True, share x axis and set some x axis labels to - invisible; defaults to True if ax is None otherwise False if an ax - is passed in. - Note that passing in both an ax and sharex=True will alter all x axis - labels for all subplots in a figure. - sharey : bool, default False - In case subplots=True, share y axis and set some y axis labels to - invisible. - figsize : tuple - The size in inches of the figure to create. Uses the value in - `matplotlib.rcParams` by default. - layout : tuple, optional - Tuple of (rows, columns) for the layout of the histograms. - bins : integer or sequence, default 10 - Number of histogram bins to be used. If an integer is given, bins + 1 - bin edges are calculated and returned. If bins is a sequence, gives - bin edges, including left edge of first bin and right edge of last - bin. In this case, bins is returned unmodified. - **kwds - All other plotting keyword arguments to be passed to - :meth:`matplotlib.pyplot.hist`. - - Returns - ------- - matplotlib.AxesSubplot or numpy.ndarray of them - - See Also - -------- - matplotlib.pyplot.hist : Plot a histogram using matplotlib. - - Examples - -------- - - .. plot:: - :context: close-figs - - This example draws a histogram based on the length and width of - some animals, displayed in three bins - - >>> df = pd.DataFrame({ - ... 'length': [1.5, 0.5, 1.2, 0.9, 3], - ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1] - ... }, index= ['pig', 'rabbit', 'duck', 'chicken', 'horse']) - >>> hist = df.hist(bins=3) - """ - plot_backend = _get_plot_backend() - return plot_backend.hist_frame(data, column=column, by=by, grid=grid, - xlabelsize=xlabelsize, xrot=xrot, - ylabelsize=ylabelsize, yrot=yrot, - ax=ax, sharex=sharex, sharey=sharey, - figsize=figsize, layout=layout, bins=bins, - **kwds) - - -@Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) -def boxplot(data, column=None, by=None, ax=None, fontsize=None, - rot=0, grid=True, figsize=None, layout=None, return_type=None, - **kwds): plot_backend = _get_plot_backend() return plot_backend.boxplot(data, column=column, by=by, ax=ax, fontsize=fontsize, rot=rot, grid=grid, @@ -527,7 +328,7 @@ def boxplot(data, column=None, by=None, ax=None, fontsize=None, return_type=return_type, **kwds) -@Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) +@Appender(boxplot.__doc__) def boxplot_frame(self, column=None, by=None, ax=None, fontsize=None, rot=0, grid=True, figsize=None, layout=None, return_type=None, **kwds): @@ -597,81 +398,214 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, sharey=sharey, **kwds) -# kinds supported by both dataframe and series -_common_kinds = ['line', 'bar', 'barh', - 'kde', 'density', 'area', 'hist', 'box'] -# kinds supported by dataframe -_dataframe_kinds = ['scatter', 'hexbin'] -# kinds supported only by series or dataframe single column -_series_kinds = ['pie'] -_all_kinds = _common_kinds + _dataframe_kinds + _series_kinds +class PlotAccessor(PandasObject): + """ + Make plots of Series or DataFrame using the backend specified by the + option ``plotting.backend``. By default, matplotlib is used. + Parameters + ---------- + data : Series or DataFrame + The object for which the method is called + x : label or position, default None + Only used if data is a DataFrame. + y : label, position or list of label, positions, default None + Allows plotting of one column versus another. Only used if data is a + DataFrame. + kind : str + - 'line' : line plot (default) + - 'bar' : vertical bar plot + - 'barh' : horizontal bar plot + - 'hist' : histogram + - 'box' : boxplot + - 'kde' : Kernel Density Estimation plot + - 'density' : same as 'kde' + - 'area' : area plot + - 'pie' : pie plot + - 'scatter' : scatter plot + - 'hexbin' : hexbin plot + figsize : a tuple (width, height) in inches + use_index : bool, default True + Use index as ticks for x axis + title : string or list + Title to use for the plot. If a string is passed, print the string + at the top of the figure. If a list is passed and `subplots` is + True, print each item in the list above the corresponding subplot. + grid : bool, default None (matlab style default) + Axis grid lines + legend : False/True/'reverse' + Place legend on axis subplots + style : list or dict + matplotlib line style per column + logx : bool or 'sym', default False + Use log scaling or symlog scaling on x axis + .. versionchanged:: 0.25.0 -def _get_standard_kind(kind): - return {'density': 'kde'}.get(kind, kind) + logy : bool or 'sym' default False + Use log scaling or symlog scaling on y axis + .. versionchanged:: 0.25.0 + loglog : bool or 'sym', default False + Use log scaling or symlog scaling on both x and y axes + .. versionchanged:: 0.25.0 -def _get_plot_backend(): - """ - Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`). + xticks : sequence + Values to use for the xticks + yticks : sequence + Values to use for the yticks + xlim : 2-tuple/list + ylim : 2-tuple/list + rot : int, default None + Rotation for ticks (xticks for vertical, yticks for horizontal + plots) + fontsize : int, default None + Font size for xticks and yticks + colormap : str or matplotlib colormap object, default None + Colormap to select colors from. If string, load colormap with that + name from matplotlib. + colorbar : bool, optional + If True, plot colorbar (only relevant for 'scatter' and 'hexbin' + plots) + position : float + Specify relative alignments for bar plot layout. + From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 + (center) + table : bool, Series or DataFrame, default False + If True, draw a table using the data in the DataFrame and the data + will be transposed to meet matplotlib's default layout. + If a Series or DataFrame is passed, use passed data to draw a + table. + yerr : DataFrame, Series, array-like, dict and str + See :ref:`Plotting with Error Bars ` for + detail. + xerr : DataFrame, Series, array-like, dict and str + Equivalent to yerr. + mark_right : bool, default True + When using a secondary_y axis, automatically mark the column + labels with "(right)" in the legend + `**kwds` : keywords + Options to pass to matplotlib plotting method - The plotting system of pandas has been using matplotlib, but the idea here - is that it can also work with other third-party backends. In the future, - this function will return the backend from a pandas option, and all the - rest of the code in this file will use the backend specified there for the - plotting. + Returns + ------- + :class:`matplotlib.axes.Axes` or numpy.ndarray of them + If the backend is not the default matplotlib one, the return value + will be the object returned by the backend. - The backend is imported lazily, as matplotlib is a soft dependency, and - pandas can be used without it being installed. + Notes + ----- + - See matplotlib documentation online for more on this subject + - If `kind` = 'bar' or 'barh', you can specify relative alignments + for bar plot layout by `position` keyword. + From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 + (center) """ - backend_str = pandas.get_option('plotting.backend') - if backend_str == 'matplotlib': - backend_str = 'pandas.plotting._matplotlib' - return importlib.import_module(backend_str) + _common_kinds = ('line', 'bar', 'barh', 'kde', 'density', 'area', 'hist', + 'box') + _series_kinds = ('pie',) + _dataframe_kinds = ('scatter', 'hexbin') + _kind_aliases = {'density': 'kde'} + _all_kinds = _common_kinds + _series_kinds + _dataframe_kinds + def __init__(self, data): + self._parent = data -def _plot_classes(): - plot_backend = _get_plot_backend() - # TODO restore type annotations if we create a base class for plot classes - # (a parent of MPLPlot, and classes of other backends) - classes = [plot_backend.LinePlot, plot_backend.BarPlot, - plot_backend.BarhPlot, plot_backend.AreaPlot, - plot_backend.HistPlot, plot_backend.BoxPlot, - plot_backend.ScatterPlot, plot_backend.HexBinPlot, - plot_backend.KdePlot, plot_backend.PiePlot] - return {class_._kind: class_ for class_ in classes} - - -def _plot(data, x=None, y=None, subplots=False, - ax=None, kind='line', **kwds): - kind = _get_standard_kind(kind.lower().strip()) - if kind in _all_kinds: - klass = _plot_classes()[kind] - else: - raise ValueError("%r is not a valid plot kind" % kind) - - if kind in _dataframe_kinds: - if isinstance(data, ABCDataFrame): - plot_obj = klass(data, x=x, y=y, subplots=subplots, ax=ax, - kind=kind, **kwds) + @staticmethod + def _get_call_args(backend_name, data, args, kwargs): + """ + This function makes calls to this accessor `__call__` method compatible + with the previous `SeriesPlotMethods.__call__` and + `DataFramePlotMethods.__call__`. Those had slightly different + signatures, since `DataFramePlotMethods` accepted `x` and `y` + parameters. + """ + if isinstance(data, ABCSeries): + arg_def = [ + ('kind', 'line'), ('ax', None), ('figsize', None), + ('use_index', True), ('title', None), ('grid', None), + ('legend', False), ('style', None), ('logx', False), + ('logy', False), ('loglog', False), ('xticks', None), + ('yticks', None), ('xlim', None), ('ylim', None), + ('rot', None), ('fontsize', None), ('colormap', None), + ('table', False), ('yerr', None), ('xerr', None), + ('label', None), ('secondary_y', False)] + elif isinstance(data, ABCDataFrame): + arg_def = [ + ('x', None), ('y', None), ('kind', 'line'), ('ax', None), + ('subplots', False), ('sharex', None), ('sharey', False), + ('layout', None), ('figsize', None), ('use_index', True), + ('title', None), ('grid', None), ('legend', True), + ('style', None), ('logx', False), ('logy', False), + ('loglog', False), ('xticks', None), ('yticks', None), + ('xlim', None), ('ylim', None), ('rot', None), + ('fontsize', None), ('colormap', None), ('table', False), + ('yerr', None), ('xerr', None), ('secondary_y', False), + ('sort_columns', False)] else: - raise ValueError("plot kind %r can only be used for data frames" - % kind) - - elif kind in _series_kinds: - if isinstance(data, ABCDataFrame): - if y is None and subplots is False: - msg = "{0} requires either y column or 'subplots=True'" - raise ValueError(msg.format(kind)) - elif y is not None: - if is_integer(y) and not data.columns.holds_integer(): - y = data.columns[y] - # converted to series actually. copy to not modify - data = data[y].copy() - data.index.name = y - plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) - else: - if isinstance(data, ABCDataFrame): + raise TypeError(('Called plot accessor for type {}, expected ' + 'Series or DataFrame').format( + type(data).__name__)) + + if args and isinstance(data, ABCSeries): + msg = ('`Series.plot()` should not be called with positional ' + 'arguments, only keyword arguments. The order of ' + 'positional arguments will change in the future. ' + 'Use `Series.plot({})` instead of `Series.plot({})`.') + positional_args = str(args)[1:-1] + keyword_args = ', '.join('{}={!r}'.format(name, value) + for (name, default), value + in zip(arg_def, args)) + warnings.warn(msg.format(keyword_args, positional_args), + FutureWarning, stacklevel=3) + + pos_args = {name: value for value, (name, _) in zip(args, arg_def)} + if backend_name == 'pandas.plotting._matplotlib': + kwargs = dict(arg_def, **pos_args, **kwargs) + else: + kwargs = dict(pos_args, **kwargs) + + x = kwargs.pop('x', None) + y = kwargs.pop('y', None) + kind = kwargs.pop('kind', 'line') + return x, y, kind, kwargs + + def __call__(self, *args, **kwargs): + plot_backend = _get_plot_backend() + + x, y, kind, kwargs = self._get_call_args(plot_backend.__name__, + self._parent, args, kwargs) + + kind = self._kind_aliases.get(kind, kind) + if kind not in self._all_kinds: + raise ValueError('{} is not a valid plot kind'.format(kind)) + + # The original data structured can be transformed before passed to the + # backend. For example, for DataFrame is common to set the index as the + # `x` parameter, and return a Series with the parameter `y` as values. + data = self._parent.copy() + + if isinstance(data, pandas.core.dtypes.generic.ABCSeries): + kwargs['reuse_plot'] = True + + if kind in self._dataframe_kinds: + if isinstance(data, ABCDataFrame): + return plot_backend.plot(data, x=x, y=y, kind=kind, **kwargs) + else: + raise ValueError(("plot kind {} can only be used for " + "data frames").format(kind)) + elif kind in self._series_kinds: + if isinstance(data, ABCDataFrame): + if y is None and kwargs.get('subplots') is False: + msg = "{} requires either y column or 'subplots=True'" + raise ValueError(msg.format(kind)) + elif y is not None: + if (is_integer(y) + and not data.columns.holds_integer()): + y = data.columns[y] + # converted to series actually. copy to not modify + data = data[y].copy() + data.index.name = y + elif isinstance(data, ABCDataFrame): data_cols = data.columns if x is not None: if is_integer(x) and not data.columns.holds_integer(): @@ -679,7 +613,6 @@ def _plot(data, x=None, y=None, subplots=False, elif not isinstance(data[x], ABCSeries): raise ValueError("x must be a label or position") data = data.set_index(x) - if y is not None: # check if we have y as int or list of ints int_ylist = is_list_like(y) and all(is_integer(c) for c in y) @@ -687,13 +620,13 @@ def _plot(data, x=None, y=None, subplots=False, if int_y_arg and not data.columns.holds_integer(): y = data_cols[y] - label_kw = kwds['label'] if 'label' in kwds else False + label_kw = kwargs['label'] if 'label' in kwargs else False for kw in ['xerr', 'yerr']: - if (kw in kwds) and \ - (isinstance(kwds[kw], str) or - is_integer(kwds[kw])): + if (kw in kwargs and + (isinstance(kwargs[kw], str) + or is_integer(kwargs[kw]))): try: - kwds[kw] = data[kwds[kw]] + kwargs[kw] = data[kwargs[kw]] except (IndexError, KeyError, TypeError): pass @@ -707,312 +640,15 @@ def _plot(data, x=None, y=None, subplots=False, match = is_list_like(label_kw) and len(label_kw) == len(y) if label_kw and not match: raise ValueError( - "label should be list-like and same length as y" - ) + "label should be list-like and same length as y") label_name = label_kw or data.columns data.columns = label_name - plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) - - plot_obj.generate() - plot_obj.draw() - return plot_obj.result - - -@Appender(_shared_docs['plot'] % _shared_doc_df_kwargs) -def plot_frame(data, x=None, y=None, kind='line', ax=None, - subplots=False, sharex=None, sharey=False, layout=None, - figsize=None, use_index=True, title=None, grid=None, - legend=True, style=None, logx=False, logy=False, loglog=False, - xticks=None, yticks=None, xlim=None, ylim=None, - rot=None, fontsize=None, colormap=None, table=False, - yerr=None, xerr=None, - secondary_y=False, sort_columns=False, - **kwds): - return _plot(data, kind=kind, x=x, y=y, ax=ax, - subplots=subplots, sharex=sharex, sharey=sharey, - layout=layout, figsize=figsize, use_index=use_index, - title=title, grid=grid, legend=legend, - style=style, logx=logx, logy=logy, loglog=loglog, - xticks=xticks, yticks=yticks, xlim=xlim, ylim=ylim, - rot=rot, fontsize=fontsize, colormap=colormap, table=table, - yerr=yerr, xerr=xerr, - secondary_y=secondary_y, sort_columns=sort_columns, - **kwds) - - -@Appender(_shared_docs['plot'] % _shared_doc_series_kwargs) -def plot_series(data, kind='line', ax=None, # Series unique - figsize=None, use_index=True, title=None, grid=None, - legend=False, style=None, logx=False, logy=False, loglog=False, - xticks=None, yticks=None, xlim=None, ylim=None, - rot=None, fontsize=None, colormap=None, table=False, - yerr=None, xerr=None, - label=None, secondary_y=False, # Series unique - **kwds): - - # FIXME move this into _matplotlib - import matplotlib.pyplot as plt - if ax is None and len(plt.get_fignums()) > 0: - with plt.rc_context(): - ax = plt.gca() - ax = getattr(ax, 'left_ax', ax) - - return _plot(data, kind=kind, ax=ax, - figsize=figsize, use_index=use_index, title=title, - grid=grid, legend=legend, - style=style, logx=logx, logy=logy, loglog=loglog, - xticks=xticks, yticks=yticks, xlim=xlim, ylim=ylim, - rot=rot, fontsize=fontsize, colormap=colormap, table=table, - yerr=yerr, xerr=xerr, - label=label, secondary_y=secondary_y, - **kwds) - - -class BasePlotMethods(PandasObject): - - def __init__(self, data): - self._parent = data # can be Series or DataFrame - - def __call__(self, *args, **kwargs): - raise NotImplementedError - - -class SeriesPlotMethods(BasePlotMethods): - """ - Series plotting accessor and method. - - Examples - -------- - >>> s.plot.line() - >>> s.plot.bar() - >>> s.plot.hist() - - Plotting methods can also be accessed by calling the accessor as a method - with the ``kind`` argument: - ``s.plot(kind='line')`` is equivalent to ``s.plot.line()`` - """ - - def __call__(self, kind='line', ax=None, - figsize=None, use_index=True, title=None, grid=None, - legend=False, style=None, logx=False, logy=False, - loglog=False, xticks=None, yticks=None, - xlim=None, ylim=None, - rot=None, fontsize=None, colormap=None, table=False, - yerr=None, xerr=None, - label=None, secondary_y=False, **kwds): - return plot_series(self._parent, kind=kind, ax=ax, figsize=figsize, - use_index=use_index, title=title, grid=grid, - legend=legend, style=style, logx=logx, logy=logy, - loglog=loglog, xticks=xticks, yticks=yticks, - xlim=xlim, ylim=ylim, rot=rot, fontsize=fontsize, - colormap=colormap, table=table, yerr=yerr, - xerr=xerr, label=label, secondary_y=secondary_y, - **kwds) - __call__.__doc__ = plot_series.__doc__ - - def line(self, **kwds): - """ - Line plot. - - Parameters - ---------- - `**kwds` : optional - Additional keyword arguments are documented in - :meth:`pandas.Series.plot`. - - Returns - ------- - :class:`matplotlib.axes.Axes` or numpy.ndarray of them - - Examples - -------- - - .. plot:: - :context: close-figs - - >>> s = pd.Series([1, 3, 2]) - >>> s.plot.line() - """ - return self(kind='line', **kwds) - - def bar(self, **kwds): - """ - Vertical bar plot. - - Parameters - ---------- - `**kwds` : optional - Additional keyword arguments are documented in - :meth:`pandas.Series.plot`. - - Returns - ------- - :class:`matplotlib.axes.Axes` or numpy.ndarray of them - """ - return self(kind='bar', **kwds) - - def barh(self, **kwds): - """ - Horizontal bar plot. - - Parameters - ---------- - `**kwds` : optional - Additional keyword arguments are documented in - :meth:`pandas.Series.plot`. - - Returns - ------- - :class:`matplotlib.axes.Axes` or numpy.ndarray of them - """ - return self(kind='barh', **kwds) - - def box(self, **kwds): - """ - Boxplot. - - Parameters - ---------- - `**kwds` : optional - Additional keyword arguments are documented in - :meth:`pandas.Series.plot`. - - Returns - ------- - :class:`matplotlib.axes.Axes` or numpy.ndarray of them - """ - return self(kind='box', **kwds) - - def hist(self, bins=10, **kwds): - """ - Histogram. - - Parameters - ---------- - bins : integer, default 10 - Number of histogram bins to be used - `**kwds` : optional - Additional keyword arguments are documented in - :meth:`pandas.Series.plot`. - - Returns - ------- - :class:`matplotlib.axes.Axes` or numpy.ndarray of them - """ - return self(kind='hist', bins=bins, **kwds) - - @Appender(_shared_docs['kde'] % { - 'this-datatype': 'Series', - 'sibling-datatype': 'DataFrame', - 'examples': """ - Given a Series of points randomly sampled from an unknown - distribution, estimate its PDF using KDE with automatic - bandwidth determination and plot the results, evaluating them at - 1000 equally spaced points (default): - - .. plot:: - :context: close-figs - - >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5]) - >>> ax = s.plot.kde() - - A scalar bandwidth can be specified. Using a small bandwidth value can - lead to over-fitting, while using a large bandwidth value may result - in under-fitting: - - .. plot:: - :context: close-figs - - >>> ax = s.plot.kde(bw_method=0.3) - - .. plot:: - :context: close-figs - - >>> ax = s.plot.kde(bw_method=3) - - Finally, the `ind` parameter determines the evaluation points for the - plot of the estimated PDF: - - .. plot:: - :context: close-figs - - >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5]) - """.strip() - }) - def kde(self, bw_method=None, ind=None, **kwds): - return self(kind='kde', bw_method=bw_method, ind=ind, **kwds) - - density = kde - - def area(self, **kwds): - """ - Area plot. - - Parameters - ---------- - `**kwds` : optional - Additional keyword arguments are documented in - :meth:`pandas.Series.plot`. - - Returns - ------- - :class:`matplotlib.axes.Axes` or numpy.ndarray of them - """ - return self(kind='area', **kwds) - - def pie(self, **kwds): - """ - Pie chart. - - Parameters - ---------- - `**kwds` : optional - Additional keyword arguments are documented in - :meth:`pandas.Series.plot`. - - Returns - ------- - :class:`matplotlib.axes.Axes` or numpy.ndarray of them - """ - return self(kind='pie', **kwds) + return plot_backend.plot(data, kind=kind, **kwargs) -class FramePlotMethods(BasePlotMethods): - """DataFrame plotting accessor and method - - Examples - -------- - >>> df.plot.line() - >>> df.plot.scatter('x', 'y') - >>> df.plot.hexbin() - - These plotting methods can also be accessed by calling the accessor as a - method with the ``kind`` argument: - ``df.plot(kind='line')`` is equivalent to ``df.plot.line()`` - """ - - def __call__(self, x=None, y=None, kind='line', ax=None, - subplots=False, sharex=None, sharey=False, layout=None, - figsize=None, use_index=True, title=None, grid=None, - legend=True, style=None, logx=False, logy=False, loglog=False, - xticks=None, yticks=None, xlim=None, ylim=None, - rot=None, fontsize=None, colormap=None, table=False, - yerr=None, xerr=None, - secondary_y=False, sort_columns=False, **kwds): - return plot_frame(self._parent, kind=kind, x=x, y=y, ax=ax, - subplots=subplots, sharex=sharex, sharey=sharey, - layout=layout, figsize=figsize, use_index=use_index, - title=title, grid=grid, legend=legend, style=style, - logx=logx, logy=logy, loglog=loglog, xticks=xticks, - yticks=yticks, xlim=xlim, ylim=ylim, rot=rot, - fontsize=fontsize, colormap=colormap, table=table, - yerr=yerr, xerr=xerr, secondary_y=secondary_y, - sort_columns=sort_columns, **kwds) - __call__.__doc__ = plot_frame.__doc__ - - def line(self, x=None, y=None, **kwds): + def line(self, x=None, y=None, **kwargs): """ - Plot DataFrame columns as lines. + Plot Series or DataFrame as lines. This function is useful to plot lines using DataFrame's values as coordinates. @@ -1042,6 +678,12 @@ def line(self, x=None, y=None, **kwds): Examples -------- + .. plot:: + :context: close-figs + + >>> s = pd.Series([1, 3, 2]) + >>> s.plot.line() + .. plot:: :context: close-figs @@ -1071,9 +713,9 @@ def line(self, x=None, y=None, **kwds): >>> lines = df.plot.line(x='pig', y='horse') """ - return self(kind='line', x=x, y=y, **kwds) + return self(kind='line', x=x, y=y, **kwargs) - def bar(self, x=None, y=None, **kwds): + def bar(self, x=None, y=None, **kwargs): """ Vertical bar plot. @@ -1156,9 +798,9 @@ def bar(self, x=None, y=None, **kwds): >>> ax = df.plot.bar(x='lifespan', rot=0) """ - return self(kind='bar', x=x, y=y, **kwds) + return self(kind='bar', x=x, y=y, **kwargs) - def barh(self, x=None, y=None, **kwds): + def barh(self, x=None, y=None, **kwargs): """ Make a horizontal bar plot. @@ -1236,9 +878,9 @@ def barh(self, x=None, y=None, **kwds): ... 'lifespan': lifespan}, index=index) >>> ax = df.plot.barh(x='lifespan') """ - return self(kind='barh', x=x, y=y, **kwds) + return self(kind='barh', x=x, y=y, **kwargs) - def box(self, by=None, **kwds): + def box(self, by=None, **kwargs): r""" Make a box plot of the DataFrame columns. @@ -1286,9 +928,9 @@ def box(self, by=None, **kwds): >>> df = pd.DataFrame(data, columns=list('ABCD')) >>> ax = df.plot.box() """ - return self(kind='box', by=by, **kwds) + return self(kind='box', by=by, **kwargs) - def hist(self, by=None, bins=10, **kwds): + def hist(self, by=None, bins=10, **kwargs): """ Draw one histogram of the DataFrame's columns. @@ -1333,17 +975,83 @@ def hist(self, by=None, bins=10, **kwds): >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000) >>> ax = df.plot.hist(bins=12, alpha=0.5) """ - return self(kind='hist', by=by, bins=bins, **kwds) - - @Appender(_shared_docs['kde'] % { - 'this-datatype': 'DataFrame', - 'sibling-datatype': 'Series', - 'examples': """ - Given several Series of points randomly sampled from unknown - distributions, estimate their PDFs using KDE with automatic + return self(kind='hist', by=by, bins=bins, **kwargs) + + def kde(self, bw_method=None, ind=None, **kwargs): + """ + Generate Kernel Density Estimate plot using Gaussian kernels. + + In statistics, `kernel density estimation`_ (KDE) is a non-parametric + way to estimate the probability density function (PDF) of a random + variable. This function uses Gaussian kernels and includes automatic + bandwidth determination. + + .. _kernel density estimation: + https://en.wikipedia.org/wiki/Kernel_density_estimation + + Parameters + ---------- + bw_method : str, scalar or callable, optional + The method used to calculate the estimator bandwidth. This can be + 'scott', 'silverman', a scalar constant or a callable. + If None (default), 'scott' is used. + See :class:`scipy.stats.gaussian_kde` for more information. + ind : NumPy array or integer, optional + Evaluation points for the estimated PDF. If None (default), + 1000 equally spaced points are used. If `ind` is a NumPy array, the + KDE is evaluated at the points passed. If `ind` is an integer, + `ind` number of equally spaced points are used. + **kwds : optional + Additional keyword arguments are documented in + :meth:`pandas.%(this-datatype)s.plot`. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + See Also + -------- + scipy.stats.gaussian_kde : Representation of a kernel-density + estimate using Gaussian kernels. This is the function used + internally to estimate the PDF. + + Examples + -------- + Given a Series of points randomly sampled from an unknown + distribution, estimate its PDF using KDE with automatic bandwidth determination and plot the results, evaluating them at 1000 equally spaced points (default): + .. plot:: + :context: close-figs + + >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5]) + >>> ax = s.plot.kde() + + A scalar bandwidth can be specified. Using a small bandwidth value can + lead to over-fitting, while using a large bandwidth value may result + in under-fitting: + + .. plot:: + :context: close-figs + + >>> ax = s.plot.kde(bw_method=0.3) + + .. plot:: + :context: close-figs + + >>> ax = s.plot.kde(bw_method=3) + + Finally, the `ind` parameter determines the evaluation points for the + plot of the estimated PDF: + + .. plot:: + :context: close-figs + + >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5]) + + For DataFrame, it works in the same way: + .. plot:: :context: close-figs @@ -1374,14 +1082,12 @@ def hist(self, by=None, bins=10, **kwds): :context: close-figs >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6]) - """.strip() - }) - def kde(self, bw_method=None, ind=None, **kwds): - return self(kind='kde', bw_method=bw_method, ind=ind, **kwds) + """ + return self(kind='kde', bw_method=bw_method, ind=ind, **kwargs) density = kde - def area(self, x=None, y=None, **kwds): + def area(self, x=None, y=None, **kwargs): """ Draw a stacked area plot. @@ -1452,9 +1158,9 @@ def area(self, x=None, y=None, **kwds): ... }) >>> ax = df.plot.area(x='day') """ - return self(kind='area', x=x, y=y, **kwds) + return self(kind='area', x=x, y=y, **kwargs) - def pie(self, y=None, **kwds): + def pie(self, **kwargs): """ Generate a pie plot. @@ -1501,9 +1207,13 @@ def pie(self, y=None, **kwds): >>> plot = df.plot.pie(subplots=True, figsize=(6, 3)) """ - return self(kind='pie', y=y, **kwds) + if (isinstance(self._parent, ABCDataFrame) + and kwargs.get('y', None) is None + and not kwargs.get('subplots', False)): + raise ValueError("pie requires either y column or 'subplots=True'") + return self(kind='pie', **kwargs) - def scatter(self, x, y, s=None, c=None, **kwds): + def scatter(self, x, y, s=None, c=None, **kwargs): """ Create a scatter plot with varying marker point size and color. @@ -1582,10 +1292,10 @@ def scatter(self, x, y, s=None, c=None, **kwds): ... c='species', ... colormap='viridis') """ - return self(kind='scatter', x=x, y=y, c=c, s=s, **kwds) + return self(kind='scatter', x=x, y=y, s=s, c=c, **kwargs) def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, - **kwds): + **kwargs): """ Generate a hexagonal binning plot. @@ -1668,7 +1378,27 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, ... cmap="viridis") """ if reduce_C_function is not None: - kwds['reduce_C_function'] = reduce_C_function + kwargs['reduce_C_function'] = reduce_C_function if gridsize is not None: - kwds['gridsize'] = gridsize - return self(kind='hexbin', x=x, y=y, C=C, **kwds) + kwargs['gridsize'] = gridsize + + return self(kind='hexbin', x=x, y=y, C=C, **kwargs) + + +def _get_plot_backend(): + """ + Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`). + + The plotting system of pandas has been using matplotlib, but the idea here + is that it can also work with other third-party backends. In the future, + this function will return the backend from a pandas option, and all the + rest of the code in this file will use the backend specified there for the + plotting. + + The backend is imported lazily, as matplotlib is a soft dependency, and + pandas can be used without it being installed. + """ + backend_str = pandas.get_option('plotting.backend') + if backend_str == 'matplotlib': + backend_str = 'pandas.plotting._matplotlib' + return importlib.import_module(backend_str) diff --git a/pandas/plotting/_matplotlib/__init__.py b/pandas/plotting/_matplotlib/__init__.py index 1b775d03349d0..8eac6897add0e 100644 --- a/pandas/plotting/_matplotlib/__init__.py +++ b/pandas/plotting/_matplotlib/__init__.py @@ -13,13 +13,40 @@ from pandas.plotting._matplotlib.timeseries import tsplot from pandas.plotting._matplotlib.tools import table +PLOT_CLASSES = {'line': LinePlot, + 'bar': BarPlot, + 'barh': BarhPlot, + 'box': BoxPlot, + 'hist': HistPlot, + 'kde': KdePlot, + 'area': AreaPlot, + 'pie': PiePlot, + 'scatter': ScatterPlot, + 'hexbin': HexBinPlot} + if get_option("plotting.matplotlib.register_converters"): register(explicit=False) -__all__ = ['LinePlot', 'BarPlot', 'BarhPlot', 'HistPlot', 'BoxPlot', 'KdePlot', - 'AreaPlot', 'PiePlot', 'ScatterPlot', 'HexBinPlot', 'hist_series', - 'hist_frame', 'boxplot', 'boxplot_frame', 'boxplot_frame_groupby', - 'tsplot', 'table', 'andrews_curves', 'autocorrelation_plot', - 'bootstrap_plot', 'lag_plot', 'parallel_coordinates', 'radviz', - 'scatter_matrix', 'register', 'deregister'] +def plot(data, kind, **kwargs): + # Importing pyplot at the top of the file (before the converters are + # registered) causes problems in matplotlib 2 (converters seem to not + # work) + import matplotlib.pyplot as plt + if kwargs.pop('reuse_plot', False): + ax = kwargs.get('ax') + if ax is None and len(plt.get_fignums()) > 0: + with plt.rc_context(): + ax = plt.gca() + kwargs['ax'] = getattr(ax, 'left_ax', ax) + plot_obj = PLOT_CLASSES[kind](data, **kwargs) + plot_obj.generate() + plot_obj.draw() + return plot_obj.result + + +__all__ = ['plot', 'hist_series', 'hist_frame', 'boxplot', 'boxplot_frame', + 'boxplot_frame_groupby', 'tsplot', 'table', 'andrews_curves', + 'autocorrelation_plot', 'bootstrap_plot', 'lag_plot', + 'parallel_coordinates', 'radviz', 'scatter_matrix', 'register', + 'deregister'] diff --git a/pandas/plotting/_matplotlib/timeseries.py b/pandas/plotting/_matplotlib/timeseries.py index e36ffed10d94f..c3b548a6dfa85 100644 --- a/pandas/plotting/_matplotlib/timeseries.py +++ b/pandas/plotting/_matplotlib/timeseries.py @@ -143,12 +143,8 @@ def _replot_ax(ax, freq, kwargs): # for tsplot if isinstance(plotf, str): - # XXX _plot_classes is private and shouldn't be imported - # here. But as tsplot is deprecated, and we'll remove this - # code soon, it's probably better to not overcomplicate - # things, and just leave this the way it was implemented - from pandas.plotting._core import _plot_classes - plotf = _plot_classes()[plotf]._plot + from pandas.plotting._matplotlib import PLOT_CLASSES + plotf = PLOT_CLASSES[plotf]._plot lines.append(plotf(ax, series.index._mpl_repr(), series.values, **kwds)[0]) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 06c753d1b8e21..272f01a12156b 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -2225,7 +2225,7 @@ def test_unordered_ts(self): @td.skip_if_no_scipy def test_kind_both_ways(self): df = DataFrame({'x': [1, 2, 3]}) - for kind in plotting._core._common_kinds: + for kind in plotting.PlotAccessor._common_kinds: df.plot(kind=kind) getattr(df.plot, kind)() @@ -2235,7 +2235,7 @@ def test_kind_both_ways(self): def test_all_invalid_plot_data(self): df = DataFrame(list('abcd')) - for kind in plotting._core._common_kinds: + for kind in plotting.PlotAccessor._common_kinds: msg = "no numeric data to plot" with pytest.raises(TypeError, match=msg): @@ -2246,7 +2246,7 @@ def test_partially_invalid_plot_data(self): with tm.RNGContext(42): df = DataFrame(randn(10, 2), dtype=object) df[np.random.rand(df.shape[0]) > 0.5] = 'a' - for kind in plotting._core._common_kinds: + for kind in plotting.PlotAccessor._common_kinds: msg = "no numeric data to plot" with pytest.raises(TypeError, match=msg): @@ -2738,7 +2738,7 @@ def test_memory_leak(self): import gc results = {} - for kind in plotting._core._plot_classes().keys(): + for kind in plotting.PlotAccessor._all_kinds: args = {} if kind in ['hexbin', 'scatter', 'pie']: @@ -2936,7 +2936,7 @@ def test_df_grid_settings(self): # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 self._check_grid_settings( DataFrame({'a': [1, 2, 3], 'b': [2, 3, 4]}), - plotting._core._dataframe_kinds, kws={'x': 'a', 'y': 'b'}) + plotting.PlotAccessor._dataframe_kinds, kws={'x': 'a', 'y': 'b'}) def test_invalid_colormap(self): df = DataFrame(randn(3, 2), columns=['A', 'B']) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index b58854743a42d..b27df946aeacf 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -9,7 +9,7 @@ import pandas.util._test_decorators as td -from pandas import DataFrame +from pandas import DataFrame, Series from pandas.tests.plotting.common import TestPlotBase, _check_plot_works import pandas.util.testing as tm @@ -25,6 +25,39 @@ def test_import_error_message(): df.plot() +def test_get_accessor_args(): + func = plotting._core.PlotAccessor._get_call_args + + msg = 'Called plot accessor for type list, expected Series or DataFrame' + with pytest.raises(TypeError, match=msg): + func(backend_name='', data=[], args=[], kwargs={}) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + x, y, kind, kwargs = func(backend_name='', data=Series(), + args=['line', None], kwargs={}) + assert x is None + assert y is None + assert kind == 'line' + assert kwargs == {'ax': None} + + x, y, kind, kwargs = func(backend_name='', data=DataFrame(), + args=['x'], kwargs={'y': 'y', + 'kind': 'bar', + 'grid': False}) + assert x == 'x' + assert y == 'y' + assert kind == 'bar' + assert kwargs == {'grid': False} + + x, y, kind, kwargs = func(backend_name='pandas.plotting._matplotlib', + data=Series(), args=[], kwargs={}) + assert x is None + assert y is None + assert kind == 'line' + assert len(kwargs) == 22 + + @td.skip_if_no_mpl class TestSeriesPlots(TestPlotBase): diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 9a954b522333d..d10620b4e7547 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -684,8 +684,8 @@ def test_boxplot_series(self): @pytest.mark.slow def test_kind_both_ways(self): s = Series(range(3)) - kinds = (plotting._core._common_kinds + - plotting._core._series_kinds) + kinds = (plotting.PlotAccessor._common_kinds + + plotting.PlotAccessor._series_kinds) _, ax = self.plt.subplots() for kind in kinds: @@ -696,7 +696,7 @@ def test_kind_both_ways(self): def test_invalid_plot_data(self): s = Series(list('abcd')) _, ax = self.plt.subplots() - for kind in plotting._core._common_kinds: + for kind in plotting.PlotAccessor._common_kinds: msg = "no numeric data to plot" with pytest.raises(TypeError, match=msg): @@ -705,13 +705,13 @@ def test_invalid_plot_data(self): @pytest.mark.slow def test_valid_object_plot(self): s = Series(range(10), dtype=object) - for kind in plotting._core._common_kinds: + for kind in plotting.PlotAccessor._common_kinds: _check_plot_works(s.plot, kind=kind) def test_partially_invalid_plot_data(self): s = Series(['a', 'b', 1.0, 2]) _, ax = self.plt.subplots() - for kind in plotting._core._common_kinds: + for kind in plotting.PlotAccessor._common_kinds: msg = "no numeric data to plot" with pytest.raises(TypeError, match=msg): @@ -781,8 +781,8 @@ def test_table(self): def test_series_grid_settings(self): # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 self._check_grid_settings(Series([1, 2, 3]), - plotting._core._series_kinds + - plotting._core._common_kinds) + plotting.PlotAccessor._series_kinds + + plotting.PlotAccessor._common_kinds) @pytest.mark.slow def test_standard_colors(self):