From f279d168666765a52d0714b305e32027e9139390 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Fri, 21 Jun 2019 22:46:46 +0100 Subject: [PATCH 01/26] Some experiments so far --- pandas/plotting/_core.py | 2 +- pandas/plotting/_matplotlib/__init__.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index b0e928fa8022b..78ceec674b199 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -627,7 +627,7 @@ def _get_plot_backend(): backend_str = pandas.get_option('plotting.backend') if backend_str == 'matplotlib': backend_str = 'pandas.plotting._matplotlib' - return importlib.import_module(backend_str) + return getattr(importlib.import_module(backend_str), 'PlotBackend') def _plot_classes(): diff --git a/pandas/plotting/_matplotlib/__init__.py b/pandas/plotting/_matplotlib/__init__.py index 1b775d03349d0..184ad070a31bb 100644 --- a/pandas/plotting/_matplotlib/__init__.py +++ b/pandas/plotting/_matplotlib/__init__.py @@ -1,5 +1,6 @@ from pandas._config import get_option +import pandas from pandas.plotting._matplotlib.boxplot import ( BoxPlot, boxplot, boxplot_frame, boxplot_frame_groupby) from pandas.plotting._matplotlib.converter import deregister, register @@ -17,6 +18,20 @@ register(explicit=False) +class PlotBackend(pandas.plotting.BasePlotBackend): + def line(self, data, x=None, y=None, **kwargs): + return LinePlot(data, x=x, y=y) + + def bar(self, data, x=None, y=None, **kwargs): + return BarPlot(data, x=x, y=y) + + def barh(self, data, x=None, y=None, **kwargs): + return BarhPlot(data, x=x, y=y) + + def box(self, data, by=None, **kwargs): + return BoxPlot(data, by=by) + + __all__ = ['LinePlot', 'BarPlot', 'BarhPlot', 'HistPlot', 'BoxPlot', 'KdePlot', 'AreaPlot', 'PiePlot', 'ScatterPlot', 'HexBinPlot', 'hist_series', 'hist_frame', 'boxplot', 'boxplot_frame', 'boxplot_frame_groupby', From ca5671c96e56aca83643a06636574bb5b422588e Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sun, 23 Jun 2019 13:02:47 +0100 Subject: [PATCH 02/26] Refactoring of pandas plotting to make the API clearer --- pandas/core/frame.py | 2 +- pandas/core/series.py | 2 +- pandas/plotting/__init__.py | 4 +- pandas/plotting/_core.py | 1375 +++++++---------------- pandas/plotting/_matplotlib/__init__.py | 50 +- 5 files changed, 419 insertions(+), 1014 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6746844f4b1fa..9eb3f315454d8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8051,7 +8051,7 @@ def isin(self, values): # ---------------------------------------------------------------------- # Add plotting methods to DataFrame - plot = CachedAccessor("plot", pandas.plotting.FramePlotMethods) + plot = CachedAccessor("plot", pandas.plotting.PlotAccessor) hist = pandas.plotting.hist_frame boxplot = pandas.plotting.boxplot_frame sparse = CachedAccessor("sparse", SparseFrameAccessor) diff --git a/pandas/core/series.py b/pandas/core/series.py index 11e578e74f6e7..cff39492b99aa 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4530,7 +4530,7 @@ def to_period(self, freq=None, copy=True): str = CachedAccessor("str", StringMethods) dt = CachedAccessor("dt", CombinedDatetimelikeProperties) cat = CachedAccessor("cat", CategoricalAccessor) - plot = CachedAccessor("plot", pandas.plotting.SeriesPlotMethods) + plot = CachedAccessor("plot", pandas.plotting.PlotAccessor) sparse = CachedAccessor("sparse", SparseAccessor) # ---------------------------------------------------------------------- diff --git a/pandas/plotting/__init__.py b/pandas/plotting/__init__.py index ac983e7efd618..47e71541cdaef 100644 --- a/pandas/plotting/__init__.py +++ b/pandas/plotting/__init__.py @@ -2,7 +2,7 @@ Plotting public API """ from pandas.plotting._core import ( - FramePlotMethods, SeriesPlotMethods, boxplot, boxplot_frame, + PlotAccessor, boxplot, boxplot_frame, boxplot_frame_groupby, hist_frame, hist_series) from pandas.plotting._misc import ( andrews_curves, autocorrelation_plot, bootstrap_plot, @@ -11,7 +11,7 @@ register as register_matplotlib_converters, scatter_matrix, table) __all__ = ['boxplot', 'boxplot_frame', 'boxplot_frame_groupby', 'hist_frame', - 'hist_series', 'FramePlotMethods', 'SeriesPlotMethods', + 'hist_series', 'PlotAccessor', 'scatter_matrix', 'radviz', 'andrews_curves', 'bootstrap_plot', 'parallel_coordinates', 'lag_plot', 'autocorrelation_plot', 'table', 'plot_params', 'register_matplotlib_converters', diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 78ceec674b199..7cabd79b8605b 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1,14 +1,7 @@ import importlib from typing import List, Type # noqa -from pandas.util._decorators import Appender - -from pandas.core.dtypes.common import is_integer, is_list_like -from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries - import pandas -from pandas.core.base import PandasObject -from pandas.core.generic import _shared_doc_kwargs, _shared_docs # Trigger matplotlib import, which implicitly registers our # converts. Implicit registration is deprecated, and when enforced @@ -18,598 +11,6 @@ except ImportError: pass -df_kind = """- 'scatter' : scatter plot - - 'hexbin' : hexbin plot""" -series_kind = "" - -df_coord = """x : label or position, default None - y : label, position or list of label, positions, default None - Allows plotting of one column versus another""" -series_coord = "" - -df_unique = """stacked : bool, default False in line and - bar plots, and True in area plot. If True, create stacked plot. - sort_columns : bool, default False - Sort column names to determine plot ordering - secondary_y : bool or sequence, default False - Whether to plot on the secondary y-axis - If a list/tuple, which columns to plot on secondary y-axis""" -series_unique = """label : label argument to provide to plot - secondary_y : bool or sequence of ints, default False - If True then y-axis will be on the right""" - -df_ax = """ax : matplotlib axes object, default None - subplots : bool, default False - Make separate subplots for each column - sharex : bool, default True if ax is None else False - In case subplots=True, share x axis and set some x axis labels to - invisible; defaults to True if ax is None otherwise False if an ax - is passed in; Be aware, that passing in both an ax and sharex=True - will alter all x axis labels for all axis in a figure! - sharey : bool, default False - In case subplots=True, share y axis and set some y axis labels to - invisible - layout : tuple (optional) - (rows, columns) for the layout of subplots""" -series_ax = """ax : matplotlib axes object - If not passed, uses gca()""" - -df_note = """- If `kind` = 'scatter' and the argument `c` is the name of a dataframe - column, the values of that column are used to color each point. - - If `kind` = 'hexbin', you can control the size of the bins with the - `gridsize` argument. By default, a histogram of the counts around each - `(x, y)` point is computed. You can specify alternative aggregations - by passing values to the `C` and `reduce_C_function` arguments. - `C` specifies the value at each `(x, y)` point and `reduce_C_function` - is a function of one argument that reduces all the values in a bin to - a single number (e.g. `mean`, `max`, `sum`, `std`).""" -series_note = "" - -_shared_doc_df_kwargs = dict(klass='DataFrame', klass_obj='df', - klass_kind=df_kind, klass_coord=df_coord, - klass_ax=df_ax, klass_unique=df_unique, - klass_note=df_note) -_shared_doc_series_kwargs = dict(klass='Series', klass_obj='s', - klass_kind=series_kind, - klass_coord=series_coord, klass_ax=series_ax, - klass_unique=series_unique, - klass_note=series_note) - -_shared_docs['plot'] = """ - Make plots of %(klass)s using matplotlib / pylab. - - *New in version 0.17.0:* Each plot kind has a corresponding method on the - ``%(klass)s.plot`` accessor: - ``%(klass_obj)s.plot(kind='line')`` is equivalent to - ``%(klass_obj)s.plot.line()``. - - Parameters - ---------- - data : %(klass)s - %(klass_coord)s - kind : str - - 'line' : line plot (default) - - 'bar' : vertical bar plot - - 'barh' : horizontal bar plot - - 'hist' : histogram - - 'box' : boxplot - - 'kde' : Kernel Density Estimation plot - - 'density' : same as 'kde' - - 'area' : area plot - - 'pie' : pie plot - %(klass_kind)s - %(klass_ax)s - figsize : a tuple (width, height) in inches - use_index : bool, default True - Use index as ticks for x axis - title : string or list - Title to use for the plot. If a string is passed, print the string at - the top of the figure. If a list is passed and `subplots` is True, - print each item in the list above the corresponding subplot. - grid : bool, default None (matlab style default) - Axis grid lines - legend : False/True/'reverse' - Place legend on axis subplots - style : list or dict - matplotlib line style per column - logx : bool or 'sym', default False - Use log scaling or symlog scaling on x axis - .. versionchanged:: 0.25.0 - - logy : bool or 'sym' default False - Use log scaling or symlog scaling on y axis - .. versionchanged:: 0.25.0 - - loglog : bool or 'sym', default False - Use log scaling or symlog scaling on both x and y axes - .. versionchanged:: 0.25.0 - - xticks : sequence - Values to use for the xticks - yticks : sequence - Values to use for the yticks - xlim : 2-tuple/list - ylim : 2-tuple/list - rot : int, default None - Rotation for ticks (xticks for vertical, yticks for horizontal plots) - fontsize : int, default None - Font size for xticks and yticks - colormap : str or matplotlib colormap object, default None - Colormap to select colors from. If string, load colormap with that name - from matplotlib. - colorbar : bool, optional - If True, plot colorbar (only relevant for 'scatter' and 'hexbin' plots) - position : float - Specify relative alignments for bar plot layout. - From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 (center) - table : bool, Series or DataFrame, default False - If True, draw a table using the data in the DataFrame and the data will - be transposed to meet matplotlib's default layout. - If a Series or DataFrame is passed, use passed data to draw a table. - yerr : DataFrame, Series, array-like, dict and str - See :ref:`Plotting with Error Bars ` for - detail. - xerr : same types as yerr. - %(klass_unique)s - mark_right : bool, default True - When using a secondary_y axis, automatically mark the column - labels with "(right)" in the legend - `**kwds` : keywords - Options to pass to matplotlib plotting method - - Returns - ------- - :class:`matplotlib.axes.Axes` or numpy.ndarray of them - - Notes - ----- - - - See matplotlib documentation online for more on this subject - - If `kind` = 'bar' or 'barh', you can specify relative alignments - for bar plot layout by `position` keyword. - From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 (center) - %(klass_note)s - """ - -_shared_docs['boxplot'] = """ - Make a box plot from DataFrame columns. - - Make a box-and-whisker plot from DataFrame columns, optionally grouped - by some other columns. A box plot is a method for graphically depicting - groups of numerical data through their quartiles. - The box extends from the Q1 to Q3 quartile values of the data, - with a line at the median (Q2). The whiskers extend from the edges - of box to show the range of the data. The position of the whiskers - is set by default to `1.5 * IQR (IQR = Q3 - Q1)` from the edges of the box. - Outlier points are those past the end of the whiskers. - - For further details see - Wikipedia's entry for `boxplot `_. - - Parameters - ---------- - column : str or list of str, optional - Column name or list of names, or vector. - Can be any valid input to :meth:`pandas.DataFrame.groupby`. - by : str or array-like, optional - Column in the DataFrame to :meth:`pandas.DataFrame.groupby`. - One box-plot will be done per value of columns in `by`. - ax : object of class matplotlib.axes.Axes, optional - The matplotlib axes to be used by boxplot. - fontsize : float or str - Tick label font size in points or as a string (e.g., `large`). - rot : int or float, default 0 - The rotation angle of labels (in degrees) - with respect to the screen coordinate system. - grid : bool, default True - Setting this to True will show the grid. - figsize : A tuple (width, height) in inches - The size of the figure to create in matplotlib. - layout : tuple (rows, columns), optional - For example, (3, 5) will display the subplots - using 3 columns and 5 rows, starting from the top-left. - return_type : {'axes', 'dict', 'both'} or None, default 'axes' - The kind of object to return. The default is ``axes``. - - * 'axes' returns the matplotlib axes the boxplot is drawn on. - * 'dict' returns a dictionary whose values are the matplotlib - Lines of the boxplot. - * 'both' returns a namedtuple with the axes and dict. - * when grouping with ``by``, a Series mapping columns to - ``return_type`` is returned. - - If ``return_type`` is `None`, a NumPy array - of axes with the same shape as ``layout`` is returned. - **kwds - All other plotting keyword arguments to be passed to - :func:`matplotlib.pyplot.boxplot`. - - Returns - ------- - result - See Notes. - - See Also - -------- - Series.plot.hist: Make a histogram. - matplotlib.pyplot.boxplot : Matplotlib equivalent plot. - - Notes - ----- - The return type depends on the `return_type` parameter: - - * 'axes' : object of class matplotlib.axes.Axes - * 'dict' : dict of matplotlib.lines.Line2D objects - * 'both' : a namedtuple with structure (ax, lines) - - For data grouped with ``by``, return a Series of the above or a numpy - array: - - * :class:`~pandas.Series` - * :class:`~numpy.array` (for ``return_type = None``) - - Use ``return_type='dict'`` when you want to tweak the appearance - of the lines after plotting. In this case a dict containing the Lines - making up the boxes, caps, fliers, medians, and whiskers is returned. - - Examples - -------- - - Boxplots can be created for every column in the dataframe - by ``df.boxplot()`` or indicating the columns to be used: - - .. plot:: - :context: close-figs - - >>> np.random.seed(1234) - >>> df = pd.DataFrame(np.random.randn(10,4), - ... columns=['Col1', 'Col2', 'Col3', 'Col4']) - >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3']) - - Boxplots of variables distributions grouped by the values of a third - variable can be created using the option ``by``. For instance: - - .. plot:: - :context: close-figs - - >>> df = pd.DataFrame(np.random.randn(10, 2), - ... columns=['Col1', 'Col2']) - >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', - ... 'B', 'B', 'B', 'B', 'B']) - >>> boxplot = df.boxplot(by='X') - - A list of strings (i.e. ``['X', 'Y']``) can be passed to boxplot - in order to group the data by combination of the variables in the x-axis: - - .. plot:: - :context: close-figs - - >>> df = pd.DataFrame(np.random.randn(10,3), - ... columns=['Col1', 'Col2', 'Col3']) - >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', - ... 'B', 'B', 'B', 'B', 'B']) - >>> df['Y'] = pd.Series(['A', 'B', 'A', 'B', 'A', - ... 'B', 'A', 'B', 'A', 'B']) - >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by=['X', 'Y']) - - The layout of boxplot can be adjusted giving a tuple to ``layout``: - - .. plot:: - :context: close-figs - - >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', - ... layout=(2, 1)) - - Additional formatting can be done to the boxplot, like suppressing the grid - (``grid=False``), rotating the labels in the x-axis (i.e. ``rot=45``) - or changing the fontsize (i.e. ``fontsize=15``): - - .. plot:: - :context: close-figs - - >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15) - - The parameter ``return_type`` can be used to select the type of element - returned by `boxplot`. When ``return_type='axes'`` is selected, - the matplotlib axes on which the boxplot is drawn are returned: - - >>> boxplot = df.boxplot(column=['Col1','Col2'], return_type='axes') - >>> type(boxplot) - - - When grouping with ``by``, a Series mapping columns to ``return_type`` - is returned: - - >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', - ... return_type='axes') - >>> type(boxplot) - - - If ``return_type`` is `None`, a NumPy array of axes with the same shape - as ``layout`` is returned: - - >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', - ... return_type=None) - >>> type(boxplot) - - """ - -_shared_docs['kde'] = """ - Generate Kernel Density Estimate plot using Gaussian kernels. - - In statistics, `kernel density estimation`_ (KDE) is a non-parametric - way to estimate the probability density function (PDF) of a random - variable. This function uses Gaussian kernels and includes automatic - bandwidth determination. - - .. _kernel density estimation: - https://en.wikipedia.org/wiki/Kernel_density_estimation - - Parameters - ---------- - bw_method : str, scalar or callable, optional - The method used to calculate the estimator bandwidth. This can be - 'scott', 'silverman', a scalar constant or a callable. - If None (default), 'scott' is used. - See :class:`scipy.stats.gaussian_kde` for more information. - ind : NumPy array or integer, optional - Evaluation points for the estimated PDF. If None (default), - 1000 equally spaced points are used. If `ind` is a NumPy array, the - KDE is evaluated at the points passed. If `ind` is an integer, - `ind` number of equally spaced points are used. - **kwds : optional - Additional keyword arguments are documented in - :meth:`pandas.%(this-datatype)s.plot`. - - Returns - ------- - matplotlib.axes.Axes or numpy.ndarray of them - - See Also - -------- - scipy.stats.gaussian_kde : Representation of a kernel-density - estimate using Gaussian kernels. This is the function used - internally to estimate the PDF. - %(sibling-datatype)s.plot.kde : Generate a KDE plot for a - %(sibling-datatype)s. - - Examples - -------- - %(examples)s - """ - - -def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, - xrot=None, ylabelsize=None, yrot=None, figsize=None, - bins=10, **kwds): - """ - Draw histogram of the input series using matplotlib. - - Parameters - ---------- - by : object, optional - If passed, then used to form histograms for separate groups - ax : matplotlib axis object - If not passed, uses gca() - grid : bool, default True - Whether to show axis grid lines - xlabelsize : int, default None - If specified changes the x-axis label size - xrot : float, default None - rotation of x axis labels - ylabelsize : int, default None - If specified changes the y-axis label size - yrot : float, default None - rotation of y axis labels - figsize : tuple, default None - figure size in inches by default - bins : integer or sequence, default 10 - Number of histogram bins to be used. If an integer is given, bins + 1 - bin edges are calculated and returned. If bins is a sequence, gives - bin edges, including left edge of first bin and right edge of last - bin. In this case, bins is returned unmodified. - `**kwds` : keywords - To be passed to the actual plotting function - - Returns - ------- - matplotlib.AxesSubplot - A histogram plot. - - See Also - -------- - matplotlib.axes.Axes.hist : Plot a histogram using matplotlib. - """ - plot_backend = _get_plot_backend() - return plot_backend.hist_series(self, by=by, ax=ax, grid=grid, - xlabelsize=xlabelsize, xrot=xrot, - ylabelsize=ylabelsize, yrot=yrot, - figsize=figsize, bins=bins, **kwds) - - -def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, - xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, - sharey=False, figsize=None, layout=None, bins=10, **kwds): - """ - Make a histogram of the DataFrame's. - - A `histogram`_ is a representation of the distribution of data. - This function calls :meth:`matplotlib.pyplot.hist`, on each series in - the DataFrame, resulting in one histogram per column. - - .. _histogram: https://en.wikipedia.org/wiki/Histogram - - Parameters - ---------- - data : DataFrame - The pandas object holding the data. - column : string or sequence - If passed, will be used to limit data to a subset of columns. - by : object, optional - If passed, then used to form histograms for separate groups. - grid : bool, default True - Whether to show axis grid lines. - xlabelsize : int, default None - If specified changes the x-axis label size. - xrot : float, default None - Rotation of x axis labels. For example, a value of 90 displays the - x labels rotated 90 degrees clockwise. - ylabelsize : int, default None - If specified changes the y-axis label size. - yrot : float, default None - Rotation of y axis labels. For example, a value of 90 displays the - y labels rotated 90 degrees clockwise. - ax : Matplotlib axes object, default None - The axes to plot the histogram on. - sharex : bool, default True if ax is None else False - In case subplots=True, share x axis and set some x axis labels to - invisible; defaults to True if ax is None otherwise False if an ax - is passed in. - Note that passing in both an ax and sharex=True will alter all x axis - labels for all subplots in a figure. - sharey : bool, default False - In case subplots=True, share y axis and set some y axis labels to - invisible. - figsize : tuple - The size in inches of the figure to create. Uses the value in - `matplotlib.rcParams` by default. - layout : tuple, optional - Tuple of (rows, columns) for the layout of the histograms. - bins : integer or sequence, default 10 - Number of histogram bins to be used. If an integer is given, bins + 1 - bin edges are calculated and returned. If bins is a sequence, gives - bin edges, including left edge of first bin and right edge of last - bin. In this case, bins is returned unmodified. - **kwds - All other plotting keyword arguments to be passed to - :meth:`matplotlib.pyplot.hist`. - - Returns - ------- - matplotlib.AxesSubplot or numpy.ndarray of them - - See Also - -------- - matplotlib.pyplot.hist : Plot a histogram using matplotlib. - - Examples - -------- - - .. plot:: - :context: close-figs - - This example draws a histogram based on the length and width of - some animals, displayed in three bins - - >>> df = pd.DataFrame({ - ... 'length': [1.5, 0.5, 1.2, 0.9, 3], - ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1] - ... }, index= ['pig', 'rabbit', 'duck', 'chicken', 'horse']) - >>> hist = df.hist(bins=3) - """ - plot_backend = _get_plot_backend() - return plot_backend.hist_frame(data, column=column, by=by, grid=grid, - xlabelsize=xlabelsize, xrot=xrot, - ylabelsize=ylabelsize, yrot=yrot, - ax=ax, sharex=sharex, sharey=sharey, - figsize=figsize, layout=layout, bins=bins, - **kwds) - - -@Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) -def boxplot(data, column=None, by=None, ax=None, fontsize=None, - rot=0, grid=True, figsize=None, layout=None, return_type=None, - **kwds): - plot_backend = _get_plot_backend() - return plot_backend.boxplot(data, column=column, by=by, ax=ax, - fontsize=fontsize, rot=rot, grid=grid, - figsize=figsize, layout=layout, - return_type=return_type, **kwds) - - -@Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) -def boxplot_frame(self, column=None, by=None, ax=None, fontsize=None, rot=0, - grid=True, figsize=None, layout=None, - return_type=None, **kwds): - plot_backend = _get_plot_backend() - return plot_backend.boxplot_frame(self, column=column, by=by, ax=ax, - fontsize=fontsize, rot=rot, grid=grid, - figsize=figsize, layout=layout, - return_type=return_type, **kwds) - - -def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, - rot=0, grid=True, ax=None, figsize=None, - layout=None, sharex=False, sharey=True, **kwds): - """ - Make box plots from DataFrameGroupBy data. - - Parameters - ---------- - grouped : Grouped DataFrame - subplots : bool - * ``False`` - no subplots will be used - * ``True`` - create a subplot for each group - column : column name or list of names, or vector - Can be any valid input to groupby - fontsize : int or string - rot : label rotation angle - grid : Setting this to True will show the grid - ax : Matplotlib axis object, default None - figsize : A tuple (width, height) in inches - layout : tuple (optional) - (rows, columns) for the layout of the plot - sharex : bool, default False - Whether x-axes will be shared among subplots - - .. versionadded:: 0.23.1 - sharey : bool, default True - Whether y-axes will be shared among subplots - - .. versionadded:: 0.23.1 - `**kwds` : Keyword Arguments - All other plotting keyword arguments to be passed to - matplotlib's boxplot function - - Returns - ------- - dict of key/value = group key/DataFrame.boxplot return value - or DataFrame.boxplot return value in case subplots=figures=False - - Examples - -------- - >>> import itertools - >>> tuples = [t for t in itertools.product(range(1000), range(4))] - >>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1']) - >>> data = np.random.randn(len(index),4) - >>> df = pd.DataFrame(data, columns=list('ABCD'), index=index) - >>> - >>> grouped = df.groupby(level='lvl1') - >>> boxplot_frame_groupby(grouped) - >>> - >>> grouped = df.unstack(level='lvl1').groupby(level=0, axis=1) - >>> boxplot_frame_groupby(grouped, subplots=False) - """ - plot_backend = _get_plot_backend() - return plot_backend.boxplot_frame_groupby( - grouped, subplots=subplots, column=column, fontsize=fontsize, rot=rot, - grid=grid, ax=ax, figsize=figsize, layout=layout, sharex=sharex, - sharey=sharey, **kwds) - - -# kinds supported by both dataframe and series -_common_kinds = ['line', 'bar', 'barh', - 'kde', 'density', 'area', 'hist', 'box'] -# kinds supported by dataframe -_dataframe_kinds = ['scatter', 'hexbin'] -# kinds supported only by series or dataframe single column -_series_kinds = ['pie'] -_all_kinds = _common_kinds + _dataframe_kinds + _series_kinds - - -def _get_standard_kind(kind): - return {'density': 'kde'}.get(kind, kind) - def _get_plot_backend(): """ @@ -627,392 +28,136 @@ def _get_plot_backend(): backend_str = pandas.get_option('plotting.backend') if backend_str == 'matplotlib': backend_str = 'pandas.plotting._matplotlib' - return getattr(importlib.import_module(backend_str), 'PlotBackend') - - -def _plot_classes(): - plot_backend = _get_plot_backend() - # TODO restore type annotations if we create a base class for plot classes - # (a parent of MPLPlot, and classes of other backends) - classes = [plot_backend.LinePlot, plot_backend.BarPlot, - plot_backend.BarhPlot, plot_backend.AreaPlot, - plot_backend.HistPlot, plot_backend.BoxPlot, - plot_backend.ScatterPlot, plot_backend.HexBinPlot, - plot_backend.KdePlot, plot_backend.PiePlot] - return {class_._kind: class_ for class_ in classes} - + return importlib.import_module(backend_str) -def _plot(data, x=None, y=None, subplots=False, - ax=None, kind='line', **kwds): - kind = _get_standard_kind(kind.lower().strip()) - if kind in _all_kinds: - klass = _plot_classes()[kind] - else: - raise ValueError("%r is not a valid plot kind" % kind) - - if kind in _dataframe_kinds: - if isinstance(data, ABCDataFrame): - plot_obj = klass(data, x=x, y=y, subplots=subplots, ax=ax, - kind=kind, **kwds) - else: - raise ValueError("plot kind %r can only be used for data frames" - % kind) - elif kind in _series_kinds: - if isinstance(data, ABCDataFrame): - if y is None and subplots is False: - msg = "{0} requires either y column or 'subplots=True'" - raise ValueError(msg.format(kind)) - elif y is not None: - if is_integer(y) and not data.columns.holds_integer(): - y = data.columns[y] - # converted to series actually. copy to not modify - data = data[y].copy() - data.index.name = y - plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) - else: - if isinstance(data, ABCDataFrame): - data_cols = data.columns - if x is not None: - if is_integer(x) and not data.columns.holds_integer(): - x = data_cols[x] - elif not isinstance(data[x], ABCSeries): - raise ValueError("x must be a label or position") - data = data.set_index(x) - - if y is not None: - # check if we have y as int or list of ints - int_ylist = is_list_like(y) and all(is_integer(c) for c in y) - int_y_arg = is_integer(y) or int_ylist - if int_y_arg and not data.columns.holds_integer(): - y = data_cols[y] - - label_kw = kwds['label'] if 'label' in kwds else False - for kw in ['xerr', 'yerr']: - if (kw in kwds) and \ - (isinstance(kwds[kw], str) or - is_integer(kwds[kw])): - try: - kwds[kw] = data[kwds[kw]] - except (IndexError, KeyError, TypeError): - pass - - # don't overwrite - data = data[y].copy() - - if isinstance(data, ABCSeries): - label_name = label_kw or y - data.name = label_name - else: - match = is_list_like(label_kw) and len(label_kw) == len(y) - if label_kw and not match: - raise ValueError( - "label should be list-like and same length as y" - ) - label_name = label_kw or data.columns - data.columns = label_name - plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) - - plot_obj.generate() - plot_obj.draw() - return plot_obj.result - - -@Appender(_shared_docs['plot'] % _shared_doc_df_kwargs) -def plot_frame(data, x=None, y=None, kind='line', ax=None, - subplots=False, sharex=None, sharey=False, layout=None, - figsize=None, use_index=True, title=None, grid=None, - legend=True, style=None, logx=False, logy=False, loglog=False, - xticks=None, yticks=None, xlim=None, ylim=None, - rot=None, fontsize=None, colormap=None, table=False, - yerr=None, xerr=None, - secondary_y=False, sort_columns=False, - **kwds): - return _plot(data, kind=kind, x=x, y=y, ax=ax, - subplots=subplots, sharex=sharex, sharey=sharey, - layout=layout, figsize=figsize, use_index=use_index, - title=title, grid=grid, legend=legend, - style=style, logx=logx, logy=logy, loglog=loglog, - xticks=xticks, yticks=yticks, xlim=xlim, ylim=ylim, - rot=rot, fontsize=fontsize, colormap=colormap, table=table, - yerr=yerr, xerr=xerr, - secondary_y=secondary_y, sort_columns=sort_columns, - **kwds) - - -@Appender(_shared_docs['plot'] % _shared_doc_series_kwargs) -def plot_series(data, kind='line', ax=None, # Series unique - figsize=None, use_index=True, title=None, grid=None, - legend=False, style=None, logx=False, logy=False, loglog=False, - xticks=None, yticks=None, xlim=None, ylim=None, - rot=None, fontsize=None, colormap=None, table=False, - yerr=None, xerr=None, - label=None, secondary_y=False, # Series unique - **kwds): - - # FIXME move this into _matplotlib - import matplotlib.pyplot as plt - if ax is None and len(plt.get_fignums()) > 0: - with plt.rc_context(): - ax = plt.gca() - ax = getattr(ax, 'left_ax', ax) - - return _plot(data, kind=kind, ax=ax, - figsize=figsize, use_index=use_index, title=title, - grid=grid, legend=legend, - style=style, logx=logx, logy=logy, loglog=loglog, - xticks=xticks, yticks=yticks, xlim=xlim, ylim=ylim, - rot=rot, fontsize=fontsize, colormap=colormap, table=table, - yerr=yerr, xerr=xerr, - label=label, secondary_y=secondary_y, - **kwds) - - -class BasePlotMethods(PandasObject): - - def __init__(self, data): - self._parent = data # can be Series or DataFrame - - def __call__(self, *args, **kwargs): - raise NotImplementedError - - -class SeriesPlotMethods(BasePlotMethods): +class PlotAccessor(pandas.core.base.PandasObject): """ - Series plotting accessor and method. + Series and DataFrame plotting accessor and method. Examples -------- >>> s.plot.line() >>> s.plot.bar() >>> s.plot.hist() + >>> df.plot.line() + >>> df.plot.scatter('x', 'y') + >>> df.plot.hexbin() Plotting methods can also be accessed by calling the accessor as a method with the ``kind`` argument: ``s.plot(kind='line')`` is equivalent to ``s.plot.line()`` """ + def __init__(self, data): + assert isinstance(data, (pandas.Series, pandas.DataFrame)) + self._parent = data - def __call__(self, kind='line', ax=None, - figsize=None, use_index=True, title=None, grid=None, - legend=False, style=None, logx=False, logy=False, - loglog=False, xticks=None, yticks=None, - xlim=None, ylim=None, - rot=None, fontsize=None, colormap=None, table=False, - yerr=None, xerr=None, - label=None, secondary_y=False, **kwds): - return plot_series(self._parent, kind=kind, ax=ax, figsize=figsize, - use_index=use_index, title=title, grid=grid, - legend=legend, style=style, logx=logx, logy=logy, - loglog=loglog, xticks=xticks, yticks=yticks, - xlim=xlim, ylim=ylim, rot=rot, fontsize=fontsize, - colormap=colormap, table=table, yerr=yerr, - xerr=xerr, label=label, secondary_y=secondary_y, - **kwds) - __call__.__doc__ = plot_series.__doc__ - - def line(self, **kwds): - """ - Line plot. - - Parameters - ---------- - `**kwds` : optional - Additional keyword arguments are documented in - :meth:`pandas.Series.plot`. - - Returns - ------- - :class:`matplotlib.axes.Axes` or numpy.ndarray of them - - Examples - -------- - - .. plot:: - :context: close-figs - - >>> s = pd.Series([1, 3, 2]) - >>> s.plot.line() - """ - return self(kind='line', **kwds) - - def bar(self, **kwds): - """ - Vertical bar plot. - - Parameters - ---------- - `**kwds` : optional - Additional keyword arguments are documented in - :meth:`pandas.Series.plot`. - - Returns - ------- - :class:`matplotlib.axes.Axes` or numpy.ndarray of them - """ - return self(kind='bar', **kwds) - - def barh(self, **kwds): - """ - Horizontal bar plot. - - Parameters - ---------- - `**kwds` : optional - Additional keyword arguments are documented in - :meth:`pandas.Series.plot`. - - Returns - ------- - :class:`matplotlib.axes.Axes` or numpy.ndarray of them - """ - return self(kind='barh', **kwds) - - def box(self, **kwds): - """ - Boxplot. - - Parameters - ---------- - `**kwds` : optional - Additional keyword arguments are documented in - :meth:`pandas.Series.plot`. - - Returns - ------- - :class:`matplotlib.axes.Axes` or numpy.ndarray of them - """ - return self(kind='box', **kwds) - - def hist(self, bins=10, **kwds): - """ - Histogram. - - Parameters - ---------- - bins : integer, default 10 - Number of histogram bins to be used - `**kwds` : optional - Additional keyword arguments are documented in - :meth:`pandas.Series.plot`. - - Returns - ------- - :class:`matplotlib.axes.Axes` or numpy.ndarray of them - """ - return self(kind='hist', bins=bins, **kwds) - - @Appender(_shared_docs['kde'] % { - 'this-datatype': 'Series', - 'sibling-datatype': 'DataFrame', - 'examples': """ - Given a Series of points randomly sampled from an unknown - distribution, estimate its PDF using KDE with automatic - bandwidth determination and plot the results, evaluating them at - 1000 equally spaced points (default): - - .. plot:: - :context: close-figs - - >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5]) - >>> ax = s.plot.kde() - - A scalar bandwidth can be specified. Using a small bandwidth value can - lead to over-fitting, while using a large bandwidth value may result - in under-fitting: - - .. plot:: - :context: close-figs - - >>> ax = s.plot.kde(bw_method=0.3) - - .. plot:: - :context: close-figs - - >>> ax = s.plot.kde(bw_method=3) - - Finally, the `ind` parameter determines the evaluation points for the - plot of the estimated PDF: - - .. plot:: - :context: close-figs - - >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5]) - """.strip() - }) - def kde(self, bw_method=None, ind=None, **kwds): - return self(kind='kde', bw_method=bw_method, ind=ind, **kwds) - - density = kde - - def area(self, **kwds): - """ - Area plot. - - Parameters - ---------- - `**kwds` : optional - Additional keyword arguments are documented in - :meth:`pandas.Series.plot`. - - Returns - ------- - :class:`matplotlib.axes.Axes` or numpy.ndarray of them + def __call__(self, kind='line', **kwargs): """ - return self(kind='area', **kwds) + Make plots of Series or DataFrame using the backend specified by the + option ``plotting.backend``. - def pie(self, **kwds): - """ - Pie chart. + *New in version 0.17.0:* Each plot kind has a corresponding method on + the Series or DataFrame accessor, for example: + ``Series.plot(kind='line')`` is equivalent to + ``Series.plot.line()``. Parameters ---------- - `**kwds` : optional - Additional keyword arguments are documented in - :meth:`pandas.Series.plot`. + data : Series or DataFrame + The object for which the method is called + %(klass_coord)s + kind : str + - 'line' : line plot (default) + - 'bar' : vertical bar plot + - 'barh' : horizontal bar plot + - 'hist' : histogram + - 'box' : boxplot + - 'kde' : Kernel Density Estimation plot + - 'density' : same as 'kde' + - 'area' : area plot + - 'pie' : pie plot + - 'scatter' : scatter plot + - 'hexbin' : hexbin plot + figsize : a tuple (width, height) in inches + use_index : bool, default True + Use index as ticks for x axis + title : string or list + Title to use for the plot. If a string is passed, print the string + at the top of the figure. If a list is passed and `subplots` is + True, print each item in the list above the corresponding subplot. + grid : bool, default None (matlab style default) + Axis grid lines + legend : False/True/'reverse' + Place legend on axis subplots + style : list or dict + matplotlib line style per column + logx : bool or 'sym', default False + Use log scaling or symlog scaling on x axis + .. versionchanged:: 0.25.0 + + logy : bool or 'sym' default False + Use log scaling or symlog scaling on y axis + .. versionchanged:: 0.25.0 + + loglog : bool or 'sym', default False + Use log scaling or symlog scaling on both x and y axes + .. versionchanged:: 0.25.0 + + xticks : sequence + Values to use for the xticks + yticks : sequence + Values to use for the yticks + xlim : 2-tuple/list + ylim : 2-tuple/list + rot : int, default None + Rotation for ticks (xticks for vertical, yticks for horizontal + plots) + fontsize : int, default None + Font size for xticks and yticks + colormap : str or matplotlib colormap object, default None + Colormap to select colors from. If string, load colormap with that + name from matplotlib. + colorbar : bool, optional + If True, plot colorbar (only relevant for 'scatter' and 'hexbin' + plots) + position : float + Specify relative alignments for bar plot layout. + From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 + (center) + table : bool, Series or DataFrame, default False + If True, draw a table using the data in the DataFrame and the data + will be transposed to meet matplotlib's default layout. + If a Series or DataFrame is passed, use passed data to draw a + table. + yerr : DataFrame, Series, array-like, dict and str + See :ref:`Plotting with Error Bars ` for + detail. + xerr : same types as yerr. + mark_right : bool, default True + When using a secondary_y axis, automatically mark the column + labels with "(right)" in the legend + `**kwds` : keywords + Options to pass to matplotlib plotting method Returns ------- :class:`matplotlib.axes.Axes` or numpy.ndarray of them - """ - return self(kind='pie', **kwds) - - -class FramePlotMethods(BasePlotMethods): - """DataFrame plotting accessor and method - - Examples - -------- - >>> df.plot.line() - >>> df.plot.scatter('x', 'y') - >>> df.plot.hexbin() - These plotting methods can also be accessed by calling the accessor as a - method with the ``kind`` argument: - ``df.plot(kind='line')`` is equivalent to ``df.plot.line()`` - """ + Notes + ----- + - See matplotlib documentation online for more on this subject + - If `kind` = 'bar' or 'barh', you can specify relative alignments + for bar plot layout by `position` keyword. + From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 + (center) + """ + plot_backend = _get_plot_backend() + return plot_backend.plot(self._parent, kind=kind, **kwargs) - def __call__(self, x=None, y=None, kind='line', ax=None, - subplots=False, sharex=None, sharey=False, layout=None, - figsize=None, use_index=True, title=None, grid=None, - legend=True, style=None, logx=False, logy=False, loglog=False, - xticks=None, yticks=None, xlim=None, ylim=None, - rot=None, fontsize=None, colormap=None, table=False, - yerr=None, xerr=None, - secondary_y=False, sort_columns=False, **kwds): - return plot_frame(self._parent, kind=kind, x=x, y=y, ax=ax, - subplots=subplots, sharex=sharex, sharey=sharey, - layout=layout, figsize=figsize, use_index=use_index, - title=title, grid=grid, legend=legend, style=style, - logx=logx, logy=logy, loglog=loglog, xticks=xticks, - yticks=yticks, xlim=xlim, ylim=ylim, rot=rot, - fontsize=fontsize, colormap=colormap, table=table, - yerr=yerr, xerr=xerr, secondary_y=secondary_y, - sort_columns=sort_columns, **kwds) - __call__.__doc__ = plot_frame.__doc__ - - def line(self, x=None, y=None, **kwds): + def line(self, x=None, y=None, **kwargs): """ - Plot DataFrame columns as lines. + Plot Series or DataFrame as lines. This function is useful to plot lines using DataFrame's values as coordinates. @@ -1042,6 +187,12 @@ def line(self, x=None, y=None, **kwds): Examples -------- + .. plot:: + :context: close-figs + + >>> s = pd.Series([1, 3, 2]) + >>> s.plot.line() + .. plot:: :context: close-figs @@ -1071,9 +222,9 @@ def line(self, x=None, y=None, **kwds): >>> lines = df.plot.line(x='pig', y='horse') """ - return self(kind='line', x=x, y=y, **kwds) + return self(kind='line', x=x, y=y, **kwargs) - def bar(self, x=None, y=None, **kwds): + def bar(self, x=None, y=None, **kwargs): """ Vertical bar plot. @@ -1156,9 +307,9 @@ def bar(self, x=None, y=None, **kwds): >>> ax = df.plot.bar(x='lifespan', rot=0) """ - return self(kind='bar', x=x, y=y, **kwds) + return self(kind='bar', x=x, y=y, **kwargs) - def barh(self, x=None, y=None, **kwds): + def barh(self, x=None, y=None, **kwargs): """ Make a horizontal bar plot. @@ -1236,9 +387,9 @@ def barh(self, x=None, y=None, **kwds): ... 'lifespan': lifespan}, index=index) >>> ax = df.plot.barh(x='lifespan') """ - return self(kind='barh', x=x, y=y, **kwds) + return self(kind='barh', x=x, y=y, **kwargs) - def box(self, by=None, **kwds): + def box(self, by=None, **kwargs): r""" Make a box plot of the DataFrame columns. @@ -1286,9 +437,9 @@ def box(self, by=None, **kwds): >>> df = pd.DataFrame(data, columns=list('ABCD')) >>> ax = df.plot.box() """ - return self(kind='box', by=by, **kwds) + return self(kind='box', by=by, **kwargs) - def hist(self, by=None, bins=10, **kwds): + def hist(self, by=None, bins=10, **kwargs): """ Draw one histogram of the DataFrame's columns. @@ -1333,17 +484,83 @@ def hist(self, by=None, bins=10, **kwds): >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000) >>> ax = df.plot.hist(bins=12, alpha=0.5) """ - return self(kind='hist', by=by, bins=bins, **kwds) - - @Appender(_shared_docs['kde'] % { - 'this-datatype': 'DataFrame', - 'sibling-datatype': 'Series', - 'examples': """ - Given several Series of points randomly sampled from unknown - distributions, estimate their PDFs using KDE with automatic + return self(kind='hist', by=by, bins=bins, **kwargs) + + def kde(self, bw_method=None, ind=None, **kwargs): + """ + Generate Kernel Density Estimate plot using Gaussian kernels. + + In statistics, `kernel density estimation`_ (KDE) is a non-parametric + way to estimate the probability density function (PDF) of a random + variable. This function uses Gaussian kernels and includes automatic + bandwidth determination. + + .. _kernel density estimation: + https://en.wikipedia.org/wiki/Kernel_density_estimation + + Parameters + ---------- + bw_method : str, scalar or callable, optional + The method used to calculate the estimator bandwidth. This can be + 'scott', 'silverman', a scalar constant or a callable. + If None (default), 'scott' is used. + See :class:`scipy.stats.gaussian_kde` for more information. + ind : NumPy array or integer, optional + Evaluation points for the estimated PDF. If None (default), + 1000 equally spaced points are used. If `ind` is a NumPy array, the + KDE is evaluated at the points passed. If `ind` is an integer, + `ind` number of equally spaced points are used. + **kwds : optional + Additional keyword arguments are documented in + :meth:`pandas.%(this-datatype)s.plot`. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + See Also + -------- + scipy.stats.gaussian_kde : Representation of a kernel-density + estimate using Gaussian kernels. This is the function used + internally to estimate the PDF. + + Examples + -------- + Given a Series of points randomly sampled from an unknown + distribution, estimate its PDF using KDE with automatic bandwidth determination and plot the results, evaluating them at 1000 equally spaced points (default): + .. plot:: + :context: close-figs + + >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5]) + >>> ax = s.plot.kde() + + A scalar bandwidth can be specified. Using a small bandwidth value can + lead to over-fitting, while using a large bandwidth value may result + in under-fitting: + + .. plot:: + :context: close-figs + + >>> ax = s.plot.kde(bw_method=0.3) + + .. plot:: + :context: close-figs + + >>> ax = s.plot.kde(bw_method=3) + + Finally, the `ind` parameter determines the evaluation points for the + plot of the estimated PDF: + + .. plot:: + :context: close-figs + + >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5]) + + For DataFrame, it works in the same way: + .. plot:: :context: close-figs @@ -1374,14 +591,12 @@ def hist(self, by=None, bins=10, **kwds): :context: close-figs >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6]) - """.strip() - }) - def kde(self, bw_method=None, ind=None, **kwds): - return self(kind='kde', bw_method=bw_method, ind=ind, **kwds) + """ + return self(kind='kde', bw_method=bw_method, ind=ind, **kwargs) density = kde - def area(self, x=None, y=None, **kwds): + def area(self, x=None, y=None, **kwargs): """ Draw a stacked area plot. @@ -1452,9 +667,9 @@ def area(self, x=None, y=None, **kwds): ... }) >>> ax = df.plot.area(x='day') """ - return self(kind='area', x=x, y=y, **kwds) + return self(kind='area', x=x, y=y, **kwargs) - def pie(self, y=None, **kwds): + def pie(self, **kwargs): """ Generate a pie plot. @@ -1501,9 +716,9 @@ def pie(self, y=None, **kwds): >>> plot = df.plot.pie(subplots=True, figsize=(6, 3)) """ - return self(kind='pie', y=y, **kwds) + return self(kind='pie', **kwargs) - def scatter(self, x, y, s=None, c=None, **kwds): + def scatter(self, x, y, s=None, c=None, **kwargs): """ Create a scatter plot with varying marker point size and color. @@ -1582,10 +797,14 @@ def scatter(self, x, y, s=None, c=None, **kwds): ... c='species', ... colormap='viridis') """ - return self(kind='scatter', x=x, y=y, c=c, s=s, **kwds) + if not isinstance(self._parent, + pandas.core.dtypes.generic.ABCDataFrame): + raise ValueError( + "plot kind scatter can only be used for data frames") + return self(kind='scatter', x=x, y=y, s=s, c=c, **kwargs) def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, - **kwds): + **kwargs): """ Generate a hexagonal binning plot. @@ -1667,8 +886,184 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, ... gridsize=10, ... cmap="viridis") """ - if reduce_C_function is not None: - kwds['reduce_C_function'] = reduce_C_function - if gridsize is not None: - kwds['gridsize'] = gridsize - return self(kind='hexbin', x=x, y=y, C=C, **kwds) + if not isinstance(self._parent, + pandas.core.dtypes.generic.ABCDataFrame): + raise ValueError( + "plot kind hexbin can only be used for data frames") + return self(kind='hexbin', x=x, y=y, C=C, + reduce_C_function=reduce_C_function, gridsize=gridsize, + **kwargs) + + +def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, + xrot=None, ylabelsize=None, yrot=None, figsize=None, + bins=10, **kwds): + plot_backend = _get_plot_backend() + return plot_backend.hist_series(self, by=by, ax=ax, grid=grid, + xlabelsize=xlabelsize, xrot=xrot, + ylabelsize=ylabelsize, yrot=yrot, + figsize=figsize, bins=bins, **kwds) + + +hist_series.__doc__ = PlotAccessor.hist.__doc__ + + +def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, + xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, + sharey=False, figsize=None, layout=None, bins=10, **kwds): + plot_backend = _get_plot_backend() + return plot_backend.hist_frame(data, column=column, by=by, grid=grid, + xlabelsize=xlabelsize, xrot=xrot, + ylabelsize=ylabelsize, yrot=yrot, + ax=ax, sharex=sharex, sharey=sharey, + figsize=figsize, layout=layout, bins=bins, + **kwds) + + +hist_series.__doc__ = PlotAccessor.hist.__doc__ + + +def boxplot(data, column=None, by=None, ax=None, fontsize=None, + rot=0, grid=True, figsize=None, layout=None, return_type=None, + **kwds): + plot_backend = _get_plot_backend() + return plot_backend.boxplot(data, column=column, by=by, ax=ax, + fontsize=fontsize, rot=rot, grid=grid, + figsize=figsize, layout=layout, + return_type=return_type, **kwds) + + +boxplot.__doc__ = PlotAccessor.box.__doc__ + + +def boxplot_frame(self, column=None, by=None, ax=None, fontsize=None, rot=0, + grid=True, figsize=None, layout=None, + return_type=None, **kwds): + plot_backend = _get_plot_backend() + return plot_backend.boxplot_frame(self, column=column, by=by, ax=ax, + fontsize=fontsize, rot=rot, grid=grid, + figsize=figsize, layout=layout, + return_type=return_type, **kwds) + + +boxplot.__doc__ = PlotAccessor.box.__doc__ + + +def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, + rot=0, grid=True, ax=None, figsize=None, + layout=None, sharex=False, sharey=True, **kwds): + """ + Make box plots from DataFrameGroupBy data. + + Parameters + ---------- + grouped : Grouped DataFrame + subplots : bool + * ``False`` - no subplots will be used + * ``True`` - create a subplot for each group + column : column name or list of names, or vector + Can be any valid input to groupby + fontsize : int or string + rot : label rotation angle + grid : Setting this to True will show the grid + ax : Matplotlib axis object, default None + figsize : A tuple (width, height) in inches + layout : tuple (optional) + (rows, columns) for the layout of the plot + sharex : bool, default False + Whether x-axes will be shared among subplots + + .. versionadded:: 0.23.1 + sharey : bool, default True + Whether y-axes will be shared among subplots + + .. versionadded:: 0.23.1 + `**kwds` : Keyword Arguments + All other plotting keyword arguments to be passed to + matplotlib's boxplot function + + Returns + ------- + dict of key/value = group key/DataFrame.boxplot return value + or DataFrame.boxplot return value in case subplots=figures=False + + Examples + -------- + >>> import itertools + >>> tuples = [t for t in itertools.product(range(1000), range(4))] + >>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1']) + >>> data = np.random.randn(len(index),4) + >>> df = pd.DataFrame(data, columns=list('ABCD'), index=index) + >>> + >>> grouped = df.groupby(level='lvl1') + >>> boxplot_frame_groupby(grouped) + >>> + >>> grouped = df.unstack(level='lvl1').groupby(level=0, axis=1) + >>> boxplot_frame_groupby(grouped, subplots=False) + """ + plot_backend = _get_plot_backend() + return plot_backend.boxplot_frame_groupby( + grouped, subplots=subplots, column=column, fontsize=fontsize, rot=rot, + grid=grid, ax=ax, figsize=figsize, layout=layout, sharex=sharex, + sharey=sharey, **kwds) + + +""" +# TODO move this somewhere else, it's not being called +def _plot(data, x=None, y=None, subplots=False, + ax=None, kind='line', **kwds): + if kind in _series_kinds: + if isinstance(data, ABCDataFrame): + if y is None and subplots is False: + msg = "{0} requires either y column or 'subplots=True'" + raise ValueError(msg.format(kind)) + elif y is not None: + if is_integer(y) and not data.columns.holds_integer(): + y = data.columns[y] + # converted to series actually. copy to not modify + data = data[y].copy() + data.index.name = y + plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) + else: + if isinstance(data, ABCDataFrame): + data_cols = data.columns + if x is not None: + if is_integer(x) and not data.columns.holds_integer(): + x = data_cols[x] + elif not isinstance(data[x], ABCSeries): + raise ValueError("x must be a label or position") + data = data.set_index(x) + + if y is not None: + # check if we have y as int or list of ints + int_ylist = is_list_like(y) and all(is_integer(c) for c in y) + int_y_arg = is_integer(y) or int_ylist + if int_y_arg and not data.columns.holds_integer(): + y = data_cols[y] + + label_kw = kwds['label'] if 'label' in kwds else False + for kw in ['xerr', 'yerr']: + if (kw in kwds) and \ + (isinstance(kwds[kw], str) or + is_integer(kwds[kw])): + try: + kwds[kw] = data[kwds[kw]] + except (IndexError, KeyError, TypeError): + pass + + # don't overwrite + data = data[y].copy() + + if isinstance(data, ABCSeries): + label_name = label_kw or y + data.name = label_name + else: + match = is_list_like(label_kw) and len(label_kw) == len(y) + if label_kw and not match: + raise ValueError( + "label should be list-like and same length as y" + ) + label_name = label_kw or data.columns + data.columns = label_name + plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) +""" diff --git a/pandas/plotting/_matplotlib/__init__.py b/pandas/plotting/_matplotlib/__init__.py index 184ad070a31bb..a3c4a5afd2b15 100644 --- a/pandas/plotting/_matplotlib/__init__.py +++ b/pandas/plotting/_matplotlib/__init__.py @@ -14,27 +14,37 @@ from pandas.plotting._matplotlib.timeseries import tsplot from pandas.plotting._matplotlib.tools import table +PLOT_CLASSES = {'line': LinePlot, + 'bar': BarPlot, + 'barh': BarhPlot, + 'box': BoxPlot, + 'hist': HistPlot, + 'kde': KdePlot, + 'area': AreaPlot, + 'pie': PiePlot, + 'scatter': ScatterPlot, + 'hexbin': HexBinPlot} + if get_option("plotting.matplotlib.register_converters"): register(explicit=False) -class PlotBackend(pandas.plotting.BasePlotBackend): - def line(self, data, x=None, y=None, **kwargs): - return LinePlot(data, x=x, y=y) - - def bar(self, data, x=None, y=None, **kwargs): - return BarPlot(data, x=x, y=y) - - def barh(self, data, x=None, y=None, **kwargs): - return BarhPlot(data, x=x, y=y) - - def box(self, data, by=None, **kwargs): - return BoxPlot(data, by=by) - - -__all__ = ['LinePlot', 'BarPlot', 'BarhPlot', 'HistPlot', 'BoxPlot', 'KdePlot', - 'AreaPlot', 'PiePlot', 'ScatterPlot', 'HexBinPlot', 'hist_series', - 'hist_frame', 'boxplot', 'boxplot_frame', 'boxplot_frame_groupby', - 'tsplot', 'table', 'andrews_curves', 'autocorrelation_plot', - 'bootstrap_plot', 'lag_plot', 'parallel_coordinates', 'radviz', - 'scatter_matrix', 'register', 'deregister'] +def plot(data, kind, **kwargs): + if isinstance(data, pandas.Series): + import matplotlib.pyplot as plt + ax = kwargs.get('ax') + if ax is None and len(plt.get_fignums()) > 0: + with plt.rc_context(): + ax = plt.gca() + kwargs['ax'] = getattr(ax, 'left_ax', ax) + plot_obj = PLOT_CLASSES[kind](data, **kwargs) + plot_obj.generate() + plot_obj.draw() + return plot_obj.result + + +__all__ = ['plot', 'hist_series', 'hist_frame', 'boxplot', 'boxplot_frame', + 'boxplot_frame_groupby', 'tsplot', 'table', 'andrews_curves', + 'autocorrelation_plot', 'bootstrap_plot', 'lag_plot', + 'parallel_coordinates', 'radviz', 'scatter_matrix', 'register', + 'deregister'] From 196388be2544850e586bc41140e0b5e38fdfe68a Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Tue, 25 Jun 2019 15:53:55 +0100 Subject: [PATCH 03/26] Addressing review comments, and fixing many tests (still some tests failing) --- pandas/plotting/_core.py | 135 ++++++++++++---------- pandas/plotting/_matplotlib/__init__.py | 5 +- pandas/plotting/_matplotlib/timeseries.py | 8 +- pandas/tests/plotting/test_frame.py | 10 +- pandas/tests/plotting/test_series.py | 14 +-- 5 files changed, 89 insertions(+), 83 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 7cabd79b8605b..cb0f045464048 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2,6 +2,8 @@ from typing import List, Type # noqa import pandas +from pandas.core.dtypes.common import is_integer, is_list_like +from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries # Trigger matplotlib import, which implicitly registers our # converts. Implicit registration is deprecated, and when enforced @@ -48,11 +50,17 @@ class PlotAccessor(pandas.core.base.PandasObject): with the ``kind`` argument: ``s.plot(kind='line')`` is equivalent to ``s.plot.line()`` """ + _common_kinds = ('line', 'bar', 'barh', 'kde', 'density', 'area', 'hist', + 'box') + _series_kinds = ('pie',) + _dataframe_kinds = ('scatter', 'hexbin') + _kind_aliases = {'density': 'kde'} + _all_kinds = _common_kinds + _series_kinds + _dataframe_kinds + def __init__(self, data): - assert isinstance(data, (pandas.Series, pandas.DataFrame)) self._parent = data - def __call__(self, kind='line', **kwargs): + def __call__(self, kind='line', x=None, y=None, **kwargs): """ Make plots of Series or DataFrame using the backend specified by the option ``plotting.backend``. @@ -152,7 +160,69 @@ def __call__(self, kind='line', **kwargs): From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 (center) """ + kind = self._kind_aliases.get(kind, kind) + if kind not in self._all_kinds: + raise ValueError('{} is not a valid plot kind'.format(kind)) + plot_backend = _get_plot_backend() + if kind in self._dataframe_kinds: + if isinstance(self._parent, ABCDataFrame): + return plot_backend.plot(self._parent, x=x, y=y, kind=kind, + **kwargs) + else: + raise ValueError(("plot kind {} can only be used for " + "data frames").format(kind)) + if kind in self._series_kinds: + if isinstance(self._parent, ABCDataFrame): + if y is None and kwargs.get('subplots') is False: + msg = "{} requires either y column or 'subplots=True'" + raise ValueError(msg.format(kind)) + elif y is not None: + if (is_integer(y) + and not self._parent.columns.holds_integer()): + y = self._parent.columns[y] + # converted to series actually. copy to not modify + self._parent = self._parent[y].copy() + self._parent.index.name = y + elif isinstance(self._parent, ABCDataFrame): + data_cols = self._parent.columns + if x is not None: + if is_integer(x) and not self._parent.columns.holds_integer(): + x = data_cols[x] + elif not isinstance(self._parent[x], ABCSeries): + raise ValueError("x must be a label or position") + self._parent = self._parent.set_index(x) + if y is not None: + # check if we have y as int or list of ints + int_ylist = is_list_like(y) and all(is_integer(c) for c in y) + int_y_arg = is_integer(y) or int_ylist + if int_y_arg and not self._parent.columns.holds_integer(): + y = data_cols[y] + + label_kw = kwargs['label'] if 'label' in kwargs else False + for kw in ['xerr', 'yerr']: + if (kw in kwargs and + (isinstance(kwargs[kw], str) + or is_integer(kwargs[kw]))): + try: + kwargs[kw] = self._parent[kwargs[kw]] + except (IndexError, KeyError, TypeError): + pass + + # don't overwrite + self._parent = self._parent[y].copy() + + if isinstance(self._parent, ABCSeries): + label_name = label_kw or y + self._parent.name = label_name + else: + match = is_list_like(label_kw) and len(label_kw) == len(y) + if label_kw and not match: + raise ValueError( + "label should be list-like and same length as y") + label_name = label_kw or self._parent.columns + self._parent.columns = label_name + return plot_backend.plot(self._parent, kind=kind, **kwargs) def line(self, x=None, y=None, **kwargs): @@ -1006,64 +1076,3 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, grouped, subplots=subplots, column=column, fontsize=fontsize, rot=rot, grid=grid, ax=ax, figsize=figsize, layout=layout, sharex=sharex, sharey=sharey, **kwds) - - -""" -# TODO move this somewhere else, it's not being called -def _plot(data, x=None, y=None, subplots=False, - ax=None, kind='line', **kwds): - if kind in _series_kinds: - if isinstance(data, ABCDataFrame): - if y is None and subplots is False: - msg = "{0} requires either y column or 'subplots=True'" - raise ValueError(msg.format(kind)) - elif y is not None: - if is_integer(y) and not data.columns.holds_integer(): - y = data.columns[y] - # converted to series actually. copy to not modify - data = data[y].copy() - data.index.name = y - plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) - else: - if isinstance(data, ABCDataFrame): - data_cols = data.columns - if x is not None: - if is_integer(x) and not data.columns.holds_integer(): - x = data_cols[x] - elif not isinstance(data[x], ABCSeries): - raise ValueError("x must be a label or position") - data = data.set_index(x) - - if y is not None: - # check if we have y as int or list of ints - int_ylist = is_list_like(y) and all(is_integer(c) for c in y) - int_y_arg = is_integer(y) or int_ylist - if int_y_arg and not data.columns.holds_integer(): - y = data_cols[y] - - label_kw = kwds['label'] if 'label' in kwds else False - for kw in ['xerr', 'yerr']: - if (kw in kwds) and \ - (isinstance(kwds[kw], str) or - is_integer(kwds[kw])): - try: - kwds[kw] = data[kwds[kw]] - except (IndexError, KeyError, TypeError): - pass - - # don't overwrite - data = data[y].copy() - - if isinstance(data, ABCSeries): - label_name = label_kw or y - data.name = label_name - else: - match = is_list_like(label_kw) and len(label_kw) == len(y) - if label_kw and not match: - raise ValueError( - "label should be list-like and same length as y" - ) - label_name = label_kw or data.columns - data.columns = label_name - plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) -""" diff --git a/pandas/plotting/_matplotlib/__init__.py b/pandas/plotting/_matplotlib/__init__.py index a3c4a5afd2b15..97e544e5baba4 100644 --- a/pandas/plotting/_matplotlib/__init__.py +++ b/pandas/plotting/_matplotlib/__init__.py @@ -1,3 +1,5 @@ +import matplotlib.pyplot as plt + from pandas._config import get_option import pandas @@ -30,8 +32,7 @@ def plot(data, kind, **kwargs): - if isinstance(data, pandas.Series): - import matplotlib.pyplot as plt + if isinstance(data, pandas.core.dtypes.generic.ABCSeries): ax = kwargs.get('ax') if ax is None and len(plt.get_fignums()) > 0: with plt.rc_context(): diff --git a/pandas/plotting/_matplotlib/timeseries.py b/pandas/plotting/_matplotlib/timeseries.py index e36ffed10d94f..c3b548a6dfa85 100644 --- a/pandas/plotting/_matplotlib/timeseries.py +++ b/pandas/plotting/_matplotlib/timeseries.py @@ -143,12 +143,8 @@ def _replot_ax(ax, freq, kwargs): # for tsplot if isinstance(plotf, str): - # XXX _plot_classes is private and shouldn't be imported - # here. But as tsplot is deprecated, and we'll remove this - # code soon, it's probably better to not overcomplicate - # things, and just leave this the way it was implemented - from pandas.plotting._core import _plot_classes - plotf = _plot_classes()[plotf]._plot + from pandas.plotting._matplotlib import PLOT_CLASSES + plotf = PLOT_CLASSES[plotf]._plot lines.append(plotf(ax, series.index._mpl_repr(), series.values, **kwds)[0]) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 06c753d1b8e21..272f01a12156b 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -2225,7 +2225,7 @@ def test_unordered_ts(self): @td.skip_if_no_scipy def test_kind_both_ways(self): df = DataFrame({'x': [1, 2, 3]}) - for kind in plotting._core._common_kinds: + for kind in plotting.PlotAccessor._common_kinds: df.plot(kind=kind) getattr(df.plot, kind)() @@ -2235,7 +2235,7 @@ def test_kind_both_ways(self): def test_all_invalid_plot_data(self): df = DataFrame(list('abcd')) - for kind in plotting._core._common_kinds: + for kind in plotting.PlotAccessor._common_kinds: msg = "no numeric data to plot" with pytest.raises(TypeError, match=msg): @@ -2246,7 +2246,7 @@ def test_partially_invalid_plot_data(self): with tm.RNGContext(42): df = DataFrame(randn(10, 2), dtype=object) df[np.random.rand(df.shape[0]) > 0.5] = 'a' - for kind in plotting._core._common_kinds: + for kind in plotting.PlotAccessor._common_kinds: msg = "no numeric data to plot" with pytest.raises(TypeError, match=msg): @@ -2738,7 +2738,7 @@ def test_memory_leak(self): import gc results = {} - for kind in plotting._core._plot_classes().keys(): + for kind in plotting.PlotAccessor._all_kinds: args = {} if kind in ['hexbin', 'scatter', 'pie']: @@ -2936,7 +2936,7 @@ def test_df_grid_settings(self): # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 self._check_grid_settings( DataFrame({'a': [1, 2, 3], 'b': [2, 3, 4]}), - plotting._core._dataframe_kinds, kws={'x': 'a', 'y': 'b'}) + plotting.PlotAccessor._dataframe_kinds, kws={'x': 'a', 'y': 'b'}) def test_invalid_colormap(self): df = DataFrame(randn(3, 2), columns=['A', 'B']) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 9a954b522333d..d10620b4e7547 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -684,8 +684,8 @@ def test_boxplot_series(self): @pytest.mark.slow def test_kind_both_ways(self): s = Series(range(3)) - kinds = (plotting._core._common_kinds + - plotting._core._series_kinds) + kinds = (plotting.PlotAccessor._common_kinds + + plotting.PlotAccessor._series_kinds) _, ax = self.plt.subplots() for kind in kinds: @@ -696,7 +696,7 @@ def test_kind_both_ways(self): def test_invalid_plot_data(self): s = Series(list('abcd')) _, ax = self.plt.subplots() - for kind in plotting._core._common_kinds: + for kind in plotting.PlotAccessor._common_kinds: msg = "no numeric data to plot" with pytest.raises(TypeError, match=msg): @@ -705,13 +705,13 @@ def test_invalid_plot_data(self): @pytest.mark.slow def test_valid_object_plot(self): s = Series(range(10), dtype=object) - for kind in plotting._core._common_kinds: + for kind in plotting.PlotAccessor._common_kinds: _check_plot_works(s.plot, kind=kind) def test_partially_invalid_plot_data(self): s = Series(['a', 'b', 1.0, 2]) _, ax = self.plt.subplots() - for kind in plotting._core._common_kinds: + for kind in plotting.PlotAccessor._common_kinds: msg = "no numeric data to plot" with pytest.raises(TypeError, match=msg): @@ -781,8 +781,8 @@ def test_table(self): def test_series_grid_settings(self): # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 self._check_grid_settings(Series([1, 2, 3]), - plotting._core._series_kinds + - plotting._core._common_kinds) + plotting.PlotAccessor._series_kinds + + plotting.PlotAccessor._common_kinds) @pytest.mark.slow def test_standard_colors(self): From 995d72e8f0589e7e9560bc3fb962a194fde882f7 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 26 Jun 2019 10:56:07 +0100 Subject: [PATCH 04/26] Restoring docstrings of hist_series, hist_frame, boxplot and boxplot_frame --- pandas/plotting/_core.py | 292 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 280 insertions(+), 12 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index cb0f045464048..4509724b9ddba 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1,6 +1,8 @@ import importlib from typing import List, Type # noqa +from pandas.util._decorators import Appender + import pandas from pandas.core.dtypes.common import is_integer, is_list_like from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries @@ -968,6 +970,44 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, figsize=None, bins=10, **kwds): + """ + Draw histogram of the input series using matplotlib. + + Parameters + ---------- + by : object, optional + If passed, then used to form histograms for separate groups + ax : matplotlib axis object + If not passed, uses gca() + grid : bool, default True + Whether to show axis grid lines + xlabelsize : int, default None + If specified changes the x-axis label size + xrot : float, default None + rotation of x axis labels + ylabelsize : int, default None + If specified changes the y-axis label size + yrot : float, default None + rotation of y axis labels + figsize : tuple, default None + figure size in inches by default + bins : integer or sequence, default 10 + Number of histogram bins to be used. If an integer is given, bins + 1 + bin edges are calculated and returned. If bins is a sequence, gives + bin edges, including left edge of first bin and right edge of last + bin. In this case, bins is returned unmodified. + `**kwds` : keywords + To be passed to the actual plotting function + + Returns + ------- + matplotlib.AxesSubplot + A histogram plot. + + See Also + -------- + matplotlib.axes.Axes.hist : Plot a histogram using matplotlib. + """ plot_backend = _get_plot_backend() return plot_backend.hist_series(self, by=by, ax=ax, grid=grid, xlabelsize=xlabelsize, xrot=xrot, @@ -975,12 +1015,86 @@ def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, figsize=figsize, bins=bins, **kwds) -hist_series.__doc__ = PlotAccessor.hist.__doc__ - - def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, sharey=False, figsize=None, layout=None, bins=10, **kwds): + """ + Make a histogram of the DataFrame's. + + A `histogram`_ is a representation of the distribution of data. + This function calls :meth:`matplotlib.pyplot.hist`, on each series in + the DataFrame, resulting in one histogram per column. + + .. _histogram: https://en.wikipedia.org/wiki/Histogram + + Parameters + ---------- + data : DataFrame + The pandas object holding the data. + column : string or sequence + If passed, will be used to limit data to a subset of columns. + by : object, optional + If passed, then used to form histograms for separate groups. + grid : bool, default True + Whether to show axis grid lines. + xlabelsize : int, default None + If specified changes the x-axis label size. + xrot : float, default None + Rotation of x axis labels. For example, a value of 90 displays the + x labels rotated 90 degrees clockwise. + ylabelsize : int, default None + If specified changes the y-axis label size. + yrot : float, default None + Rotation of y axis labels. For example, a value of 90 displays the + y labels rotated 90 degrees clockwise. + ax : Matplotlib axes object, default None + The axes to plot the histogram on. + sharex : bool, default True if ax is None else False + In case subplots=True, share x axis and set some x axis labels to + invisible; defaults to True if ax is None otherwise False if an ax + is passed in. + Note that passing in both an ax and sharex=True will alter all x axis + labels for all subplots in a figure. + sharey : bool, default False + In case subplots=True, share y axis and set some y axis labels to + invisible. + figsize : tuple + The size in inches of the figure to create. Uses the value in + `matplotlib.rcParams` by default. + layout : tuple, optional + Tuple of (rows, columns) for the layout of the histograms. + bins : integer or sequence, default 10 + Number of histogram bins to be used. If an integer is given, bins + 1 + bin edges are calculated and returned. If bins is a sequence, gives + bin edges, including left edge of first bin and right edge of last + bin. In this case, bins is returned unmodified. + **kwds + All other plotting keyword arguments to be passed to + :meth:`matplotlib.pyplot.hist`. + + Returns + ------- + matplotlib.AxesSubplot or numpy.ndarray of them + + See Also + -------- + matplotlib.pyplot.hist : Plot a histogram using matplotlib. + + Examples + -------- + + .. plot:: + :context: close-figs + + This example draws a histogram based on the length and width of + some animals, displayed in three bins + + >>> df = pd.DataFrame({ + ... 'length': [1.5, 0.5, 1.2, 0.9, 3], + ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1] + ... }, index= ['pig', 'rabbit', 'duck', 'chicken', 'horse']) + >>> hist = df.hist(bins=3) + """ plot_backend = _get_plot_backend() return plot_backend.hist_frame(data, column=column, by=by, grid=grid, xlabelsize=xlabelsize, xrot=xrot, @@ -990,12 +1104,171 @@ def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, **kwds) -hist_series.__doc__ = PlotAccessor.hist.__doc__ - - def boxplot(data, column=None, by=None, ax=None, fontsize=None, rot=0, grid=True, figsize=None, layout=None, return_type=None, **kwds): + """ + Make a box plot from DataFrame columns. + + Make a box-and-whisker plot from DataFrame columns, optionally grouped + by some other columns. A box plot is a method for graphically depicting + groups of numerical data through their quartiles. + The box extends from the Q1 to Q3 quartile values of the data, + with a line at the median (Q2). The whiskers extend from the edges + of box to show the range of the data. The position of the whiskers + is set by default to `1.5 * IQR (IQR = Q3 - Q1)` from the edges of the box. + Outlier points are those past the end of the whiskers. + + For further details see + Wikipedia's entry for `boxplot `_. + + Parameters + ---------- + column : str or list of str, optional + Column name or list of names, or vector. + Can be any valid input to :meth:`pandas.DataFrame.groupby`. + by : str or array-like, optional + Column in the DataFrame to :meth:`pandas.DataFrame.groupby`. + One box-plot will be done per value of columns in `by`. + ax : object of class matplotlib.axes.Axes, optional + The matplotlib axes to be used by boxplot. + fontsize : float or str + Tick label font size in points or as a string (e.g., `large`). + rot : int or float, default 0 + The rotation angle of labels (in degrees) + with respect to the screen coordinate system. + grid : bool, default True + Setting this to True will show the grid. + figsize : A tuple (width, height) in inches + The size of the figure to create in matplotlib. + layout : tuple (rows, columns), optional + For example, (3, 5) will display the subplots + using 3 columns and 5 rows, starting from the top-left. + return_type : {'axes', 'dict', 'both'} or None, default 'axes' + The kind of object to return. The default is ``axes``. + + * 'axes' returns the matplotlib axes the boxplot is drawn on. + * 'dict' returns a dictionary whose values are the matplotlib + Lines of the boxplot. + * 'both' returns a namedtuple with the axes and dict. + * when grouping with ``by``, a Series mapping columns to + ``return_type`` is returned. + + If ``return_type`` is `None`, a NumPy array + of axes with the same shape as ``layout`` is returned. + **kwds + All other plotting keyword arguments to be passed to + :func:`matplotlib.pyplot.boxplot`. + + Returns + ------- + result + See Notes. + + See Also + -------- + Series.plot.hist: Make a histogram. + matplotlib.pyplot.boxplot : Matplotlib equivalent plot. + + Notes + ----- + The return type depends on the `return_type` parameter: + + * 'axes' : object of class matplotlib.axes.Axes + * 'dict' : dict of matplotlib.lines.Line2D objects + * 'both' : a namedtuple with structure (ax, lines) + + For data grouped with ``by``, return a Series of the above or a numpy + array: + + * :class:`~pandas.Series` + * :class:`~numpy.array` (for ``return_type = None``) + + Use ``return_type='dict'`` when you want to tweak the appearance + of the lines after plotting. In this case a dict containing the Lines + making up the boxes, caps, fliers, medians, and whiskers is returned. + + Examples + -------- + + Boxplots can be created for every column in the dataframe + by ``df.boxplot()`` or indicating the columns to be used: + + .. plot:: + :context: close-figs + + >>> np.random.seed(1234) + >>> df = pd.DataFrame(np.random.randn(10,4), + ... columns=['Col1', 'Col2', 'Col3', 'Col4']) + >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3']) + + Boxplots of variables distributions grouped by the values of a third + variable can be created using the option ``by``. For instance: + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame(np.random.randn(10, 2), + ... columns=['Col1', 'Col2']) + >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', + ... 'B', 'B', 'B', 'B', 'B']) + >>> boxplot = df.boxplot(by='X') + + A list of strings (i.e. ``['X', 'Y']``) can be passed to boxplot + in order to group the data by combination of the variables in the x-axis: + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame(np.random.randn(10,3), + ... columns=['Col1', 'Col2', 'Col3']) + >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', + ... 'B', 'B', 'B', 'B', 'B']) + >>> df['Y'] = pd.Series(['A', 'B', 'A', 'B', 'A', + ... 'B', 'A', 'B', 'A', 'B']) + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by=['X', 'Y']) + + The layout of boxplot can be adjusted giving a tuple to ``layout``: + + .. plot:: + :context: close-figs + + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', + ... layout=(2, 1)) + + Additional formatting can be done to the boxplot, like suppressing the grid + (``grid=False``), rotating the labels in the x-axis (i.e. ``rot=45``) + or changing the fontsize (i.e. ``fontsize=15``): + + .. plot:: + :context: close-figs + + >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15) + + The parameter ``return_type`` can be used to select the type of element + returned by `boxplot`. When ``return_type='axes'`` is selected, + the matplotlib axes on which the boxplot is drawn are returned: + + >>> boxplot = df.boxplot(column=['Col1','Col2'], return_type='axes') + >>> type(boxplot) + + + When grouping with ``by``, a Series mapping columns to ``return_type`` + is returned: + + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', + ... return_type='axes') + >>> type(boxplot) + + + If ``return_type`` is `None`, a NumPy array of axes with the same shape + as ``layout`` is returned: + + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', + ... return_type=None) + >>> type(boxplot) + + """ plot_backend = _get_plot_backend() return plot_backend.boxplot(data, column=column, by=by, ax=ax, fontsize=fontsize, rot=rot, grid=grid, @@ -1003,9 +1276,7 @@ def boxplot(data, column=None, by=None, ax=None, fontsize=None, return_type=return_type, **kwds) -boxplot.__doc__ = PlotAccessor.box.__doc__ - - +@Appender(boxplot.__doc__) def boxplot_frame(self, column=None, by=None, ax=None, fontsize=None, rot=0, grid=True, figsize=None, layout=None, return_type=None, **kwds): @@ -1016,9 +1287,6 @@ def boxplot_frame(self, column=None, by=None, ax=None, fontsize=None, rot=0, return_type=return_type, **kwds) -boxplot.__doc__ = PlotAccessor.box.__doc__ - - def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, rot=0, grid=True, ax=None, figsize=None, layout=None, sharex=False, sharey=True, **kwds): From 7e459962f54a90f96cbe84a59482edefb3075be7 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 26 Jun 2019 11:04:31 +0100 Subject: [PATCH 05/26] Fixing plot accessor docstring (was in the wrong place, and couple of things were wrong or could be improved) --- pandas/plotting/_core.py | 214 +++++++++++++++++++-------------------- 1 file changed, 102 insertions(+), 112 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 4509724b9ddba..ea6f930874cd0 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -37,20 +37,109 @@ def _get_plot_backend(): class PlotAccessor(pandas.core.base.PandasObject): """ - Series and DataFrame plotting accessor and method. + Make plots of Series or DataFrame using the backend specified by the + option ``plotting.backend``. By default, matplotlib is used. - Examples - -------- - >>> s.plot.line() - >>> s.plot.bar() - >>> s.plot.hist() - >>> df.plot.line() - >>> df.plot.scatter('x', 'y') - >>> df.plot.hexbin() - - Plotting methods can also be accessed by calling the accessor as a method - with the ``kind`` argument: - ``s.plot(kind='line')`` is equivalent to ``s.plot.line()`` + *New in version 0.17.0:* Each plot kind has a corresponding method on + the Series or DataFrame accessor, for example: + ``Series.plot(kind='line')`` is equivalent to + ``Series.plot.line()``. + + Parameters + ---------- + data : Series or DataFrame + The object for which the method is called + x : label or position, default None + Only used if data is a DataFrame. + y : label, position or list of label, positions, default None + Allows plotting of one column versus another. Only used if data is a + DataFrame. + kind : str + - 'line' : line plot (default) + - 'bar' : vertical bar plot + - 'barh' : horizontal bar plot + - 'hist' : histogram + - 'box' : boxplot + - 'kde' : Kernel Density Estimation plot + - 'density' : same as 'kde' + - 'area' : area plot + - 'pie' : pie plot + - 'scatter' : scatter plot + - 'hexbin' : hexbin plot + figsize : a tuple (width, height) in inches + use_index : bool, default True + Use index as ticks for x axis + title : string or list + Title to use for the plot. If a string is passed, print the string + at the top of the figure. If a list is passed and `subplots` is + True, print each item in the list above the corresponding subplot. + grid : bool, default None (matlab style default) + Axis grid lines + legend : False/True/'reverse' + Place legend on axis subplots + style : list or dict + matplotlib line style per column + logx : bool or 'sym', default False + Use log scaling or symlog scaling on x axis + .. versionchanged:: 0.25.0 + + logy : bool or 'sym' default False + Use log scaling or symlog scaling on y axis + .. versionchanged:: 0.25.0 + + loglog : bool or 'sym', default False + Use log scaling or symlog scaling on both x and y axes + .. versionchanged:: 0.25.0 + + xticks : sequence + Values to use for the xticks + yticks : sequence + Values to use for the yticks + xlim : 2-tuple/list + ylim : 2-tuple/list + rot : int, default None + Rotation for ticks (xticks for vertical, yticks for horizontal + plots) + fontsize : int, default None + Font size for xticks and yticks + colormap : str or matplotlib colormap object, default None + Colormap to select colors from. If string, load colormap with that + name from matplotlib. + colorbar : bool, optional + If True, plot colorbar (only relevant for 'scatter' and 'hexbin' + plots) + position : float + Specify relative alignments for bar plot layout. + From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 + (center) + table : bool, Series or DataFrame, default False + If True, draw a table using the data in the DataFrame and the data + will be transposed to meet matplotlib's default layout. + If a Series or DataFrame is passed, use passed data to draw a + table. + yerr : DataFrame, Series, array-like, dict and str + See :ref:`Plotting with Error Bars ` for + detail. + xerr : same types as yerr. + mark_right : bool, default True + When using a secondary_y axis, automatically mark the column + labels with "(right)" in the legend + `**kwds` : keywords + Options to pass to matplotlib plotting method + + Returns + ------- + :class:`matplotlib.axes.Axes` or numpy.ndarray of them + If the backend is not the default matplotlib one, the return value + will be the object returned by the backend. + + Notes + ----- + - See matplotlib documentation online for more on this subject + - If `kind` = 'bar' or 'barh', you can specify relative alignments + for bar plot layout by `position` keyword. + From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 + (center) """ _common_kinds = ('line', 'bar', 'barh', 'kde', 'density', 'area', 'hist', 'box') @@ -63,105 +152,6 @@ def __init__(self, data): self._parent = data def __call__(self, kind='line', x=None, y=None, **kwargs): - """ - Make plots of Series or DataFrame using the backend specified by the - option ``plotting.backend``. - - *New in version 0.17.0:* Each plot kind has a corresponding method on - the Series or DataFrame accessor, for example: - ``Series.plot(kind='line')`` is equivalent to - ``Series.plot.line()``. - - Parameters - ---------- - data : Series or DataFrame - The object for which the method is called - %(klass_coord)s - kind : str - - 'line' : line plot (default) - - 'bar' : vertical bar plot - - 'barh' : horizontal bar plot - - 'hist' : histogram - - 'box' : boxplot - - 'kde' : Kernel Density Estimation plot - - 'density' : same as 'kde' - - 'area' : area plot - - 'pie' : pie plot - - 'scatter' : scatter plot - - 'hexbin' : hexbin plot - figsize : a tuple (width, height) in inches - use_index : bool, default True - Use index as ticks for x axis - title : string or list - Title to use for the plot. If a string is passed, print the string - at the top of the figure. If a list is passed and `subplots` is - True, print each item in the list above the corresponding subplot. - grid : bool, default None (matlab style default) - Axis grid lines - legend : False/True/'reverse' - Place legend on axis subplots - style : list or dict - matplotlib line style per column - logx : bool or 'sym', default False - Use log scaling or symlog scaling on x axis - .. versionchanged:: 0.25.0 - - logy : bool or 'sym' default False - Use log scaling or symlog scaling on y axis - .. versionchanged:: 0.25.0 - - loglog : bool or 'sym', default False - Use log scaling or symlog scaling on both x and y axes - .. versionchanged:: 0.25.0 - - xticks : sequence - Values to use for the xticks - yticks : sequence - Values to use for the yticks - xlim : 2-tuple/list - ylim : 2-tuple/list - rot : int, default None - Rotation for ticks (xticks for vertical, yticks for horizontal - plots) - fontsize : int, default None - Font size for xticks and yticks - colormap : str or matplotlib colormap object, default None - Colormap to select colors from. If string, load colormap with that - name from matplotlib. - colorbar : bool, optional - If True, plot colorbar (only relevant for 'scatter' and 'hexbin' - plots) - position : float - Specify relative alignments for bar plot layout. - From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 - (center) - table : bool, Series or DataFrame, default False - If True, draw a table using the data in the DataFrame and the data - will be transposed to meet matplotlib's default layout. - If a Series or DataFrame is passed, use passed data to draw a - table. - yerr : DataFrame, Series, array-like, dict and str - See :ref:`Plotting with Error Bars ` for - detail. - xerr : same types as yerr. - mark_right : bool, default True - When using a secondary_y axis, automatically mark the column - labels with "(right)" in the legend - `**kwds` : keywords - Options to pass to matplotlib plotting method - - Returns - ------- - :class:`matplotlib.axes.Axes` or numpy.ndarray of them - - Notes - ----- - - See matplotlib documentation online for more on this subject - - If `kind` = 'bar' or 'barh', you can specify relative alignments - for bar plot layout by `position` keyword. - From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 - (center) - """ kind = self._kind_aliases.get(kind, kind) if kind not in self._all_kinds: raise ValueError('{} is not a valid plot kind'.format(kind)) From 4fbfed0cb6852e35dbbaee268e4422efdad48c81 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 26 Jun 2019 11:35:46 +0100 Subject: [PATCH 06/26] Fixing hexbin plot tests --- pandas/plotting/_core.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index ea6f930874cd0..2f39d869dd5a3 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -952,9 +952,13 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, pandas.core.dtypes.generic.ABCDataFrame): raise ValueError( "plot kind hexbin can only be used for data frames") - return self(kind='hexbin', x=x, y=y, C=C, - reduce_C_function=reduce_C_function, gridsize=gridsize, - **kwargs) + + if reduce_C_function is not None: + kwargs['reduce_C_function'] = reduce_C_function + if gridsize is not None: + kwargs['gridsize'] = gridsize + + return self(kind='hexbin', x=x, y=y, C=C, **kwargs) def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, From 7d7263a20532f3b555bc849c7901da748cde59a4 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 26 Jun 2019 13:41:55 +0100 Subject: [PATCH 07/26] Fixing bug when calling plot twice on the same data, since the data (Series or DataFrame) was being modified in the first call --- pandas/plotting/_core.py | 45 ++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 2f39d869dd5a3..d8db8f6bf3b3b 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -157,38 +157,39 @@ def __call__(self, kind='line', x=None, y=None, **kwargs): raise ValueError('{} is not a valid plot kind'.format(kind)) plot_backend = _get_plot_backend() + data = self._parent.copy() + if kind in self._dataframe_kinds: - if isinstance(self._parent, ABCDataFrame): - return plot_backend.plot(self._parent, x=x, y=y, kind=kind, - **kwargs) + if isinstance(data, ABCDataFrame): + return plot_backend.plot(data, x=x, y=y, kind=kind, **kwargs) else: raise ValueError(("plot kind {} can only be used for " "data frames").format(kind)) - if kind in self._series_kinds: - if isinstance(self._parent, ABCDataFrame): + elif kind in self._series_kinds: + if isinstance(data, ABCDataFrame): if y is None and kwargs.get('subplots') is False: msg = "{} requires either y column or 'subplots=True'" raise ValueError(msg.format(kind)) elif y is not None: if (is_integer(y) - and not self._parent.columns.holds_integer()): - y = self._parent.columns[y] + and not data.columns.holds_integer()): + y = data.columns[y] # converted to series actually. copy to not modify - self._parent = self._parent[y].copy() - self._parent.index.name = y - elif isinstance(self._parent, ABCDataFrame): - data_cols = self._parent.columns + data = data[y].copy() + data.index.name = y + elif isinstance(data, ABCDataFrame): + data_cols = data.columns if x is not None: - if is_integer(x) and not self._parent.columns.holds_integer(): + if is_integer(x) and not data.columns.holds_integer(): x = data_cols[x] - elif not isinstance(self._parent[x], ABCSeries): + elif not isinstance(data[x], ABCSeries): raise ValueError("x must be a label or position") - self._parent = self._parent.set_index(x) + data = data.set_index(x) if y is not None: # check if we have y as int or list of ints int_ylist = is_list_like(y) and all(is_integer(c) for c in y) int_y_arg = is_integer(y) or int_ylist - if int_y_arg and not self._parent.columns.holds_integer(): + if int_y_arg and not data.columns.holds_integer(): y = data_cols[y] label_kw = kwargs['label'] if 'label' in kwargs else False @@ -197,25 +198,25 @@ def __call__(self, kind='line', x=None, y=None, **kwargs): (isinstance(kwargs[kw], str) or is_integer(kwargs[kw]))): try: - kwargs[kw] = self._parent[kwargs[kw]] + kwargs[kw] = data[kwargs[kw]] except (IndexError, KeyError, TypeError): pass # don't overwrite - self._parent = self._parent[y].copy() + data = data[y].copy() - if isinstance(self._parent, ABCSeries): + if isinstance(data, ABCSeries): label_name = label_kw or y - self._parent.name = label_name + data.name = label_name else: match = is_list_like(label_kw) and len(label_kw) == len(y) if label_kw and not match: raise ValueError( "label should be list-like and same length as y") - label_name = label_kw or self._parent.columns - self._parent.columns = label_name + label_name = label_kw or data.columns + data.columns = label_name - return plot_backend.plot(self._parent, kind=kind, **kwargs) + return plot_backend.plot(data, kind=kind, **kwargs) def line(self, x=None, y=None, **kwargs): """ From 1a03cbf41a6bbf78beada99076944a558f962775 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 26 Jun 2019 14:48:30 +0100 Subject: [PATCH 08/26] Raising missing exception for pie in DataFrame, and fixing accessor signature mismatches --- pandas/plotting/_core.py | 57 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index d8db8f6bf3b3b..16a87e27da90d 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -151,7 +151,58 @@ class PlotAccessor(pandas.core.base.PandasObject): def __init__(self, data): self._parent = data - def __call__(self, kind='line', x=None, y=None, **kwargs): + @staticmethod + def _get_call_args(data, args, kwargs): + """ + We used to have different accessors for Series and DataFrame. Their + signatures were different: + + - SeriesPlotMethods.__call__(kind, ..., **kwargs) + - DataFramePlotMethods.__call__(x, y, kind, ..., **kwargs) + + This function makes this unified `__call__` method compatible with both + """ + if isinstance(data, ABCSeries): + arg_def = [ + ('kind', 'line'), ('ax', None), ('figsize', None), + ('use_index', True), ('title', None), ('grid', None), + ('legend', False), ('style', None), ('logx', False), + ('logy', False), ('loglog', False), ('xticks', None), + ('yticks', None), ('xlim', None), ('ylim', None), + ('rot', None), ('fontsize', None), ('colormap', None), + ('table', False), ('yerr', None), ('xerr', None), + ('label', None), ('secondary_y', False)] + elif isinstance(data, ABCDataFrame): + arg_def = [ + ('x', None), ('y', None), ('kind', 'line'), ('ax', None), + ('subplots', False), ('sharex', None), ('sharey', False), + ('layout', None), ('figsize', None), ('use_index', True), + ('title', None), ('grid', None), ('legend', True), + ('style', None), ('logx', False), ('logy', False), + ('loglog', False), ('xticks', None), ('yticks', None), + ('xlim', None), ('ylim', None), ('rot', None), + ('fontsize', None), ('colormap', None), ('table', False), + ('yerr', None), ('xerr', None), ('secondary_y', False), + ('sort_columns', False)] + else: + return TypeError(('Called plot accessor for type {}, expected ' + 'Series or DataFrame').format( + type(data).__name__)) + + if args: + # TODO raise warning here, positional arguments shouldn't be used + # anymore + pos_args = {name: value for value, (name, _) in zip(args, arg_def)} + kwargs = dict(arg_def, **pos_args, **kwargs) + + x = kwargs.pop('x', None) + y = kwargs.pop('y', None) + kind = kwargs.pop('kind', 'line') + return x, y, kind, kwargs + + def __call__(self, *args, **kwargs): + x, y, kind, kwargs = self._get_call_args(self._parent, args, kwargs) + kind = self._kind_aliases.get(kind, kind) if kind not in self._all_kinds: raise ValueError('{} is not a valid plot kind'.format(kind)) @@ -779,6 +830,10 @@ def pie(self, **kwargs): >>> plot = df.plot.pie(subplots=True, figsize=(6, 3)) """ + if (isinstance(self._parent, ABCDataFrame) + and kwargs.get('y', None) is None + and not kwargs.get('subplots', False)): + raise ValueError("pie requires either y column or 'subplots=True'") return self(kind='pie', **kwargs) def scatter(self, x, y, s=None, c=None, **kwargs): From cf7cbc03e2f49e8d592b628430925b33fc736e73 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 26 Jun 2019 15:11:33 +0100 Subject: [PATCH 09/26] Fixing bug that shown the legend for Series plot --- pandas/plotting/_core.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 16a87e27da90d..9034809df29ae 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -162,6 +162,11 @@ def _get_call_args(data, args, kwargs): This function makes this unified `__call__` method compatible with both """ + if args and isinstance(data, ABCSeries): + # TODO raise warning here, positional arguments shouldn't be + # used anymore, so we can add x, y and kind to the signature + pass + if isinstance(data, ABCSeries): arg_def = [ ('kind', 'line'), ('ax', None), ('figsize', None), @@ -189,11 +194,8 @@ def _get_call_args(data, args, kwargs): 'Series or DataFrame').format( type(data).__name__)) - if args: - # TODO raise warning here, positional arguments shouldn't be used - # anymore - pos_args = {name: value for value, (name, _) in zip(args, arg_def)} - kwargs = dict(arg_def, **pos_args, **kwargs) + pos_args = {name: value for value, (name, _) in zip(args, arg_def)} + kwargs = dict(arg_def, **pos_args, **kwargs) x = kwargs.pop('x', None) y = kwargs.pop('y', None) From d063e05622acce0925dd255d8c9177616d007cde Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 26 Jun 2019 17:12:54 +0100 Subject: [PATCH 10/26] Fix linting --- pandas/plotting/__init__.py | 4 ++-- pandas/plotting/_core.py | 9 ++++++--- pandas/plotting/_matplotlib/__init__.py | 1 + 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/pandas/plotting/__init__.py b/pandas/plotting/__init__.py index 47e71541cdaef..403ca7c547948 100644 --- a/pandas/plotting/__init__.py +++ b/pandas/plotting/__init__.py @@ -2,8 +2,8 @@ Plotting public API """ from pandas.plotting._core import ( - PlotAccessor, boxplot, boxplot_frame, - boxplot_frame_groupby, hist_frame, hist_series) + PlotAccessor, boxplot, boxplot_frame, boxplot_frame_groupby, hist_frame, + hist_series) from pandas.plotting._misc import ( andrews_curves, autocorrelation_plot, bootstrap_plot, deregister as deregister_matplotlib_converters, lag_plot, diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 9034809df29ae..09688aa672d54 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -3,10 +3,12 @@ from pandas.util._decorators import Appender -import pandas from pandas.core.dtypes.common import is_integer, is_list_like from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries +import pandas +from pandas.core.base import PandasObject + # Trigger matplotlib import, which implicitly registers our # converts. Implicit registration is deprecated, and when enforced # we can lazily import matplotlib. @@ -35,7 +37,7 @@ def _get_plot_backend(): return importlib.import_module(backend_str) -class PlotAccessor(pandas.core.base.PandasObject): +class PlotAccessor(PandasObject): """ Make plots of Series or DataFrame using the backend specified by the option ``plotting.backend``. By default, matplotlib is used. @@ -120,7 +122,8 @@ class PlotAccessor(pandas.core.base.PandasObject): yerr : DataFrame, Series, array-like, dict and str See :ref:`Plotting with Error Bars ` for detail. - xerr : same types as yerr. + xerr : DataFrame, Series, array-like, dict and str + Equivalent to yerr. mark_right : bool, default True When using a secondary_y axis, automatically mark the column labels with "(right)" in the legend diff --git a/pandas/plotting/_matplotlib/__init__.py b/pandas/plotting/_matplotlib/__init__.py index 97e544e5baba4..5ea53e4e60562 100644 --- a/pandas/plotting/_matplotlib/__init__.py +++ b/pandas/plotting/_matplotlib/__init__.py @@ -3,6 +3,7 @@ from pandas._config import get_option import pandas + from pandas.plotting._matplotlib.boxplot import ( BoxPlot, boxplot, boxplot_frame, boxplot_frame_groupby) from pandas.plotting._matplotlib.converter import deregister, register From 0d146f99eec965f310c0dd0e7129e2d69f5768f5 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Mon, 1 Jul 2019 13:52:08 +0100 Subject: [PATCH 11/26] Fixing bug that made reusing the previous plot for dataframes --- pandas/plotting/_core.py | 3 +++ pandas/plotting/_matplotlib/__init__.py | 4 +--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 09688aa672d54..bdb3800721250 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -215,6 +215,9 @@ def __call__(self, *args, **kwargs): plot_backend = _get_plot_backend() data = self._parent.copy() + if isinstance(data, pandas.core.dtypes.generic.ABCSeries): + kwargs['new_plot'] = True + if kind in self._dataframe_kinds: if isinstance(data, ABCDataFrame): return plot_backend.plot(data, x=x, y=y, kind=kind, **kwargs) diff --git a/pandas/plotting/_matplotlib/__init__.py b/pandas/plotting/_matplotlib/__init__.py index 5ea53e4e60562..30e29d813d652 100644 --- a/pandas/plotting/_matplotlib/__init__.py +++ b/pandas/plotting/_matplotlib/__init__.py @@ -2,8 +2,6 @@ from pandas._config import get_option -import pandas - from pandas.plotting._matplotlib.boxplot import ( BoxPlot, boxplot, boxplot_frame, boxplot_frame_groupby) from pandas.plotting._matplotlib.converter import deregister, register @@ -33,7 +31,7 @@ def plot(data, kind, **kwargs): - if isinstance(data, pandas.core.dtypes.generic.ABCSeries): + if kwargs.pop('new_plot', False): ax = kwargs.get('ax') if ax is None and len(plt.get_fignums()) > 0: with plt.rc_context(): From 34ea1f23cc4f2811fe8f76a5e8deb37889dcf06f Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Mon, 1 Jul 2019 13:56:55 +0100 Subject: [PATCH 12/26] Removing duplicated data type checks --- pandas/plotting/_core.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index bdb3800721250..2ccc1266a44da 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -923,10 +923,6 @@ def scatter(self, x, y, s=None, c=None, **kwargs): ... c='species', ... colormap='viridis') """ - if not isinstance(self._parent, - pandas.core.dtypes.generic.ABCDataFrame): - raise ValueError( - "plot kind scatter can only be used for data frames") return self(kind='scatter', x=x, y=y, s=s, c=c, **kwargs) def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, @@ -1012,11 +1008,6 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, ... gridsize=10, ... cmap="viridis") """ - if not isinstance(self._parent, - pandas.core.dtypes.generic.ABCDataFrame): - raise ValueError( - "plot kind hexbin can only be used for data frames") - if reduce_C_function is not None: kwargs['reduce_C_function'] = reduce_C_function if gridsize is not None: From 19489ba0957088db24e24f400fae4abeb87c1940 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Mon, 1 Jul 2019 14:01:59 +0100 Subject: [PATCH 13/26] Restoring original position of methods, so the diff is smaller --- pandas/plotting/_core.py | 1332 +++++++++++++++++++------------------- 1 file changed, 666 insertions(+), 666 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 2ccc1266a44da..5bb3751ad9066 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -18,23 +18,383 @@ pass -def _get_plot_backend(): +def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, + xrot=None, ylabelsize=None, yrot=None, figsize=None, + bins=10, **kwds): + """ + Draw histogram of the input series using matplotlib. + + Parameters + ---------- + by : object, optional + If passed, then used to form histograms for separate groups + ax : matplotlib axis object + If not passed, uses gca() + grid : bool, default True + Whether to show axis grid lines + xlabelsize : int, default None + If specified changes the x-axis label size + xrot : float, default None + rotation of x axis labels + ylabelsize : int, default None + If specified changes the y-axis label size + yrot : float, default None + rotation of y axis labels + figsize : tuple, default None + figure size in inches by default + bins : integer or sequence, default 10 + Number of histogram bins to be used. If an integer is given, bins + 1 + bin edges are calculated and returned. If bins is a sequence, gives + bin edges, including left edge of first bin and right edge of last + bin. In this case, bins is returned unmodified. + `**kwds` : keywords + To be passed to the actual plotting function + + Returns + ------- + matplotlib.AxesSubplot + A histogram plot. + + See Also + -------- + matplotlib.axes.Axes.hist : Plot a histogram using matplotlib. + """ + plot_backend = _get_plot_backend() + return plot_backend.hist_series(self, by=by, ax=ax, grid=grid, + xlabelsize=xlabelsize, xrot=xrot, + ylabelsize=ylabelsize, yrot=yrot, + figsize=figsize, bins=bins, **kwds) + + +def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, + xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, + sharey=False, figsize=None, layout=None, bins=10, **kwds): + """ + Make a histogram of the DataFrame's. + + A `histogram`_ is a representation of the distribution of data. + This function calls :meth:`matplotlib.pyplot.hist`, on each series in + the DataFrame, resulting in one histogram per column. + + .. _histogram: https://en.wikipedia.org/wiki/Histogram + + Parameters + ---------- + data : DataFrame + The pandas object holding the data. + column : string or sequence + If passed, will be used to limit data to a subset of columns. + by : object, optional + If passed, then used to form histograms for separate groups. + grid : bool, default True + Whether to show axis grid lines. + xlabelsize : int, default None + If specified changes the x-axis label size. + xrot : float, default None + Rotation of x axis labels. For example, a value of 90 displays the + x labels rotated 90 degrees clockwise. + ylabelsize : int, default None + If specified changes the y-axis label size. + yrot : float, default None + Rotation of y axis labels. For example, a value of 90 displays the + y labels rotated 90 degrees clockwise. + ax : Matplotlib axes object, default None + The axes to plot the histogram on. + sharex : bool, default True if ax is None else False + In case subplots=True, share x axis and set some x axis labels to + invisible; defaults to True if ax is None otherwise False if an ax + is passed in. + Note that passing in both an ax and sharex=True will alter all x axis + labels for all subplots in a figure. + sharey : bool, default False + In case subplots=True, share y axis and set some y axis labels to + invisible. + figsize : tuple + The size in inches of the figure to create. Uses the value in + `matplotlib.rcParams` by default. + layout : tuple, optional + Tuple of (rows, columns) for the layout of the histograms. + bins : integer or sequence, default 10 + Number of histogram bins to be used. If an integer is given, bins + 1 + bin edges are calculated and returned. If bins is a sequence, gives + bin edges, including left edge of first bin and right edge of last + bin. In this case, bins is returned unmodified. + **kwds + All other plotting keyword arguments to be passed to + :meth:`matplotlib.pyplot.hist`. + + Returns + ------- + matplotlib.AxesSubplot or numpy.ndarray of them + + See Also + -------- + matplotlib.pyplot.hist : Plot a histogram using matplotlib. + + Examples + -------- + + .. plot:: + :context: close-figs + + This example draws a histogram based on the length and width of + some animals, displayed in three bins + + >>> df = pd.DataFrame({ + ... 'length': [1.5, 0.5, 1.2, 0.9, 3], + ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1] + ... }, index= ['pig', 'rabbit', 'duck', 'chicken', 'horse']) + >>> hist = df.hist(bins=3) + """ + plot_backend = _get_plot_backend() + return plot_backend.hist_frame(data, column=column, by=by, grid=grid, + xlabelsize=xlabelsize, xrot=xrot, + ylabelsize=ylabelsize, yrot=yrot, + ax=ax, sharex=sharex, sharey=sharey, + figsize=figsize, layout=layout, bins=bins, + **kwds) + + +def boxplot(data, column=None, by=None, ax=None, fontsize=None, + rot=0, grid=True, figsize=None, layout=None, return_type=None, + **kwds): + """ + Make a box plot from DataFrame columns. + + Make a box-and-whisker plot from DataFrame columns, optionally grouped + by some other columns. A box plot is a method for graphically depicting + groups of numerical data through their quartiles. + The box extends from the Q1 to Q3 quartile values of the data, + with a line at the median (Q2). The whiskers extend from the edges + of box to show the range of the data. The position of the whiskers + is set by default to `1.5 * IQR (IQR = Q3 - Q1)` from the edges of the box. + Outlier points are those past the end of the whiskers. + + For further details see + Wikipedia's entry for `boxplot `_. + + Parameters + ---------- + column : str or list of str, optional + Column name or list of names, or vector. + Can be any valid input to :meth:`pandas.DataFrame.groupby`. + by : str or array-like, optional + Column in the DataFrame to :meth:`pandas.DataFrame.groupby`. + One box-plot will be done per value of columns in `by`. + ax : object of class matplotlib.axes.Axes, optional + The matplotlib axes to be used by boxplot. + fontsize : float or str + Tick label font size in points or as a string (e.g., `large`). + rot : int or float, default 0 + The rotation angle of labels (in degrees) + with respect to the screen coordinate system. + grid : bool, default True + Setting this to True will show the grid. + figsize : A tuple (width, height) in inches + The size of the figure to create in matplotlib. + layout : tuple (rows, columns), optional + For example, (3, 5) will display the subplots + using 3 columns and 5 rows, starting from the top-left. + return_type : {'axes', 'dict', 'both'} or None, default 'axes' + The kind of object to return. The default is ``axes``. + + * 'axes' returns the matplotlib axes the boxplot is drawn on. + * 'dict' returns a dictionary whose values are the matplotlib + Lines of the boxplot. + * 'both' returns a namedtuple with the axes and dict. + * when grouping with ``by``, a Series mapping columns to + ``return_type`` is returned. + + If ``return_type`` is `None`, a NumPy array + of axes with the same shape as ``layout`` is returned. + **kwds + All other plotting keyword arguments to be passed to + :func:`matplotlib.pyplot.boxplot`. + + Returns + ------- + result + See Notes. + + See Also + -------- + Series.plot.hist: Make a histogram. + matplotlib.pyplot.boxplot : Matplotlib equivalent plot. + + Notes + ----- + The return type depends on the `return_type` parameter: + + * 'axes' : object of class matplotlib.axes.Axes + * 'dict' : dict of matplotlib.lines.Line2D objects + * 'both' : a namedtuple with structure (ax, lines) + + For data grouped with ``by``, return a Series of the above or a numpy + array: + + * :class:`~pandas.Series` + * :class:`~numpy.array` (for ``return_type = None``) + + Use ``return_type='dict'`` when you want to tweak the appearance + of the lines after plotting. In this case a dict containing the Lines + making up the boxes, caps, fliers, medians, and whiskers is returned. + + Examples + -------- + + Boxplots can be created for every column in the dataframe + by ``df.boxplot()`` or indicating the columns to be used: + + .. plot:: + :context: close-figs + + >>> np.random.seed(1234) + >>> df = pd.DataFrame(np.random.randn(10,4), + ... columns=['Col1', 'Col2', 'Col3', 'Col4']) + >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3']) + + Boxplots of variables distributions grouped by the values of a third + variable can be created using the option ``by``. For instance: + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame(np.random.randn(10, 2), + ... columns=['Col1', 'Col2']) + >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', + ... 'B', 'B', 'B', 'B', 'B']) + >>> boxplot = df.boxplot(by='X') + + A list of strings (i.e. ``['X', 'Y']``) can be passed to boxplot + in order to group the data by combination of the variables in the x-axis: + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame(np.random.randn(10,3), + ... columns=['Col1', 'Col2', 'Col3']) + >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', + ... 'B', 'B', 'B', 'B', 'B']) + >>> df['Y'] = pd.Series(['A', 'B', 'A', 'B', 'A', + ... 'B', 'A', 'B', 'A', 'B']) + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by=['X', 'Y']) + + The layout of boxplot can be adjusted giving a tuple to ``layout``: + + .. plot:: + :context: close-figs + + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', + ... layout=(2, 1)) + + Additional formatting can be done to the boxplot, like suppressing the grid + (``grid=False``), rotating the labels in the x-axis (i.e. ``rot=45``) + or changing the fontsize (i.e. ``fontsize=15``): + + .. plot:: + :context: close-figs + + >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15) + + The parameter ``return_type`` can be used to select the type of element + returned by `boxplot`. When ``return_type='axes'`` is selected, + the matplotlib axes on which the boxplot is drawn are returned: + + >>> boxplot = df.boxplot(column=['Col1','Col2'], return_type='axes') + >>> type(boxplot) + + + When grouping with ``by``, a Series mapping columns to ``return_type`` + is returned: + + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', + ... return_type='axes') + >>> type(boxplot) + + + If ``return_type`` is `None`, a NumPy array of axes with the same shape + as ``layout`` is returned: + + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', + ... return_type=None) + >>> type(boxplot) + + """ + plot_backend = _get_plot_backend() + return plot_backend.boxplot(data, column=column, by=by, ax=ax, + fontsize=fontsize, rot=rot, grid=grid, + figsize=figsize, layout=layout, + return_type=return_type, **kwds) + + +@Appender(boxplot.__doc__) +def boxplot_frame(self, column=None, by=None, ax=None, fontsize=None, rot=0, + grid=True, figsize=None, layout=None, + return_type=None, **kwds): + plot_backend = _get_plot_backend() + return plot_backend.boxplot_frame(self, column=column, by=by, ax=ax, + fontsize=fontsize, rot=rot, grid=grid, + figsize=figsize, layout=layout, + return_type=return_type, **kwds) + + +def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, + rot=0, grid=True, ax=None, figsize=None, + layout=None, sharex=False, sharey=True, **kwds): """ - Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`). + Make box plots from DataFrameGroupBy data. - The plotting system of pandas has been using matplotlib, but the idea here - is that it can also work with other third-party backends. In the future, - this function will return the backend from a pandas option, and all the - rest of the code in this file will use the backend specified there for the - plotting. + Parameters + ---------- + grouped : Grouped DataFrame + subplots : bool + * ``False`` - no subplots will be used + * ``True`` - create a subplot for each group + column : column name or list of names, or vector + Can be any valid input to groupby + fontsize : int or string + rot : label rotation angle + grid : Setting this to True will show the grid + ax : Matplotlib axis object, default None + figsize : A tuple (width, height) in inches + layout : tuple (optional) + (rows, columns) for the layout of the plot + sharex : bool, default False + Whether x-axes will be shared among subplots - The backend is imported lazily, as matplotlib is a soft dependency, and - pandas can be used without it being installed. + .. versionadded:: 0.23.1 + sharey : bool, default True + Whether y-axes will be shared among subplots + + .. versionadded:: 0.23.1 + `**kwds` : Keyword Arguments + All other plotting keyword arguments to be passed to + matplotlib's boxplot function + + Returns + ------- + dict of key/value = group key/DataFrame.boxplot return value + or DataFrame.boxplot return value in case subplots=figures=False + + Examples + -------- + >>> import itertools + >>> tuples = [t for t in itertools.product(range(1000), range(4))] + >>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1']) + >>> data = np.random.randn(len(index),4) + >>> df = pd.DataFrame(data, columns=list('ABCD'), index=index) + >>> + >>> grouped = df.groupby(level='lvl1') + >>> boxplot_frame_groupby(grouped) + >>> + >>> grouped = df.unstack(level='lvl1').groupby(level=0, axis=1) + >>> boxplot_frame_groupby(grouped, subplots=False) """ - backend_str = pandas.get_option('plotting.backend') - if backend_str == 'matplotlib': - backend_str = 'pandas.plotting._matplotlib' - return importlib.import_module(backend_str) + plot_backend = _get_plot_backend() + return plot_backend.boxplot_frame_groupby( + grouped, subplots=subplots, column=column, fontsize=fontsize, rot=rot, + grid=grid, ax=ax, figsize=figsize, layout=layout, sharex=sharex, + sharey=sharey, **kwds) class PlotAccessor(PandasObject): @@ -630,766 +990,406 @@ def kde(self, bw_method=None, ind=None, **kwargs): ind : NumPy array or integer, optional Evaluation points for the estimated PDF. If None (default), 1000 equally spaced points are used. If `ind` is a NumPy array, the - KDE is evaluated at the points passed. If `ind` is an integer, - `ind` number of equally spaced points are used. - **kwds : optional - Additional keyword arguments are documented in - :meth:`pandas.%(this-datatype)s.plot`. - - Returns - ------- - matplotlib.axes.Axes or numpy.ndarray of them - - See Also - -------- - scipy.stats.gaussian_kde : Representation of a kernel-density - estimate using Gaussian kernels. This is the function used - internally to estimate the PDF. - - Examples - -------- - Given a Series of points randomly sampled from an unknown - distribution, estimate its PDF using KDE with automatic - bandwidth determination and plot the results, evaluating them at - 1000 equally spaced points (default): - - .. plot:: - :context: close-figs - - >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5]) - >>> ax = s.plot.kde() - - A scalar bandwidth can be specified. Using a small bandwidth value can - lead to over-fitting, while using a large bandwidth value may result - in under-fitting: - - .. plot:: - :context: close-figs - - >>> ax = s.plot.kde(bw_method=0.3) - - .. plot:: - :context: close-figs - - >>> ax = s.plot.kde(bw_method=3) - - Finally, the `ind` parameter determines the evaluation points for the - plot of the estimated PDF: - - .. plot:: - :context: close-figs - - >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5]) - - For DataFrame, it works in the same way: - - .. plot:: - :context: close-figs - - >>> df = pd.DataFrame({ - ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5], - ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6], - ... }) - >>> ax = df.plot.kde() - - A scalar bandwidth can be specified. Using a small bandwidth value can - lead to over-fitting, while using a large bandwidth value may result - in under-fitting: - - .. plot:: - :context: close-figs - - >>> ax = df.plot.kde(bw_method=0.3) - - .. plot:: - :context: close-figs - - >>> ax = df.plot.kde(bw_method=3) - - Finally, the `ind` parameter determines the evaluation points for the - plot of the estimated PDF: - - .. plot:: - :context: close-figs - - >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6]) - """ - return self(kind='kde', bw_method=bw_method, ind=ind, **kwargs) - - density = kde - - def area(self, x=None, y=None, **kwargs): - """ - Draw a stacked area plot. - - An area plot displays quantitative data visually. - This function wraps the matplotlib area function. - - Parameters - ---------- - x : label or position, optional - Coordinates for the X axis. By default uses the index. - y : label or position, optional - Column to plot. By default uses all columns. - stacked : bool, default True - Area plots are stacked by default. Set to False to create a - unstacked plot. - **kwds : optional - Additional keyword arguments are documented in - :meth:`DataFrame.plot`. - - Returns - ------- - matplotlib.axes.Axes or numpy.ndarray - Area plot, or array of area plots if subplots is True. - - See Also - -------- - DataFrame.plot : Make plots of DataFrame using matplotlib / pylab. - - Examples - -------- - Draw an area plot based on basic business metrics: - - .. plot:: - :context: close-figs - - >>> df = pd.DataFrame({ - ... 'sales': [3, 2, 3, 9, 10, 6], - ... 'signups': [5, 5, 6, 12, 14, 13], - ... 'visits': [20, 42, 28, 62, 81, 50], - ... }, index=pd.date_range(start='2018/01/01', end='2018/07/01', - ... freq='M')) - >>> ax = df.plot.area() - - Area plots are stacked by default. To produce an unstacked plot, - pass ``stacked=False``: - - .. plot:: - :context: close-figs - - >>> ax = df.plot.area(stacked=False) - - Draw an area plot for a single column: - - .. plot:: - :context: close-figs - - >>> ax = df.plot.area(y='sales') - - Draw with a different `x`: - - .. plot:: - :context: close-figs - - >>> df = pd.DataFrame({ - ... 'sales': [3, 2, 3], - ... 'visits': [20, 42, 28], - ... 'day': [1, 2, 3], - ... }) - >>> ax = df.plot.area(x='day') - """ - return self(kind='area', x=x, y=y, **kwargs) - - def pie(self, **kwargs): - """ - Generate a pie plot. - - A pie plot is a proportional representation of the numerical data in a - column. This function wraps :meth:`matplotlib.pyplot.pie` for the - specified column. If no column reference is passed and - ``subplots=True`` a pie plot is drawn for each numerical column - independently. - - Parameters - ---------- - y : int or label, optional - Label or position of the column to plot. - If not provided, ``subplots=True`` argument must be passed. - **kwds - Keyword arguments to pass on to :meth:`DataFrame.plot`. + KDE is evaluated at the points passed. If `ind` is an integer, + `ind` number of equally spaced points are used. + **kwds : optional + Additional keyword arguments are documented in + :meth:`pandas.%(this-datatype)s.plot`. Returns ------- - matplotlib.axes.Axes or np.ndarray of them - A NumPy array is returned when `subplots` is True. + matplotlib.axes.Axes or numpy.ndarray of them See Also -------- - Series.plot.pie : Generate a pie plot for a Series. - DataFrame.plot : Make plots of a DataFrame. + scipy.stats.gaussian_kde : Representation of a kernel-density + estimate using Gaussian kernels. This is the function used + internally to estimate the PDF. Examples -------- - In the example below we have a DataFrame with the information about - planet's mass and radius. We pass the the 'mass' column to the - pie function to get a pie plot. + Given a Series of points randomly sampled from an unknown + distribution, estimate its PDF using KDE with automatic + bandwidth determination and plot the results, evaluating them at + 1000 equally spaced points (default): .. plot:: :context: close-figs - >>> df = pd.DataFrame({'mass': [0.330, 4.87 , 5.97], - ... 'radius': [2439.7, 6051.8, 6378.1]}, - ... index=['Mercury', 'Venus', 'Earth']) - >>> plot = df.plot.pie(y='mass', figsize=(5, 5)) + >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5]) + >>> ax = s.plot.kde() + + A scalar bandwidth can be specified. Using a small bandwidth value can + lead to over-fitting, while using a large bandwidth value may result + in under-fitting: .. plot:: :context: close-figs - >>> plot = df.plot.pie(subplots=True, figsize=(6, 3)) - """ - if (isinstance(self._parent, ABCDataFrame) - and kwargs.get('y', None) is None - and not kwargs.get('subplots', False)): - raise ValueError("pie requires either y column or 'subplots=True'") - return self(kind='pie', **kwargs) - - def scatter(self, x, y, s=None, c=None, **kwargs): - """ - Create a scatter plot with varying marker point size and color. - - The coordinates of each point are defined by two dataframe columns and - filled circles are used to represent each point. This kind of plot is - useful to see complex correlations between two variables. Points could - be for instance natural 2D coordinates like longitude and latitude in - a map or, in general, any pair of metrics that can be plotted against - each other. + >>> ax = s.plot.kde(bw_method=0.3) - Parameters - ---------- - x : int or str - The column name or column position to be used as horizontal - coordinates for each point. - y : int or str - The column name or column position to be used as vertical - coordinates for each point. - s : scalar or array_like, optional - The size of each point. Possible values are: + .. plot:: + :context: close-figs - - A single scalar so all points have the same size. + >>> ax = s.plot.kde(bw_method=3) - - A sequence of scalars, which will be used for each point's size - recursively. For instance, when passing [2,14] all points size - will be either 2 or 14, alternatively. + Finally, the `ind` parameter determines the evaluation points for the + plot of the estimated PDF: - c : str, int or array_like, optional - The color of each point. Possible values are: + .. plot:: + :context: close-figs - - A single color string referred to by name, RGB or RGBA code, - for instance 'red' or '#a98d19'. + >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5]) - - A sequence of color strings referred to by name, RGB or RGBA - code, which will be used for each point's color recursively. For - instance ['green','yellow'] all points will be filled in green or - yellow, alternatively. + For DataFrame, it works in the same way: - - A column name or position whose values will be used to color the - marker points according to a colormap. + .. plot:: + :context: close-figs - **kwds - Keyword arguments to pass on to :meth:`DataFrame.plot`. + >>> df = pd.DataFrame({ + ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5], + ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6], + ... }) + >>> ax = df.plot.kde() - Returns - ------- - :class:`matplotlib.axes.Axes` or numpy.ndarray of them + A scalar bandwidth can be specified. Using a small bandwidth value can + lead to over-fitting, while using a large bandwidth value may result + in under-fitting: - See Also - -------- - matplotlib.pyplot.scatter : Scatter plot using multiple input data - formats. + .. plot:: + :context: close-figs - Examples - -------- - Let's see how to draw a scatter plot using coordinates from the values - in a DataFrame's columns. + >>> ax = df.plot.kde(bw_method=0.3) .. plot:: :context: close-figs - >>> df = pd.DataFrame([[5.1, 3.5, 0], [4.9, 3.0, 0], [7.0, 3.2, 1], - ... [6.4, 3.2, 1], [5.9, 3.0, 2]], - ... columns=['length', 'width', 'species']) - >>> ax1 = df.plot.scatter(x='length', - ... y='width', - ... c='DarkBlue') + >>> ax = df.plot.kde(bw_method=3) - And now with the color determined by a column as well. + Finally, the `ind` parameter determines the evaluation points for the + plot of the estimated PDF: .. plot:: :context: close-figs - >>> ax2 = df.plot.scatter(x='length', - ... y='width', - ... c='species', - ... colormap='viridis') + >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6]) """ - return self(kind='scatter', x=x, y=y, s=s, c=c, **kwargs) + return self(kind='kde', bw_method=bw_method, ind=ind, **kwargs) - def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, - **kwargs): - """ - Generate a hexagonal binning plot. + density = kde - Generate a hexagonal binning plot of `x` versus `y`. If `C` is `None` - (the default), this is a histogram of the number of occurrences - of the observations at ``(x[i], y[i])``. + def area(self, x=None, y=None, **kwargs): + """ + Draw a stacked area plot. - If `C` is specified, specifies values at given coordinates - ``(x[i], y[i])``. These values are accumulated for each hexagonal - bin and then reduced according to `reduce_C_function`, - having as default the NumPy's mean function (:meth:`numpy.mean`). - (If `C` is specified, it must also be a 1-D sequence - of the same length as `x` and `y`, or a column label.) + An area plot displays quantitative data visually. + This function wraps the matplotlib area function. Parameters ---------- - x : int or str - The column label or position for x points. - y : int or str - The column label or position for y points. - C : int or str, optional - The column label or position for the value of `(x, y)` point. - reduce_C_function : callable, default `np.mean` - Function of one argument that reduces all the values in a bin to - a single number (e.g. `np.mean`, `np.max`, `np.sum`, `np.std`). - gridsize : int or tuple of (int, int), default 100 - The number of hexagons in the x-direction. - The corresponding number of hexagons in the y-direction is - chosen in a way that the hexagons are approximately regular. - Alternatively, gridsize can be a tuple with two elements - specifying the number of hexagons in the x-direction and the - y-direction. - **kwds + x : label or position, optional + Coordinates for the X axis. By default uses the index. + y : label or position, optional + Column to plot. By default uses all columns. + stacked : bool, default True + Area plots are stacked by default. Set to False to create a + unstacked plot. + **kwds : optional Additional keyword arguments are documented in :meth:`DataFrame.plot`. Returns ------- - matplotlib.AxesSubplot - The matplotlib ``Axes`` on which the hexbin is plotted. + matplotlib.axes.Axes or numpy.ndarray + Area plot, or array of area plots if subplots is True. See Also -------- - DataFrame.plot : Make plots of a DataFrame. - matplotlib.pyplot.hexbin : Hexagonal binning plot using matplotlib, - the matplotlib function that is used under the hood. - - Examples - -------- - The following examples are generated with random data from - a normal distribution. - - .. plot:: - :context: close-figs - - >>> n = 10000 - >>> df = pd.DataFrame({'x': np.random.randn(n), - ... 'y': np.random.randn(n)}) - >>> ax = df.plot.hexbin(x='x', y='y', gridsize=20) - - The next example uses `C` and `np.sum` as `reduce_C_function`. - Note that `'observations'` values ranges from 1 to 5 but the result - plot shows values up to more than 25. This is because of the - `reduce_C_function`. - - .. plot:: - :context: close-figs - - >>> n = 500 - >>> df = pd.DataFrame({ - ... 'coord_x': np.random.uniform(-3, 3, size=n), - ... 'coord_y': np.random.uniform(30, 50, size=n), - ... 'observations': np.random.randint(1,5, size=n) - ... }) - >>> ax = df.plot.hexbin(x='coord_x', - ... y='coord_y', - ... C='observations', - ... reduce_C_function=np.sum, - ... gridsize=10, - ... cmap="viridis") - """ - if reduce_C_function is not None: - kwargs['reduce_C_function'] = reduce_C_function - if gridsize is not None: - kwargs['gridsize'] = gridsize - - return self(kind='hexbin', x=x, y=y, C=C, **kwargs) - - -def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, - xrot=None, ylabelsize=None, yrot=None, figsize=None, - bins=10, **kwds): - """ - Draw histogram of the input series using matplotlib. - - Parameters - ---------- - by : object, optional - If passed, then used to form histograms for separate groups - ax : matplotlib axis object - If not passed, uses gca() - grid : bool, default True - Whether to show axis grid lines - xlabelsize : int, default None - If specified changes the x-axis label size - xrot : float, default None - rotation of x axis labels - ylabelsize : int, default None - If specified changes the y-axis label size - yrot : float, default None - rotation of y axis labels - figsize : tuple, default None - figure size in inches by default - bins : integer or sequence, default 10 - Number of histogram bins to be used. If an integer is given, bins + 1 - bin edges are calculated and returned. If bins is a sequence, gives - bin edges, including left edge of first bin and right edge of last - bin. In this case, bins is returned unmodified. - `**kwds` : keywords - To be passed to the actual plotting function - - Returns - ------- - matplotlib.AxesSubplot - A histogram plot. + DataFrame.plot : Make plots of DataFrame using matplotlib / pylab. - See Also - -------- - matplotlib.axes.Axes.hist : Plot a histogram using matplotlib. - """ - plot_backend = _get_plot_backend() - return plot_backend.hist_series(self, by=by, ax=ax, grid=grid, - xlabelsize=xlabelsize, xrot=xrot, - ylabelsize=ylabelsize, yrot=yrot, - figsize=figsize, bins=bins, **kwds) + Examples + -------- + Draw an area plot based on basic business metrics: + .. plot:: + :context: close-figs -def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, - xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, - sharey=False, figsize=None, layout=None, bins=10, **kwds): - """ - Make a histogram of the DataFrame's. + >>> df = pd.DataFrame({ + ... 'sales': [3, 2, 3, 9, 10, 6], + ... 'signups': [5, 5, 6, 12, 14, 13], + ... 'visits': [20, 42, 28, 62, 81, 50], + ... }, index=pd.date_range(start='2018/01/01', end='2018/07/01', + ... freq='M')) + >>> ax = df.plot.area() - A `histogram`_ is a representation of the distribution of data. - This function calls :meth:`matplotlib.pyplot.hist`, on each series in - the DataFrame, resulting in one histogram per column. + Area plots are stacked by default. To produce an unstacked plot, + pass ``stacked=False``: - .. _histogram: https://en.wikipedia.org/wiki/Histogram + .. plot:: + :context: close-figs - Parameters - ---------- - data : DataFrame - The pandas object holding the data. - column : string or sequence - If passed, will be used to limit data to a subset of columns. - by : object, optional - If passed, then used to form histograms for separate groups. - grid : bool, default True - Whether to show axis grid lines. - xlabelsize : int, default None - If specified changes the x-axis label size. - xrot : float, default None - Rotation of x axis labels. For example, a value of 90 displays the - x labels rotated 90 degrees clockwise. - ylabelsize : int, default None - If specified changes the y-axis label size. - yrot : float, default None - Rotation of y axis labels. For example, a value of 90 displays the - y labels rotated 90 degrees clockwise. - ax : Matplotlib axes object, default None - The axes to plot the histogram on. - sharex : bool, default True if ax is None else False - In case subplots=True, share x axis and set some x axis labels to - invisible; defaults to True if ax is None otherwise False if an ax - is passed in. - Note that passing in both an ax and sharex=True will alter all x axis - labels for all subplots in a figure. - sharey : bool, default False - In case subplots=True, share y axis and set some y axis labels to - invisible. - figsize : tuple - The size in inches of the figure to create. Uses the value in - `matplotlib.rcParams` by default. - layout : tuple, optional - Tuple of (rows, columns) for the layout of the histograms. - bins : integer or sequence, default 10 - Number of histogram bins to be used. If an integer is given, bins + 1 - bin edges are calculated and returned. If bins is a sequence, gives - bin edges, including left edge of first bin and right edge of last - bin. In this case, bins is returned unmodified. - **kwds - All other plotting keyword arguments to be passed to - :meth:`matplotlib.pyplot.hist`. + >>> ax = df.plot.area(stacked=False) - Returns - ------- - matplotlib.AxesSubplot or numpy.ndarray of them + Draw an area plot for a single column: - See Also - -------- - matplotlib.pyplot.hist : Plot a histogram using matplotlib. + .. plot:: + :context: close-figs - Examples - -------- + >>> ax = df.plot.area(y='sales') - .. plot:: - :context: close-figs + Draw with a different `x`: - This example draws a histogram based on the length and width of - some animals, displayed in three bins + .. plot:: + :context: close-figs - >>> df = pd.DataFrame({ - ... 'length': [1.5, 0.5, 1.2, 0.9, 3], - ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1] - ... }, index= ['pig', 'rabbit', 'duck', 'chicken', 'horse']) - >>> hist = df.hist(bins=3) - """ - plot_backend = _get_plot_backend() - return plot_backend.hist_frame(data, column=column, by=by, grid=grid, - xlabelsize=xlabelsize, xrot=xrot, - ylabelsize=ylabelsize, yrot=yrot, - ax=ax, sharex=sharex, sharey=sharey, - figsize=figsize, layout=layout, bins=bins, - **kwds) + >>> df = pd.DataFrame({ + ... 'sales': [3, 2, 3], + ... 'visits': [20, 42, 28], + ... 'day': [1, 2, 3], + ... }) + >>> ax = df.plot.area(x='day') + """ + return self(kind='area', x=x, y=y, **kwargs) + def pie(self, **kwargs): + """ + Generate a pie plot. -def boxplot(data, column=None, by=None, ax=None, fontsize=None, - rot=0, grid=True, figsize=None, layout=None, return_type=None, - **kwds): - """ - Make a box plot from DataFrame columns. + A pie plot is a proportional representation of the numerical data in a + column. This function wraps :meth:`matplotlib.pyplot.pie` for the + specified column. If no column reference is passed and + ``subplots=True`` a pie plot is drawn for each numerical column + independently. - Make a box-and-whisker plot from DataFrame columns, optionally grouped - by some other columns. A box plot is a method for graphically depicting - groups of numerical data through their quartiles. - The box extends from the Q1 to Q3 quartile values of the data, - with a line at the median (Q2). The whiskers extend from the edges - of box to show the range of the data. The position of the whiskers - is set by default to `1.5 * IQR (IQR = Q3 - Q1)` from the edges of the box. - Outlier points are those past the end of the whiskers. + Parameters + ---------- + y : int or label, optional + Label or position of the column to plot. + If not provided, ``subplots=True`` argument must be passed. + **kwds + Keyword arguments to pass on to :meth:`DataFrame.plot`. - For further details see - Wikipedia's entry for `boxplot `_. + Returns + ------- + matplotlib.axes.Axes or np.ndarray of them + A NumPy array is returned when `subplots` is True. - Parameters - ---------- - column : str or list of str, optional - Column name or list of names, or vector. - Can be any valid input to :meth:`pandas.DataFrame.groupby`. - by : str or array-like, optional - Column in the DataFrame to :meth:`pandas.DataFrame.groupby`. - One box-plot will be done per value of columns in `by`. - ax : object of class matplotlib.axes.Axes, optional - The matplotlib axes to be used by boxplot. - fontsize : float or str - Tick label font size in points or as a string (e.g., `large`). - rot : int or float, default 0 - The rotation angle of labels (in degrees) - with respect to the screen coordinate system. - grid : bool, default True - Setting this to True will show the grid. - figsize : A tuple (width, height) in inches - The size of the figure to create in matplotlib. - layout : tuple (rows, columns), optional - For example, (3, 5) will display the subplots - using 3 columns and 5 rows, starting from the top-left. - return_type : {'axes', 'dict', 'both'} or None, default 'axes' - The kind of object to return. The default is ``axes``. + See Also + -------- + Series.plot.pie : Generate a pie plot for a Series. + DataFrame.plot : Make plots of a DataFrame. - * 'axes' returns the matplotlib axes the boxplot is drawn on. - * 'dict' returns a dictionary whose values are the matplotlib - Lines of the boxplot. - * 'both' returns a namedtuple with the axes and dict. - * when grouping with ``by``, a Series mapping columns to - ``return_type`` is returned. + Examples + -------- + In the example below we have a DataFrame with the information about + planet's mass and radius. We pass the the 'mass' column to the + pie function to get a pie plot. - If ``return_type`` is `None`, a NumPy array - of axes with the same shape as ``layout`` is returned. - **kwds - All other plotting keyword arguments to be passed to - :func:`matplotlib.pyplot.boxplot`. + .. plot:: + :context: close-figs - Returns - ------- - result - See Notes. + >>> df = pd.DataFrame({'mass': [0.330, 4.87 , 5.97], + ... 'radius': [2439.7, 6051.8, 6378.1]}, + ... index=['Mercury', 'Venus', 'Earth']) + >>> plot = df.plot.pie(y='mass', figsize=(5, 5)) - See Also - -------- - Series.plot.hist: Make a histogram. - matplotlib.pyplot.boxplot : Matplotlib equivalent plot. + .. plot:: + :context: close-figs - Notes - ----- - The return type depends on the `return_type` parameter: + >>> plot = df.plot.pie(subplots=True, figsize=(6, 3)) + """ + if (isinstance(self._parent, ABCDataFrame) + and kwargs.get('y', None) is None + and not kwargs.get('subplots', False)): + raise ValueError("pie requires either y column or 'subplots=True'") + return self(kind='pie', **kwargs) - * 'axes' : object of class matplotlib.axes.Axes - * 'dict' : dict of matplotlib.lines.Line2D objects - * 'both' : a namedtuple with structure (ax, lines) + def scatter(self, x, y, s=None, c=None, **kwargs): + """ + Create a scatter plot with varying marker point size and color. - For data grouped with ``by``, return a Series of the above or a numpy - array: + The coordinates of each point are defined by two dataframe columns and + filled circles are used to represent each point. This kind of plot is + useful to see complex correlations between two variables. Points could + be for instance natural 2D coordinates like longitude and latitude in + a map or, in general, any pair of metrics that can be plotted against + each other. - * :class:`~pandas.Series` - * :class:`~numpy.array` (for ``return_type = None``) + Parameters + ---------- + x : int or str + The column name or column position to be used as horizontal + coordinates for each point. + y : int or str + The column name or column position to be used as vertical + coordinates for each point. + s : scalar or array_like, optional + The size of each point. Possible values are: - Use ``return_type='dict'`` when you want to tweak the appearance - of the lines after plotting. In this case a dict containing the Lines - making up the boxes, caps, fliers, medians, and whiskers is returned. + - A single scalar so all points have the same size. - Examples - -------- + - A sequence of scalars, which will be used for each point's size + recursively. For instance, when passing [2,14] all points size + will be either 2 or 14, alternatively. - Boxplots can be created for every column in the dataframe - by ``df.boxplot()`` or indicating the columns to be used: + c : str, int or array_like, optional + The color of each point. Possible values are: - .. plot:: - :context: close-figs + - A single color string referred to by name, RGB or RGBA code, + for instance 'red' or '#a98d19'. - >>> np.random.seed(1234) - >>> df = pd.DataFrame(np.random.randn(10,4), - ... columns=['Col1', 'Col2', 'Col3', 'Col4']) - >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3']) + - A sequence of color strings referred to by name, RGB or RGBA + code, which will be used for each point's color recursively. For + instance ['green','yellow'] all points will be filled in green or + yellow, alternatively. - Boxplots of variables distributions grouped by the values of a third - variable can be created using the option ``by``. For instance: + - A column name or position whose values will be used to color the + marker points according to a colormap. - .. plot:: - :context: close-figs + **kwds + Keyword arguments to pass on to :meth:`DataFrame.plot`. - >>> df = pd.DataFrame(np.random.randn(10, 2), - ... columns=['Col1', 'Col2']) - >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', - ... 'B', 'B', 'B', 'B', 'B']) - >>> boxplot = df.boxplot(by='X') + Returns + ------- + :class:`matplotlib.axes.Axes` or numpy.ndarray of them - A list of strings (i.e. ``['X', 'Y']``) can be passed to boxplot - in order to group the data by combination of the variables in the x-axis: + See Also + -------- + matplotlib.pyplot.scatter : Scatter plot using multiple input data + formats. - .. plot:: - :context: close-figs + Examples + -------- + Let's see how to draw a scatter plot using coordinates from the values + in a DataFrame's columns. - >>> df = pd.DataFrame(np.random.randn(10,3), - ... columns=['Col1', 'Col2', 'Col3']) - >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', - ... 'B', 'B', 'B', 'B', 'B']) - >>> df['Y'] = pd.Series(['A', 'B', 'A', 'B', 'A', - ... 'B', 'A', 'B', 'A', 'B']) - >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by=['X', 'Y']) + .. plot:: + :context: close-figs - The layout of boxplot can be adjusted giving a tuple to ``layout``: + >>> df = pd.DataFrame([[5.1, 3.5, 0], [4.9, 3.0, 0], [7.0, 3.2, 1], + ... [6.4, 3.2, 1], [5.9, 3.0, 2]], + ... columns=['length', 'width', 'species']) + >>> ax1 = df.plot.scatter(x='length', + ... y='width', + ... c='DarkBlue') - .. plot:: - :context: close-figs + And now with the color determined by a column as well. - >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', - ... layout=(2, 1)) + .. plot:: + :context: close-figs - Additional formatting can be done to the boxplot, like suppressing the grid - (``grid=False``), rotating the labels in the x-axis (i.e. ``rot=45``) - or changing the fontsize (i.e. ``fontsize=15``): + >>> ax2 = df.plot.scatter(x='length', + ... y='width', + ... c='species', + ... colormap='viridis') + """ + return self(kind='scatter', x=x, y=y, s=s, c=c, **kwargs) - .. plot:: - :context: close-figs + def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, + **kwargs): + """ + Generate a hexagonal binning plot. - >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15) + Generate a hexagonal binning plot of `x` versus `y`. If `C` is `None` + (the default), this is a histogram of the number of occurrences + of the observations at ``(x[i], y[i])``. - The parameter ``return_type`` can be used to select the type of element - returned by `boxplot`. When ``return_type='axes'`` is selected, - the matplotlib axes on which the boxplot is drawn are returned: + If `C` is specified, specifies values at given coordinates + ``(x[i], y[i])``. These values are accumulated for each hexagonal + bin and then reduced according to `reduce_C_function`, + having as default the NumPy's mean function (:meth:`numpy.mean`). + (If `C` is specified, it must also be a 1-D sequence + of the same length as `x` and `y`, or a column label.) - >>> boxplot = df.boxplot(column=['Col1','Col2'], return_type='axes') - >>> type(boxplot) - + Parameters + ---------- + x : int or str + The column label or position for x points. + y : int or str + The column label or position for y points. + C : int or str, optional + The column label or position for the value of `(x, y)` point. + reduce_C_function : callable, default `np.mean` + Function of one argument that reduces all the values in a bin to + a single number (e.g. `np.mean`, `np.max`, `np.sum`, `np.std`). + gridsize : int or tuple of (int, int), default 100 + The number of hexagons in the x-direction. + The corresponding number of hexagons in the y-direction is + chosen in a way that the hexagons are approximately regular. + Alternatively, gridsize can be a tuple with two elements + specifying the number of hexagons in the x-direction and the + y-direction. + **kwds + Additional keyword arguments are documented in + :meth:`DataFrame.plot`. - When grouping with ``by``, a Series mapping columns to ``return_type`` - is returned: + Returns + ------- + matplotlib.AxesSubplot + The matplotlib ``Axes`` on which the hexbin is plotted. - >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', - ... return_type='axes') - >>> type(boxplot) - + See Also + -------- + DataFrame.plot : Make plots of a DataFrame. + matplotlib.pyplot.hexbin : Hexagonal binning plot using matplotlib, + the matplotlib function that is used under the hood. - If ``return_type`` is `None`, a NumPy array of axes with the same shape - as ``layout`` is returned: + Examples + -------- + The following examples are generated with random data from + a normal distribution. - >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', - ... return_type=None) - >>> type(boxplot) - - """ - plot_backend = _get_plot_backend() - return plot_backend.boxplot(data, column=column, by=by, ax=ax, - fontsize=fontsize, rot=rot, grid=grid, - figsize=figsize, layout=layout, - return_type=return_type, **kwds) + .. plot:: + :context: close-figs + >>> n = 10000 + >>> df = pd.DataFrame({'x': np.random.randn(n), + ... 'y': np.random.randn(n)}) + >>> ax = df.plot.hexbin(x='x', y='y', gridsize=20) -@Appender(boxplot.__doc__) -def boxplot_frame(self, column=None, by=None, ax=None, fontsize=None, rot=0, - grid=True, figsize=None, layout=None, - return_type=None, **kwds): - plot_backend = _get_plot_backend() - return plot_backend.boxplot_frame(self, column=column, by=by, ax=ax, - fontsize=fontsize, rot=rot, grid=grid, - figsize=figsize, layout=layout, - return_type=return_type, **kwds) + The next example uses `C` and `np.sum` as `reduce_C_function`. + Note that `'observations'` values ranges from 1 to 5 but the result + plot shows values up to more than 25. This is because of the + `reduce_C_function`. + .. plot:: + :context: close-figs -def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, - rot=0, grid=True, ax=None, figsize=None, - layout=None, sharex=False, sharey=True, **kwds): - """ - Make box plots from DataFrameGroupBy data. + >>> n = 500 + >>> df = pd.DataFrame({ + ... 'coord_x': np.random.uniform(-3, 3, size=n), + ... 'coord_y': np.random.uniform(30, 50, size=n), + ... 'observations': np.random.randint(1,5, size=n) + ... }) + >>> ax = df.plot.hexbin(x='coord_x', + ... y='coord_y', + ... C='observations', + ... reduce_C_function=np.sum, + ... gridsize=10, + ... cmap="viridis") + """ + if reduce_C_function is not None: + kwargs['reduce_C_function'] = reduce_C_function + if gridsize is not None: + kwargs['gridsize'] = gridsize - Parameters - ---------- - grouped : Grouped DataFrame - subplots : bool - * ``False`` - no subplots will be used - * ``True`` - create a subplot for each group - column : column name or list of names, or vector - Can be any valid input to groupby - fontsize : int or string - rot : label rotation angle - grid : Setting this to True will show the grid - ax : Matplotlib axis object, default None - figsize : A tuple (width, height) in inches - layout : tuple (optional) - (rows, columns) for the layout of the plot - sharex : bool, default False - Whether x-axes will be shared among subplots + return self(kind='hexbin', x=x, y=y, C=C, **kwargs) - .. versionadded:: 0.23.1 - sharey : bool, default True - Whether y-axes will be shared among subplots - .. versionadded:: 0.23.1 - `**kwds` : Keyword Arguments - All other plotting keyword arguments to be passed to - matplotlib's boxplot function +def _get_plot_backend(): + """ + Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`). - Returns - ------- - dict of key/value = group key/DataFrame.boxplot return value - or DataFrame.boxplot return value in case subplots=figures=False + The plotting system of pandas has been using matplotlib, but the idea here + is that it can also work with other third-party backends. In the future, + this function will return the backend from a pandas option, and all the + rest of the code in this file will use the backend specified there for the + plotting. - Examples - -------- - >>> import itertools - >>> tuples = [t for t in itertools.product(range(1000), range(4))] - >>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1']) - >>> data = np.random.randn(len(index),4) - >>> df = pd.DataFrame(data, columns=list('ABCD'), index=index) - >>> - >>> grouped = df.groupby(level='lvl1') - >>> boxplot_frame_groupby(grouped) - >>> - >>> grouped = df.unstack(level='lvl1').groupby(level=0, axis=1) - >>> boxplot_frame_groupby(grouped, subplots=False) + The backend is imported lazily, as matplotlib is a soft dependency, and + pandas can be used without it being installed. """ - plot_backend = _get_plot_backend() - return plot_backend.boxplot_frame_groupby( - grouped, subplots=subplots, column=column, fontsize=fontsize, rot=rot, - grid=grid, ax=ax, figsize=figsize, layout=layout, sharex=sharex, - sharey=sharey, **kwds) + backend_str = pandas.get_option('plotting.backend') + if backend_str == 'matplotlib': + backend_str = 'pandas.plotting._matplotlib' + return importlib.import_module(backend_str) From 9b4fc6d9d223d4415f991eea2b84931ac55c7974 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Mon, 1 Jul 2019 16:41:41 +0100 Subject: [PATCH 14/26] Fixing name of reuse_plot parameter --- pandas/plotting/_core.py | 2 +- pandas/plotting/_matplotlib/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 5bb3751ad9066..13afd05c6bbd2 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -576,7 +576,7 @@ def __call__(self, *args, **kwargs): data = self._parent.copy() if isinstance(data, pandas.core.dtypes.generic.ABCSeries): - kwargs['new_plot'] = True + kwargs['reuse_plot'] = True if kind in self._dataframe_kinds: if isinstance(data, ABCDataFrame): diff --git a/pandas/plotting/_matplotlib/__init__.py b/pandas/plotting/_matplotlib/__init__.py index 30e29d813d652..2a6cfffe798af 100644 --- a/pandas/plotting/_matplotlib/__init__.py +++ b/pandas/plotting/_matplotlib/__init__.py @@ -31,7 +31,7 @@ def plot(data, kind, **kwargs): - if kwargs.pop('new_plot', False): + if kwargs.pop('reuse_plot', False): ax = kwargs.get('ax') if ax is None and len(plt.get_fignums()) > 0: with plt.rc_context(): From 2597bc964ba269a69d479013a3aa1b9da9c4244f Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Mon, 1 Jul 2019 16:47:45 +0100 Subject: [PATCH 15/26] Fixing bug with matplotlib 2 --- pandas/plotting/_matplotlib/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/plotting/_matplotlib/__init__.py b/pandas/plotting/_matplotlib/__init__.py index 2a6cfffe798af..8eac6897add0e 100644 --- a/pandas/plotting/_matplotlib/__init__.py +++ b/pandas/plotting/_matplotlib/__init__.py @@ -1,5 +1,3 @@ -import matplotlib.pyplot as plt - from pandas._config import get_option from pandas.plotting._matplotlib.boxplot import ( @@ -31,6 +29,10 @@ def plot(data, kind, **kwargs): + # Importing pyplot at the top of the file (before the converters are + # registered) causes problems in matplotlib 2 (converters seem to not + # work) + import matplotlib.pyplot as plt if kwargs.pop('reuse_plot', False): ax = kwargs.get('ax') if ax is None and len(plt.get_fignums()) > 0: From 57c4937139bfb3ad80f9feb39a0bf29749ce41aa Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Tue, 2 Jul 2019 10:35:10 +0100 Subject: [PATCH 16/26] Adding documentation and improving comments, based on Jeff review --- doc/source/development/extending.rst | 27 +++++++++++ pandas/plotting/__init__.py | 67 +++++++++++++++++++++++++--- pandas/plotting/_core.py | 20 ++++----- 3 files changed, 96 insertions(+), 18 deletions(-) diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst index 363ec10d58bb6..b7e3c4f2e43d7 100644 --- a/doc/source/development/extending.rst +++ b/doc/source/development/extending.rst @@ -397,3 +397,30 @@ Below is an example to define two original properties, "internal_cache" as a tem # properties defined in _metadata are retained >>> df[['A', 'B']].added_property property + +.. _extending.plotting-backends: + +Plotting backends +----------------- + +Starting in 0.25 pandas can be extended with third-party plotting backends. The +main idea is letting users select a plotting backend different than the provided +one based on Matplotlib. For example: + +.. code-block:: python + + >>> pd.set_option('plotting.backend', 'backend.module') + >>> pd.Series([1, 2, 3]).plot() + +This would be more or less equivalent to: + +.. code-block:: python + + >>> import backend.module + >>> backend.module.plot(pd.Series([1, 2, 3])) + +The backend module can then use other visualization tools (Bokeh, Altair,...) +to generate the plots. + +More information on how to implement a third-party plotting backend can be found at +https://github.com/pandas-dev/pandas/blob/master/pandas/plotting/__init__.py#L1. diff --git a/pandas/plotting/__init__.py b/pandas/plotting/__init__.py index 403ca7c547948..57a45f0f18d90 100644 --- a/pandas/plotting/__init__.py +++ b/pandas/plotting/__init__.py @@ -1,5 +1,60 @@ """ -Plotting public API +Plotting public API. + +Authors of third-party plotting backends should implement a module with a +public ``plot(data, kind, **kwargs)``. The parameter `data` will contain +the data structure and can be a `Series` or a `DataFrame`. For example, +for ``df.plot()`` the parameter `data` will contain the DataFrame `df`. +In some cases, the data structure is transformed before being sent to +the backend (see PlotAccessor.__call__ in pandas/plotting/_core.py for +the exact transformations). + +The parameter `kind` will be one of: + +- line +- bar +- barh +- box +- hist +- kde +- area +- pie +- scatter +- hexbin + +See the pandas API reference for documentation on each kind of plot. + +Any other keyword argument is currently assumed to be backend specific, +but some parameters may be unified and added to the signature in the +future (e.g. `title` which should be useful for any backend). + +Currently, all the Matplotlib functions in pandas are accessed through +the selected backend. For example, `pandas.plotting.boxplot` (equivalent +to `DataFrame.boxplot`) is also accessed in the selected backend. This +is expected to change, and the exact API is under discussion. But with +the current version, backends are expected to implement the next functions: + +- plot (describe above, used for `Series.plot` and `DataFrame.plot`) +- hist_series and hist_frame (for `Series.hist` and `DataFrame.hist`) +- boxplot (`pandas.plotting.boxplot(df)` equivalent to `DataFrame.boxplot`) +- boxplot_frame and boxplot_frame_groupby +- tsplot (deprecated) +- register and deregister (register converters for the tick formats) +- Plots not called as `Series` and `DataFrame` methods: + - table + - andrews_curves + - autocorrelation_plot + - bootstrap_plot + - lag_plot + - parallel_coordinates + - radviz + - scatter_matrix + +Use the code in pandas/plotting/_matplotib.py and +https://github.com/pyviz/hvplot as a reference on how to write a backend. + +For the discussion about the API see +https://github.com/pandas-dev/pandas/issues/26747. """ from pandas.plotting._core import ( PlotAccessor, boxplot, boxplot_frame, boxplot_frame_groupby, hist_frame, @@ -10,9 +65,9 @@ parallel_coordinates, plot_params, radviz, register as register_matplotlib_converters, scatter_matrix, table) -__all__ = ['boxplot', 'boxplot_frame', 'boxplot_frame_groupby', 'hist_frame', - 'hist_series', 'PlotAccessor', - 'scatter_matrix', 'radviz', 'andrews_curves', 'bootstrap_plot', - 'parallel_coordinates', 'lag_plot', 'autocorrelation_plot', - 'table', 'plot_params', 'register_matplotlib_converters', +__all__ = ['PlotAccessor', 'boxplot', 'boxplot_frame', 'boxplot_frame_groupby', + 'hist_frame', 'hist_series', 'scatter_matrix', 'radviz', + 'andrews_curves', 'bootstrap_plot', 'parallel_coordinates', + 'lag_plot', 'autocorrelation_plot', 'table', 'plot_params', + 'register_matplotlib_converters', 'deregister_matplotlib_converters'] diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 13afd05c6bbd2..cc620c79f854f 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -402,11 +402,6 @@ class PlotAccessor(PandasObject): Make plots of Series or DataFrame using the backend specified by the option ``plotting.backend``. By default, matplotlib is used. - *New in version 0.17.0:* Each plot kind has a corresponding method on - the Series or DataFrame accessor, for example: - ``Series.plot(kind='line')`` is equivalent to - ``Series.plot.line()``. - Parameters ---------- data : Series or DataFrame @@ -517,13 +512,11 @@ def __init__(self, data): @staticmethod def _get_call_args(data, args, kwargs): """ - We used to have different accessors for Series and DataFrame. Their - signatures were different: - - - SeriesPlotMethods.__call__(kind, ..., **kwargs) - - DataFramePlotMethods.__call__(x, y, kind, ..., **kwargs) - - This function makes this unified `__call__` method compatible with both + This function makes calls to this accessor `__call__` method compatible + with the previous `SeriesPlotMethods.__call__` and + `DataFramePlotMethods.__call__`. Those had slightly different + signatures, since `DataFramePlotMethods` accepted `x` and `y` + parameters. """ if args and isinstance(data, ABCSeries): # TODO raise warning here, positional arguments shouldn't be @@ -573,6 +566,9 @@ def __call__(self, *args, **kwargs): raise ValueError('{} is not a valid plot kind'.format(kind)) plot_backend = _get_plot_backend() + # The original data structured can be transformed before passed to the + # backend. For example, for DataFrame is common to set the index as the + # `x` parameter, and return a Series with the parameter `y` as values. data = self._parent.copy() if isinstance(data, pandas.core.dtypes.generic.ABCSeries): From 263ee7a43dc1e3bce3daed14df81283d22d7a8aa Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Tue, 2 Jul 2019 11:12:41 +0100 Subject: [PATCH 17/26] Adding FutureWarning if Series.plot is called with positional arguments --- pandas/plotting/_core.py | 18 +++++++++++++----- pandas/tests/plotting/test_misc.py | 8 +++++++- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index cc620c79f854f..fa81beed4a56c 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1,5 +1,6 @@ import importlib from typing import List, Type # noqa +import warnings from pandas.util._decorators import Appender @@ -518,11 +519,6 @@ def _get_call_args(data, args, kwargs): signatures, since `DataFramePlotMethods` accepted `x` and `y` parameters. """ - if args and isinstance(data, ABCSeries): - # TODO raise warning here, positional arguments shouldn't be - # used anymore, so we can add x, y and kind to the signature - pass - if isinstance(data, ABCSeries): arg_def = [ ('kind', 'line'), ('ax', None), ('figsize', None), @@ -550,6 +546,18 @@ def _get_call_args(data, args, kwargs): 'Series or DataFrame').format( type(data).__name__)) + if args and isinstance(data, ABCSeries): + msg = ('`Series.plot()` should not be called with positional ' + 'arguments, only keyword arguments. The order of ' + 'positional arguments will change in the future. ' + 'Use `Series.plot({})` instead of `Series.plot({})`.') + positional_args = str(args)[1:-1] + keyword_args = ', '.join('{}={!r}'.format(name, value) + for (name, default), value + in zip(arg_def, args)) + warnings.warn(msg.format(keyword_args, positional_args), + FutureWarning, stacklevel=3) + pos_args = {name: value for value, (name, _) in zip(args, arg_def)} kwargs = dict(arg_def, **pos_args, **kwargs) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index b58854743a42d..4e2e413ac6db3 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -9,7 +9,7 @@ import pandas.util._test_decorators as td -from pandas import DataFrame +from pandas import DataFrame, Series from pandas.tests.plotting.common import TestPlotBase, _check_plot_works import pandas.util.testing as tm @@ -25,6 +25,12 @@ def test_import_error_message(): df.plot() +@td.skip_if_no_mpl +def test_series_plot_with_positional_arguments_warns(): + with tm.assert_produces_warning(FutureWarning): + Series([1, 2, 3]).plot('line', None) + + @td.skip_if_no_mpl class TestSeriesPlots(TestPlotBase): From 37fe165e3b6a85c2b67671da78c71afdaa4f2b2e Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Tue, 2 Jul 2019 12:38:58 +0100 Subject: [PATCH 18/26] Not passing default matplotlib parameters to backends (all known kwargs were being passed, now only the ones provided by the user) --- pandas/plotting/_core.py | 19 ++++++++++------ pandas/tests/plotting/test_misc.py | 35 ++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index fa81beed4a56c..2f46df2985703 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -511,7 +511,7 @@ def __init__(self, data): self._parent = data @staticmethod - def _get_call_args(data, args, kwargs): + def _get_call_args(backend_name, data, args, kwargs): """ This function makes calls to this accessor `__call__` method compatible with the previous `SeriesPlotMethods.__call__` and @@ -542,9 +542,9 @@ def _get_call_args(data, args, kwargs): ('yerr', None), ('xerr', None), ('secondary_y', False), ('sort_columns', False)] else: - return TypeError(('Called plot accessor for type {}, expected ' - 'Series or DataFrame').format( - type(data).__name__)) + raise TypeError(('Called plot accessor for type {}, expected ' + 'Series or DataFrame').format( + type(data).__name__)) if args and isinstance(data, ABCSeries): msg = ('`Series.plot()` should not be called with positional ' @@ -559,7 +559,10 @@ def _get_call_args(data, args, kwargs): FutureWarning, stacklevel=3) pos_args = {name: value for value, (name, _) in zip(args, arg_def)} - kwargs = dict(arg_def, **pos_args, **kwargs) + if backend_name == 'pandas.plotting._matplotlib': + kwargs = dict(arg_def, **pos_args, **kwargs) + else: + kwargs = dict(pos_args, **kwargs) x = kwargs.pop('x', None) y = kwargs.pop('y', None) @@ -567,13 +570,15 @@ def _get_call_args(data, args, kwargs): return x, y, kind, kwargs def __call__(self, *args, **kwargs): - x, y, kind, kwargs = self._get_call_args(self._parent, args, kwargs) + plot_backend = _get_plot_backend() + + x, y, kind, kwargs = self._get_call_args(plot_backend.__name__, + self._parent, args, kwargs) kind = self._kind_aliases.get(kind, kind) if kind not in self._all_kinds: raise ValueError('{} is not a valid plot kind'.format(kind)) - plot_backend = _get_plot_backend() # The original data structured can be transformed before passed to the # backend. For example, for DataFrame is common to set the index as the # `x` parameter, and return a Series with the parameter `y` as values. diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 4e2e413ac6db3..52508d9c453ad 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -31,6 +31,41 @@ def test_series_plot_with_positional_arguments_warns(): Series([1, 2, 3]).plot('line', None) +def test_get_accessor_args(): + func = plotting._core.PlotAccessor._get_call_args + + msg = 'Called plot accessor for type list, expected Series or DataFrame' + with pytest.raises(TypeError, match=msg): + func(backend_name='', data=[], args=[], kwargs={}) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + func(backend_name='', data=Series(), args=[''], kwargs={}) + + x, y, kind, kwargs = func(backend_name='', data=Series(), + args=['line', None], kwargs={}) + assert x is None + assert y is None + assert kind == 'line' + assert kwargs == {'ax': None} + + x, y, kind, kwargs = func(backend_name='', data=DataFrame(), + args=['x'], kwargs={'y': 'y', + 'kind': 'bar', + 'grid': False}) + assert x == 'x' + assert y == 'y' + assert kind == 'bar' + assert kwargs == {'grid': False} + + x, y, kind, kwargs = func(backend_name='pandas.plotting._matplotlib', + data=Series(), args=[], kwargs={}) + assert x is None + assert y is None + assert kind == 'line' + assert len(kwargs) == 22 + + @td.skip_if_no_mpl class TestSeriesPlots(TestPlotBase): From 0cf45147d7e0498f2ca6fc9a4a5c1d74360c5077 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Tue, 2 Jul 2019 14:34:51 +0100 Subject: [PATCH 19/26] Fixing test of plotting accessor parameters --- pandas/tests/plotting/test_misc.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 52508d9c453ad..cba3f5571717b 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -40,10 +40,8 @@ def test_get_accessor_args(): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - func(backend_name='', data=Series(), args=[''], kwargs={}) - - x, y, kind, kwargs = func(backend_name='', data=Series(), - args=['line', None], kwargs={}) + x, y, kind, kwargs = func(backend_name='', data=Series(), + args=['line', None], kwargs={}) assert x is None assert y is None assert kind == 'line' From 4d70d5d98bf71649c438ed08ce339169d3f3aeb7 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Tue, 2 Jul 2019 14:36:58 +0100 Subject: [PATCH 20/26] Temporary not warning for Series.plot positional arguments (looks like this caused the parallel coordinates test failure) --- pandas/plotting/_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 2f46df2985703..b2c8fddc1b6c1 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -546,7 +546,7 @@ def _get_call_args(backend_name, data, args, kwargs): 'Series or DataFrame').format( type(data).__name__)) - if args and isinstance(data, ABCSeries): + if args and isinstance(data, ABCSeries) and False: # FIXME not generating the warning to see if parallel coordinates test is fixed in the CI msg = ('`Series.plot()` should not be called with positional ' 'arguments, only keyword arguments. The order of ' 'positional arguments will change in the future. ' From a2330b29360f9c69bef3b9188477eaf92cd04636 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Tue, 2 Jul 2019 15:37:20 +0100 Subject: [PATCH 21/26] Revert "Temporary not warning for Series.plot positional arguments (looks like this caused the parallel coordinates test failure)" This reverts commit 4d70d5d98bf71649c438ed08ce339169d3f3aeb7. --- pandas/plotting/_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index b2c8fddc1b6c1..2f46df2985703 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -546,7 +546,7 @@ def _get_call_args(backend_name, data, args, kwargs): 'Series or DataFrame').format( type(data).__name__)) - if args and isinstance(data, ABCSeries) and False: # FIXME not generating the warning to see if parallel coordinates test is fixed in the CI + if args and isinstance(data, ABCSeries): msg = ('`Series.plot()` should not be called with positional ' 'arguments, only keyword arguments. The order of ' 'positional arguments will change in the future. ' From 34d189f6b7c9ff03d72e9c382115db9f2e6074aa Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Tue, 2 Jul 2019 16:23:17 +0100 Subject: [PATCH 22/26] Adding debug info in the CI for failing test --- pandas/tests/plotting/test_misc.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index cba3f5571717b..603ee6c8dc313 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -262,6 +262,13 @@ def test_parallel_coordinates_with_sorted_labels(self): prev_next_tupels = zip([i for i in ordered_color_label_tuples[0:-1]], [i for i in ordered_color_label_tuples[1:]]) for prev, nxt in prev_next_tupels: + # FIXME: Showing in the CI what's in ordered_color_label_tuples + if ((isinstance(prev[1], list) and isinstance(nxt[1], str)) + or (isinstance(prev[0], list) + and isinstance(nxt[0], str))): + raise ValueError('ordered_color_label_tuples: {}'.format( + ordered_color_label_tuples)) + # labels and colors are ordered strictly increasing assert prev[1] < nxt[1] and prev[0] < nxt[0] From 58195856cefb5645104792fbaaa6e77a7ab11f3b Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Tue, 2 Jul 2019 17:10:28 +0100 Subject: [PATCH 23/26] Revert "Adding debug info in the CI for failing test" This reverts commit 34d189f6b7c9ff03d72e9c382115db9f2e6074aa. --- pandas/tests/plotting/test_misc.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 603ee6c8dc313..cba3f5571717b 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -262,13 +262,6 @@ def test_parallel_coordinates_with_sorted_labels(self): prev_next_tupels = zip([i for i in ordered_color_label_tuples[0:-1]], [i for i in ordered_color_label_tuples[1:]]) for prev, nxt in prev_next_tupels: - # FIXME: Showing in the CI what's in ordered_color_label_tuples - if ((isinstance(prev[1], list) and isinstance(nxt[1], str)) - or (isinstance(prev[0], list) - and isinstance(nxt[0], str))): - raise ValueError('ordered_color_label_tuples: {}'.format( - ordered_color_label_tuples)) - # labels and colors are ordered strictly increasing assert prev[1] < nxt[1] and prev[0] < nxt[0] From 29d7547ce49c0f4afb0296237e43ad2455f9b6c7 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Tue, 2 Jul 2019 17:17:11 +0100 Subject: [PATCH 24/26] Temporary removing the warning, to see if it's causing the andrews_curves test failure --- pandas/plotting/_core.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 2f46df2985703..02c55f36ef582 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1,6 +1,5 @@ import importlib from typing import List, Type # noqa -import warnings from pandas.util._decorators import Appender @@ -546,18 +545,6 @@ def _get_call_args(backend_name, data, args, kwargs): 'Series or DataFrame').format( type(data).__name__)) - if args and isinstance(data, ABCSeries): - msg = ('`Series.plot()` should not be called with positional ' - 'arguments, only keyword arguments. The order of ' - 'positional arguments will change in the future. ' - 'Use `Series.plot({})` instead of `Series.plot({})`.') - positional_args = str(args)[1:-1] - keyword_args = ', '.join('{}={!r}'.format(name, value) - for (name, default), value - in zip(arg_def, args)) - warnings.warn(msg.format(keyword_args, positional_args), - FutureWarning, stacklevel=3) - pos_args = {name: value for value, (name, _) in zip(args, arg_def)} if backend_name == 'pandas.plotting._matplotlib': kwargs = dict(arg_def, **pos_args, **kwargs) From a5d0fd929f1b5a650b521670834e72e8d3612511 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 3 Jul 2019 09:23:38 +0100 Subject: [PATCH 25/26] Revert "Temporary removing the warning, to see if it's causing the andrews_curves test failure" This reverts commit 29d7547ce49c0f4afb0296237e43ad2455f9b6c7. --- pandas/plotting/_core.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 02c55f36ef582..2f46df2985703 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1,5 +1,6 @@ import importlib from typing import List, Type # noqa +import warnings from pandas.util._decorators import Appender @@ -545,6 +546,18 @@ def _get_call_args(backend_name, data, args, kwargs): 'Series or DataFrame').format( type(data).__name__)) + if args and isinstance(data, ABCSeries): + msg = ('`Series.plot()` should not be called with positional ' + 'arguments, only keyword arguments. The order of ' + 'positional arguments will change in the future. ' + 'Use `Series.plot({})` instead of `Series.plot({})`.') + positional_args = str(args)[1:-1] + keyword_args = ', '.join('{}={!r}'.format(name, value) + for (name, default), value + in zip(arg_def, args)) + warnings.warn(msg.format(keyword_args, positional_args), + FutureWarning, stacklevel=3) + pos_args = {name: value for value, (name, _) in zip(args, arg_def)} if backend_name == 'pandas.plotting._matplotlib': kwargs = dict(arg_def, **pos_args, **kwargs) From ce544e116efa709477c773bb5a72e0e1591a7ef1 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 3 Jul 2019 15:07:22 +0100 Subject: [PATCH 26/26] Removing test that causes parallel_coordinates test to fail --- pandas/tests/plotting/test_misc.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index cba3f5571717b..b27df946aeacf 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -25,12 +25,6 @@ def test_import_error_message(): df.plot() -@td.skip_if_no_mpl -def test_series_plot_with_positional_arguments_warns(): - with tm.assert_produces_warning(FutureWarning): - Series([1, 2, 3]).plot('line', None) - - def test_get_accessor_args(): func = plotting._core.PlotAccessor._get_call_args