diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index eea1df35c7711..7de1c42246ad5 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -5,7 +5,7 @@ from pandas import to_datetime, date_range, Series, DataFrame, period_range from pandas.tseries.frequencies import infer_freq try: - from pandas.plotting._converter import DatetimeConverter + from pandas.plotting._matplotlib.converter import DatetimeConverter except ImportError: from pandas.tseries.converter import DatetimeConverter diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 48dfa57c47bf6..bb3275c27a4ac 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -101,7 +101,7 @@ from pandas.io.formats import format as fmt from pandas.io.formats.printing import pprint_thing -import pandas.plotting._core as gfx +import pandas.plotting # --------------------------------------------------------------------- # Docstring templates @@ -8041,9 +8041,9 @@ def isin(self, values): # ---------------------------------------------------------------------- # Add plotting methods to DataFrame - plot = CachedAccessor("plot", gfx.FramePlotMethods) - hist = gfx.hist_frame - boxplot = gfx.boxplot_frame + plot = CachedAccessor("plot", pandas.plotting.FramePlotMethods) + hist = pandas.plotting.hist_frame + boxplot = pandas.plotting.boxplot_frame sparse = CachedAccessor("sparse", SparseFrameAccessor) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 121244cde368a..9ded2450693dd 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -41,7 +41,7 @@ from pandas.core.series import Series from pandas.core.sparse.frame import SparseDataFrame -from pandas.plotting._core import boxplot_frame_groupby +from pandas.plotting import boxplot_frame_groupby NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"]) # TODO(typing) the return value on this callable should be any *scalar*. diff --git a/pandas/core/series.py b/pandas/core/series.py index 472d984234275..e276ca7dcd9ab 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -49,7 +49,7 @@ from pandas.core.tools.datetimes import to_datetime import pandas.io.formats.format as fmt -import pandas.plotting._core as gfx +import pandas.plotting __all__ = ['Series'] @@ -4502,12 +4502,12 @@ def to_period(self, freq=None, copy=True): str = CachedAccessor("str", StringMethods) dt = CachedAccessor("dt", CombinedDatetimelikeProperties) cat = CachedAccessor("cat", CategoricalAccessor) - plot = CachedAccessor("plot", gfx.SeriesPlotMethods) + plot = CachedAccessor("plot", pandas.plotting.SeriesPlotMethods) sparse = CachedAccessor("sparse", SparseAccessor) # ---------------------------------------------------------------------- # Add plotting methods to Series - hist = gfx.hist_series + hist = pandas.plotting.hist_series Series._setup_axes(['index'], info_axis=0, stat_axis=0, aliases={'rows': 0}, diff --git a/pandas/plotting/__init__.py b/pandas/plotting/__init__.py index ff5351bb6c6ea..ac983e7efd618 100644 --- a/pandas/plotting/__init__.py +++ b/pandas/plotting/__init__.py @@ -1,20 +1,18 @@ """ -Plotting api +Plotting public API """ +from pandas.plotting._core import ( + FramePlotMethods, SeriesPlotMethods, boxplot, boxplot_frame, + boxplot_frame_groupby, hist_frame, hist_series) +from pandas.plotting._misc import ( + andrews_curves, autocorrelation_plot, bootstrap_plot, + deregister as deregister_matplotlib_converters, lag_plot, + parallel_coordinates, plot_params, radviz, + register as register_matplotlib_converters, scatter_matrix, table) -# flake8: noqa - -from pandas.plotting._misc import (scatter_matrix, radviz, - andrews_curves, bootstrap_plot, - parallel_coordinates, lag_plot, - autocorrelation_plot) -from pandas.plotting._core import boxplot -from pandas.plotting._style import plot_params -from pandas.plotting._tools import table -try: - from pandas.plotting._converter import ( - register as register_matplotlib_converters) - from pandas.plotting._converter import ( - deregister as deregister_matplotlib_converters) -except ImportError: - pass +__all__ = ['boxplot', 'boxplot_frame', 'boxplot_frame_groupby', 'hist_frame', + 'hist_series', 'FramePlotMethods', 'SeriesPlotMethods', + 'scatter_matrix', 'radviz', 'andrews_curves', 'bootstrap_plot', + 'parallel_coordinates', 'lag_plot', 'autocorrelation_plot', + 'table', 'plot_params', 'register_matplotlib_converters', + 'deregister_matplotlib_converters'] diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 3f6a30c4639bc..81f5b5cb0f74c 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1,1826 +1,24 @@ -# being a bit too dynamic -from collections import namedtuple -import re -from typing import List, Optional, Type -import warnings +from typing import List, Type # noqa -import numpy as np +from pandas.util._decorators import Appender -from pandas._config import get_option - -from pandas.errors import AbstractMethodError -from pandas.util._decorators import Appender, cache_readonly - -from pandas.core.dtypes.common import ( - is_hashable, is_integer, is_iterator, is_list_like, is_number) -from pandas.core.dtypes.generic import ( - ABCDataFrame, ABCIndexClass, ABCMultiIndex, ABCPeriodIndex, ABCSeries) -from pandas.core.dtypes.missing import isna, notna, remove_na_arraylike +from pandas.core.dtypes.common import is_integer, is_list_like +from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries +import pandas from pandas.core.base import PandasObject -import pandas.core.common as com from pandas.core.generic import _shared_doc_kwargs, _shared_docs -from pandas.io.formats.printing import pprint_thing -from pandas.plotting._compat import _mpl_ge_3_0_0 -from pandas.plotting._style import _get_standard_colors, plot_params -from pandas.plotting._tools import ( - _flatten, _get_all_lines, _get_xlim, _handle_shared_axes, _set_ticks_props, - _subplots, format_date_labels, table) - -try: - from pandas.plotting import _converter -except ImportError: - _HAS_MPL = False -else: - _HAS_MPL = True - if get_option('plotting.matplotlib.register_converters'): - _converter.register(explicit=False) - - -def _raise_if_no_mpl(): - # TODO(mpl_converter): remove once converter is explicit - if not _HAS_MPL: - raise ImportError("matplotlib is required for plotting.") - - -def _get_standard_kind(kind): - return {'density': 'kde'}.get(kind, kind) - - -def _gca(rc=None): - import matplotlib.pyplot as plt - with plt.rc_context(rc): - return plt.gca() - - -def _gcf(): - import matplotlib.pyplot as plt - return plt.gcf() - - -class MPLPlot: - """ - Base class for assembling a pandas plot using matplotlib - - Parameters - ---------- - data : - - """ - - @property - def _kind(self): - """Specify kind str. Must be overridden in child class""" - raise NotImplementedError - - _layout_type = 'vertical' - _default_rot = 0 - orientation = None # type: Optional[str] - _pop_attributes = ['label', 'style', 'logy', 'logx', 'loglog', - 'mark_right', 'stacked'] - _attr_defaults = {'logy': False, 'logx': False, 'loglog': False, - 'mark_right': True, 'stacked': False} - - def __init__(self, data, kind=None, by=None, subplots=False, sharex=None, - sharey=False, use_index=True, - figsize=None, grid=None, legend=True, rot=None, - ax=None, fig=None, title=None, xlim=None, ylim=None, - xticks=None, yticks=None, - sort_columns=False, fontsize=None, - secondary_y=False, colormap=None, - table=False, layout=None, **kwds): - - _raise_if_no_mpl() - _converter._WARN = False - self.data = data - self.by = by - - self.kind = kind - - self.sort_columns = sort_columns - - self.subplots = subplots - - if sharex is None: - if ax is None: - self.sharex = True - else: - # if we get an axis, the users should do the visibility - # setting... - self.sharex = False - else: - self.sharex = sharex - - self.sharey = sharey - self.figsize = figsize - self.layout = layout - - self.xticks = xticks - self.yticks = yticks - self.xlim = xlim - self.ylim = ylim - self.title = title - self.use_index = use_index - - self.fontsize = fontsize - - if rot is not None: - self.rot = rot - # need to know for format_date_labels since it's rotated to 30 by - # default - self._rot_set = True - else: - self._rot_set = False - self.rot = self._default_rot - - if grid is None: - grid = False if secondary_y else self.plt.rcParams['axes.grid'] - - self.grid = grid - self.legend = legend - self.legend_handles = [] - self.legend_labels = [] - - for attr in self._pop_attributes: - value = kwds.pop(attr, self._attr_defaults.get(attr, None)) - setattr(self, attr, value) - - self.ax = ax - self.fig = fig - self.axes = None - - # parse errorbar input if given - xerr = kwds.pop('xerr', None) - yerr = kwds.pop('yerr', None) - self.errors = {kw: self._parse_errorbars(kw, err) - for kw, err in zip(['xerr', 'yerr'], [xerr, yerr])} - - if not isinstance(secondary_y, (bool, tuple, list, - np.ndarray, ABCIndexClass)): - secondary_y = [secondary_y] - self.secondary_y = secondary_y - - # ugly TypeError if user passes matplotlib's `cmap` name. - # Probably better to accept either. - if 'cmap' in kwds and colormap: - raise TypeError("Only specify one of `cmap` and `colormap`.") - elif 'cmap' in kwds: - self.colormap = kwds.pop('cmap') - else: - self.colormap = colormap - - self.table = table - - self.kwds = kwds - - self._validate_color_args() - - def _validate_color_args(self): - if 'color' not in self.kwds and 'colors' in self.kwds: - warnings.warn(("'colors' is being deprecated. Please use 'color'" - "instead of 'colors'")) - colors = self.kwds.pop('colors') - self.kwds['color'] = colors - - if ('color' in self.kwds and self.nseries == 1 and - not is_list_like(self.kwds['color'])): - # support series.plot(color='green') - self.kwds['color'] = [self.kwds['color']] - - if ('color' in self.kwds and isinstance(self.kwds['color'], tuple) and - self.nseries == 1 and len(self.kwds['color']) in (3, 4)): - # support RGB and RGBA tuples in series plot - self.kwds['color'] = [self.kwds['color']] - - if ('color' in self.kwds or 'colors' in self.kwds) and \ - self.colormap is not None: - warnings.warn("'color' and 'colormap' cannot be used " - "simultaneously. Using 'color'") - - if 'color' in self.kwds and self.style is not None: - if is_list_like(self.style): - styles = self.style - else: - styles = [self.style] - # need only a single match - for s in styles: - if re.match('^[a-z]+?', s) is not None: - raise ValueError( - "Cannot pass 'style' string with a color " - "symbol and 'color' keyword argument. Please" - " use one or the other or pass 'style' " - "without a color symbol") - - def _iter_data(self, data=None, keep_index=False, fillna=None): - if data is None: - data = self.data - if fillna is not None: - data = data.fillna(fillna) - - # TODO: unused? - # if self.sort_columns: - # columns = com.try_sort(data.columns) - # else: - # columns = data.columns - - for col, values in data.iteritems(): - if keep_index is True: - yield col, values - else: - yield col, values.values - - @property - def nseries(self): - if self.data.ndim == 1: - return 1 - else: - return self.data.shape[1] - - def draw(self): - self.plt.draw_if_interactive() - - def generate(self): - self._args_adjust() - self._compute_plot_data() - self._setup_subplots() - self._make_plot() - self._add_table() - self._make_legend() - self._adorn_subplots() - - for ax in self.axes: - self._post_plot_logic_common(ax, self.data) - self._post_plot_logic(ax, self.data) - - def _args_adjust(self): - pass - - def _has_plotted_object(self, ax): - """check whether ax has data""" - return (len(ax.lines) != 0 or - len(ax.artists) != 0 or - len(ax.containers) != 0) - - def _maybe_right_yaxis(self, ax, axes_num): - if not self.on_right(axes_num): - # secondary axes may be passed via ax kw - return self._get_ax_layer(ax) - - if hasattr(ax, 'right_ax'): - # if it has right_ax proparty, ``ax`` must be left axes - return ax.right_ax - elif hasattr(ax, 'left_ax'): - # if it has left_ax proparty, ``ax`` must be right axes - return ax - else: - # otherwise, create twin axes - orig_ax, new_ax = ax, ax.twinx() - # TODO: use Matplotlib public API when available - new_ax._get_lines = orig_ax._get_lines - new_ax._get_patches_for_fill = orig_ax._get_patches_for_fill - orig_ax.right_ax, new_ax.left_ax = new_ax, orig_ax - - if not self._has_plotted_object(orig_ax): # no data on left y - orig_ax.get_yaxis().set_visible(False) - - if self.logy is True or self.loglog is True: - new_ax.set_yscale('log') - elif self.logy == 'sym' or self.loglog == 'sym': - new_ax.set_yscale('symlog') - return new_ax - - def _setup_subplots(self): - if self.subplots: - fig, axes = _subplots(naxes=self.nseries, - sharex=self.sharex, sharey=self.sharey, - figsize=self.figsize, ax=self.ax, - layout=self.layout, - layout_type=self._layout_type) - else: - if self.ax is None: - fig = self.plt.figure(figsize=self.figsize) - axes = fig.add_subplot(111) - else: - fig = self.ax.get_figure() - if self.figsize is not None: - fig.set_size_inches(self.figsize) - axes = self.ax - - axes = _flatten(axes) - - valid_log = {False, True, 'sym', None} - input_log = {self.logx, self.logy, self.loglog} - if input_log - valid_log: - invalid_log = next(iter((input_log - valid_log))) - raise ValueError( - "Boolean, None and 'sym' are valid options," - " '{}' is given.".format(invalid_log) - ) - - if self.logx is True or self.loglog is True: - [a.set_xscale('log') for a in axes] - elif self.logx == 'sym' or self.loglog == 'sym': - [a.set_xscale('symlog') for a in axes] - - if self.logy is True or self.loglog is True: - [a.set_yscale('log') for a in axes] - elif self.logy == 'sym' or self.loglog == 'sym': - [a.set_yscale('symlog') for a in axes] - - self.fig = fig - self.axes = axes - - @property - def result(self): - """ - Return result axes - """ - if self.subplots: - if self.layout is not None and not is_list_like(self.ax): - return self.axes.reshape(*self.layout) - else: - return self.axes - else: - sec_true = isinstance(self.secondary_y, bool) and self.secondary_y - all_sec = (is_list_like(self.secondary_y) and - len(self.secondary_y) == self.nseries) - if (sec_true or all_sec): - # if all data is plotted on secondary, return right axes - return self._get_ax_layer(self.axes[0], primary=False) - else: - return self.axes[0] - - def _compute_plot_data(self): - data = self.data - - if isinstance(data, ABCSeries): - label = self.label - if label is None and data.name is None: - label = 'None' - data = data.to_frame(name=label) - - # GH16953, _convert is needed as fallback, for ``Series`` - # with ``dtype == object`` - data = data._convert(datetime=True, timedelta=True) - numeric_data = data.select_dtypes(include=[np.number, - "datetime", - "datetimetz", - "timedelta"]) - - try: - is_empty = numeric_data.empty - except AttributeError: - is_empty = not len(numeric_data) - - # no non-numeric frames or series allowed - if is_empty: - raise TypeError('no numeric data to plot') - - # GH25587: cast ExtensionArray of pandas (IntegerArray, etc.) to - # np.ndarray before plot. - numeric_data = numeric_data.copy() - for col in numeric_data: - numeric_data[col] = np.asarray(numeric_data[col]) - - self.data = numeric_data - - def _make_plot(self): - raise AbstractMethodError(self) - - def _add_table(self): - if self.table is False: - return - elif self.table is True: - data = self.data.transpose() - else: - data = self.table - ax = self._get_ax(0) - table(ax, data) - - def _post_plot_logic_common(self, ax, data): - """Common post process for each axes""" - - if self.orientation == 'vertical' or self.orientation is None: - self._apply_axis_properties(ax.xaxis, rot=self.rot, - fontsize=self.fontsize) - self._apply_axis_properties(ax.yaxis, fontsize=self.fontsize) - - if hasattr(ax, 'right_ax'): - self._apply_axis_properties(ax.right_ax.yaxis, - fontsize=self.fontsize) - - elif self.orientation == 'horizontal': - self._apply_axis_properties(ax.yaxis, rot=self.rot, - fontsize=self.fontsize) - self._apply_axis_properties(ax.xaxis, fontsize=self.fontsize) - - if hasattr(ax, 'right_ax'): - self._apply_axis_properties(ax.right_ax.yaxis, - fontsize=self.fontsize) - else: # pragma no cover - raise ValueError - - def _post_plot_logic(self, ax, data): - """Post process for each axes. Overridden in child classes""" - pass - - def _adorn_subplots(self): - """Common post process unrelated to data""" - if len(self.axes) > 0: - all_axes = self._get_subplots() - nrows, ncols = self._get_axes_layout() - _handle_shared_axes(axarr=all_axes, nplots=len(all_axes), - naxes=nrows * ncols, nrows=nrows, - ncols=ncols, sharex=self.sharex, - sharey=self.sharey) - - for ax in self.axes: - if self.yticks is not None: - ax.set_yticks(self.yticks) - - if self.xticks is not None: - ax.set_xticks(self.xticks) - - if self.ylim is not None: - ax.set_ylim(self.ylim) - - if self.xlim is not None: - ax.set_xlim(self.xlim) - - ax.grid(self.grid) - - if self.title: - if self.subplots: - if is_list_like(self.title): - if len(self.title) != self.nseries: - msg = ('The length of `title` must equal the number ' - 'of columns if using `title` of type `list` ' - 'and `subplots=True`.\n' - 'length of title = {}\n' - 'number of columns = {}').format( - len(self.title), self.nseries) - raise ValueError(msg) - - for (ax, title) in zip(self.axes, self.title): - ax.set_title(title) - else: - self.fig.suptitle(self.title) - else: - if is_list_like(self.title): - msg = ('Using `title` of type `list` is not supported ' - 'unless `subplots=True` is passed') - raise ValueError(msg) - self.axes[0].set_title(self.title) - - def _apply_axis_properties(self, axis, rot=None, fontsize=None): - """ Tick creation within matplotlib is reasonably expensive and is - internally deferred until accessed as Ticks are created/destroyed - multiple times per draw. It's therefore beneficial for us to avoid - accessing unless we will act on the Tick. - """ - - if rot is not None or fontsize is not None: - # rot=0 is a valid setting, hence the explicit None check - labels = axis.get_majorticklabels() + axis.get_minorticklabels() - for label in labels: - if rot is not None: - label.set_rotation(rot) - if fontsize is not None: - label.set_fontsize(fontsize) - - @property - def legend_title(self): - if not isinstance(self.data.columns, ABCMultiIndex): - name = self.data.columns.name - if name is not None: - name = pprint_thing(name) - return name - else: - stringified = map(pprint_thing, - self.data.columns.names) - return ','.join(stringified) - - def _add_legend_handle(self, handle, label, index=None): - if label is not None: - if self.mark_right and index is not None: - if self.on_right(index): - label = label + ' (right)' - self.legend_handles.append(handle) - self.legend_labels.append(label) - - def _make_legend(self): - ax, leg = self._get_ax_legend(self.axes[0]) - - handles = [] - labels = [] - title = '' - - if not self.subplots: - if leg is not None: - title = leg.get_title().get_text() - handles = leg.legendHandles - labels = [x.get_text() for x in leg.get_texts()] - - if self.legend: - if self.legend == 'reverse': - self.legend_handles = reversed(self.legend_handles) - self.legend_labels = reversed(self.legend_labels) - - handles += self.legend_handles - labels += self.legend_labels - if self.legend_title is not None: - title = self.legend_title - - if len(handles) > 0: - ax.legend(handles, labels, loc='best', title=title) - - elif self.subplots and self.legend: - for ax in self.axes: - if ax.get_visible(): - ax.legend(loc='best') - - def _get_ax_legend(self, ax): - leg = ax.get_legend() - other_ax = (getattr(ax, 'left_ax', None) or - getattr(ax, 'right_ax', None)) - other_leg = None - if other_ax is not None: - other_leg = other_ax.get_legend() - if leg is None and other_leg is not None: - leg = other_leg - ax = other_ax - return ax, leg - - @cache_readonly - def plt(self): - import matplotlib.pyplot as plt - return plt - - _need_to_set_index = False - - def _get_xticks(self, convert_period=False): - index = self.data.index - is_datetype = index.inferred_type in ('datetime', 'date', - 'datetime64', 'time') - - if self.use_index: - if convert_period and isinstance(index, ABCPeriodIndex): - self.data = self.data.reindex(index=index.sort_values()) - x = self.data.index.to_timestamp()._mpl_repr() - elif index.is_numeric(): - """ - Matplotlib supports numeric values or datetime objects as - xaxis values. Taking LBYL approach here, by the time - matplotlib raises exception when using non numeric/datetime - values for xaxis, several actions are already taken by plt. - """ - x = index._mpl_repr() - elif is_datetype: - self.data = self.data[notna(self.data.index)] - self.data = self.data.sort_index() - x = self.data.index._mpl_repr() - else: - self._need_to_set_index = True - x = list(range(len(index))) - else: - x = list(range(len(index))) - - return x - - @classmethod - def _plot(cls, ax, x, y, style=None, is_errorbar=False, **kwds): - mask = isna(y) - if mask.any(): - y = np.ma.array(y) - y = np.ma.masked_where(mask, y) - - if isinstance(x, ABCIndexClass): - x = x._mpl_repr() - - if is_errorbar: - if 'xerr' in kwds: - kwds['xerr'] = np.array(kwds.get('xerr')) - if 'yerr' in kwds: - kwds['yerr'] = np.array(kwds.get('yerr')) - return ax.errorbar(x, y, **kwds) - else: - # prevent style kwarg from going to errorbar, where it is - # unsupported - if style is not None: - args = (x, y, style) - else: - args = (x, y) - return ax.plot(*args, **kwds) - - def _get_index_name(self): - if isinstance(self.data.index, ABCMultiIndex): - name = self.data.index.names - if com._any_not_none(*name): - name = ','.join(pprint_thing(x) for x in name) - else: - name = None - else: - name = self.data.index.name - if name is not None: - name = pprint_thing(name) - - return name - - @classmethod - def _get_ax_layer(cls, ax, primary=True): - """get left (primary) or right (secondary) axes""" - if primary: - return getattr(ax, 'left_ax', ax) - else: - return getattr(ax, 'right_ax', ax) - - def _get_ax(self, i): - # get the twinx ax if appropriate - if self.subplots: - ax = self.axes[i] - ax = self._maybe_right_yaxis(ax, i) - self.axes[i] = ax - else: - ax = self.axes[0] - ax = self._maybe_right_yaxis(ax, i) - - ax.get_yaxis().set_visible(True) - return ax - - def on_right(self, i): - if isinstance(self.secondary_y, bool): - return self.secondary_y - - if isinstance(self.secondary_y, (tuple, list, - np.ndarray, ABCIndexClass)): - return self.data.columns[i] in self.secondary_y - - def _apply_style_colors(self, colors, kwds, col_num, label): - """ - Manage style and color based on column number and its label. - Returns tuple of appropriate style and kwds which "color" may be added. - """ - style = None - if self.style is not None: - if isinstance(self.style, list): - try: - style = self.style[col_num] - except IndexError: - pass - elif isinstance(self.style, dict): - style = self.style.get(label, style) - else: - style = self.style - - has_color = 'color' in kwds or self.colormap is not None - nocolor_style = style is None or re.match('[a-z]+', style) is None - if (has_color or self.subplots) and nocolor_style: - kwds['color'] = colors[col_num % len(colors)] - return style, kwds - - def _get_colors(self, num_colors=None, color_kwds='color'): - if num_colors is None: - num_colors = self.nseries - - return _get_standard_colors(num_colors=num_colors, - colormap=self.colormap, - color=self.kwds.get(color_kwds)) - - def _parse_errorbars(self, label, err): - """ - Look for error keyword arguments and return the actual errorbar data - or return the error DataFrame/dict - - Error bars can be specified in several ways: - Series: the user provides a pandas.Series object of the same - length as the data - ndarray: provides a np.ndarray of the same length as the data - DataFrame/dict: error values are paired with keys matching the - key in the plotted DataFrame - str: the name of the column within the plotted DataFrame - """ - - if err is None: - return None - - def match_labels(data, e): - e = e.reindex(data.index) - return e - - # key-matched DataFrame - if isinstance(err, ABCDataFrame): - - err = match_labels(self.data, err) - # key-matched dict - elif isinstance(err, dict): - pass - - # Series of error values - elif isinstance(err, ABCSeries): - # broadcast error series across data - err = match_labels(self.data, err) - err = np.atleast_2d(err) - err = np.tile(err, (self.nseries, 1)) - - # errors are a column in the dataframe - elif isinstance(err, str): - evalues = self.data[err].values - self.data = self.data[self.data.columns.drop(err)] - err = np.atleast_2d(evalues) - err = np.tile(err, (self.nseries, 1)) - - elif is_list_like(err): - if is_iterator(err): - err = np.atleast_2d(list(err)) - else: - # raw error values - err = np.atleast_2d(err) - - err_shape = err.shape - - # asymmetrical error bars - if err.ndim == 3: - if (err_shape[0] != self.nseries) or \ - (err_shape[1] != 2) or \ - (err_shape[2] != len(self.data)): - msg = "Asymmetrical error bars should be provided " + \ - "with the shape (%u, 2, %u)" % \ - (self.nseries, len(self.data)) - raise ValueError(msg) - - # broadcast errors to each data series - if len(err) == 1: - err = np.tile(err, (self.nseries, 1)) - - elif is_number(err): - err = np.tile([err], (self.nseries, len(self.data))) - - else: - msg = "No valid {label} detected".format(label=label) - raise ValueError(msg) - - return err - - def _get_errorbars(self, label=None, index=None, xerr=True, yerr=True): - errors = {} - - for kw, flag in zip(['xerr', 'yerr'], [xerr, yerr]): - if flag: - err = self.errors[kw] - # user provided label-matched dataframe of errors - if isinstance(err, (ABCDataFrame, dict)): - if label is not None and label in err.keys(): - err = err[label] - else: - err = None - elif index is not None and err is not None: - err = err[index] - - if err is not None: - errors[kw] = err - return errors - - def _get_subplots(self): - from matplotlib.axes import Subplot - return [ax for ax in self.axes[0].get_figure().get_axes() - if isinstance(ax, Subplot)] - - def _get_axes_layout(self): - axes = self._get_subplots() - x_set = set() - y_set = set() - for ax in axes: - # check axes coordinates to estimate layout - points = ax.get_position().get_points() - x_set.add(points[0][0]) - y_set.add(points[0][1]) - return (len(y_set), len(x_set)) - - -class PlanePlot(MPLPlot): - """ - Abstract class for plotting on plane, currently scatter and hexbin. - """ - - _layout_type = 'single' - - def __init__(self, data, x, y, **kwargs): - MPLPlot.__init__(self, data, **kwargs) - if x is None or y is None: - raise ValueError(self._kind + ' requires an x and y column') - if is_integer(x) and not self.data.columns.holds_integer(): - x = self.data.columns[x] - if is_integer(y) and not self.data.columns.holds_integer(): - y = self.data.columns[y] - if len(self.data[x]._get_numeric_data()) == 0: - raise ValueError(self._kind + ' requires x column to be numeric') - if len(self.data[y]._get_numeric_data()) == 0: - raise ValueError(self._kind + ' requires y column to be numeric') - - self.x = x - self.y = y - - @property - def nseries(self): - return 1 - - def _post_plot_logic(self, ax, data): - x, y = self.x, self.y - ax.set_ylabel(pprint_thing(y)) - ax.set_xlabel(pprint_thing(x)) - - def _plot_colorbar(self, ax, **kwds): - # Addresses issues #10611 and #10678: - # When plotting scatterplots and hexbinplots in IPython - # inline backend the colorbar axis height tends not to - # exactly match the parent axis height. - # The difference is due to small fractional differences - # in floating points with similar representation. - # To deal with this, this method forces the colorbar - # height to take the height of the parent axes. - # For a more detailed description of the issue - # see the following link: - # https://github.com/ipython/ipython/issues/11215 - img = ax.collections[0] - cbar = self.fig.colorbar(img, ax=ax, **kwds) - - if _mpl_ge_3_0_0(): - # The workaround below is no longer necessary. - return - - points = ax.get_position().get_points() - cbar_points = cbar.ax.get_position().get_points() - - cbar.ax.set_position([cbar_points[0, 0], - points[0, 1], - cbar_points[1, 0] - cbar_points[0, 0], - points[1, 1] - points[0, 1]]) - # To see the discrepancy in axis heights uncomment - # the following two lines: - # print(points[1, 1] - points[0, 1]) - # print(cbar_points[1, 1] - cbar_points[0, 1]) - - -class ScatterPlot(PlanePlot): - _kind = 'scatter' - - def __init__(self, data, x, y, s=None, c=None, **kwargs): - if s is None: - # hide the matplotlib default for size, in case we want to change - # the handling of this argument later - s = 20 - super().__init__(data, x, y, s=s, **kwargs) - if is_integer(c) and not self.data.columns.holds_integer(): - c = self.data.columns[c] - self.c = c - - def _make_plot(self): - x, y, c, data = self.x, self.y, self.c, self.data - ax = self.axes[0] - - c_is_column = is_hashable(c) and c in self.data.columns - - # plot a colorbar only if a colormap is provided or necessary - cb = self.kwds.pop('colorbar', self.colormap or c_is_column) - - # pandas uses colormap, matplotlib uses cmap. - cmap = self.colormap or 'Greys' - cmap = self.plt.cm.get_cmap(cmap) - color = self.kwds.pop("color", None) - if c is not None and color is not None: - raise TypeError('Specify exactly one of `c` and `color`') - elif c is None and color is None: - c_values = self.plt.rcParams['patch.facecolor'] - elif color is not None: - c_values = color - elif c_is_column: - c_values = self.data[c].values - else: - c_values = c - - if self.legend and hasattr(self, 'label'): - label = self.label - else: - label = None - scatter = ax.scatter(data[x].values, data[y].values, c=c_values, - label=label, cmap=cmap, **self.kwds) - if cb: - cbar_label = c if c_is_column else '' - self._plot_colorbar(ax, label=cbar_label) - - if label is not None: - self._add_legend_handle(scatter, label) - else: - self.legend = False - - errors_x = self._get_errorbars(label=x, index=0, yerr=False) - errors_y = self._get_errorbars(label=y, index=0, xerr=False) - if len(errors_x) > 0 or len(errors_y) > 0: - err_kwds = dict(errors_x, **errors_y) - err_kwds['ecolor'] = scatter.get_facecolor()[0] - ax.errorbar(data[x].values, data[y].values, - linestyle='none', **err_kwds) - - -class HexBinPlot(PlanePlot): - _kind = 'hexbin' - - def __init__(self, data, x, y, C=None, **kwargs): - super().__init__(data, x, y, **kwargs) - if is_integer(C) and not self.data.columns.holds_integer(): - C = self.data.columns[C] - self.C = C - - def _make_plot(self): - x, y, data, C = self.x, self.y, self.data, self.C - ax = self.axes[0] - # pandas uses colormap, matplotlib uses cmap. - cmap = self.colormap or 'BuGn' - cmap = self.plt.cm.get_cmap(cmap) - cb = self.kwds.pop('colorbar', True) - - if C is None: - c_values = None - else: - c_values = data[C].values - - ax.hexbin(data[x].values, data[y].values, C=c_values, cmap=cmap, - **self.kwds) - if cb: - self._plot_colorbar(ax) - - def _make_legend(self): +# Automatically registering converters was deprecated in 0.21, but +# the deprecation warning wasn't showing until 0.24 +# This block will be eventually removed, but it's not clear when +if pandas.get_option('plotting.matplotlib.register_converters'): + try: + from .misc import register + register(explicit=False) + except ImportError: pass - -class LinePlot(MPLPlot): - _kind = 'line' - _default_rot = 0 - orientation = 'vertical' - - def __init__(self, data, **kwargs): - MPLPlot.__init__(self, data, **kwargs) - if self.stacked: - self.data = self.data.fillna(value=0) - self.x_compat = plot_params['x_compat'] - if 'x_compat' in self.kwds: - self.x_compat = bool(self.kwds.pop('x_compat')) - - def _is_ts_plot(self): - # this is slightly deceptive - return not self.x_compat and self.use_index and self._use_dynamic_x() - - def _use_dynamic_x(self): - from pandas.plotting._timeseries import _use_dynamic_x - return _use_dynamic_x(self._get_ax(0), self.data) - - def _make_plot(self): - if self._is_ts_plot(): - from pandas.plotting._timeseries import _maybe_convert_index - data = _maybe_convert_index(self._get_ax(0), self.data) - - x = data.index # dummy, not used - plotf = self._ts_plot - it = self._iter_data(data=data, keep_index=True) - else: - x = self._get_xticks(convert_period=True) - plotf = self._plot - it = self._iter_data() - - stacking_id = self._get_stacking_id() - is_errorbar = com._any_not_none(*self.errors.values()) - - colors = self._get_colors() - for i, (label, y) in enumerate(it): - ax = self._get_ax(i) - kwds = self.kwds.copy() - style, kwds = self._apply_style_colors(colors, kwds, i, label) - - errors = self._get_errorbars(label=label, index=i) - kwds = dict(kwds, **errors) - - label = pprint_thing(label) # .encode('utf-8') - kwds['label'] = label - - newlines = plotf(ax, x, y, style=style, column_num=i, - stacking_id=stacking_id, - is_errorbar=is_errorbar, - **kwds) - self._add_legend_handle(newlines[0], label, index=i) - - lines = _get_all_lines(ax) - left, right = _get_xlim(lines) - ax.set_xlim(left, right) - - @classmethod - def _plot(cls, ax, x, y, style=None, column_num=None, - stacking_id=None, **kwds): - # column_num is used to get the target column from protf in line and - # area plots - if column_num == 0: - cls._initialize_stacker(ax, stacking_id, len(y)) - y_values = cls._get_stacked_values(ax, stacking_id, y, kwds['label']) - lines = MPLPlot._plot(ax, x, y_values, style=style, **kwds) - cls._update_stacker(ax, stacking_id, y) - return lines - - @classmethod - def _ts_plot(cls, ax, x, data, style=None, **kwds): - from pandas.plotting._timeseries import (_maybe_resample, - _decorate_axes, - format_dateaxis) - # accept x to be consistent with normal plot func, - # x is not passed to tsplot as it uses data.index as x coordinate - # column_num must be in kwds for stacking purpose - freq, data = _maybe_resample(data, ax, kwds) - - # Set ax with freq info - _decorate_axes(ax, freq, kwds) - # digging deeper - if hasattr(ax, 'left_ax'): - _decorate_axes(ax.left_ax, freq, kwds) - if hasattr(ax, 'right_ax'): - _decorate_axes(ax.right_ax, freq, kwds) - ax._plot_data.append((data, cls._kind, kwds)) - - lines = cls._plot(ax, data.index, data.values, style=style, **kwds) - # set date formatter, locators and rescale limits - format_dateaxis(ax, ax.freq, data.index) - return lines - - def _get_stacking_id(self): - if self.stacked: - return id(self.data) - else: - return None - - @classmethod - def _initialize_stacker(cls, ax, stacking_id, n): - if stacking_id is None: - return - if not hasattr(ax, '_stacker_pos_prior'): - ax._stacker_pos_prior = {} - if not hasattr(ax, '_stacker_neg_prior'): - ax._stacker_neg_prior = {} - ax._stacker_pos_prior[stacking_id] = np.zeros(n) - ax._stacker_neg_prior[stacking_id] = np.zeros(n) - - @classmethod - def _get_stacked_values(cls, ax, stacking_id, values, label): - if stacking_id is None: - return values - if not hasattr(ax, '_stacker_pos_prior'): - # stacker may not be initialized for subplots - cls._initialize_stacker(ax, stacking_id, len(values)) - - if (values >= 0).all(): - return ax._stacker_pos_prior[stacking_id] + values - elif (values <= 0).all(): - return ax._stacker_neg_prior[stacking_id] + values - - raise ValueError('When stacked is True, each column must be either ' - 'all positive or negative.' - '{0} contains both positive and negative values' - .format(label)) - - @classmethod - def _update_stacker(cls, ax, stacking_id, values): - if stacking_id is None: - return - if (values >= 0).all(): - ax._stacker_pos_prior[stacking_id] += values - elif (values <= 0).all(): - ax._stacker_neg_prior[stacking_id] += values - - def _post_plot_logic(self, ax, data): - from matplotlib.ticker import FixedLocator - - def get_label(i): - try: - return pprint_thing(data.index[i]) - except Exception: - return '' - - if self._need_to_set_index: - xticks = ax.get_xticks() - xticklabels = [get_label(x) for x in xticks] - ax.set_xticklabels(xticklabels) - ax.xaxis.set_major_locator(FixedLocator(xticks)) - - condition = (not self._use_dynamic_x() and - data.index.is_all_dates and - not self.subplots or - (self.subplots and self.sharex)) - - index_name = self._get_index_name() - - if condition: - # irregular TS rotated 30 deg. by default - # probably a better place to check / set this. - if not self._rot_set: - self.rot = 30 - format_date_labels(ax, rot=self.rot) - - if index_name is not None and self.use_index: - ax.set_xlabel(index_name) - - -class AreaPlot(LinePlot): - _kind = 'area' - - def __init__(self, data, **kwargs): - kwargs.setdefault('stacked', True) - data = data.fillna(value=0) - LinePlot.__init__(self, data, **kwargs) - - if not self.stacked: - # use smaller alpha to distinguish overlap - self.kwds.setdefault('alpha', 0.5) - - if self.logy or self.loglog: - raise ValueError("Log-y scales are not supported in area plot") - - @classmethod - def _plot(cls, ax, x, y, style=None, column_num=None, - stacking_id=None, is_errorbar=False, **kwds): - - if column_num == 0: - cls._initialize_stacker(ax, stacking_id, len(y)) - y_values = cls._get_stacked_values(ax, stacking_id, y, kwds['label']) - - # need to remove label, because subplots uses mpl legend as it is - line_kwds = kwds.copy() - line_kwds.pop('label') - lines = MPLPlot._plot(ax, x, y_values, style=style, **line_kwds) - - # get data from the line to get coordinates for fill_between - xdata, y_values = lines[0].get_data(orig=False) - - # unable to use ``_get_stacked_values`` here to get starting point - if stacking_id is None: - start = np.zeros(len(y)) - elif (y >= 0).all(): - start = ax._stacker_pos_prior[stacking_id] - elif (y <= 0).all(): - start = ax._stacker_neg_prior[stacking_id] - else: - start = np.zeros(len(y)) - - if 'color' not in kwds: - kwds['color'] = lines[0].get_color() - - rect = ax.fill_between(xdata, start, y_values, **kwds) - cls._update_stacker(ax, stacking_id, y) - - # LinePlot expects list of artists - res = [rect] - return res - - def _post_plot_logic(self, ax, data): - LinePlot._post_plot_logic(self, ax, data) - - if self.ylim is None: - if (data >= 0).all().all(): - ax.set_ylim(0, None) - elif (data <= 0).all().all(): - ax.set_ylim(None, 0) - - -class BarPlot(MPLPlot): - _kind = 'bar' - _default_rot = 90 - orientation = 'vertical' - - def __init__(self, data, **kwargs): - # we have to treat a series differently than a - # 1-column DataFrame w.r.t. color handling - self._is_series = isinstance(data, ABCSeries) - self.bar_width = kwargs.pop('width', 0.5) - pos = kwargs.pop('position', 0.5) - kwargs.setdefault('align', 'center') - self.tick_pos = np.arange(len(data)) - - self.bottom = kwargs.pop('bottom', 0) - self.left = kwargs.pop('left', 0) - - self.log = kwargs.pop('log', False) - MPLPlot.__init__(self, data, **kwargs) - - if self.stacked or self.subplots: - self.tickoffset = self.bar_width * pos - if kwargs['align'] == 'edge': - self.lim_offset = self.bar_width / 2 - else: - self.lim_offset = 0 - else: - if kwargs['align'] == 'edge': - w = self.bar_width / self.nseries - self.tickoffset = self.bar_width * (pos - 0.5) + w * 0.5 - self.lim_offset = w * 0.5 - else: - self.tickoffset = self.bar_width * pos - self.lim_offset = 0 - - self.ax_pos = self.tick_pos - self.tickoffset - - def _args_adjust(self): - if is_list_like(self.bottom): - self.bottom = np.array(self.bottom) - if is_list_like(self.left): - self.left = np.array(self.left) - - @classmethod - def _plot(cls, ax, x, y, w, start=0, log=False, **kwds): - return ax.bar(x, y, w, bottom=start, log=log, **kwds) - - @property - def _start_base(self): - return self.bottom - - def _make_plot(self): - import matplotlib as mpl - - colors = self._get_colors() - ncolors = len(colors) - - pos_prior = neg_prior = np.zeros(len(self.data)) - K = self.nseries - - for i, (label, y) in enumerate(self._iter_data(fillna=0)): - ax = self._get_ax(i) - kwds = self.kwds.copy() - if self._is_series: - kwds['color'] = colors - else: - kwds['color'] = colors[i % ncolors] - - errors = self._get_errorbars(label=label, index=i) - kwds = dict(kwds, **errors) - - label = pprint_thing(label) - - if (('yerr' in kwds) or ('xerr' in kwds)) \ - and (kwds.get('ecolor') is None): - kwds['ecolor'] = mpl.rcParams['xtick.color'] - - start = 0 - if self.log and (y >= 1).all(): - start = 1 - start = start + self._start_base - - if self.subplots: - w = self.bar_width / 2 - rect = self._plot(ax, self.ax_pos + w, y, self.bar_width, - start=start, label=label, - log=self.log, **kwds) - ax.set_title(label) - elif self.stacked: - mask = y > 0 - start = np.where(mask, pos_prior, neg_prior) + self._start_base - w = self.bar_width / 2 - rect = self._plot(ax, self.ax_pos + w, y, self.bar_width, - start=start, label=label, - log=self.log, **kwds) - pos_prior = pos_prior + np.where(mask, y, 0) - neg_prior = neg_prior + np.where(mask, 0, y) - else: - w = self.bar_width / K - rect = self._plot(ax, self.ax_pos + (i + 0.5) * w, y, w, - start=start, label=label, - log=self.log, **kwds) - self._add_legend_handle(rect, label, index=i) - - def _post_plot_logic(self, ax, data): - if self.use_index: - str_index = [pprint_thing(key) for key in data.index] - else: - str_index = [pprint_thing(key) for key in range(data.shape[0])] - name = self._get_index_name() - - s_edge = self.ax_pos[0] - 0.25 + self.lim_offset - e_edge = self.ax_pos[-1] + 0.25 + self.bar_width + self.lim_offset - - self._decorate_ticks(ax, name, str_index, s_edge, e_edge) - - def _decorate_ticks(self, ax, name, ticklabels, start_edge, end_edge): - ax.set_xlim((start_edge, end_edge)) - ax.set_xticks(self.tick_pos) - ax.set_xticklabels(ticklabels) - if name is not None and self.use_index: - ax.set_xlabel(name) - - -class BarhPlot(BarPlot): - _kind = 'barh' - _default_rot = 0 - orientation = 'horizontal' - - @property - def _start_base(self): - return self.left - - @classmethod - def _plot(cls, ax, x, y, w, start=0, log=False, **kwds): - return ax.barh(x, y, w, left=start, log=log, **kwds) - - def _decorate_ticks(self, ax, name, ticklabels, start_edge, end_edge): - # horizontal bars - ax.set_ylim((start_edge, end_edge)) - ax.set_yticks(self.tick_pos) - ax.set_yticklabels(ticklabels) - if name is not None and self.use_index: - ax.set_ylabel(name) - - -class HistPlot(LinePlot): - _kind = 'hist' - - def __init__(self, data, bins=10, bottom=0, **kwargs): - self.bins = bins # use mpl default - self.bottom = bottom - # Do not call LinePlot.__init__ which may fill nan - MPLPlot.__init__(self, data, **kwargs) - - def _args_adjust(self): - if is_integer(self.bins): - # create common bin edge - values = (self.data._convert(datetime=True)._get_numeric_data()) - values = np.ravel(values) - values = values[~isna(values)] - - hist, self.bins = np.histogram( - values, bins=self.bins, - range=self.kwds.get('range', None), - weights=self.kwds.get('weights', None)) - - if is_list_like(self.bottom): - self.bottom = np.array(self.bottom) - - @classmethod - def _plot(cls, ax, y, style=None, bins=None, bottom=0, column_num=0, - stacking_id=None, **kwds): - if column_num == 0: - cls._initialize_stacker(ax, stacking_id, len(bins) - 1) - y = y[~isna(y)] - - base = np.zeros(len(bins) - 1) - bottom = bottom + \ - cls._get_stacked_values(ax, stacking_id, base, kwds['label']) - # ignore style - n, bins, patches = ax.hist(y, bins=bins, bottom=bottom, **kwds) - cls._update_stacker(ax, stacking_id, n) - return patches - - def _make_plot(self): - colors = self._get_colors() - stacking_id = self._get_stacking_id() - - for i, (label, y) in enumerate(self._iter_data()): - ax = self._get_ax(i) - - kwds = self.kwds.copy() - - label = pprint_thing(label) - kwds['label'] = label - - style, kwds = self._apply_style_colors(colors, kwds, i, label) - if style is not None: - kwds['style'] = style - - kwds = self._make_plot_keywords(kwds, y) - artists = self._plot(ax, y, column_num=i, - stacking_id=stacking_id, **kwds) - self._add_legend_handle(artists[0], label, index=i) - - def _make_plot_keywords(self, kwds, y): - """merge BoxPlot/KdePlot properties to passed kwds""" - # y is required for KdePlot - kwds['bottom'] = self.bottom - kwds['bins'] = self.bins - return kwds - - def _post_plot_logic(self, ax, data): - if self.orientation == 'horizontal': - ax.set_xlabel('Frequency') - else: - ax.set_ylabel('Frequency') - - @property - def orientation(self): - if self.kwds.get('orientation', None) == 'horizontal': - return 'horizontal' - else: - return 'vertical' - - -_kde_docstring = """ - Generate Kernel Density Estimate plot using Gaussian kernels. - - In statistics, `kernel density estimation`_ (KDE) is a non-parametric - way to estimate the probability density function (PDF) of a random - variable. This function uses Gaussian kernels and includes automatic - bandwidth determination. - - .. _kernel density estimation: - https://en.wikipedia.org/wiki/Kernel_density_estimation - - Parameters - ---------- - bw_method : str, scalar or callable, optional - The method used to calculate the estimator bandwidth. This can be - 'scott', 'silverman', a scalar constant or a callable. - If None (default), 'scott' is used. - See :class:`scipy.stats.gaussian_kde` for more information. - ind : NumPy array or integer, optional - Evaluation points for the estimated PDF. If None (default), - 1000 equally spaced points are used. If `ind` is a NumPy array, the - KDE is evaluated at the points passed. If `ind` is an integer, - `ind` number of equally spaced points are used. - **kwds : optional - Additional keyword arguments are documented in - :meth:`pandas.%(this-datatype)s.plot`. - - Returns - ------- - matplotlib.axes.Axes or numpy.ndarray of them - - See Also - -------- - scipy.stats.gaussian_kde : Representation of a kernel-density - estimate using Gaussian kernels. This is the function used - internally to estimate the PDF. - %(sibling-datatype)s.plot.kde : Generate a KDE plot for a - %(sibling-datatype)s. - - Examples - -------- - %(examples)s - """ - - -class KdePlot(HistPlot): - _kind = 'kde' - orientation = 'vertical' - - def __init__(self, data, bw_method=None, ind=None, **kwargs): - MPLPlot.__init__(self, data, **kwargs) - self.bw_method = bw_method - self.ind = ind - - def _args_adjust(self): - pass - - def _get_ind(self, y): - if self.ind is None: - # np.nanmax() and np.nanmin() ignores the missing values - sample_range = np.nanmax(y) - np.nanmin(y) - ind = np.linspace(np.nanmin(y) - 0.5 * sample_range, - np.nanmax(y) + 0.5 * sample_range, 1000) - elif is_integer(self.ind): - sample_range = np.nanmax(y) - np.nanmin(y) - ind = np.linspace(np.nanmin(y) - 0.5 * sample_range, - np.nanmax(y) + 0.5 * sample_range, self.ind) - else: - ind = self.ind - return ind - - @classmethod - def _plot(cls, ax, y, style=None, bw_method=None, ind=None, - column_num=None, stacking_id=None, **kwds): - from scipy.stats import gaussian_kde - - y = remove_na_arraylike(y) - gkde = gaussian_kde(y, bw_method=bw_method) - - y = gkde.evaluate(ind) - lines = MPLPlot._plot(ax, ind, y, style=style, **kwds) - return lines - - def _make_plot_keywords(self, kwds, y): - kwds['bw_method'] = self.bw_method - kwds['ind'] = self._get_ind(y) - return kwds - - def _post_plot_logic(self, ax, data): - ax.set_ylabel('Density') - - -class PiePlot(MPLPlot): - _kind = 'pie' - _layout_type = 'horizontal' - - def __init__(self, data, kind=None, **kwargs): - data = data.fillna(value=0) - if (data < 0).any().any(): - raise ValueError("{0} doesn't allow negative values".format(kind)) - MPLPlot.__init__(self, data, kind=kind, **kwargs) - - def _args_adjust(self): - self.grid = False - self.logy = False - self.logx = False - self.loglog = False - - def _validate_color_args(self): - pass - - def _make_plot(self): - colors = self._get_colors( - num_colors=len(self.data), color_kwds='colors') - self.kwds.setdefault('colors', colors) - - for i, (label, y) in enumerate(self._iter_data()): - ax = self._get_ax(i) - if label is not None: - label = pprint_thing(label) - ax.set_ylabel(label) - - kwds = self.kwds.copy() - - def blank_labeler(label, value): - if value == 0: - return '' - else: - return label - - idx = [pprint_thing(v) for v in self.data.index] - labels = kwds.pop('labels', idx) - # labels is used for each wedge's labels - # Blank out labels for values of 0 so they don't overlap - # with nonzero wedges - if labels is not None: - blabels = [blank_labeler(l, value) for - l, value in zip(labels, y)] - else: - blabels = None - results = ax.pie(y, labels=blabels, **kwds) - - if kwds.get('autopct', None) is not None: - patches, texts, autotexts = results - else: - patches, texts = results - autotexts = [] - - if self.fontsize is not None: - for t in texts + autotexts: - t.set_fontsize(self.fontsize) - - # leglabels is used for legend labels - leglabels = labels if labels is not None else idx - for p, l in zip(patches, leglabels): - self._add_legend_handle(p, l) - - -class BoxPlot(LinePlot): - _kind = 'box' - _layout_type = 'horizontal' - - _valid_return_types = (None, 'axes', 'dict', 'both') - # namedtuple to hold results - BP = namedtuple("Boxplot", ['ax', 'lines']) - - def __init__(self, data, return_type='axes', **kwargs): - # Do not call LinePlot.__init__ which may fill nan - if return_type not in self._valid_return_types: - raise ValueError( - "return_type must be {None, 'axes', 'dict', 'both'}") - - self.return_type = return_type - MPLPlot.__init__(self, data, **kwargs) - - def _args_adjust(self): - if self.subplots: - # Disable label ax sharing. Otherwise, all subplots shows last - # column label - if self.orientation == 'vertical': - self.sharex = False - else: - self.sharey = False - - @classmethod - def _plot(cls, ax, y, column_num=None, return_type='axes', **kwds): - if y.ndim == 2: - y = [remove_na_arraylike(v) for v in y] - # Boxplot fails with empty arrays, so need to add a NaN - # if any cols are empty - # GH 8181 - y = [v if v.size > 0 else np.array([np.nan]) for v in y] - else: - y = remove_na_arraylike(y) - bp = ax.boxplot(y, **kwds) - - if return_type == 'dict': - return bp, bp - elif return_type == 'both': - return cls.BP(ax=ax, lines=bp), bp - else: - return ax, bp - - def _validate_color_args(self): - if 'color' in self.kwds: - if self.colormap is not None: - warnings.warn("'color' and 'colormap' cannot be used " - "simultaneously. Using 'color'") - self.color = self.kwds.pop('color') - - if isinstance(self.color, dict): - valid_keys = ['boxes', 'whiskers', 'medians', 'caps'] - for key, values in self.color.items(): - if key not in valid_keys: - raise ValueError("color dict contains invalid " - "key '{0}' " - "The key must be either {1}" - .format(key, valid_keys)) - else: - self.color = None - - # get standard colors for default - colors = _get_standard_colors(num_colors=3, - colormap=self.colormap, - color=None) - # use 2 colors by default, for box/whisker and median - # flier colors isn't needed here - # because it can be specified by ``sym`` kw - self._boxes_c = colors[0] - self._whiskers_c = colors[0] - self._medians_c = colors[2] - self._caps_c = 'k' # mpl default - - def _get_colors(self, num_colors=None, color_kwds='color'): - pass - - def maybe_color_bp(self, bp): - if isinstance(self.color, dict): - boxes = self.color.get('boxes', self._boxes_c) - whiskers = self.color.get('whiskers', self._whiskers_c) - medians = self.color.get('medians', self._medians_c) - caps = self.color.get('caps', self._caps_c) - else: - # Other types are forwarded to matplotlib - # If None, use default colors - boxes = self.color or self._boxes_c - whiskers = self.color or self._whiskers_c - medians = self.color or self._medians_c - caps = self.color or self._caps_c - - from matplotlib.artist import setp - setp(bp['boxes'], color=boxes, alpha=1) - setp(bp['whiskers'], color=whiskers, alpha=1) - setp(bp['medians'], color=medians, alpha=1) - setp(bp['caps'], color=caps, alpha=1) - - def _make_plot(self): - if self.subplots: - from pandas.core.series import Series - self._return_obj = Series() - - for i, (label, y) in enumerate(self._iter_data()): - ax = self._get_ax(i) - kwds = self.kwds.copy() - - ret, bp = self._plot(ax, y, column_num=i, - return_type=self.return_type, **kwds) - self.maybe_color_bp(bp) - self._return_obj[label] = ret - - label = [pprint_thing(label)] - self._set_ticklabels(ax, label) - else: - y = self.data.values.T - ax = self._get_ax(0) - kwds = self.kwds.copy() - - ret, bp = self._plot(ax, y, column_num=0, - return_type=self.return_type, **kwds) - self.maybe_color_bp(bp) - self._return_obj = ret - - labels = [l for l, _ in self._iter_data()] - labels = [pprint_thing(l) for l in labels] - if not self.use_index: - labels = [pprint_thing(key) for key in range(len(labels))] - self._set_ticklabels(ax, labels) - - def _set_ticklabels(self, ax, labels): - if self.orientation == 'vertical': - ax.set_xticklabels(labels) - else: - ax.set_yticklabels(labels) - - def _make_legend(self): - pass - - def _post_plot_logic(self, ax, data): - pass - - @property - def orientation(self): - if self.kwds.get('vert', True): - return 'vertical' - else: - return 'horizontal' - - @property - def result(self): - if self.return_type is None: - return super().result - else: - return self._return_obj - - -# kinds supported by both dataframe and series -_common_kinds = ['line', 'bar', 'barh', - 'kde', 'density', 'area', 'hist', 'box'] -# kinds supported by dataframe -_dataframe_kinds = ['scatter', 'hexbin'] -# kinds supported only by series or dataframe single column -_series_kinds = ['pie'] -_all_kinds = _common_kinds + _dataframe_kinds + _series_kinds - -_klasses = [LinePlot, BarPlot, BarhPlot, KdePlot, HistPlot, BoxPlot, - ScatterPlot, HexBinPlot, AreaPlot, PiePlot] \ - # type: List[Type[MPLPlot]] - -_plot_klass = {klass._kind: klass for klass in _klasses} - - -def _plot(data, x=None, y=None, subplots=False, - ax=None, kind='line', **kwds): - kind = _get_standard_kind(kind.lower().strip()) - if kind in _all_kinds: - klass = _plot_klass[kind] - else: - raise ValueError("%r is not a valid plot kind" % kind) - - if kind in _dataframe_kinds: - if isinstance(data, ABCDataFrame): - plot_obj = klass(data, x=x, y=y, subplots=subplots, ax=ax, - kind=kind, **kwds) - else: - raise ValueError("plot kind %r can only be used for data frames" - % kind) - - elif kind in _series_kinds: - if isinstance(data, ABCDataFrame): - if y is None and subplots is False: - msg = "{0} requires either y column or 'subplots=True'" - raise ValueError(msg.format(kind)) - elif y is not None: - if is_integer(y) and not data.columns.holds_integer(): - y = data.columns[y] - # converted to series actually. copy to not modify - data = data[y].copy() - data.index.name = y - plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) - else: - if isinstance(data, ABCDataFrame): - data_cols = data.columns - if x is not None: - if is_integer(x) and not data.columns.holds_integer(): - x = data_cols[x] - elif not isinstance(data[x], ABCSeries): - raise ValueError("x must be a label or position") - data = data.set_index(x) - - if y is not None: - # check if we have y as int or list of ints - int_ylist = is_list_like(y) and all(is_integer(c) for c in y) - int_y_arg = is_integer(y) or int_ylist - if int_y_arg and not data.columns.holds_integer(): - y = data_cols[y] - - label_kw = kwds['label'] if 'label' in kwds else False - for kw in ['xerr', 'yerr']: - if (kw in kwds) and \ - (isinstance(kwds[kw], str) or - is_integer(kwds[kw])): - try: - kwds[kw] = data[kwds[kw]] - except (IndexError, KeyError, TypeError): - pass - - # don't overwrite - data = data[y].copy() - - if isinstance(data, ABCSeries): - label_name = label_kw or y - data.name = label_name - else: - match = is_list_like(label_kw) and len(label_kw) == len(y) - if label_kw and not match: - raise ValueError( - "label should be list-like and same length as y" - ) - label_name = label_kw or data.columns - data.columns = label_name - plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) - - plot_obj.generate() - plot_obj.draw() - return plot_obj.result - - df_kind = """- 'scatter' : scatter plot - 'hexbin' : hexbin plot""" series_kind = "" @@ -1974,54 +172,6 @@ def _plot(data, x=None, y=None, subplots=False, %(klass_note)s """ - -@Appender(_shared_docs['plot'] % _shared_doc_df_kwargs) -def plot_frame(data, x=None, y=None, kind='line', ax=None, - subplots=False, sharex=None, sharey=False, layout=None, - figsize=None, use_index=True, title=None, grid=None, - legend=True, style=None, logx=False, logy=False, loglog=False, - xticks=None, yticks=None, xlim=None, ylim=None, - rot=None, fontsize=None, colormap=None, table=False, - yerr=None, xerr=None, - secondary_y=False, sort_columns=False, - **kwds): - return _plot(data, kind=kind, x=x, y=y, ax=ax, - subplots=subplots, sharex=sharex, sharey=sharey, - layout=layout, figsize=figsize, use_index=use_index, - title=title, grid=grid, legend=legend, - style=style, logx=logx, logy=logy, loglog=loglog, - xticks=xticks, yticks=yticks, xlim=xlim, ylim=ylim, - rot=rot, fontsize=fontsize, colormap=colormap, table=table, - yerr=yerr, xerr=xerr, - secondary_y=secondary_y, sort_columns=sort_columns, - **kwds) - - -@Appender(_shared_docs['plot'] % _shared_doc_series_kwargs) -def plot_series(data, kind='line', ax=None, # Series unique - figsize=None, use_index=True, title=None, grid=None, - legend=False, style=None, logx=False, logy=False, loglog=False, - xticks=None, yticks=None, xlim=None, ylim=None, - rot=None, fontsize=None, colormap=None, table=False, - yerr=None, xerr=None, - label=None, secondary_y=False, # Series unique - **kwds): - - import matplotlib.pyplot as plt - if ax is None and len(plt.get_fignums()) > 0: - ax = _gca() - ax = MPLPlot._get_ax_layer(ax) - return _plot(data, kind=kind, ax=ax, - figsize=figsize, use_index=use_index, title=title, - grid=grid, legend=legend, - style=style, logx=logx, logy=logy, loglog=loglog, - xticks=xticks, yticks=yticks, xlim=xlim, ylim=ylim, - rot=rot, fontsize=fontsize, colormap=colormap, table=table, - yerr=yerr, xerr=xerr, - label=label, secondary_y=secondary_y, - **kwds) - - _shared_docs['boxplot'] = """ Make a box plot from DataFrame columns. @@ -2185,148 +335,97 @@ def plot_series(data, kind='line', ax=None, # Series unique """ +_shared_docs['kde'] = """ + Generate Kernel Density Estimate plot using Gaussian kernels. -@Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) -def boxplot(data, column=None, by=None, ax=None, fontsize=None, - rot=0, grid=True, figsize=None, layout=None, return_type=None, - **kwds): - - # validate return_type: - if return_type not in BoxPlot._valid_return_types: - raise ValueError("return_type must be {'axes', 'dict', 'both'}") - - if isinstance(data, ABCSeries): - data = data.to_frame('x') - column = 'x' - - def _get_colors(): - # num_colors=3 is required as method maybe_color_bp takes the colors - # in positions 0 and 2. - return _get_standard_colors(color=kwds.get('color'), num_colors=3) - - def maybe_color_bp(bp): - if 'color' not in kwds: - from matplotlib.artist import setp - setp(bp['boxes'], color=colors[0], alpha=1) - setp(bp['whiskers'], color=colors[0], alpha=1) - setp(bp['medians'], color=colors[2], alpha=1) - - def plot_group(keys, values, ax): - keys = [pprint_thing(x) for x in keys] - values = [np.asarray(remove_na_arraylike(v)) for v in values] - bp = ax.boxplot(values, **kwds) - if fontsize is not None: - ax.tick_params(axis='both', labelsize=fontsize) - if kwds.get('vert', 1): - ax.set_xticklabels(keys, rotation=rot) - else: - ax.set_yticklabels(keys, rotation=rot) - maybe_color_bp(bp) - - # Return axes in multiplot case, maybe revisit later # 985 - if return_type == 'dict': - return bp - elif return_type == 'both': - return BoxPlot.BP(ax=ax, lines=bp) - else: - return ax + In statistics, `kernel density estimation`_ (KDE) is a non-parametric + way to estimate the probability density function (PDF) of a random + variable. This function uses Gaussian kernels and includes automatic + bandwidth determination. - colors = _get_colors() - if column is None: - columns = None - else: - if isinstance(column, (list, tuple)): - columns = column - else: - columns = [column] - - if by is not None: - # Prefer array return type for 2-D plots to match the subplot layout - # https://github.com/pandas-dev/pandas/pull/12216#issuecomment-241175580 - result = _grouped_plot_by_column(plot_group, data, columns=columns, - by=by, grid=grid, figsize=figsize, - ax=ax, layout=layout, - return_type=return_type) - else: - if return_type is None: - return_type = 'axes' - if layout is not None: - raise ValueError("The 'layout' keyword is not supported when " - "'by' is None") - - if ax is None: - rc = {'figure.figsize': figsize} if figsize is not None else {} - ax = _gca(rc) - data = data._get_numeric_data() - if columns is None: - columns = data.columns - else: - data = data[columns] + .. _kernel density estimation: + https://en.wikipedia.org/wiki/Kernel_density_estimation - result = plot_group(columns, data.values.T, ax) - ax.grid(grid) + Parameters + ---------- + bw_method : str, scalar or callable, optional + The method used to calculate the estimator bandwidth. This can be + 'scott', 'silverman', a scalar constant or a callable. + If None (default), 'scott' is used. + See :class:`scipy.stats.gaussian_kde` for more information. + ind : NumPy array or integer, optional + Evaluation points for the estimated PDF. If None (default), + 1000 equally spaced points are used. If `ind` is a NumPy array, the + KDE is evaluated at the points passed. If `ind` is an integer, + `ind` number of equally spaced points are used. + **kwds : optional + Additional keyword arguments are documented in + :meth:`pandas.%(this-datatype)s.plot`. - return result + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + See Also + -------- + scipy.stats.gaussian_kde : Representation of a kernel-density + estimate using Gaussian kernels. This is the function used + internally to estimate the PDF. + %(sibling-datatype)s.plot.kde : Generate a KDE plot for a + %(sibling-datatype)s. -@Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) -def boxplot_frame(self, column=None, by=None, ax=None, fontsize=None, rot=0, - grid=True, figsize=None, layout=None, - return_type=None, **kwds): - import matplotlib.pyplot as plt - _converter._WARN = False - ax = boxplot(self, column=column, by=by, ax=ax, fontsize=fontsize, - grid=grid, rot=rot, figsize=figsize, layout=layout, - return_type=return_type, **kwds) - plt.draw_if_interactive() - return ax + Examples + -------- + %(examples)s + """ -def scatter_plot(data, x, y, by=None, ax=None, figsize=None, grid=False, - **kwargs): +def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, + xrot=None, ylabelsize=None, yrot=None, figsize=None, + bins=10, **kwds): """ - Make a scatter plot from two DataFrame columns + Draw histogram of the input series using matplotlib. Parameters ---------- - data : DataFrame - x : Column name for the x-axis values - y : Column name for the y-axis values - ax : Matplotlib axis object - figsize : A tuple (width, height) in inches - grid : Setting this to True will show the grid - kwargs : other plotting keyword arguments - To be passed to scatter function + by : object, optional + If passed, then used to form histograms for separate groups + ax : matplotlib axis object + If not passed, uses gca() + grid : bool, default True + Whether to show axis grid lines + xlabelsize : int, default None + If specified changes the x-axis label size + xrot : float, default None + rotation of x axis labels + ylabelsize : int, default None + If specified changes the y-axis label size + yrot : float, default None + rotation of y axis labels + figsize : tuple, default None + figure size in inches by default + bins : integer or sequence, default 10 + Number of histogram bins to be used. If an integer is given, bins + 1 + bin edges are calculated and returned. If bins is a sequence, gives + bin edges, including left edge of first bin and right edge of last + bin. In this case, bins is returned unmodified. + `**kwds` : keywords + To be passed to the actual plotting function Returns ------- - matplotlib.Figure - """ - import matplotlib.pyplot as plt - - kwargs.setdefault('edgecolors', 'none') - - def plot_group(group, ax): - xvals = group[x].values - yvals = group[y].values - ax.scatter(xvals, yvals, **kwargs) - ax.grid(grid) - - if by is not None: - fig = _grouped_plot(plot_group, data, by=by, figsize=figsize, ax=ax) - else: - if ax is None: - fig = plt.figure() - ax = fig.add_subplot(111) - else: - fig = ax.get_figure() - plot_group(data, ax) - ax.set_ylabel(pprint_thing(y)) - ax.set_xlabel(pprint_thing(x)) - - ax.grid(grid) + matplotlib.AxesSubplot + A histogram plot. - return fig + See Also + -------- + matplotlib.axes.Axes.hist : Plot a histogram using matplotlib. + """ + plot_backend = _get_plot_backend() + return plot_backend.hist_series(self, by=by, ax=ax, grid=grid, + xlabelsize=xlabelsize, xrot=xrot, + ylabelsize=ylabelsize, yrot=yrot, + figsize=figsize, bins=bins, **kwds) def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, @@ -2409,170 +508,35 @@ def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, ... }, index= ['pig', 'rabbit', 'duck', 'chicken', 'horse']) >>> hist = df.hist(bins=3) """ - _raise_if_no_mpl() - _converter._WARN = False - if by is not None: - axes = grouped_hist(data, column=column, by=by, ax=ax, grid=grid, - figsize=figsize, sharex=sharex, sharey=sharey, - layout=layout, bins=bins, xlabelsize=xlabelsize, - xrot=xrot, ylabelsize=ylabelsize, - yrot=yrot, **kwds) - return axes - - if column is not None: - if not isinstance(column, (list, np.ndarray, ABCIndexClass)): - column = [column] - data = data[column] - data = data._get_numeric_data() - naxes = len(data.columns) - - if naxes == 0: - raise ValueError("hist method requires numerical columns, " - "nothing to plot.") - - fig, axes = _subplots(naxes=naxes, ax=ax, squeeze=False, - sharex=sharex, sharey=sharey, figsize=figsize, - layout=layout) - _axes = _flatten(axes) - - for i, col in enumerate(com.try_sort(data.columns)): - ax = _axes[i] - ax.hist(data[col].dropna().values, bins=bins, **kwds) - ax.set_title(col) - ax.grid(grid) - - _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, - ylabelsize=ylabelsize, yrot=yrot) - fig.subplots_adjust(wspace=0.3, hspace=0.3) - - return axes - - -def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, - xrot=None, ylabelsize=None, yrot=None, figsize=None, - bins=10, **kwds): - """ - Draw histogram of the input series using matplotlib. - - Parameters - ---------- - by : object, optional - If passed, then used to form histograms for separate groups - ax : matplotlib axis object - If not passed, uses gca() - grid : bool, default True - Whether to show axis grid lines - xlabelsize : int, default None - If specified changes the x-axis label size - xrot : float, default None - rotation of x axis labels - ylabelsize : int, default None - If specified changes the y-axis label size - yrot : float, default None - rotation of y axis labels - figsize : tuple, default None - figure size in inches by default - bins : integer or sequence, default 10 - Number of histogram bins to be used. If an integer is given, bins + 1 - bin edges are calculated and returned. If bins is a sequence, gives - bin edges, including left edge of first bin and right edge of last - bin. In this case, bins is returned unmodified. - `**kwds` : keywords - To be passed to the actual plotting function - - Returns - ------- - matplotlib.AxesSubplot - A histogram plot. - - See Also - -------- - matplotlib.axes.Axes.hist : Plot a histogram using matplotlib. - """ - import matplotlib.pyplot as plt - - if by is None: - if kwds.get('layout', None) is not None: - raise ValueError("The 'layout' keyword is not supported when " - "'by' is None") - # hack until the plotting interface is a bit more unified - fig = kwds.pop('figure', plt.gcf() if plt.get_fignums() else - plt.figure(figsize=figsize)) - if (figsize is not None and tuple(figsize) != - tuple(fig.get_size_inches())): - fig.set_size_inches(*figsize, forward=True) - if ax is None: - ax = fig.gca() - elif ax.get_figure() != fig: - raise AssertionError('passed axis not bound to passed figure') - values = self.dropna().values - - ax.hist(values, bins=bins, **kwds) - ax.grid(grid) - axes = np.array([ax]) - - _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, - ylabelsize=ylabelsize, yrot=yrot) - - else: - if 'figure' in kwds: - raise ValueError("Cannot pass 'figure' when using the " - "'by' argument, since a new 'Figure' instance " - "will be created") - axes = grouped_hist(self, by=by, ax=ax, grid=grid, figsize=figsize, - bins=bins, xlabelsize=xlabelsize, xrot=xrot, - ylabelsize=ylabelsize, yrot=yrot, **kwds) - - if hasattr(axes, 'ndim'): - if axes.ndim == 1 and len(axes) == 1: - return axes[0] - return axes - - -def grouped_hist(data, column=None, by=None, ax=None, bins=50, figsize=None, - layout=None, sharex=False, sharey=False, rot=90, grid=True, - xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, - **kwargs): - """ - Grouped histogram - - Parameters - ---------- - data : Series/DataFrame - column : object, optional - by : object, optional - ax : axes, optional - bins : int, default 50 - figsize : tuple, optional - layout : optional - sharex : bool, default False - sharey : bool, default False - rot : int, default 90 - grid : bool, default True - kwargs : dict, keyword arguments passed to matplotlib.Axes.hist - - Returns - ------- - collection of Matplotlib Axes - """ - _raise_if_no_mpl() - _converter._WARN = False - - def plot_group(group, ax): - ax.hist(group.dropna().values, bins=bins, **kwargs) + plot_backend = _get_plot_backend() + return plot_backend.hist_frame(data, column=column, by=by, grid=grid, + xlabelsize=xlabelsize, xrot=xrot, + ylabelsize=ylabelsize, yrot=yrot, + ax=ax, sharex=sharex, sharey=sharey, + figsize=figsize, layout=layout, bins=bins, + **kwds) - xrot = xrot or rot - fig, axes = _grouped_plot(plot_group, data, column=column, - by=by, sharex=sharex, sharey=sharey, ax=ax, - figsize=figsize, layout=layout, rot=rot) +@Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) +def boxplot(data, column=None, by=None, ax=None, fontsize=None, + rot=0, grid=True, figsize=None, layout=None, return_type=None, + **kwds): + plot_backend = _get_plot_backend() + return plot_backend.boxplot(data, column=column, by=by, ax=ax, + fontsize=fontsize, rot=rot, grid=grid, + figsize=figsize, layout=layout, + return_type=return_type, **kwds) - _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, - ylabelsize=ylabelsize, yrot=yrot) - fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, - hspace=0.5, wspace=0.3) - return axes +@Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) +def boxplot_frame(self, column=None, by=None, ax=None, fontsize=None, rot=0, + grid=True, figsize=None, layout=None, + return_type=None, **kwds): + plot_backend = _get_plot_backend() + return plot_backend.boxplot_frame(self, column=column, by=by, ax=ax, + fontsize=fontsize, rot=rot, grid=grid, + figsize=figsize, layout=layout, + return_type=return_type, **kwds) def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, @@ -2627,110 +591,183 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, >>> grouped = df.unstack(level='lvl1').groupby(level=0, axis=1) >>> boxplot_frame_groupby(grouped, subplots=False) """ - _raise_if_no_mpl() - _converter._WARN = False - if subplots is True: - naxes = len(grouped) - fig, axes = _subplots(naxes=naxes, squeeze=False, - ax=ax, sharex=sharex, sharey=sharey, - figsize=figsize, layout=layout) - axes = _flatten(axes) - - from pandas.core.series import Series - ret = Series() - for (key, group), ax in zip(grouped, axes): - d = group.boxplot(ax=ax, column=column, fontsize=fontsize, - rot=rot, grid=grid, **kwds) - ax.set_title(pprint_thing(key)) - ret.loc[key] = d - fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, - right=0.9, wspace=0.2) + plot_backend = _get_plot_backend() + return plot_backend.boxplot_frame_groupby( + grouped, subplots=subplots, column=column, fontsize=fontsize, rot=rot, + grid=grid, ax=ax, figsize=figsize, layout=layout, sharex=sharex, + sharey=sharey, **kwds) + + +# kinds supported by both dataframe and series +_common_kinds = ['line', 'bar', 'barh', + 'kde', 'density', 'area', 'hist', 'box'] +# kinds supported by dataframe +_dataframe_kinds = ['scatter', 'hexbin'] +# kinds supported only by series or dataframe single column +_series_kinds = ['pie'] +_all_kinds = _common_kinds + _dataframe_kinds + _series_kinds + + +def _get_standard_kind(kind): + return {'density': 'kde'}.get(kind, kind) + + +def _get_plot_backend(): + """ + Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`). + + The plotting system of pandas has been using matplotlib, but the idea here + is that it can also work with other third-party backends. In the future, + this function will return the backend from a pandas option, and all the + rest of the code in this file will use the backend specified there for the + plotting. + + The backend is imported lazily, as matplotlib is a soft dependency, and + pandas can be used without it being installed. + """ + try: + import pandas.plotting._matplotlib as plot_backend + except ImportError: + raise ImportError("matplotlib is required for plotting.") + return plot_backend + + +def _plot_classes(): + plot_backend = _get_plot_backend() + # TODO restore type annotations if we create a base class for plot classes + # (a parent of MPLPlot, and classes of other backends) + classes = [plot_backend.LinePlot, plot_backend.BarPlot, + plot_backend.BarhPlot, plot_backend.AreaPlot, + plot_backend.HistPlot, plot_backend.BoxPlot, + plot_backend.ScatterPlot, plot_backend.HexBinPlot, + plot_backend.KdePlot, plot_backend.PiePlot] + return {class_._kind: class_ for class_ in classes} + + +def _plot(data, x=None, y=None, subplots=False, + ax=None, kind='line', **kwds): + kind = _get_standard_kind(kind.lower().strip()) + if kind in _all_kinds: + klass = _plot_classes()[kind] else: - from pandas.core.reshape.concat import concat - keys, frames = zip(*grouped) - if grouped.axis == 0: - df = concat(frames, keys=keys, axis=1) + raise ValueError("%r is not a valid plot kind" % kind) + + if kind in _dataframe_kinds: + if isinstance(data, ABCDataFrame): + plot_obj = klass(data, x=x, y=y, subplots=subplots, ax=ax, + kind=kind, **kwds) else: - if len(frames) > 1: - df = frames[0].join(frames[1::]) - else: - df = frames[0] - ret = df.boxplot(column=column, fontsize=fontsize, rot=rot, - grid=grid, ax=ax, figsize=figsize, - layout=layout, **kwds) - return ret - - -def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True, - figsize=None, sharex=True, sharey=True, layout=None, - rot=0, ax=None, **kwargs): - - if figsize == 'default': - # allowed to specify mpl default with 'default' - warnings.warn("figsize='default' is deprecated. Specify figure" - "size by tuple instead", FutureWarning, stacklevel=4) - figsize = None - - grouped = data.groupby(by) - if column is not None: - grouped = grouped[column] - - naxes = len(grouped) - fig, axes = _subplots(naxes=naxes, figsize=figsize, - sharex=sharex, sharey=sharey, ax=ax, - layout=layout) - - _axes = _flatten(axes) - - for i, (key, group) in enumerate(grouped): - ax = _axes[i] - if numeric_only and isinstance(group, ABCDataFrame): - group = group._get_numeric_data() - plotf(group, ax, **kwargs) - ax.set_title(pprint_thing(key)) - - return fig, axes - - -def _grouped_plot_by_column(plotf, data, columns=None, by=None, - numeric_only=True, grid=False, - figsize=None, ax=None, layout=None, - return_type=None, **kwargs): - grouped = data.groupby(by) - if columns is None: - if not isinstance(by, (list, tuple)): - by = [by] - columns = data._get_numeric_data().columns.difference(by) - naxes = len(columns) - fig, axes = _subplots(naxes=naxes, sharex=True, sharey=True, - figsize=figsize, ax=ax, layout=layout) - - _axes = _flatten(axes) - - ax_values = [] - - for i, col in enumerate(columns): - ax = _axes[i] - gp_col = grouped[col] - keys, values = zip(*gp_col) - re_plotf = plotf(keys, values, ax, **kwargs) - ax.set_title(col) - ax.set_xlabel(pprint_thing(by)) - ax_values.append(re_plotf) - ax.grid(grid) - - from pandas.core.series import Series - result = Series(ax_values, index=columns) - - # Return axes in multiplot case, maybe revisit later # 985 - if return_type is None: - result = axes - - byline = by[0] if len(by) == 1 else by - fig.suptitle('Boxplot grouped by {byline}'.format(byline=byline)) - fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) - - return result + raise ValueError("plot kind %r can only be used for data frames" + % kind) + + elif kind in _series_kinds: + if isinstance(data, ABCDataFrame): + if y is None and subplots is False: + msg = "{0} requires either y column or 'subplots=True'" + raise ValueError(msg.format(kind)) + elif y is not None: + if is_integer(y) and not data.columns.holds_integer(): + y = data.columns[y] + # converted to series actually. copy to not modify + data = data[y].copy() + data.index.name = y + plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) + else: + if isinstance(data, ABCDataFrame): + data_cols = data.columns + if x is not None: + if is_integer(x) and not data.columns.holds_integer(): + x = data_cols[x] + elif not isinstance(data[x], ABCSeries): + raise ValueError("x must be a label or position") + data = data.set_index(x) + + if y is not None: + # check if we have y as int or list of ints + int_ylist = is_list_like(y) and all(is_integer(c) for c in y) + int_y_arg = is_integer(y) or int_ylist + if int_y_arg and not data.columns.holds_integer(): + y = data_cols[y] + + label_kw = kwds['label'] if 'label' in kwds else False + for kw in ['xerr', 'yerr']: + if (kw in kwds) and \ + (isinstance(kwds[kw], str) or + is_integer(kwds[kw])): + try: + kwds[kw] = data[kwds[kw]] + except (IndexError, KeyError, TypeError): + pass + + # don't overwrite + data = data[y].copy() + + if isinstance(data, ABCSeries): + label_name = label_kw or y + data.name = label_name + else: + match = is_list_like(label_kw) and len(label_kw) == len(y) + if label_kw and not match: + raise ValueError( + "label should be list-like and same length as y" + ) + label_name = label_kw or data.columns + data.columns = label_name + plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) + + plot_obj.generate() + plot_obj.draw() + return plot_obj.result + + +@Appender(_shared_docs['plot'] % _shared_doc_df_kwargs) +def plot_frame(data, x=None, y=None, kind='line', ax=None, + subplots=False, sharex=None, sharey=False, layout=None, + figsize=None, use_index=True, title=None, grid=None, + legend=True, style=None, logx=False, logy=False, loglog=False, + xticks=None, yticks=None, xlim=None, ylim=None, + rot=None, fontsize=None, colormap=None, table=False, + yerr=None, xerr=None, + secondary_y=False, sort_columns=False, + **kwds): + return _plot(data, kind=kind, x=x, y=y, ax=ax, + subplots=subplots, sharex=sharex, sharey=sharey, + layout=layout, figsize=figsize, use_index=use_index, + title=title, grid=grid, legend=legend, + style=style, logx=logx, logy=logy, loglog=loglog, + xticks=xticks, yticks=yticks, xlim=xlim, ylim=ylim, + rot=rot, fontsize=fontsize, colormap=colormap, table=table, + yerr=yerr, xerr=xerr, + secondary_y=secondary_y, sort_columns=sort_columns, + **kwds) + + +@Appender(_shared_docs['plot'] % _shared_doc_series_kwargs) +def plot_series(data, kind='line', ax=None, # Series unique + figsize=None, use_index=True, title=None, grid=None, + legend=False, style=None, logx=False, logy=False, loglog=False, + xticks=None, yticks=None, xlim=None, ylim=None, + rot=None, fontsize=None, colormap=None, table=False, + yerr=None, xerr=None, + label=None, secondary_y=False, # Series unique + **kwds): + + # FIXME move this into _matplotlib + import matplotlib.pyplot as plt + if ax is None and len(plt.get_fignums()) > 0: + with plt.rc_context(): + ax = plt.gca() + ax = getattr(ax, 'left_ax', ax) + + return _plot(data, kind=kind, ax=ax, + figsize=figsize, use_index=use_index, title=title, + grid=grid, legend=legend, + style=style, logx=logx, logy=logy, loglog=loglog, + xticks=xticks, yticks=yticks, xlim=xlim, ylim=ylim, + rot=rot, fontsize=fontsize, colormap=colormap, table=table, + yerr=yerr, xerr=xerr, + label=label, secondary_y=secondary_y, + **kwds) class BasePlotMethods(PandasObject): @@ -2866,7 +903,7 @@ def hist(self, bins=10, **kwds): """ return self(kind='hist', bins=bins, **kwds) - @Appender(_kde_docstring % { + @Appender(_shared_docs['kde'] % { 'this-datatype': 'Series', 'sibling-datatype': 'DataFrame', 'examples': """ @@ -3300,7 +1337,7 @@ def hist(self, by=None, bins=10, **kwds): """ return self(kind='hist', by=by, bins=bins, **kwds) - @Appender(_kde_docstring % { + @Appender(_shared_docs['kde'] % { 'this-datatype': 'DataFrame', 'sibling-datatype': 'Series', 'examples': """ diff --git a/pandas/plotting/_matplotlib/__init__.py b/pandas/plotting/_matplotlib/__init__.py new file mode 100644 index 0000000000000..5cfb6843db9ed --- /dev/null +++ b/pandas/plotting/_matplotlib/__init__.py @@ -0,0 +1,19 @@ +from pandas.plotting._matplotlib.boxplot import ( + BoxPlot, boxplot, boxplot_frame, boxplot_frame_groupby) +from pandas.plotting._matplotlib.converter import deregister, register +from pandas.plotting._matplotlib.core import ( + AreaPlot, BarhPlot, BarPlot, HexBinPlot, LinePlot, PiePlot, ScatterPlot) +from pandas.plotting._matplotlib.hist import ( + HistPlot, KdePlot, hist_frame, hist_series) +from pandas.plotting._matplotlib.misc import ( + andrews_curves, autocorrelation_plot, bootstrap_plot, lag_plot, + parallel_coordinates, radviz, scatter_matrix) +from pandas.plotting._matplotlib.timeseries import tsplot +from pandas.plotting._matplotlib.tools import table + +__all__ = ['LinePlot', 'BarPlot', 'BarhPlot', 'HistPlot', 'BoxPlot', 'KdePlot', + 'AreaPlot', 'PiePlot', 'ScatterPlot', 'HexBinPlot', 'hist_series', + 'hist_frame', 'boxplot', 'boxplot_frame', 'boxplot_frame_groupby', + 'tsplot', 'table', 'andrews_curves', 'autocorrelation_plot', + 'bootstrap_plot', 'lag_plot', 'parallel_coordinates', 'radviz', + 'scatter_matrix', 'register', 'deregister'] diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py new file mode 100644 index 0000000000000..c1a48ad5ca08b --- /dev/null +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -0,0 +1,339 @@ +from collections import namedtuple +import warnings + +from matplotlib import pyplot as plt +import numpy as np + +from pandas.core.dtypes.generic import ABCSeries +from pandas.core.dtypes.missing import remove_na_arraylike + +from pandas.io.formats.printing import pprint_thing +from pandas.plotting._matplotlib.core import LinePlot, MPLPlot +from pandas.plotting._matplotlib.style import _get_standard_colors +from pandas.plotting._matplotlib.tools import _flatten, _subplots + + +class BoxPlot(LinePlot): + _kind = 'box' + _layout_type = 'horizontal' + + _valid_return_types = (None, 'axes', 'dict', 'both') + # namedtuple to hold results + BP = namedtuple("Boxplot", ['ax', 'lines']) + + def __init__(self, data, return_type='axes', **kwargs): + # Do not call LinePlot.__init__ which may fill nan + if return_type not in self._valid_return_types: + raise ValueError( + "return_type must be {None, 'axes', 'dict', 'both'}") + + self.return_type = return_type + MPLPlot.__init__(self, data, **kwargs) + + def _args_adjust(self): + if self.subplots: + # Disable label ax sharing. Otherwise, all subplots shows last + # column label + if self.orientation == 'vertical': + self.sharex = False + else: + self.sharey = False + + @classmethod + def _plot(cls, ax, y, column_num=None, return_type='axes', **kwds): + if y.ndim == 2: + y = [remove_na_arraylike(v) for v in y] + # Boxplot fails with empty arrays, so need to add a NaN + # if any cols are empty + # GH 8181 + y = [v if v.size > 0 else np.array([np.nan]) for v in y] + else: + y = remove_na_arraylike(y) + bp = ax.boxplot(y, **kwds) + + if return_type == 'dict': + return bp, bp + elif return_type == 'both': + return cls.BP(ax=ax, lines=bp), bp + else: + return ax, bp + + def _validate_color_args(self): + if 'color' in self.kwds: + if self.colormap is not None: + warnings.warn("'color' and 'colormap' cannot be used " + "simultaneously. Using 'color'") + self.color = self.kwds.pop('color') + + if isinstance(self.color, dict): + valid_keys = ['boxes', 'whiskers', 'medians', 'caps'] + for key, values in self.color.items(): + if key not in valid_keys: + raise ValueError("color dict contains invalid " + "key '{0}' " + "The key must be either {1}" + .format(key, valid_keys)) + else: + self.color = None + + # get standard colors for default + colors = _get_standard_colors(num_colors=3, + colormap=self.colormap, + color=None) + # use 2 colors by default, for box/whisker and median + # flier colors isn't needed here + # because it can be specified by ``sym`` kw + self._boxes_c = colors[0] + self._whiskers_c = colors[0] + self._medians_c = colors[2] + self._caps_c = 'k' # mpl default + + def _get_colors(self, num_colors=None, color_kwds='color'): + pass + + def maybe_color_bp(self, bp): + if isinstance(self.color, dict): + boxes = self.color.get('boxes', self._boxes_c) + whiskers = self.color.get('whiskers', self._whiskers_c) + medians = self.color.get('medians', self._medians_c) + caps = self.color.get('caps', self._caps_c) + else: + # Other types are forwarded to matplotlib + # If None, use default colors + boxes = self.color or self._boxes_c + whiskers = self.color or self._whiskers_c + medians = self.color or self._medians_c + caps = self.color or self._caps_c + + from matplotlib.artist import setp + setp(bp['boxes'], color=boxes, alpha=1) + setp(bp['whiskers'], color=whiskers, alpha=1) + setp(bp['medians'], color=medians, alpha=1) + setp(bp['caps'], color=caps, alpha=1) + + def _make_plot(self): + if self.subplots: + from pandas.core.series import Series + self._return_obj = Series() + + for i, (label, y) in enumerate(self._iter_data()): + ax = self._get_ax(i) + kwds = self.kwds.copy() + + ret, bp = self._plot(ax, y, column_num=i, + return_type=self.return_type, **kwds) + self.maybe_color_bp(bp) + self._return_obj[label] = ret + + label = [pprint_thing(label)] + self._set_ticklabels(ax, label) + else: + y = self.data.values.T + ax = self._get_ax(0) + kwds = self.kwds.copy() + + ret, bp = self._plot(ax, y, column_num=0, + return_type=self.return_type, **kwds) + self.maybe_color_bp(bp) + self._return_obj = ret + + labels = [l for l, _ in self._iter_data()] + labels = [pprint_thing(l) for l in labels] + if not self.use_index: + labels = [pprint_thing(key) for key in range(len(labels))] + self._set_ticklabels(ax, labels) + + def _set_ticklabels(self, ax, labels): + if self.orientation == 'vertical': + ax.set_xticklabels(labels) + else: + ax.set_yticklabels(labels) + + def _make_legend(self): + pass + + def _post_plot_logic(self, ax, data): + pass + + @property + def orientation(self): + if self.kwds.get('vert', True): + return 'vertical' + else: + return 'horizontal' + + @property + def result(self): + if self.return_type is None: + return super().result + else: + return self._return_obj + + +def _grouped_plot_by_column(plotf, data, columns=None, by=None, + numeric_only=True, grid=False, + figsize=None, ax=None, layout=None, + return_type=None, **kwargs): + grouped = data.groupby(by) + if columns is None: + if not isinstance(by, (list, tuple)): + by = [by] + columns = data._get_numeric_data().columns.difference(by) + naxes = len(columns) + fig, axes = _subplots(naxes=naxes, sharex=True, sharey=True, + figsize=figsize, ax=ax, layout=layout) + + _axes = _flatten(axes) + + ax_values = [] + + for i, col in enumerate(columns): + ax = _axes[i] + gp_col = grouped[col] + keys, values = zip(*gp_col) + re_plotf = plotf(keys, values, ax, **kwargs) + ax.set_title(col) + ax.set_xlabel(pprint_thing(by)) + ax_values.append(re_plotf) + ax.grid(grid) + + from pandas.core.series import Series + result = Series(ax_values, index=columns) + + # Return axes in multiplot case, maybe revisit later # 985 + if return_type is None: + result = axes + + byline = by[0] if len(by) == 1 else by + fig.suptitle('Boxplot grouped by {byline}'.format(byline=byline)) + fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) + + return result + + +def boxplot(data, column=None, by=None, ax=None, fontsize=None, + rot=0, grid=True, figsize=None, layout=None, return_type=None, + **kwds): + + # validate return_type: + if return_type not in BoxPlot._valid_return_types: + raise ValueError("return_type must be {'axes', 'dict', 'both'}") + + if isinstance(data, ABCSeries): + data = data.to_frame('x') + column = 'x' + + def _get_colors(): + # num_colors=3 is required as method maybe_color_bp takes the colors + # in positions 0 and 2. + return _get_standard_colors(color=kwds.get('color'), num_colors=3) + + def maybe_color_bp(bp): + if 'color' not in kwds: + from matplotlib.artist import setp + setp(bp['boxes'], color=colors[0], alpha=1) + setp(bp['whiskers'], color=colors[0], alpha=1) + setp(bp['medians'], color=colors[2], alpha=1) + + def plot_group(keys, values, ax): + keys = [pprint_thing(x) for x in keys] + values = [np.asarray(remove_na_arraylike(v)) for v in values] + bp = ax.boxplot(values, **kwds) + if fontsize is not None: + ax.tick_params(axis='both', labelsize=fontsize) + if kwds.get('vert', 1): + ax.set_xticklabels(keys, rotation=rot) + else: + ax.set_yticklabels(keys, rotation=rot) + maybe_color_bp(bp) + + # Return axes in multiplot case, maybe revisit later # 985 + if return_type == 'dict': + return bp + elif return_type == 'both': + return BoxPlot.BP(ax=ax, lines=bp) + else: + return ax + + colors = _get_colors() + if column is None: + columns = None + else: + if isinstance(column, (list, tuple)): + columns = column + else: + columns = [column] + + if by is not None: + # Prefer array return type for 2-D plots to match the subplot layout + # https://github.com/pandas-dev/pandas/pull/12216#issuecomment-241175580 + result = _grouped_plot_by_column(plot_group, data, columns=columns, + by=by, grid=grid, figsize=figsize, + ax=ax, layout=layout, + return_type=return_type) + else: + if return_type is None: + return_type = 'axes' + if layout is not None: + raise ValueError("The 'layout' keyword is not supported when " + "'by' is None") + + if ax is None: + rc = {'figure.figsize': figsize} if figsize is not None else {} + with plt.rc_context(rc): + ax = plt.gca() + data = data._get_numeric_data() + if columns is None: + columns = data.columns + else: + data = data[columns] + + result = plot_group(columns, data.values.T, ax) + ax.grid(grid) + + return result + + +def boxplot_frame(self, column=None, by=None, ax=None, fontsize=None, rot=0, + grid=True, figsize=None, layout=None, + return_type=None, **kwds): + ax = boxplot(self, column=column, by=by, ax=ax, fontsize=fontsize, + grid=grid, rot=rot, figsize=figsize, layout=layout, + return_type=return_type, **kwds) + plt.draw_if_interactive() + return ax + + +def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, + rot=0, grid=True, ax=None, figsize=None, + layout=None, sharex=False, sharey=True, **kwds): + if subplots is True: + naxes = len(grouped) + fig, axes = _subplots(naxes=naxes, squeeze=False, + ax=ax, sharex=sharex, sharey=sharey, + figsize=figsize, layout=layout) + axes = _flatten(axes) + + from pandas.core.series import Series + ret = Series() + for (key, group), ax in zip(grouped, axes): + d = group.boxplot(ax=ax, column=column, fontsize=fontsize, + rot=rot, grid=grid, **kwds) + ax.set_title(pprint_thing(key)) + ret.loc[key] = d + fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, + right=0.9, wspace=0.2) + else: + from pandas.core.reshape.concat import concat + keys, frames = zip(*grouped) + if grouped.axis == 0: + df = concat(frames, keys=keys, axis=1) + else: + if len(frames) > 1: + df = frames[0].join(frames[1::]) + else: + df = frames[0] + ret = df.boxplot(column=column, fontsize=fontsize, rot=rot, + grid=grid, ax=ax, figsize=figsize, + layout=layout, **kwds) + return ret diff --git a/pandas/plotting/_compat.py b/pandas/plotting/_matplotlib/compat.py similarity index 100% rename from pandas/plotting/_compat.py rename to pandas/plotting/_matplotlib/compat.py diff --git a/pandas/plotting/_converter.py b/pandas/plotting/_matplotlib/converter.py similarity index 97% rename from pandas/plotting/_converter.py rename to pandas/plotting/_matplotlib/converter.py index 68ffcbea64f91..30ef7a64dec4a 100644 --- a/pandas/plotting/_converter.py +++ b/pandas/plotting/_matplotlib/converter.py @@ -51,23 +51,6 @@ def get_pairs(): def register(explicit=True): - """ - Register Pandas Formatters and Converters with matplotlib - - This function modifies the global ``matplotlib.units.registry`` - dictionary. Pandas adds custom converters for - - * pd.Timestamp - * pd.Period - * np.datetime64 - * datetime.datetime - * datetime.date - * datetime.time - - See Also - -------- - deregister_matplotlib_converter - """ # Renamed in pandas.plotting.__init__ global _WARN @@ -84,20 +67,6 @@ def register(explicit=True): def deregister(): - """ - Remove pandas' formatters and converters - - Removes the custom converters added by :func:`register`. This - attempts to set the state of the registry back to the state before - pandas registered its own units. Converters for pandas' own types like - Timestamp and Period are removed completely. Converters for types - pandas overwrites, like ``datetime.datetime``, are restored to their - original value. - - See Also - -------- - deregister_matplotlib_converters - """ # Renamed in pandas.plotting.__init__ for type_, cls in get_pairs(): # We use type to catch our classes directly, no inheritance diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py new file mode 100644 index 0000000000000..a7049afee80b0 --- /dev/null +++ b/pandas/plotting/_matplotlib/core.py @@ -0,0 +1,1376 @@ +import re +from typing import Optional # noqa +import warnings + +import matplotlib.pyplot as plt +import numpy as np + +from pandas._config import get_option + +from pandas.errors import AbstractMethodError +from pandas.util._decorators import cache_readonly + +from pandas.core.dtypes.common import ( + is_hashable, is_integer, is_iterator, is_list_like, is_number) +from pandas.core.dtypes.generic import ( + ABCDataFrame, ABCIndexClass, ABCMultiIndex, ABCPeriodIndex, ABCSeries) +from pandas.core.dtypes.missing import isna, notna + +import pandas.core.common as com + +from pandas.io.formats.printing import pprint_thing +from pandas.plotting._matplotlib import converter +from pandas.plotting._matplotlib.compat import _mpl_ge_3_0_0 +from pandas.plotting._matplotlib.style import _get_standard_colors +from pandas.plotting._matplotlib.tools import ( + _flatten, _get_all_lines, _get_xlim, _handle_shared_axes, _subplots, + format_date_labels, table) + +if get_option('plotting.matplotlib.register_converters'): + converter.register(explicit=False) + + +class MPLPlot: + """ + Base class for assembling a pandas plot using matplotlib + + Parameters + ---------- + data : + + """ + @property + def _kind(self): + """Specify kind str. Must be overridden in child class""" + raise NotImplementedError + + _layout_type = 'vertical' + _default_rot = 0 + orientation = None # type: Optional[str] + _pop_attributes = ['label', 'style', 'logy', 'logx', 'loglog', + 'mark_right', 'stacked'] + _attr_defaults = {'logy': False, 'logx': False, 'loglog': False, + 'mark_right': True, 'stacked': False} + + def __init__(self, data, kind=None, by=None, subplots=False, sharex=None, + sharey=False, use_index=True, + figsize=None, grid=None, legend=True, rot=None, + ax=None, fig=None, title=None, xlim=None, ylim=None, + xticks=None, yticks=None, + sort_columns=False, fontsize=None, + secondary_y=False, colormap=None, + table=False, layout=None, **kwds): + + self.data = data + self.by = by + + self.kind = kind + + self.sort_columns = sort_columns + + self.subplots = subplots + + if sharex is None: + if ax is None: + self.sharex = True + else: + # if we get an axis, the users should do the visibility + # setting... + self.sharex = False + else: + self.sharex = sharex + + self.sharey = sharey + self.figsize = figsize + self.layout = layout + + self.xticks = xticks + self.yticks = yticks + self.xlim = xlim + self.ylim = ylim + self.title = title + self.use_index = use_index + + self.fontsize = fontsize + + if rot is not None: + self.rot = rot + # need to know for format_date_labels since it's rotated to 30 by + # default + self._rot_set = True + else: + self._rot_set = False + self.rot = self._default_rot + + if grid is None: + grid = False if secondary_y else self.plt.rcParams['axes.grid'] + + self.grid = grid + self.legend = legend + self.legend_handles = [] + self.legend_labels = [] + + for attr in self._pop_attributes: + value = kwds.pop(attr, self._attr_defaults.get(attr, None)) + setattr(self, attr, value) + + self.ax = ax + self.fig = fig + self.axes = None + + # parse errorbar input if given + xerr = kwds.pop('xerr', None) + yerr = kwds.pop('yerr', None) + self.errors = {kw: self._parse_errorbars(kw, err) + for kw, err in zip(['xerr', 'yerr'], [xerr, yerr])} + + if not isinstance(secondary_y, (bool, tuple, list, + np.ndarray, ABCIndexClass)): + secondary_y = [secondary_y] + self.secondary_y = secondary_y + + # ugly TypeError if user passes matplotlib's `cmap` name. + # Probably better to accept either. + if 'cmap' in kwds and colormap: + raise TypeError("Only specify one of `cmap` and `colormap`.") + elif 'cmap' in kwds: + self.colormap = kwds.pop('cmap') + else: + self.colormap = colormap + + self.table = table + + self.kwds = kwds + + self._validate_color_args() + + def _validate_color_args(self): + if 'color' not in self.kwds and 'colors' in self.kwds: + warnings.warn(("'colors' is being deprecated. Please use 'color'" + "instead of 'colors'")) + colors = self.kwds.pop('colors') + self.kwds['color'] = colors + + if ('color' in self.kwds and self.nseries == 1 and + not is_list_like(self.kwds['color'])): + # support series.plot(color='green') + self.kwds['color'] = [self.kwds['color']] + + if ('color' in self.kwds and isinstance(self.kwds['color'], tuple) and + self.nseries == 1 and len(self.kwds['color']) in (3, 4)): + # support RGB and RGBA tuples in series plot + self.kwds['color'] = [self.kwds['color']] + + if ('color' in self.kwds or 'colors' in self.kwds) and \ + self.colormap is not None: + warnings.warn("'color' and 'colormap' cannot be used " + "simultaneously. Using 'color'") + + if 'color' in self.kwds and self.style is not None: + if is_list_like(self.style): + styles = self.style + else: + styles = [self.style] + # need only a single match + for s in styles: + if re.match('^[a-z]+?', s) is not None: + raise ValueError( + "Cannot pass 'style' string with a color " + "symbol and 'color' keyword argument. Please" + " use one or the other or pass 'style' " + "without a color symbol") + + def _iter_data(self, data=None, keep_index=False, fillna=None): + if data is None: + data = self.data + if fillna is not None: + data = data.fillna(fillna) + + # TODO: unused? + # if self.sort_columns: + # columns = com.try_sort(data.columns) + # else: + # columns = data.columns + + for col, values in data.iteritems(): + if keep_index is True: + yield col, values + else: + yield col, values.values + + @property + def nseries(self): + if self.data.ndim == 1: + return 1 + else: + return self.data.shape[1] + + def draw(self): + self.plt.draw_if_interactive() + + def generate(self): + self._args_adjust() + self._compute_plot_data() + self._setup_subplots() + self._make_plot() + self._add_table() + self._make_legend() + self._adorn_subplots() + + for ax in self.axes: + self._post_plot_logic_common(ax, self.data) + self._post_plot_logic(ax, self.data) + + def _args_adjust(self): + pass + + def _has_plotted_object(self, ax): + """check whether ax has data""" + return (len(ax.lines) != 0 or + len(ax.artists) != 0 or + len(ax.containers) != 0) + + def _maybe_right_yaxis(self, ax, axes_num): + if not self.on_right(axes_num): + # secondary axes may be passed via ax kw + return self._get_ax_layer(ax) + + if hasattr(ax, 'right_ax'): + # if it has right_ax proparty, ``ax`` must be left axes + return ax.right_ax + elif hasattr(ax, 'left_ax'): + # if it has left_ax proparty, ``ax`` must be right axes + return ax + else: + # otherwise, create twin axes + orig_ax, new_ax = ax, ax.twinx() + # TODO: use Matplotlib public API when available + new_ax._get_lines = orig_ax._get_lines + new_ax._get_patches_for_fill = orig_ax._get_patches_for_fill + orig_ax.right_ax, new_ax.left_ax = new_ax, orig_ax + + if not self._has_plotted_object(orig_ax): # no data on left y + orig_ax.get_yaxis().set_visible(False) + + if self.logy is True or self.loglog is True: + new_ax.set_yscale('log') + elif self.logy == 'sym' or self.loglog == 'sym': + new_ax.set_yscale('symlog') + return new_ax + + def _setup_subplots(self): + if self.subplots: + fig, axes = _subplots(naxes=self.nseries, + sharex=self.sharex, sharey=self.sharey, + figsize=self.figsize, ax=self.ax, + layout=self.layout, + layout_type=self._layout_type) + else: + if self.ax is None: + fig = self.plt.figure(figsize=self.figsize) + axes = fig.add_subplot(111) + else: + fig = self.ax.get_figure() + if self.figsize is not None: + fig.set_size_inches(self.figsize) + axes = self.ax + + axes = _flatten(axes) + + valid_log = {False, True, 'sym', None} + input_log = {self.logx, self.logy, self.loglog} + if input_log - valid_log: + invalid_log = next(iter((input_log - valid_log))) + raise ValueError( + "Boolean, None and 'sym' are valid options," + " '{}' is given.".format(invalid_log) + ) + + if self.logx is True or self.loglog is True: + [a.set_xscale('log') for a in axes] + elif self.logx == 'sym' or self.loglog == 'sym': + [a.set_xscale('symlog') for a in axes] + + if self.logy is True or self.loglog is True: + [a.set_yscale('log') for a in axes] + elif self.logy == 'sym' or self.loglog == 'sym': + [a.set_yscale('symlog') for a in axes] + + self.fig = fig + self.axes = axes + + @property + def result(self): + """ + Return result axes + """ + if self.subplots: + if self.layout is not None and not is_list_like(self.ax): + return self.axes.reshape(*self.layout) + else: + return self.axes + else: + sec_true = isinstance(self.secondary_y, bool) and self.secondary_y + all_sec = (is_list_like(self.secondary_y) and + len(self.secondary_y) == self.nseries) + if (sec_true or all_sec): + # if all data is plotted on secondary, return right axes + return self._get_ax_layer(self.axes[0], primary=False) + else: + return self.axes[0] + + def _compute_plot_data(self): + data = self.data + + if isinstance(data, ABCSeries): + label = self.label + if label is None and data.name is None: + label = 'None' + data = data.to_frame(name=label) + + # GH16953, _convert is needed as fallback, for ``Series`` + # with ``dtype == object`` + data = data._convert(datetime=True, timedelta=True) + numeric_data = data.select_dtypes(include=[np.number, + "datetime", + "datetimetz", + "timedelta"]) + + try: + is_empty = numeric_data.empty + except AttributeError: + is_empty = not len(numeric_data) + + # no non-numeric frames or series allowed + if is_empty: + raise TypeError('no numeric data to plot') + + # GH25587: cast ExtensionArray of pandas (IntegerArray, etc.) to + # np.ndarray before plot. + numeric_data = numeric_data.copy() + for col in numeric_data: + numeric_data[col] = np.asarray(numeric_data[col]) + + self.data = numeric_data + + def _make_plot(self): + raise AbstractMethodError(self) + + def _add_table(self): + if self.table is False: + return + elif self.table is True: + data = self.data.transpose() + else: + data = self.table + ax = self._get_ax(0) + table(ax, data) + + def _post_plot_logic_common(self, ax, data): + """Common post process for each axes""" + + if self.orientation == 'vertical' or self.orientation is None: + self._apply_axis_properties(ax.xaxis, rot=self.rot, + fontsize=self.fontsize) + self._apply_axis_properties(ax.yaxis, fontsize=self.fontsize) + + if hasattr(ax, 'right_ax'): + self._apply_axis_properties(ax.right_ax.yaxis, + fontsize=self.fontsize) + + elif self.orientation == 'horizontal': + self._apply_axis_properties(ax.yaxis, rot=self.rot, + fontsize=self.fontsize) + self._apply_axis_properties(ax.xaxis, fontsize=self.fontsize) + + if hasattr(ax, 'right_ax'): + self._apply_axis_properties(ax.right_ax.yaxis, + fontsize=self.fontsize) + else: # pragma no cover + raise ValueError + + def _post_plot_logic(self, ax, data): + """Post process for each axes. Overridden in child classes""" + pass + + def _adorn_subplots(self): + """Common post process unrelated to data""" + if len(self.axes) > 0: + all_axes = self._get_subplots() + nrows, ncols = self._get_axes_layout() + _handle_shared_axes(axarr=all_axes, nplots=len(all_axes), + naxes=nrows * ncols, nrows=nrows, + ncols=ncols, sharex=self.sharex, + sharey=self.sharey) + + for ax in self.axes: + if self.yticks is not None: + ax.set_yticks(self.yticks) + + if self.xticks is not None: + ax.set_xticks(self.xticks) + + if self.ylim is not None: + ax.set_ylim(self.ylim) + + if self.xlim is not None: + ax.set_xlim(self.xlim) + + ax.grid(self.grid) + + if self.title: + if self.subplots: + if is_list_like(self.title): + if len(self.title) != self.nseries: + msg = ('The length of `title` must equal the number ' + 'of columns if using `title` of type `list` ' + 'and `subplots=True`.\n' + 'length of title = {}\n' + 'number of columns = {}').format( + len(self.title), self.nseries) + raise ValueError(msg) + + for (ax, title) in zip(self.axes, self.title): + ax.set_title(title) + else: + self.fig.suptitle(self.title) + else: + if is_list_like(self.title): + msg = ('Using `title` of type `list` is not supported ' + 'unless `subplots=True` is passed') + raise ValueError(msg) + self.axes[0].set_title(self.title) + + def _apply_axis_properties(self, axis, rot=None, fontsize=None): + """ Tick creation within matplotlib is reasonably expensive and is + internally deferred until accessed as Ticks are created/destroyed + multiple times per draw. It's therefore beneficial for us to avoid + accessing unless we will act on the Tick. + """ + + if rot is not None or fontsize is not None: + # rot=0 is a valid setting, hence the explicit None check + labels = axis.get_majorticklabels() + axis.get_minorticklabels() + for label in labels: + if rot is not None: + label.set_rotation(rot) + if fontsize is not None: + label.set_fontsize(fontsize) + + @property + def legend_title(self): + if not isinstance(self.data.columns, ABCMultiIndex): + name = self.data.columns.name + if name is not None: + name = pprint_thing(name) + return name + else: + stringified = map(pprint_thing, + self.data.columns.names) + return ','.join(stringified) + + def _add_legend_handle(self, handle, label, index=None): + if label is not None: + if self.mark_right and index is not None: + if self.on_right(index): + label = label + ' (right)' + self.legend_handles.append(handle) + self.legend_labels.append(label) + + def _make_legend(self): + ax, leg = self._get_ax_legend(self.axes[0]) + + handles = [] + labels = [] + title = '' + + if not self.subplots: + if leg is not None: + title = leg.get_title().get_text() + handles = leg.legendHandles + labels = [x.get_text() for x in leg.get_texts()] + + if self.legend: + if self.legend == 'reverse': + self.legend_handles = reversed(self.legend_handles) + self.legend_labels = reversed(self.legend_labels) + + handles += self.legend_handles + labels += self.legend_labels + if self.legend_title is not None: + title = self.legend_title + + if len(handles) > 0: + ax.legend(handles, labels, loc='best', title=title) + + elif self.subplots and self.legend: + for ax in self.axes: + if ax.get_visible(): + ax.legend(loc='best') + + def _get_ax_legend(self, ax): + leg = ax.get_legend() + other_ax = (getattr(ax, 'left_ax', None) or + getattr(ax, 'right_ax', None)) + other_leg = None + if other_ax is not None: + other_leg = other_ax.get_legend() + if leg is None and other_leg is not None: + leg = other_leg + ax = other_ax + return ax, leg + + @cache_readonly + def plt(self): + import matplotlib.pyplot as plt + return plt + + _need_to_set_index = False + + def _get_xticks(self, convert_period=False): + index = self.data.index + is_datetype = index.inferred_type in ('datetime', 'date', + 'datetime64', 'time') + + if self.use_index: + if convert_period and isinstance(index, ABCPeriodIndex): + self.data = self.data.reindex(index=index.sort_values()) + x = self.data.index.to_timestamp()._mpl_repr() + elif index.is_numeric(): + """ + Matplotlib supports numeric values or datetime objects as + xaxis values. Taking LBYL approach here, by the time + matplotlib raises exception when using non numeric/datetime + values for xaxis, several actions are already taken by plt. + """ + x = index._mpl_repr() + elif is_datetype: + self.data = self.data[notna(self.data.index)] + self.data = self.data.sort_index() + x = self.data.index._mpl_repr() + else: + self._need_to_set_index = True + x = list(range(len(index))) + else: + x = list(range(len(index))) + + return x + + @classmethod + def _plot(cls, ax, x, y, style=None, is_errorbar=False, **kwds): + mask = isna(y) + if mask.any(): + y = np.ma.array(y) + y = np.ma.masked_where(mask, y) + + if isinstance(x, ABCIndexClass): + x = x._mpl_repr() + + if is_errorbar: + if 'xerr' in kwds: + kwds['xerr'] = np.array(kwds.get('xerr')) + if 'yerr' in kwds: + kwds['yerr'] = np.array(kwds.get('yerr')) + return ax.errorbar(x, y, **kwds) + else: + # prevent style kwarg from going to errorbar, where it is + # unsupported + if style is not None: + args = (x, y, style) + else: + args = (x, y) + return ax.plot(*args, **kwds) + + def _get_index_name(self): + if isinstance(self.data.index, ABCMultiIndex): + name = self.data.index.names + if com._any_not_none(*name): + name = ','.join(pprint_thing(x) for x in name) + else: + name = None + else: + name = self.data.index.name + if name is not None: + name = pprint_thing(name) + + return name + + @classmethod + def _get_ax_layer(cls, ax, primary=True): + """get left (primary) or right (secondary) axes""" + if primary: + return getattr(ax, 'left_ax', ax) + else: + return getattr(ax, 'right_ax', ax) + + def _get_ax(self, i): + # get the twinx ax if appropriate + if self.subplots: + ax = self.axes[i] + ax = self._maybe_right_yaxis(ax, i) + self.axes[i] = ax + else: + ax = self.axes[0] + ax = self._maybe_right_yaxis(ax, i) + + ax.get_yaxis().set_visible(True) + return ax + + @classmethod + def get_default_ax(cls, ax): + if ax is None and len(plt.get_fignums()) > 0: + with plt.rc_context(): + ax = plt.gca() + ax = cls._get_ax_layer(ax) + + def on_right(self, i): + if isinstance(self.secondary_y, bool): + return self.secondary_y + + if isinstance(self.secondary_y, (tuple, list, + np.ndarray, ABCIndexClass)): + return self.data.columns[i] in self.secondary_y + + def _apply_style_colors(self, colors, kwds, col_num, label): + """ + Manage style and color based on column number and its label. + Returns tuple of appropriate style and kwds which "color" may be added. + """ + style = None + if self.style is not None: + if isinstance(self.style, list): + try: + style = self.style[col_num] + except IndexError: + pass + elif isinstance(self.style, dict): + style = self.style.get(label, style) + else: + style = self.style + + has_color = 'color' in kwds or self.colormap is not None + nocolor_style = style is None or re.match('[a-z]+', style) is None + if (has_color or self.subplots) and nocolor_style: + kwds['color'] = colors[col_num % len(colors)] + return style, kwds + + def _get_colors(self, num_colors=None, color_kwds='color'): + if num_colors is None: + num_colors = self.nseries + + return _get_standard_colors(num_colors=num_colors, + colormap=self.colormap, + color=self.kwds.get(color_kwds)) + + def _parse_errorbars(self, label, err): + """ + Look for error keyword arguments and return the actual errorbar data + or return the error DataFrame/dict + + Error bars can be specified in several ways: + Series: the user provides a pandas.Series object of the same + length as the data + ndarray: provides a np.ndarray of the same length as the data + DataFrame/dict: error values are paired with keys matching the + key in the plotted DataFrame + str: the name of the column within the plotted DataFrame + """ + + if err is None: + return None + + def match_labels(data, e): + e = e.reindex(data.index) + return e + + # key-matched DataFrame + if isinstance(err, ABCDataFrame): + + err = match_labels(self.data, err) + # key-matched dict + elif isinstance(err, dict): + pass + + # Series of error values + elif isinstance(err, ABCSeries): + # broadcast error series across data + err = match_labels(self.data, err) + err = np.atleast_2d(err) + err = np.tile(err, (self.nseries, 1)) + + # errors are a column in the dataframe + elif isinstance(err, str): + evalues = self.data[err].values + self.data = self.data[self.data.columns.drop(err)] + err = np.atleast_2d(evalues) + err = np.tile(err, (self.nseries, 1)) + + elif is_list_like(err): + if is_iterator(err): + err = np.atleast_2d(list(err)) + else: + # raw error values + err = np.atleast_2d(err) + + err_shape = err.shape + + # asymmetrical error bars + if err.ndim == 3: + if (err_shape[0] != self.nseries) or \ + (err_shape[1] != 2) or \ + (err_shape[2] != len(self.data)): + msg = "Asymmetrical error bars should be provided " + \ + "with the shape (%u, 2, %u)" % \ + (self.nseries, len(self.data)) + raise ValueError(msg) + + # broadcast errors to each data series + if len(err) == 1: + err = np.tile(err, (self.nseries, 1)) + + elif is_number(err): + err = np.tile([err], (self.nseries, len(self.data))) + + else: + msg = "No valid {label} detected".format(label=label) + raise ValueError(msg) + + return err + + def _get_errorbars(self, label=None, index=None, xerr=True, yerr=True): + errors = {} + + for kw, flag in zip(['xerr', 'yerr'], [xerr, yerr]): + if flag: + err = self.errors[kw] + # user provided label-matched dataframe of errors + if isinstance(err, (ABCDataFrame, dict)): + if label is not None and label in err.keys(): + err = err[label] + else: + err = None + elif index is not None and err is not None: + err = err[index] + + if err is not None: + errors[kw] = err + return errors + + def _get_subplots(self): + from matplotlib.axes import Subplot + return [ax for ax in self.axes[0].get_figure().get_axes() + if isinstance(ax, Subplot)] + + def _get_axes_layout(self): + axes = self._get_subplots() + x_set = set() + y_set = set() + for ax in axes: + # check axes coordinates to estimate layout + points = ax.get_position().get_points() + x_set.add(points[0][0]) + y_set.add(points[0][1]) + return (len(y_set), len(x_set)) + + +class PlanePlot(MPLPlot): + """ + Abstract class for plotting on plane, currently scatter and hexbin. + """ + + _layout_type = 'single' + + def __init__(self, data, x, y, **kwargs): + MPLPlot.__init__(self, data, **kwargs) + if x is None or y is None: + raise ValueError(self._kind + ' requires an x and y column') + if is_integer(x) and not self.data.columns.holds_integer(): + x = self.data.columns[x] + if is_integer(y) and not self.data.columns.holds_integer(): + y = self.data.columns[y] + if len(self.data[x]._get_numeric_data()) == 0: + raise ValueError(self._kind + ' requires x column to be numeric') + if len(self.data[y]._get_numeric_data()) == 0: + raise ValueError(self._kind + ' requires y column to be numeric') + + self.x = x + self.y = y + + @property + def nseries(self): + return 1 + + def _post_plot_logic(self, ax, data): + x, y = self.x, self.y + ax.set_ylabel(pprint_thing(y)) + ax.set_xlabel(pprint_thing(x)) + + def _plot_colorbar(self, ax, **kwds): + # Addresses issues #10611 and #10678: + # When plotting scatterplots and hexbinplots in IPython + # inline backend the colorbar axis height tends not to + # exactly match the parent axis height. + # The difference is due to small fractional differences + # in floating points with similar representation. + # To deal with this, this method forces the colorbar + # height to take the height of the parent axes. + # For a more detailed description of the issue + # see the following link: + # https://github.com/ipython/ipython/issues/11215 + img = ax.collections[0] + cbar = self.fig.colorbar(img, ax=ax, **kwds) + + if _mpl_ge_3_0_0(): + # The workaround below is no longer necessary. + return + + points = ax.get_position().get_points() + cbar_points = cbar.ax.get_position().get_points() + + cbar.ax.set_position([cbar_points[0, 0], + points[0, 1], + cbar_points[1, 0] - cbar_points[0, 0], + points[1, 1] - points[0, 1]]) + # To see the discrepancy in axis heights uncomment + # the following two lines: + # print(points[1, 1] - points[0, 1]) + # print(cbar_points[1, 1] - cbar_points[0, 1]) + + +class ScatterPlot(PlanePlot): + _kind = 'scatter' + + def __init__(self, data, x, y, s=None, c=None, **kwargs): + if s is None: + # hide the matplotlib default for size, in case we want to change + # the handling of this argument later + s = 20 + super().__init__(data, x, y, s=s, **kwargs) + if is_integer(c) and not self.data.columns.holds_integer(): + c = self.data.columns[c] + self.c = c + + def _make_plot(self): + x, y, c, data = self.x, self.y, self.c, self.data + ax = self.axes[0] + + c_is_column = is_hashable(c) and c in self.data.columns + + # plot a colorbar only if a colormap is provided or necessary + cb = self.kwds.pop('colorbar', self.colormap or c_is_column) + + # pandas uses colormap, matplotlib uses cmap. + cmap = self.colormap or 'Greys' + cmap = self.plt.cm.get_cmap(cmap) + color = self.kwds.pop("color", None) + if c is not None and color is not None: + raise TypeError('Specify exactly one of `c` and `color`') + elif c is None and color is None: + c_values = self.plt.rcParams['patch.facecolor'] + elif color is not None: + c_values = color + elif c_is_column: + c_values = self.data[c].values + else: + c_values = c + + if self.legend and hasattr(self, 'label'): + label = self.label + else: + label = None + scatter = ax.scatter(data[x].values, data[y].values, c=c_values, + label=label, cmap=cmap, **self.kwds) + if cb: + cbar_label = c if c_is_column else '' + self._plot_colorbar(ax, label=cbar_label) + + if label is not None: + self._add_legend_handle(scatter, label) + else: + self.legend = False + + errors_x = self._get_errorbars(label=x, index=0, yerr=False) + errors_y = self._get_errorbars(label=y, index=0, xerr=False) + if len(errors_x) > 0 or len(errors_y) > 0: + err_kwds = dict(errors_x, **errors_y) + err_kwds['ecolor'] = scatter.get_facecolor()[0] + ax.errorbar(data[x].values, data[y].values, + linestyle='none', **err_kwds) + + +class HexBinPlot(PlanePlot): + _kind = 'hexbin' + + def __init__(self, data, x, y, C=None, **kwargs): + super().__init__(data, x, y, **kwargs) + if is_integer(C) and not self.data.columns.holds_integer(): + C = self.data.columns[C] + self.C = C + + def _make_plot(self): + x, y, data, C = self.x, self.y, self.data, self.C + ax = self.axes[0] + # pandas uses colormap, matplotlib uses cmap. + cmap = self.colormap or 'BuGn' + cmap = self.plt.cm.get_cmap(cmap) + cb = self.kwds.pop('colorbar', True) + + if C is None: + c_values = None + else: + c_values = data[C].values + + ax.hexbin(data[x].values, data[y].values, C=c_values, cmap=cmap, + **self.kwds) + if cb: + self._plot_colorbar(ax) + + def _make_legend(self): + pass + + +class LinePlot(MPLPlot): + _kind = 'line' + _default_rot = 0 + orientation = 'vertical' + + def __init__(self, data, **kwargs): + from pandas.plotting import plot_params + MPLPlot.__init__(self, data, **kwargs) + if self.stacked: + self.data = self.data.fillna(value=0) + self.x_compat = plot_params['x_compat'] + if 'x_compat' in self.kwds: + self.x_compat = bool(self.kwds.pop('x_compat')) + + def _is_ts_plot(self): + # this is slightly deceptive + return not self.x_compat and self.use_index and self._use_dynamic_x() + + def _use_dynamic_x(self): + from pandas.plotting._matplotlib.timeseries import _use_dynamic_x + return _use_dynamic_x(self._get_ax(0), self.data) + + def _make_plot(self): + if self._is_ts_plot(): + from pandas.plotting._matplotlib.timeseries import ( + _maybe_convert_index) + data = _maybe_convert_index(self._get_ax(0), self.data) + + x = data.index # dummy, not used + plotf = self._ts_plot + it = self._iter_data(data=data, keep_index=True) + else: + x = self._get_xticks(convert_period=True) + plotf = self._plot + it = self._iter_data() + + stacking_id = self._get_stacking_id() + is_errorbar = com._any_not_none(*self.errors.values()) + + colors = self._get_colors() + for i, (label, y) in enumerate(it): + ax = self._get_ax(i) + kwds = self.kwds.copy() + style, kwds = self._apply_style_colors(colors, kwds, i, label) + + errors = self._get_errorbars(label=label, index=i) + kwds = dict(kwds, **errors) + + label = pprint_thing(label) # .encode('utf-8') + kwds['label'] = label + + newlines = plotf(ax, x, y, style=style, column_num=i, + stacking_id=stacking_id, + is_errorbar=is_errorbar, + **kwds) + self._add_legend_handle(newlines[0], label, index=i) + + lines = _get_all_lines(ax) + left, right = _get_xlim(lines) + ax.set_xlim(left, right) + + @classmethod + def _plot(cls, ax, x, y, style=None, column_num=None, + stacking_id=None, **kwds): + # column_num is used to get the target column from protf in line and + # area plots + if column_num == 0: + cls._initialize_stacker(ax, stacking_id, len(y)) + y_values = cls._get_stacked_values(ax, stacking_id, y, kwds['label']) + lines = MPLPlot._plot(ax, x, y_values, style=style, **kwds) + cls._update_stacker(ax, stacking_id, y) + return lines + + @classmethod + def _ts_plot(cls, ax, x, data, style=None, **kwds): + from pandas.plotting._matplotlib.timeseries import (_maybe_resample, + _decorate_axes, + format_dateaxis) + # accept x to be consistent with normal plot func, + # x is not passed to tsplot as it uses data.index as x coordinate + # column_num must be in kwds for stacking purpose + freq, data = _maybe_resample(data, ax, kwds) + + # Set ax with freq info + _decorate_axes(ax, freq, kwds) + # digging deeper + if hasattr(ax, 'left_ax'): + _decorate_axes(ax.left_ax, freq, kwds) + if hasattr(ax, 'right_ax'): + _decorate_axes(ax.right_ax, freq, kwds) + ax._plot_data.append((data, cls._kind, kwds)) + + lines = cls._plot(ax, data.index, data.values, style=style, **kwds) + # set date formatter, locators and rescale limits + format_dateaxis(ax, ax.freq, data.index) + return lines + + def _get_stacking_id(self): + if self.stacked: + return id(self.data) + else: + return None + + @classmethod + def _initialize_stacker(cls, ax, stacking_id, n): + if stacking_id is None: + return + if not hasattr(ax, '_stacker_pos_prior'): + ax._stacker_pos_prior = {} + if not hasattr(ax, '_stacker_neg_prior'): + ax._stacker_neg_prior = {} + ax._stacker_pos_prior[stacking_id] = np.zeros(n) + ax._stacker_neg_prior[stacking_id] = np.zeros(n) + + @classmethod + def _get_stacked_values(cls, ax, stacking_id, values, label): + if stacking_id is None: + return values + if not hasattr(ax, '_stacker_pos_prior'): + # stacker may not be initialized for subplots + cls._initialize_stacker(ax, stacking_id, len(values)) + + if (values >= 0).all(): + return ax._stacker_pos_prior[stacking_id] + values + elif (values <= 0).all(): + return ax._stacker_neg_prior[stacking_id] + values + + raise ValueError('When stacked is True, each column must be either ' + 'all positive or negative.' + '{0} contains both positive and negative values' + .format(label)) + + @classmethod + def _update_stacker(cls, ax, stacking_id, values): + if stacking_id is None: + return + if (values >= 0).all(): + ax._stacker_pos_prior[stacking_id] += values + elif (values <= 0).all(): + ax._stacker_neg_prior[stacking_id] += values + + def _post_plot_logic(self, ax, data): + from matplotlib.ticker import FixedLocator + + def get_label(i): + try: + return pprint_thing(data.index[i]) + except Exception: + return '' + + if self._need_to_set_index: + xticks = ax.get_xticks() + xticklabels = [get_label(x) for x in xticks] + ax.set_xticklabels(xticklabels) + ax.xaxis.set_major_locator(FixedLocator(xticks)) + + condition = (not self._use_dynamic_x() and + data.index.is_all_dates and + not self.subplots or + (self.subplots and self.sharex)) + + index_name = self._get_index_name() + + if condition: + # irregular TS rotated 30 deg. by default + # probably a better place to check / set this. + if not self._rot_set: + self.rot = 30 + format_date_labels(ax, rot=self.rot) + + if index_name is not None and self.use_index: + ax.set_xlabel(index_name) + + +class AreaPlot(LinePlot): + _kind = 'area' + + def __init__(self, data, **kwargs): + kwargs.setdefault('stacked', True) + data = data.fillna(value=0) + LinePlot.__init__(self, data, **kwargs) + + if not self.stacked: + # use smaller alpha to distinguish overlap + self.kwds.setdefault('alpha', 0.5) + + if self.logy or self.loglog: + raise ValueError("Log-y scales are not supported in area plot") + + @classmethod + def _plot(cls, ax, x, y, style=None, column_num=None, + stacking_id=None, is_errorbar=False, **kwds): + + if column_num == 0: + cls._initialize_stacker(ax, stacking_id, len(y)) + y_values = cls._get_stacked_values(ax, stacking_id, y, kwds['label']) + + # need to remove label, because subplots uses mpl legend as it is + line_kwds = kwds.copy() + line_kwds.pop('label') + lines = MPLPlot._plot(ax, x, y_values, style=style, **line_kwds) + + # get data from the line to get coordinates for fill_between + xdata, y_values = lines[0].get_data(orig=False) + + # unable to use ``_get_stacked_values`` here to get starting point + if stacking_id is None: + start = np.zeros(len(y)) + elif (y >= 0).all(): + start = ax._stacker_pos_prior[stacking_id] + elif (y <= 0).all(): + start = ax._stacker_neg_prior[stacking_id] + else: + start = np.zeros(len(y)) + + if 'color' not in kwds: + kwds['color'] = lines[0].get_color() + + rect = ax.fill_between(xdata, start, y_values, **kwds) + cls._update_stacker(ax, stacking_id, y) + + # LinePlot expects list of artists + res = [rect] + return res + + def _post_plot_logic(self, ax, data): + LinePlot._post_plot_logic(self, ax, data) + + if self.ylim is None: + if (data >= 0).all().all(): + ax.set_ylim(0, None) + elif (data <= 0).all().all(): + ax.set_ylim(None, 0) + + +class BarPlot(MPLPlot): + _kind = 'bar' + _default_rot = 90 + orientation = 'vertical' + + def __init__(self, data, **kwargs): + # we have to treat a series differently than a + # 1-column DataFrame w.r.t. color handling + self._is_series = isinstance(data, ABCSeries) + self.bar_width = kwargs.pop('width', 0.5) + pos = kwargs.pop('position', 0.5) + kwargs.setdefault('align', 'center') + self.tick_pos = np.arange(len(data)) + + self.bottom = kwargs.pop('bottom', 0) + self.left = kwargs.pop('left', 0) + + self.log = kwargs.pop('log', False) + MPLPlot.__init__(self, data, **kwargs) + + if self.stacked or self.subplots: + self.tickoffset = self.bar_width * pos + if kwargs['align'] == 'edge': + self.lim_offset = self.bar_width / 2 + else: + self.lim_offset = 0 + else: + if kwargs['align'] == 'edge': + w = self.bar_width / self.nseries + self.tickoffset = self.bar_width * (pos - 0.5) + w * 0.5 + self.lim_offset = w * 0.5 + else: + self.tickoffset = self.bar_width * pos + self.lim_offset = 0 + + self.ax_pos = self.tick_pos - self.tickoffset + + def _args_adjust(self): + if is_list_like(self.bottom): + self.bottom = np.array(self.bottom) + if is_list_like(self.left): + self.left = np.array(self.left) + + @classmethod + def _plot(cls, ax, x, y, w, start=0, log=False, **kwds): + return ax.bar(x, y, w, bottom=start, log=log, **kwds) + + @property + def _start_base(self): + return self.bottom + + def _make_plot(self): + import matplotlib as mpl + + colors = self._get_colors() + ncolors = len(colors) + + pos_prior = neg_prior = np.zeros(len(self.data)) + K = self.nseries + + for i, (label, y) in enumerate(self._iter_data(fillna=0)): + ax = self._get_ax(i) + kwds = self.kwds.copy() + if self._is_series: + kwds['color'] = colors + else: + kwds['color'] = colors[i % ncolors] + + errors = self._get_errorbars(label=label, index=i) + kwds = dict(kwds, **errors) + + label = pprint_thing(label) + + if (('yerr' in kwds) or ('xerr' in kwds)) \ + and (kwds.get('ecolor') is None): + kwds['ecolor'] = mpl.rcParams['xtick.color'] + + start = 0 + if self.log and (y >= 1).all(): + start = 1 + start = start + self._start_base + + if self.subplots: + w = self.bar_width / 2 + rect = self._plot(ax, self.ax_pos + w, y, self.bar_width, + start=start, label=label, + log=self.log, **kwds) + ax.set_title(label) + elif self.stacked: + mask = y > 0 + start = np.where(mask, pos_prior, neg_prior) + self._start_base + w = self.bar_width / 2 + rect = self._plot(ax, self.ax_pos + w, y, self.bar_width, + start=start, label=label, + log=self.log, **kwds) + pos_prior = pos_prior + np.where(mask, y, 0) + neg_prior = neg_prior + np.where(mask, 0, y) + else: + w = self.bar_width / K + rect = self._plot(ax, self.ax_pos + (i + 0.5) * w, y, w, + start=start, label=label, + log=self.log, **kwds) + self._add_legend_handle(rect, label, index=i) + + def _post_plot_logic(self, ax, data): + if self.use_index: + str_index = [pprint_thing(key) for key in data.index] + else: + str_index = [pprint_thing(key) for key in range(data.shape[0])] + name = self._get_index_name() + + s_edge = self.ax_pos[0] - 0.25 + self.lim_offset + e_edge = self.ax_pos[-1] + 0.25 + self.bar_width + self.lim_offset + + self._decorate_ticks(ax, name, str_index, s_edge, e_edge) + + def _decorate_ticks(self, ax, name, ticklabels, start_edge, end_edge): + ax.set_xlim((start_edge, end_edge)) + ax.set_xticks(self.tick_pos) + ax.set_xticklabels(ticklabels) + if name is not None and self.use_index: + ax.set_xlabel(name) + + +class BarhPlot(BarPlot): + _kind = 'barh' + _default_rot = 0 + orientation = 'horizontal' + + @property + def _start_base(self): + return self.left + + @classmethod + def _plot(cls, ax, x, y, w, start=0, log=False, **kwds): + return ax.barh(x, y, w, left=start, log=log, **kwds) + + def _decorate_ticks(self, ax, name, ticklabels, start_edge, end_edge): + # horizontal bars + ax.set_ylim((start_edge, end_edge)) + ax.set_yticks(self.tick_pos) + ax.set_yticklabels(ticklabels) + if name is not None and self.use_index: + ax.set_ylabel(name) + + +class PiePlot(MPLPlot): + _kind = 'pie' + _layout_type = 'horizontal' + + def __init__(self, data, kind=None, **kwargs): + data = data.fillna(value=0) + if (data < 0).any().any(): + raise ValueError("{0} doesn't allow negative values".format(kind)) + MPLPlot.__init__(self, data, kind=kind, **kwargs) + + def _args_adjust(self): + self.grid = False + self.logy = False + self.logx = False + self.loglog = False + + def _validate_color_args(self): + pass + + def _make_plot(self): + colors = self._get_colors( + num_colors=len(self.data), color_kwds='colors') + self.kwds.setdefault('colors', colors) + + for i, (label, y) in enumerate(self._iter_data()): + ax = self._get_ax(i) + if label is not None: + label = pprint_thing(label) + ax.set_ylabel(label) + + kwds = self.kwds.copy() + + def blank_labeler(label, value): + if value == 0: + return '' + else: + return label + + idx = [pprint_thing(v) for v in self.data.index] + labels = kwds.pop('labels', idx) + # labels is used for each wedge's labels + # Blank out labels for values of 0 so they don't overlap + # with nonzero wedges + if labels is not None: + blabels = [blank_labeler(l, value) for + l, value in zip(labels, y)] + else: + blabels = None + results = ax.pie(y, labels=blabels, **kwds) + + if kwds.get('autopct', None) is not None: + patches, texts, autotexts = results + else: + patches, texts = results + autotexts = [] + + if self.fontsize is not None: + for t in texts + autotexts: + t.set_fontsize(self.fontsize) + + # leglabels is used for legend labels + leglabels = labels if labels is not None else idx + for p, l in zip(patches, leglabels): + self._add_legend_handle(p, l) diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py new file mode 100644 index 0000000000000..585c407e33311 --- /dev/null +++ b/pandas/plotting/_matplotlib/hist.py @@ -0,0 +1,298 @@ +import warnings + +import matplotlib.pyplot as plt +import numpy as np + +from pandas.core.dtypes.common import is_integer, is_list_like +from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass +from pandas.core.dtypes.missing import isna, remove_na_arraylike + +import pandas.core.common as com + +from pandas.io.formats.printing import pprint_thing +from pandas.plotting._matplotlib.core import LinePlot, MPLPlot +from pandas.plotting._matplotlib.tools import ( + _flatten, _set_ticks_props, _subplots) + + +class HistPlot(LinePlot): + _kind = 'hist' + + def __init__(self, data, bins=10, bottom=0, **kwargs): + self.bins = bins # use mpl default + self.bottom = bottom + # Do not call LinePlot.__init__ which may fill nan + MPLPlot.__init__(self, data, **kwargs) + + def _args_adjust(self): + if is_integer(self.bins): + # create common bin edge + values = (self.data._convert(datetime=True)._get_numeric_data()) + values = np.ravel(values) + values = values[~isna(values)] + + hist, self.bins = np.histogram( + values, bins=self.bins, + range=self.kwds.get('range', None), + weights=self.kwds.get('weights', None)) + + if is_list_like(self.bottom): + self.bottom = np.array(self.bottom) + + @classmethod + def _plot(cls, ax, y, style=None, bins=None, bottom=0, column_num=0, + stacking_id=None, **kwds): + if column_num == 0: + cls._initialize_stacker(ax, stacking_id, len(bins) - 1) + y = y[~isna(y)] + + base = np.zeros(len(bins) - 1) + bottom = bottom + \ + cls._get_stacked_values(ax, stacking_id, base, kwds['label']) + # ignore style + n, bins, patches = ax.hist(y, bins=bins, bottom=bottom, **kwds) + cls._update_stacker(ax, stacking_id, n) + return patches + + def _make_plot(self): + colors = self._get_colors() + stacking_id = self._get_stacking_id() + + for i, (label, y) in enumerate(self._iter_data()): + ax = self._get_ax(i) + + kwds = self.kwds.copy() + + label = pprint_thing(label) + kwds['label'] = label + + style, kwds = self._apply_style_colors(colors, kwds, i, label) + if style is not None: + kwds['style'] = style + + kwds = self._make_plot_keywords(kwds, y) + artists = self._plot(ax, y, column_num=i, + stacking_id=stacking_id, **kwds) + self._add_legend_handle(artists[0], label, index=i) + + def _make_plot_keywords(self, kwds, y): + """merge BoxPlot/KdePlot properties to passed kwds""" + # y is required for KdePlot + kwds['bottom'] = self.bottom + kwds['bins'] = self.bins + return kwds + + def _post_plot_logic(self, ax, data): + if self.orientation == 'horizontal': + ax.set_xlabel('Frequency') + else: + ax.set_ylabel('Frequency') + + @property + def orientation(self): + if self.kwds.get('orientation', None) == 'horizontal': + return 'horizontal' + else: + return 'vertical' + + +class KdePlot(HistPlot): + _kind = 'kde' + orientation = 'vertical' + + def __init__(self, data, bw_method=None, ind=None, **kwargs): + MPLPlot.__init__(self, data, **kwargs) + self.bw_method = bw_method + self.ind = ind + + def _args_adjust(self): + pass + + def _get_ind(self, y): + if self.ind is None: + # np.nanmax() and np.nanmin() ignores the missing values + sample_range = np.nanmax(y) - np.nanmin(y) + ind = np.linspace(np.nanmin(y) - 0.5 * sample_range, + np.nanmax(y) + 0.5 * sample_range, 1000) + elif is_integer(self.ind): + sample_range = np.nanmax(y) - np.nanmin(y) + ind = np.linspace(np.nanmin(y) - 0.5 * sample_range, + np.nanmax(y) + 0.5 * sample_range, self.ind) + else: + ind = self.ind + return ind + + @classmethod + def _plot(cls, ax, y, style=None, bw_method=None, ind=None, + column_num=None, stacking_id=None, **kwds): + from scipy.stats import gaussian_kde + + y = remove_na_arraylike(y) + gkde = gaussian_kde(y, bw_method=bw_method) + + y = gkde.evaluate(ind) + lines = MPLPlot._plot(ax, ind, y, style=style, **kwds) + return lines + + def _make_plot_keywords(self, kwds, y): + kwds['bw_method'] = self.bw_method + kwds['ind'] = self._get_ind(y) + return kwds + + def _post_plot_logic(self, ax, data): + ax.set_ylabel('Density') + + +def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True, + figsize=None, sharex=True, sharey=True, layout=None, + rot=0, ax=None, **kwargs): + + if figsize == 'default': + # allowed to specify mpl default with 'default' + warnings.warn("figsize='default' is deprecated. Specify figure " + "size by tuple instead", FutureWarning, stacklevel=5) + figsize = None + + grouped = data.groupby(by) + if column is not None: + grouped = grouped[column] + + naxes = len(grouped) + fig, axes = _subplots(naxes=naxes, figsize=figsize, + sharex=sharex, sharey=sharey, ax=ax, + layout=layout) + + _axes = _flatten(axes) + + for i, (key, group) in enumerate(grouped): + ax = _axes[i] + if numeric_only and isinstance(group, ABCDataFrame): + group = group._get_numeric_data() + plotf(group, ax, **kwargs) + ax.set_title(pprint_thing(key)) + + return fig, axes + + +def _grouped_hist(data, column=None, by=None, ax=None, bins=50, figsize=None, + layout=None, sharex=False, sharey=False, rot=90, grid=True, + xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, + **kwargs): + """ + Grouped histogram + + Parameters + ---------- + data : Series/DataFrame + column : object, optional + by : object, optional + ax : axes, optional + bins : int, default 50 + figsize : tuple, optional + layout : optional + sharex : bool, default False + sharey : bool, default False + rot : int, default 90 + grid : bool, default True + kwargs : dict, keyword arguments passed to matplotlib.Axes.hist + + Returns + ------- + collection of Matplotlib Axes + """ + def plot_group(group, ax): + ax.hist(group.dropna().values, bins=bins, **kwargs) + + xrot = xrot or rot + + fig, axes = _grouped_plot(plot_group, data, column=column, + by=by, sharex=sharex, sharey=sharey, ax=ax, + figsize=figsize, layout=layout, rot=rot) + + _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, + ylabelsize=ylabelsize, yrot=yrot) + + fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, + hspace=0.5, wspace=0.3) + return axes + + +def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, + xrot=None, ylabelsize=None, yrot=None, figsize=None, + bins=10, **kwds): + if by is None: + if kwds.get('layout', None) is not None: + raise ValueError("The 'layout' keyword is not supported when " + "'by' is None") + # hack until the plotting interface is a bit more unified + fig = kwds.pop('figure', plt.gcf() if plt.get_fignums() else + plt.figure(figsize=figsize)) + if (figsize is not None and tuple(figsize) != + tuple(fig.get_size_inches())): + fig.set_size_inches(*figsize, forward=True) + if ax is None: + ax = fig.gca() + elif ax.get_figure() != fig: + raise AssertionError('passed axis not bound to passed figure') + values = self.dropna().values + + ax.hist(values, bins=bins, **kwds) + ax.grid(grid) + axes = np.array([ax]) + + _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, + ylabelsize=ylabelsize, yrot=yrot) + + else: + if 'figure' in kwds: + raise ValueError("Cannot pass 'figure' when using the " + "'by' argument, since a new 'Figure' instance " + "will be created") + axes = _grouped_hist(self, by=by, ax=ax, grid=grid, figsize=figsize, + bins=bins, xlabelsize=xlabelsize, xrot=xrot, + ylabelsize=ylabelsize, yrot=yrot, **kwds) + + if hasattr(axes, 'ndim'): + if axes.ndim == 1 and len(axes) == 1: + return axes[0] + return axes + + +def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, + xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, + sharey=False, figsize=None, layout=None, bins=10, **kwds): + if by is not None: + axes = _grouped_hist(data, column=column, by=by, ax=ax, grid=grid, + figsize=figsize, sharex=sharex, sharey=sharey, + layout=layout, bins=bins, xlabelsize=xlabelsize, + xrot=xrot, ylabelsize=ylabelsize, + yrot=yrot, **kwds) + return axes + + if column is not None: + if not isinstance(column, (list, np.ndarray, ABCIndexClass)): + column = [column] + data = data[column] + data = data._get_numeric_data() + naxes = len(data.columns) + + if naxes == 0: + raise ValueError("hist method requires numerical columns, " + "nothing to plot.") + + fig, axes = _subplots(naxes=naxes, ax=ax, squeeze=False, + sharex=sharex, sharey=sharey, figsize=figsize, + layout=layout) + _axes = _flatten(axes) + + for i, col in enumerate(com.try_sort(data.columns)): + ax = _axes[i] + ax.hist(data[col].dropna().values, bins=bins, **kwds) + ax.set_title(col) + ax.grid(grid) + + _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, + ylabelsize=ylabelsize, yrot=yrot) + fig.subplots_adjust(wspace=0.3, hspace=0.3) + + return axes diff --git a/pandas/plotting/_matplotlib/misc.py b/pandas/plotting/_matplotlib/misc.py new file mode 100644 index 0000000000000..7a04d48da434d --- /dev/null +++ b/pandas/plotting/_matplotlib/misc.py @@ -0,0 +1,379 @@ +# being a bit too dynamic +import matplotlib.pyplot as plt +import numpy as np + +from pandas.core.dtypes.missing import notna + +from pandas.io.formats.printing import pprint_thing +from pandas.plotting._matplotlib.style import _get_standard_colors +from pandas.plotting._matplotlib.tools import _set_ticks_props, _subplots + + +def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, + diagonal='hist', marker='.', density_kwds=None, + hist_kwds=None, range_padding=0.05, **kwds): + df = frame._get_numeric_data() + n = df.columns.size + naxes = n * n + fig, axes = _subplots(naxes=naxes, figsize=figsize, ax=ax, + squeeze=False) + + # no gaps between subplots + fig.subplots_adjust(wspace=0, hspace=0) + + mask = notna(df) + + marker = _get_marker_compat(marker) + + hist_kwds = hist_kwds or {} + density_kwds = density_kwds or {} + + # GH 14855 + kwds.setdefault('edgecolors', 'none') + + boundaries_list = [] + for a in df.columns: + values = df[a].values[mask[a].values] + rmin_, rmax_ = np.min(values), np.max(values) + rdelta_ext = (rmax_ - rmin_) * range_padding / 2. + boundaries_list.append((rmin_ - rdelta_ext, rmax_ + rdelta_ext)) + + for i, a in enumerate(df.columns): + for j, b in enumerate(df.columns): + ax = axes[i, j] + + if i == j: + values = df[a].values[mask[a].values] + + # Deal with the diagonal by drawing a histogram there. + if diagonal == 'hist': + ax.hist(values, **hist_kwds) + + elif diagonal in ('kde', 'density'): + from scipy.stats import gaussian_kde + y = values + gkde = gaussian_kde(y) + ind = np.linspace(y.min(), y.max(), 1000) + ax.plot(ind, gkde.evaluate(ind), **density_kwds) + + ax.set_xlim(boundaries_list[i]) + + else: + common = (mask[a] & mask[b]).values + + ax.scatter(df[b][common], df[a][common], + marker=marker, alpha=alpha, **kwds) + + ax.set_xlim(boundaries_list[j]) + ax.set_ylim(boundaries_list[i]) + + ax.set_xlabel(b) + ax.set_ylabel(a) + + if j != 0: + ax.yaxis.set_visible(False) + if i != n - 1: + ax.xaxis.set_visible(False) + + if len(df.columns) > 1: + lim1 = boundaries_list[0] + locs = axes[0][1].yaxis.get_majorticklocs() + locs = locs[(lim1[0] <= locs) & (locs <= lim1[1])] + adj = (locs - lim1[0]) / (lim1[1] - lim1[0]) + + lim0 = axes[0][0].get_ylim() + adj = adj * (lim0[1] - lim0[0]) + lim0[0] + axes[0][0].yaxis.set_ticks(adj) + + if np.all(locs == locs.astype(int)): + # if all ticks are int + locs = locs.astype(int) + axes[0][0].yaxis.set_ticklabels(locs) + + _set_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) + + return axes + + +def _get_marker_compat(marker): + import matplotlib.lines as mlines + if marker not in mlines.lineMarkers: + return 'o' + return marker + + +def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): + import matplotlib.patches as patches + + def normalize(series): + a = min(series) + b = max(series) + return (series - a) / (b - a) + + n = len(frame) + classes = frame[class_column].drop_duplicates() + class_col = frame[class_column] + df = frame.drop(class_column, axis=1).apply(normalize) + + if ax is None: + ax = plt.gca(xlim=[-1, 1], ylim=[-1, 1]) + + to_plot = {} + colors = _get_standard_colors(num_colors=len(classes), colormap=colormap, + color_type='random', color=color) + + for kls in classes: + to_plot[kls] = [[], []] + + m = len(frame.columns) - 1 + s = np.array([(np.cos(t), np.sin(t)) + for t in [2.0 * np.pi * (i / float(m)) + for i in range(m)]]) + + for i in range(n): + row = df.iloc[i].values + row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1) + y = (s * row_).sum(axis=0) / row.sum() + kls = class_col.iat[i] + to_plot[kls][0].append(y[0]) + to_plot[kls][1].append(y[1]) + + for i, kls in enumerate(classes): + ax.scatter(to_plot[kls][0], to_plot[kls][1], color=colors[i], + label=pprint_thing(kls), **kwds) + ax.legend() + + ax.add_patch(patches.Circle((0.0, 0.0), radius=1.0, facecolor='none')) + + for xy, name in zip(s, df.columns): + + ax.add_patch(patches.Circle(xy, radius=0.025, facecolor='gray')) + + if xy[0] < 0.0 and xy[1] < 0.0: + ax.text(xy[0] - 0.025, xy[1] - 0.025, name, + ha='right', va='top', size='small') + elif xy[0] < 0.0 and xy[1] >= 0.0: + ax.text(xy[0] - 0.025, xy[1] + 0.025, name, + ha='right', va='bottom', size='small') + elif xy[0] >= 0.0 and xy[1] < 0.0: + ax.text(xy[0] + 0.025, xy[1] - 0.025, name, + ha='left', va='top', size='small') + elif xy[0] >= 0.0 and xy[1] >= 0.0: + ax.text(xy[0] + 0.025, xy[1] + 0.025, name, + ha='left', va='bottom', size='small') + + ax.axis('equal') + return ax + + +def andrews_curves(frame, class_column, ax=None, samples=200, color=None, + colormap=None, **kwds): + from math import sqrt, pi + + def function(amplitudes): + def f(t): + x1 = amplitudes[0] + result = x1 / sqrt(2.0) + + # Take the rest of the coefficients and resize them + # appropriately. Take a copy of amplitudes as otherwise numpy + # deletes the element from amplitudes itself. + coeffs = np.delete(np.copy(amplitudes), 0) + coeffs.resize(int((coeffs.size + 1) / 2), 2) + + # Generate the harmonics and arguments for the sin and cos + # functions. + harmonics = np.arange(0, coeffs.shape[0]) + 1 + trig_args = np.outer(harmonics, t) + + result += np.sum(coeffs[:, 0, np.newaxis] * np.sin(trig_args) + + coeffs[:, 1, np.newaxis] * np.cos(trig_args), + axis=0) + return result + return f + + n = len(frame) + class_col = frame[class_column] + classes = frame[class_column].drop_duplicates() + df = frame.drop(class_column, axis=1) + t = np.linspace(-pi, pi, samples) + used_legends = set() + + color_values = _get_standard_colors(num_colors=len(classes), + colormap=colormap, color_type='random', + color=color) + colors = dict(zip(classes, color_values)) + if ax is None: + ax = plt.gca(xlim=(-pi, pi)) + for i in range(n): + row = df.iloc[i].values + f = function(row) + y = f(t) + kls = class_col.iat[i] + label = pprint_thing(kls) + if label not in used_legends: + used_legends.add(label) + ax.plot(t, y, color=colors[kls], label=label, **kwds) + else: + ax.plot(t, y, color=colors[kls], **kwds) + + ax.legend(loc='upper right') + ax.grid() + return ax + + +def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): + import random + + # random.sample(ndarray, int) fails on python 3.3, sigh + data = list(series.values) + samplings = [random.sample(data, size) for _ in range(samples)] + + means = np.array([np.mean(sampling) for sampling in samplings]) + medians = np.array([np.median(sampling) for sampling in samplings]) + midranges = np.array([(min(sampling) + max(sampling)) * 0.5 + for sampling in samplings]) + if fig is None: + fig = plt.figure() + x = list(range(samples)) + axes = [] + ax1 = fig.add_subplot(2, 3, 1) + ax1.set_xlabel("Sample") + axes.append(ax1) + ax1.plot(x, means, **kwds) + ax2 = fig.add_subplot(2, 3, 2) + ax2.set_xlabel("Sample") + axes.append(ax2) + ax2.plot(x, medians, **kwds) + ax3 = fig.add_subplot(2, 3, 3) + ax3.set_xlabel("Sample") + axes.append(ax3) + ax3.plot(x, midranges, **kwds) + ax4 = fig.add_subplot(2, 3, 4) + ax4.set_xlabel("Mean") + axes.append(ax4) + ax4.hist(means, **kwds) + ax5 = fig.add_subplot(2, 3, 5) + ax5.set_xlabel("Median") + axes.append(ax5) + ax5.hist(medians, **kwds) + ax6 = fig.add_subplot(2, 3, 6) + ax6.set_xlabel("Midrange") + axes.append(ax6) + ax6.hist(midranges, **kwds) + for axis in axes: + plt.setp(axis.get_xticklabels(), fontsize=8) + plt.setp(axis.get_yticklabels(), fontsize=8) + return fig + + +def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None, + use_columns=False, xticks=None, colormap=None, + axvlines=True, axvlines_kwds=None, sort_labels=False, + **kwds): + if axvlines_kwds is None: + axvlines_kwds = {'linewidth': 1, 'color': 'black'} + + n = len(frame) + classes = frame[class_column].drop_duplicates() + class_col = frame[class_column] + + if cols is None: + df = frame.drop(class_column, axis=1) + else: + df = frame[cols] + + used_legends = set() + + ncols = len(df.columns) + + # determine values to use for xticks + if use_columns is True: + if not np.all(np.isreal(list(df.columns))): + raise ValueError('Columns must be numeric to be used as xticks') + x = df.columns + elif xticks is not None: + if not np.all(np.isreal(xticks)): + raise ValueError('xticks specified must be numeric') + elif len(xticks) != ncols: + raise ValueError('Length of xticks must match number of columns') + x = xticks + else: + x = list(range(ncols)) + + if ax is None: + ax = plt.gca() + + color_values = _get_standard_colors(num_colors=len(classes), + colormap=colormap, color_type='random', + color=color) + + if sort_labels: + classes = sorted(classes) + color_values = sorted(color_values) + colors = dict(zip(classes, color_values)) + + for i in range(n): + y = df.iloc[i].values + kls = class_col.iat[i] + label = pprint_thing(kls) + if label not in used_legends: + used_legends.add(label) + ax.plot(x, y, color=colors[kls], label=label, **kwds) + else: + ax.plot(x, y, color=colors[kls], **kwds) + + if axvlines: + for i in x: + ax.axvline(i, **axvlines_kwds) + + ax.set_xticks(x) + ax.set_xticklabels(df.columns) + ax.set_xlim(x[0], x[-1]) + ax.legend(loc='upper right') + ax.grid() + return ax + + +def lag_plot(series, lag=1, ax=None, **kwds): + # workaround because `c='b'` is hardcoded in matplotlibs scatter method + kwds.setdefault('c', plt.rcParams['patch.facecolor']) + + data = series.values + y1 = data[:-lag] + y2 = data[lag:] + if ax is None: + ax = plt.gca() + ax.set_xlabel("y(t)") + ax.set_ylabel("y(t + {lag})".format(lag=lag)) + ax.scatter(y1, y2, **kwds) + return ax + + +def autocorrelation_plot(series, ax=None, **kwds): + n = len(series) + data = np.asarray(series) + if ax is None: + ax = plt.gca(xlim=(1, n), ylim=(-1.0, 1.0)) + mean = np.mean(data) + c0 = np.sum((data - mean) ** 2) / float(n) + + def r(h): + return ((data[:n - h] - mean) * + (data[h:] - mean)).sum() / float(n) / c0 + x = np.arange(n) + 1 + y = [r(loc) for loc in x] + z95 = 1.959963984540054 + z99 = 2.5758293035489004 + ax.axhline(y=z99 / np.sqrt(n), linestyle='--', color='grey') + ax.axhline(y=z95 / np.sqrt(n), color='grey') + ax.axhline(y=0.0, color='black') + ax.axhline(y=-z95 / np.sqrt(n), color='grey') + ax.axhline(y=-z99 / np.sqrt(n), linestyle='--', color='grey') + ax.set_xlabel("Lag") + ax.set_ylabel("Autocorrelation") + ax.plot(x, y, **kwds) + if 'label' in kwds: + ax.legend() + ax.grid() + return ax diff --git a/pandas/plotting/_style.py b/pandas/plotting/_matplotlib/style.py similarity index 63% rename from pandas/plotting/_style.py rename to pandas/plotting/_matplotlib/style.py index a22881e5ddf26..ef7bbd2f05703 100644 --- a/pandas/plotting/_style.py +++ b/pandas/plotting/_matplotlib/style.py @@ -1,7 +1,7 @@ # being a bit too dynamic -from contextlib import contextmanager import warnings +import matplotlib.pyplot as plt import numpy as np from pandas.core.dtypes.common import is_list_like @@ -9,8 +9,6 @@ def _get_standard_colors(num_colors=None, colormap=None, color_type='default', color=None): - import matplotlib.pyplot as plt - if color is None and colormap is not None: if isinstance(colormap, str): import matplotlib.cm as cm @@ -91,72 +89,3 @@ def _maybe_valid_colors(colors): colors += colors[:mod] return colors - - -class _Options(dict): - """ - Stores pandas plotting options. - Allows for parameter aliasing so you can just use parameter names that are - the same as the plot function parameters, but is stored in a canonical - format that makes it easy to breakdown into groups later - """ - - # alias so the names are same as plotting method parameter names - _ALIASES = {'x_compat': 'xaxis.compat'} - _DEFAULT_KEYS = ['xaxis.compat'] - - def __init__(self, deprecated=False): - self._deprecated = deprecated - # self['xaxis.compat'] = False - super().__setitem__('xaxis.compat', False) - - def __getitem__(self, key): - key = self._get_canonical_key(key) - if key not in self: - raise ValueError( - '{key} is not a valid pandas plotting option'.format(key=key)) - return super().__getitem__(key) - - def __setitem__(self, key, value): - key = self._get_canonical_key(key) - return super().__setitem__(key, value) - - def __delitem__(self, key): - key = self._get_canonical_key(key) - if key in self._DEFAULT_KEYS: - raise ValueError( - 'Cannot remove default parameter {key}'.format(key=key)) - return super().__delitem__(key) - - def __contains__(self, key): - key = self._get_canonical_key(key) - return super().__contains__(key) - - def reset(self): - """ - Reset the option store to its initial state - - Returns - ------- - None - """ - self.__init__() - - def _get_canonical_key(self, key): - return self._ALIASES.get(key, key) - - @contextmanager - def use(self, key, value): - """ - Temporarily set a parameter value using the with statement. - Aliasing allowed. - """ - old_value = self[key] - try: - self[key] = value - yield self - finally: - self[key] = old_value - - -plot_params = _Options() diff --git a/pandas/plotting/_timeseries.py b/pandas/plotting/_matplotlib/timeseries.py similarity index 95% rename from pandas/plotting/_timeseries.py rename to pandas/plotting/_matplotlib/timeseries.py index 4460537f79b80..37e025b594ef2 100644 --- a/pandas/plotting/_timeseries.py +++ b/pandas/plotting/_matplotlib/timeseries.py @@ -3,6 +3,7 @@ import functools from matplotlib import pylab +import matplotlib.pyplot as plt import numpy as np from pandas._libs.tslibs.frequencies import ( @@ -13,7 +14,7 @@ ABCDatetimeIndex, ABCPeriodIndex, ABCTimedeltaIndex) from pandas.io.formats.printing import pprint_thing -from pandas.plotting._converter import ( +from pandas.plotting._matplotlib.converter import ( TimeSeries_DateFormatter, TimeSeries_DateLocator, TimeSeries_TimedeltaFormatter) import pandas.tseries.frequencies as frequencies @@ -47,7 +48,6 @@ def tsplot(series, plotf, ax=None, **kwargs): # Used inferred freq is possible, need a test case for inferred if ax is None: - import matplotlib.pyplot as plt ax = plt.gca() freq, series = _maybe_resample(series, ax, kwargs) @@ -144,8 +144,12 @@ def _replot_ax(ax, freq, kwargs): # for tsplot if isinstance(plotf, str): - from pandas.plotting._core import _plot_klass - plotf = _plot_klass[plotf]._plot + # XXX _plot_classes is private and shouldn't be imported + # here. But as tsplot is deprecated, and we'll remove this + # code soon, it's probably better to not overcomplicate + # things, and just leave this the way it was implemented + from pandas.plotting._core import _plot_classes + plotf = _plot_classes()[plotf]._plot lines.append(plotf(ax, series.index._mpl_repr(), series.values, **kwds)[0]) diff --git a/pandas/plotting/_tools.py b/pandas/plotting/_matplotlib/tools.py similarity index 95% rename from pandas/plotting/_tools.py rename to pandas/plotting/_matplotlib/tools.py index 39f3554f509a7..e0caab6211ce3 100644 --- a/pandas/plotting/_tools.py +++ b/pandas/plotting/_matplotlib/tools.py @@ -2,6 +2,7 @@ from math import ceil import warnings +import matplotlib.pyplot as plt import numpy as np from pandas.core.dtypes.common import is_list_like @@ -21,23 +22,6 @@ def format_date_labels(ax, rot): def table(ax, data, rowLabels=None, colLabels=None, **kwargs): - """ - Helper function to convert DataFrame and Series to matplotlib.table - - Parameters - ---------- - ax : Matplotlib axes object - data : DataFrame or Series - data for table contents - kwargs : keywords, optional - keyword arguments which passed to matplotlib.table.table. - If `rowLabels` or `colLabels` is not specified, data index or column - name will be used. - - Returns - ------- - matplotlib table object - """ if isinstance(data, ABCSeries): data = data.to_frame() elif isinstance(data, ABCDataFrame): @@ -183,8 +167,6 @@ def _subplots(naxes=None, sharex=False, sharey=False, squeeze=True, # Four polar axes plt.subplots(2, 2, subplot_kw=dict(polar=True)) """ - import matplotlib.pyplot as plt - if subplot_kw is None: subplot_kw = {} @@ -363,8 +345,6 @@ def _get_xlim(lines): def _set_ticks_props(axes, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None): - import matplotlib.pyplot as plt - for ax in _flatten(axes): if xlabelsize is not None: plt.setp(ax.get_xticklabels(), fontsize=xlabelsize) diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index f153cdeee268f..a3f3f354690df 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -1,13 +1,80 @@ -# being a bit too dynamic -import numpy as np +from contextlib import contextmanager +import warnings from pandas.util._decorators import deprecate_kwarg -from pandas.core.dtypes.missing import notna -from pandas.io.formats.printing import pprint_thing -from pandas.plotting._style import _get_standard_colors -from pandas.plotting._tools import _set_ticks_props, _subplots +def _get_plot_backend(): + # TODO unify with the same function in `_core.py` + try: + import pandas.plotting._matplotlib as plot_backend + except ImportError: + raise ImportError("matplotlib is required for plotting.") + return plot_backend + + +def table(ax, data, rowLabels=None, colLabels=None, **kwargs): + """ + Helper function to convert DataFrame and Series to matplotlib.table + + Parameters + ---------- + ax : Matplotlib axes object + data : DataFrame or Series + data for table contents + kwargs : keywords, optional + keyword arguments which passed to matplotlib.table.table. + If `rowLabels` or `colLabels` is not specified, data index or column + name will be used. + + Returns + ------- + matplotlib table object + """ + plot_backend = _get_plot_backend() + return plot_backend.table(ax=ax, data=data, rowLabels=None, colLabels=None, + **kwargs) + + +def register(explicit=True): + """ + Register Pandas Formatters and Converters with matplotlib + + This function modifies the global ``matplotlib.units.registry`` + dictionary. Pandas adds custom converters for + + * pd.Timestamp + * pd.Period + * np.datetime64 + * datetime.datetime + * datetime.date + * datetime.time + + See Also + -------- + deregister_matplotlib_converter + """ + plot_backend = _get_plot_backend() + plot_backend.register(explicit=explicit) + + +def deregister(): + """ + Remove pandas' formatters and converters + + Removes the custom converters added by :func:`register`. This + attempts to set the state of the registry back to the state before + pandas registered its own units. Converters for pandas' own types like + Timestamp and Period are removed completely. Converters for types + pandas overwrites, like ``datetime.datetime``, are restored to their + original value. + + See Also + -------- + deregister_matplotlib_converters + """ + plot_backend = _get_plot_backend() + plot_backend.deregister() def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, @@ -53,95 +120,11 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, >>> df = pd.DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D']) >>> scatter_matrix(df, alpha=0.2) """ - - df = frame._get_numeric_data() - n = df.columns.size - naxes = n * n - fig, axes = _subplots(naxes=naxes, figsize=figsize, ax=ax, - squeeze=False) - - # no gaps between subplots - fig.subplots_adjust(wspace=0, hspace=0) - - mask = notna(df) - - marker = _get_marker_compat(marker) - - hist_kwds = hist_kwds or {} - density_kwds = density_kwds or {} - - # GH 14855 - kwds.setdefault('edgecolors', 'none') - - boundaries_list = [] - for a in df.columns: - values = df[a].values[mask[a].values] - rmin_, rmax_ = np.min(values), np.max(values) - rdelta_ext = (rmax_ - rmin_) * range_padding / 2. - boundaries_list.append((rmin_ - rdelta_ext, rmax_ + rdelta_ext)) - - for i, a in enumerate(df.columns): - for j, b in enumerate(df.columns): - ax = axes[i, j] - - if i == j: - values = df[a].values[mask[a].values] - - # Deal with the diagonal by drawing a histogram there. - if diagonal == 'hist': - ax.hist(values, **hist_kwds) - - elif diagonal in ('kde', 'density'): - from scipy.stats import gaussian_kde - y = values - gkde = gaussian_kde(y) - ind = np.linspace(y.min(), y.max(), 1000) - ax.plot(ind, gkde.evaluate(ind), **density_kwds) - - ax.set_xlim(boundaries_list[i]) - - else: - common = (mask[a] & mask[b]).values - - ax.scatter(df[b][common], df[a][common], - marker=marker, alpha=alpha, **kwds) - - ax.set_xlim(boundaries_list[j]) - ax.set_ylim(boundaries_list[i]) - - ax.set_xlabel(b) - ax.set_ylabel(a) - - if j != 0: - ax.yaxis.set_visible(False) - if i != n - 1: - ax.xaxis.set_visible(False) - - if len(df.columns) > 1: - lim1 = boundaries_list[0] - locs = axes[0][1].yaxis.get_majorticklocs() - locs = locs[(lim1[0] <= locs) & (locs <= lim1[1])] - adj = (locs - lim1[0]) / (lim1[1] - lim1[0]) - - lim0 = axes[0][0].get_ylim() - adj = adj * (lim0[1] - lim0[0]) + lim0[0] - axes[0][0].yaxis.set_ticks(adj) - - if np.all(locs == locs.astype(int)): - # if all ticks are int - locs = locs.astype(int) - axes[0][0].yaxis.set_ticklabels(locs) - - _set_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) - - return axes - - -def _get_marker_compat(marker): - import matplotlib.lines as mlines - if marker not in mlines.lineMarkers: - return 'o' - return marker + plot_backend = _get_plot_backend() + return plot_backend.scatter_matrix( + frame=frame, alpha=alpha, figsize=figsize, ax=ax, grid=grid, + diagonal=diagonal, marker=marker, density_kwds=density_kwds, + hist_kwds=hist_kwds, range_padding=range_padding, **kwds) def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): @@ -206,68 +189,9 @@ def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): ... }) >>> rad_viz = pd.plotting.radviz(df, 'Category') # doctest: +SKIP """ - import matplotlib.pyplot as plt - import matplotlib.patches as patches - - def normalize(series): - a = min(series) - b = max(series) - return (series - a) / (b - a) - - n = len(frame) - classes = frame[class_column].drop_duplicates() - class_col = frame[class_column] - df = frame.drop(class_column, axis=1).apply(normalize) - - if ax is None: - ax = plt.gca(xlim=[-1, 1], ylim=[-1, 1]) - - to_plot = {} - colors = _get_standard_colors(num_colors=len(classes), colormap=colormap, - color_type='random', color=color) - - for kls in classes: - to_plot[kls] = [[], []] - - m = len(frame.columns) - 1 - s = np.array([(np.cos(t), np.sin(t)) - for t in [2.0 * np.pi * (i / float(m)) - for i in range(m)]]) - - for i in range(n): - row = df.iloc[i].values - row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1) - y = (s * row_).sum(axis=0) / row.sum() - kls = class_col.iat[i] - to_plot[kls][0].append(y[0]) - to_plot[kls][1].append(y[1]) - - for i, kls in enumerate(classes): - ax.scatter(to_plot[kls][0], to_plot[kls][1], color=colors[i], - label=pprint_thing(kls), **kwds) - ax.legend() - - ax.add_patch(patches.Circle((0.0, 0.0), radius=1.0, facecolor='none')) - - for xy, name in zip(s, df.columns): - - ax.add_patch(patches.Circle(xy, radius=0.025, facecolor='gray')) - - if xy[0] < 0.0 and xy[1] < 0.0: - ax.text(xy[0] - 0.025, xy[1] - 0.025, name, - ha='right', va='top', size='small') - elif xy[0] < 0.0 and xy[1] >= 0.0: - ax.text(xy[0] - 0.025, xy[1] + 0.025, name, - ha='right', va='bottom', size='small') - elif xy[0] >= 0.0 and xy[1] < 0.0: - ax.text(xy[0] + 0.025, xy[1] - 0.025, name, - ha='left', va='top', size='small') - elif xy[0] >= 0.0 and xy[1] >= 0.0: - ax.text(xy[0] + 0.025, xy[1] + 0.025, name, - ha='left', va='bottom', size='small') - - ax.axis('equal') - return ax + plot_backend = _get_plot_backend() + return plot_backend.radviz(frame=frame, class_column=class_column, ax=ax, + color=color, colormap=colormap, **kwds) @deprecate_kwarg(old_arg_name='data', new_arg_name='frame') @@ -305,59 +229,10 @@ def andrews_curves(frame, class_column, ax=None, samples=200, color=None, ------- class:`matplotlip.axis.Axes` """ - from math import sqrt, pi - import matplotlib.pyplot as plt - - def function(amplitudes): - def f(t): - x1 = amplitudes[0] - result = x1 / sqrt(2.0) - - # Take the rest of the coefficients and resize them - # appropriately. Take a copy of amplitudes as otherwise numpy - # deletes the element from amplitudes itself. - coeffs = np.delete(np.copy(amplitudes), 0) - coeffs.resize(int((coeffs.size + 1) / 2), 2) - - # Generate the harmonics and arguments for the sin and cos - # functions. - harmonics = np.arange(0, coeffs.shape[0]) + 1 - trig_args = np.outer(harmonics, t) - - result += np.sum(coeffs[:, 0, np.newaxis] * np.sin(trig_args) + - coeffs[:, 1, np.newaxis] * np.cos(trig_args), - axis=0) - return result - return f - - n = len(frame) - class_col = frame[class_column] - classes = frame[class_column].drop_duplicates() - df = frame.drop(class_column, axis=1) - t = np.linspace(-pi, pi, samples) - used_legends = set() - - color_values = _get_standard_colors(num_colors=len(classes), - colormap=colormap, color_type='random', - color=color) - colors = dict(zip(classes, color_values)) - if ax is None: - ax = plt.gca(xlim=(-pi, pi)) - for i in range(n): - row = df.iloc[i].values - f = function(row) - y = f(t) - kls = class_col.iat[i] - label = pprint_thing(kls) - if label not in used_legends: - used_legends.add(label) - ax.plot(t, y, color=colors[kls], label=label, **kwds) - else: - ax.plot(t, y, color=colors[kls], **kwds) - - ax.legend(loc='upper right') - ax.grid() - return ax + plot_backend = _get_plot_backend() + return plot_backend.andrews_curves(frame=frame, class_column=class_column, + ax=ax, samples=samples, color=color, + colormap=colormap, **kwds) def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): @@ -406,49 +281,9 @@ def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): >>> s = pd.Series(np.random.uniform(size=100)) >>> fig = pd.plotting.bootstrap_plot(s) # doctest: +SKIP """ - import random - import matplotlib.pyplot as plt - - # random.sample(ndarray, int) fails on python 3.3, sigh - data = list(series.values) - samplings = [random.sample(data, size) for _ in range(samples)] - - means = np.array([np.mean(sampling) for sampling in samplings]) - medians = np.array([np.median(sampling) for sampling in samplings]) - midranges = np.array([(min(sampling) + max(sampling)) * 0.5 - for sampling in samplings]) - if fig is None: - fig = plt.figure() - x = list(range(samples)) - axes = [] - ax1 = fig.add_subplot(2, 3, 1) - ax1.set_xlabel("Sample") - axes.append(ax1) - ax1.plot(x, means, **kwds) - ax2 = fig.add_subplot(2, 3, 2) - ax2.set_xlabel("Sample") - axes.append(ax2) - ax2.plot(x, medians, **kwds) - ax3 = fig.add_subplot(2, 3, 3) - ax3.set_xlabel("Sample") - axes.append(ax3) - ax3.plot(x, midranges, **kwds) - ax4 = fig.add_subplot(2, 3, 4) - ax4.set_xlabel("Mean") - axes.append(ax4) - ax4.hist(means, **kwds) - ax5 = fig.add_subplot(2, 3, 5) - ax5.set_xlabel("Median") - axes.append(ax5) - ax5.hist(medians, **kwds) - ax6 = fig.add_subplot(2, 3, 6) - ax6.set_xlabel("Midrange") - axes.append(ax6) - ax6.hist(midranges, **kwds) - for axis in axes: - plt.setp(axis.get_xticklabels(), fontsize=8) - plt.setp(axis.get_yticklabels(), fontsize=8) - return fig + plot_backend = _get_plot_backend() + return plot_backend.bootstrap_plot(series=series, fig=fig, size=size, + samples=samples, **kwds) @deprecate_kwarg(old_arg_name='colors', new_arg_name='color') @@ -502,69 +337,12 @@ def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None, color=('#556270', '#4ECDC4', '#C7F464')) >>> plt.show() """ - if axvlines_kwds is None: - axvlines_kwds = {'linewidth': 1, 'color': 'black'} - import matplotlib.pyplot as plt - - n = len(frame) - classes = frame[class_column].drop_duplicates() - class_col = frame[class_column] - - if cols is None: - df = frame.drop(class_column, axis=1) - else: - df = frame[cols] - - used_legends = set() - - ncols = len(df.columns) - - # determine values to use for xticks - if use_columns is True: - if not np.all(np.isreal(list(df.columns))): - raise ValueError('Columns must be numeric to be used as xticks') - x = df.columns - elif xticks is not None: - if not np.all(np.isreal(xticks)): - raise ValueError('xticks specified must be numeric') - elif len(xticks) != ncols: - raise ValueError('Length of xticks must match number of columns') - x = xticks - else: - x = list(range(ncols)) - - if ax is None: - ax = plt.gca() - - color_values = _get_standard_colors(num_colors=len(classes), - colormap=colormap, color_type='random', - color=color) - - if sort_labels: - classes = sorted(classes) - color_values = sorted(color_values) - colors = dict(zip(classes, color_values)) - - for i in range(n): - y = df.iloc[i].values - kls = class_col.iat[i] - label = pprint_thing(kls) - if label not in used_legends: - used_legends.add(label) - ax.plot(x, y, color=colors[kls], label=label, **kwds) - else: - ax.plot(x, y, color=colors[kls], **kwds) - - if axvlines: - for i in x: - ax.axvline(i, **axvlines_kwds) - - ax.set_xticks(x) - ax.set_xticklabels(df.columns) - ax.set_xlim(x[0], x[-1]) - ax.legend(loc='upper right') - ax.grid() - return ax + plot_backend = _get_plot_backend() + return plot_backend.parallel_coordinates( + frame=frame, class_column=class_column, cols=cols, ax=ax, color=color, + use_columns=use_columns, xticks=xticks, colormap=colormap, + axvlines=axvlines, axvlines_kwds=axvlines_kwds, + sort_labels=sort_labels, **kwds) def lag_plot(series, lag=1, ax=None, **kwds): @@ -581,20 +359,8 @@ def lag_plot(series, lag=1, ax=None, **kwds): ------- class:`matplotlib.axis.Axes` """ - import matplotlib.pyplot as plt - - # workaround because `c='b'` is hardcoded in matplotlibs scatter method - kwds.setdefault('c', plt.rcParams['patch.facecolor']) - - data = series.values - y1 = data[:-lag] - y2 = data[lag:] - if ax is None: - ax = plt.gca() - ax.set_xlabel("y(t)") - ax.set_ylabel("y(t + {lag})".format(lag=lag)) - ax.scatter(y1, y2, **kwds) - return ax + plot_backend = _get_plot_backend() + return plot_backend.lag_plot(series=series, lag=lag, ax=ax, **kwds) def autocorrelation_plot(series, ax=None, **kwds): @@ -612,30 +378,98 @@ def autocorrelation_plot(series, ax=None, **kwds): ------- class:`matplotlib.axis.Axes` """ - import matplotlib.pyplot as plt - n = len(series) - data = np.asarray(series) - if ax is None: - ax = plt.gca(xlim=(1, n), ylim=(-1.0, 1.0)) - mean = np.mean(data) - c0 = np.sum((data - mean) ** 2) / float(n) - - def r(h): - return ((data[:n - h] - mean) * - (data[h:] - mean)).sum() / float(n) / c0 - x = np.arange(n) + 1 - y = [r(loc) for loc in x] - z95 = 1.959963984540054 - z99 = 2.5758293035489004 - ax.axhline(y=z99 / np.sqrt(n), linestyle='--', color='grey') - ax.axhline(y=z95 / np.sqrt(n), color='grey') - ax.axhline(y=0.0, color='black') - ax.axhline(y=-z95 / np.sqrt(n), color='grey') - ax.axhline(y=-z99 / np.sqrt(n), linestyle='--', color='grey') - ax.set_xlabel("Lag") - ax.set_ylabel("Autocorrelation") - ax.plot(x, y, **kwds) - if 'label' in kwds: - ax.legend() - ax.grid() - return ax + plot_backend = _get_plot_backend() + return plot_backend.autocorrelation_plot(series=series, ax=ax, **kwds) + + +def tsplot(series, plotf, ax=None, **kwargs): + """ + Plots a Series on the given Matplotlib axes or the current axes + + Parameters + ---------- + axes : Axes + series : Series + + Notes + _____ + Supports same kwargs as Axes.plot + + + .. deprecated:: 0.23.0 + Use Series.plot() instead + """ + warnings.warn("'tsplot' is deprecated and will be removed in a " + "future version. Please use Series.plot() instead.", + FutureWarning, stacklevel=2) + plot_backend = _get_plot_backend() + return plot_backend.tsplot(series=series, plotf=plotf, ax=ax, **kwargs) + + +class _Options(dict): + """ + Stores pandas plotting options. + Allows for parameter aliasing so you can just use parameter names that are + the same as the plot function parameters, but is stored in a canonical + format that makes it easy to breakdown into groups later + """ + + # alias so the names are same as plotting method parameter names + _ALIASES = {'x_compat': 'xaxis.compat'} + _DEFAULT_KEYS = ['xaxis.compat'] + + def __init__(self, deprecated=False): + self._deprecated = deprecated + # self['xaxis.compat'] = False + super().__setitem__('xaxis.compat', False) + + def __getitem__(self, key): + key = self._get_canonical_key(key) + if key not in self: + raise ValueError( + '{key} is not a valid pandas plotting option'.format(key=key)) + return super().__getitem__(key) + + def __setitem__(self, key, value): + key = self._get_canonical_key(key) + return super().__setitem__(key, value) + + def __delitem__(self, key): + key = self._get_canonical_key(key) + if key in self._DEFAULT_KEYS: + raise ValueError( + 'Cannot remove default parameter {key}'.format(key=key)) + return super().__delitem__(key) + + def __contains__(self, key): + key = self._get_canonical_key(key) + return super().__contains__(key) + + def reset(self): + """ + Reset the option store to its initial state + + Returns + ------- + None + """ + self.__init__() + + def _get_canonical_key(self, key): + return self._ALIASES.get(key, key) + + @contextmanager + def use(self, key, value): + """ + Temporarily set a parameter value using the with statement. + Aliasing allowed. + """ + old_value = self[key] + try: + self[key] = value + yield self + finally: + self[key] = old_value + + +plot_params = _Options() diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 73fc59389181a..f789e61bc0df1 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -17,9 +17,6 @@ from pandas.util.testing import ( assert_is_valid_plot_return_object, ensure_clean) -import pandas.plotting as plotting -from pandas.plotting._tools import _flatten - """ This is a common base class used for various plotting tests @@ -32,11 +29,12 @@ class TestPlotBase: def setup_method(self, method): import matplotlib as mpl + from pandas.plotting._matplotlib import compat mpl.rcdefaults() - self.mpl_ge_2_2_3 = plotting._compat._mpl_ge_2_2_3() - self.mpl_ge_3_0_0 = plotting._compat._mpl_ge_3_0_0() - self.mpl_ge_3_1_0 = plotting._compat._mpl_ge_3_1_0() + self.mpl_ge_2_2_3 = compat._mpl_ge_2_2_3() + self.mpl_ge_3_0_0 = compat._mpl_ge_3_0_0() + self.mpl_ge_3_1_0 = compat._mpl_ge_3_1_0() self.bp_n_objects = 7 self.polycollection_factor = 2 @@ -309,6 +307,8 @@ def _check_axes_shape(self, axes, axes_num=None, layout=None, figsize : tuple expected figsize. default is matplotlib default """ + from pandas.plotting._matplotlib.tools import _flatten + if figsize is None: figsize = self.default_figsize visible_axes = self._flatten_visible(axes) @@ -346,6 +346,8 @@ def _flatten_visible(self, axes): axes : matplotlib Axes object, or its list-like """ + from pandas.plotting._matplotlib.tools import _flatten + axes = _flatten(axes) axes = [ax for ax in axes if ax.get_visible()] return axes diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index db5316eb3c15d..10743ca95e29e 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -161,7 +161,7 @@ def test_high_freq(self): _check_plot_works(ser.plot, ax=ax) def test_get_datevalue(self): - from pandas.plotting._converter import get_datevalue + from pandas.plotting._matplotlib.converter import get_datevalue assert get_datevalue(None, 'D') is None assert get_datevalue(1987, 'A') == 1987 assert (get_datevalue(Period(1987, 'A'), 'M') == @@ -285,7 +285,7 @@ def test_plot_multiple_inferred_freq(self): @pytest.mark.slow def test_uhf(self): - import pandas.plotting._converter as conv + import pandas.plotting._matplotlib.converter as conv idx = date_range('2012-6-22 21:59:51.960928', freq='L', periods=500) df = DataFrame(np.random.randn(len(idx), 2), index=idx) @@ -416,7 +416,7 @@ def _test(ax): _test(ax) def test_get_finder(self): - import pandas.plotting._converter as conv + import pandas.plotting._matplotlib.converter as conv assert conv.get_finder('B') == conv._daily_finder assert conv.get_finder('D') == conv._daily_finder diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 9c7a0abf981d1..4ee918fa48dab 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -23,7 +23,6 @@ from pandas.io.formats.printing import pprint_thing import pandas.plotting as plotting -from pandas.plotting._compat import _mpl_ge_3_1_0 @td.skip_if_no_mpl @@ -50,6 +49,8 @@ def _assert_xtickslabels_visibility(self, axes, expected): @pytest.mark.slow def test_plot(self): + from pandas.plotting._matplotlib.compat import _mpl_ge_3_1_0 + df = self.tdf _check_plot_works(df.plot, grid=False) # _check_plot_works adds an ax so catch warning. see GH #13188 @@ -2737,7 +2738,7 @@ def test_memory_leak(self): import gc results = {} - for kind in plotting._core._plot_klass.keys(): + for kind in plotting._core._plot_classes().keys(): args = {} if kind in ['hexbin', 'scatter', 'pie']: diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index f3f6c9c7fc2d4..79ce418768044 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -12,8 +12,6 @@ from pandas.tests.plotting.common import TestPlotBase, _check_plot_works import pandas.util.testing as tm -from pandas.plotting._core import grouped_hist - @td.skip_if_no_mpl class TestSeriesPlots(TestPlotBase): @@ -266,12 +264,13 @@ class TestDataFrameGroupByPlots(TestPlotBase): @pytest.mark.slow def test_grouped_hist_legacy(self): from matplotlib.patches import Rectangle + from pandas.plotting._matplotlib.hist import _grouped_hist df = DataFrame(randn(500, 2), columns=['A', 'B']) df['C'] = np.random.randint(0, 4, 500) df['D'] = ['X'] * 500 - axes = grouped_hist(df.A, by=df.C) + axes = _grouped_hist(df.A, by=df.C) self._check_axes_shape(axes, axes_num=4, layout=(2, 2)) tm.close() @@ -289,9 +288,9 @@ def test_grouped_hist_legacy(self): xf, yf = 20, 18 xrot, yrot = 30, 40 - axes = grouped_hist(df.A, by=df.C, cumulative=True, - bins=4, xlabelsize=xf, xrot=xrot, - ylabelsize=yf, yrot=yrot, density=True) + axes = _grouped_hist(df.A, by=df.C, cumulative=True, + bins=4, xlabelsize=xf, xrot=xrot, + ylabelsize=yf, yrot=yrot, density=True) # height of last bin (index 5) must be 1.0 for ax in axes.ravel(): rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] @@ -301,14 +300,14 @@ def test_grouped_hist_legacy(self): ylabelsize=yf, yrot=yrot) tm.close() - axes = grouped_hist(df.A, by=df.C, log=True) + axes = _grouped_hist(df.A, by=df.C, log=True) # scale of y must be 'log' self._check_ax_scales(axes, yaxis='log') tm.close() # propagate attr exception from matplotlib.Axes.hist with pytest.raises(AttributeError): - grouped_hist(df.A, by=df.C, foo='bar') + _grouped_hist(df.A, by=df.C, foo='bar') with tm.assert_produces_warning(FutureWarning): df.hist(by='C', figsize='default') diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 5ffbe9ec8fd47..a1c12fc73362a 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -308,13 +308,13 @@ def test_get_standard_colors_random_seed(self): assert rand1 != rand2 # Make sure it produces the same colors every time it's called - from pandas.plotting._style import _get_standard_colors + from pandas.plotting._matplotlib.style import _get_standard_colors color1 = _get_standard_colors(1, color_type='random') color2 = _get_standard_colors(1, color_type='random') assert color1 == color2 def test_get_standard_colors_default_num_colors(self): - from pandas.plotting._style import _get_standard_colors + from pandas.plotting._matplotlib.style import _get_standard_colors # Make sure the default color_types returns the specified amount color1 = _get_standard_colors(1, color_type='default') @@ -345,9 +345,9 @@ def test_get_standard_colors_no_appending(self): # Make sure not to add more colors so that matplotlib can cycle # correctly. from matplotlib import cm + from pandas.plotting._matplotlib.style import _get_standard_colors color_before = cm.gnuplot(range(5)) - color_after = plotting._style._get_standard_colors( - 1, color=color_before) + color_after = _get_standard_colors(1, color=color_before) assert len(color_after) == len(color_before) df = DataFrame(np.random.randn(48, 4), columns=list("ABCD")) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 737a69a06af5a..9dabb35196741 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -786,7 +786,7 @@ def test_series_grid_settings(self): @pytest.mark.slow def test_standard_colors(self): - from pandas.plotting._style import _get_standard_colors + from pandas.plotting._matplotlib.style import _get_standard_colors for c in ['r', 'red', 'green', '#FF0000']: result = _get_standard_colors(1, color=c) @@ -804,7 +804,7 @@ def test_standard_colors(self): @pytest.mark.slow def test_standard_colors_all(self): import matplotlib.colors as colors - from pandas.plotting._style import _get_standard_colors + from pandas.plotting._matplotlib.style import _get_standard_colors # multiple colors like mediumaquamarine for c in colors.cnames: diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index 05dd7cea1bd2f..e1e4dd4cf4b8a 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -1,7 +1,11 @@ # flake8: noqa import warnings -from pandas.plotting._converter import ( +# TODO `_matplotlib` module should be private, so the plotting backend +# can be change. Decide whether all these should be public and exponsed +# in `pandas.plotting`, or remove from here (I guess they are here for +# legacy reasons +from pandas.plotting._matplotlib.converter import ( DatetimeConverter, MilliSecondLocator, PandasAutoDateFormatter, PandasAutoDateLocator, PeriodConverter, TimeConverter, TimeFormatter, TimeSeries_DateFormatter, TimeSeries_DateLocator, get_datevalue, @@ -9,8 +13,8 @@ def register(): - from pandas.plotting._converter import register as register_ + from pandas.plotting import register_matplotlib_converters msg = ("'pandas.tseries.converter.register' has been moved and renamed to " "'pandas.plotting.register_matplotlib_converters'. ") warnings.warn(msg, FutureWarning, stacklevel=2) - register_() + register_matplotlib_converters() diff --git a/pandas/tseries/plotting.py b/pandas/tseries/plotting.py index 302016907635d..df41b4b5b40d9 100644 --- a/pandas/tseries/plotting.py +++ b/pandas/tseries/plotting.py @@ -1,3 +1,3 @@ # flake8: noqa -from pandas.plotting._timeseries import tsplot +from pandas.plotting._matplotlib.timeseries import tsplot