From d1587eb98aa49d9fd437257eb652d4b3875c3d14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Fri, 22 Jul 2022 21:07:22 -0400 Subject: [PATCH 1/2] TYP: pandas/plotting annotations from pandas-stubs --- pandas/core/arrays/arrow/array.py | 2 +- pandas/plotting/_core.py | 58 +++++++++--------- pandas/plotting/_matplotlib/boxplot.py | 22 +++---- pandas/plotting/_matplotlib/converter.py | 42 +++++++------- pandas/plotting/_matplotlib/core.py | 25 ++++---- pandas/plotting/_matplotlib/hist.py | 20 ++++--- pandas/plotting/_matplotlib/misc.py | 12 ++-- pandas/plotting/_matplotlib/timeseries.py | 2 +- pandas/plotting/_matplotlib/tools.py | 2 +- pandas/plotting/_misc.py | 71 +++++++++++++++-------- 10 files changed, 148 insertions(+), 108 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index b0e4d46564ba4..a882d3a955469 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -444,7 +444,7 @@ def dropna(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT: else: return type(self)(pc.drop_null(self._data)) - def isin(self: ArrowExtensionArrayT, values) -> npt.NDArray[np.bool_]: + def isin(self, values) -> npt.NDArray[np.bool_]: if pa_version_under2p0: fallback_performancewarning(version="2") return super().isin(values) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index bc39d1f619f49..0d69a52eb15f1 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -27,6 +27,8 @@ from pandas.core.base import PandasObject if TYPE_CHECKING: + from matplotlib.axes import Axes + from pandas import DataFrame @@ -463,16 +465,16 @@ def hist_frame( @Substitution(backend="") @Appender(_boxplot_doc) def boxplot( - data, - column=None, - by=None, - ax=None, - fontsize=None, - rot=0, - grid=True, - figsize=None, - layout=None, - return_type=None, + data: DataFrame, + column: str | list[str] | None = None, + by: str | list[str] | None = None, + ax: Axes | None = None, + fontsize: float | str | None = None, + rot: int = 0, + grid: bool = True, + figsize: tuple[float, float] | None = None, + layout: tuple[int, int] | None = None, + return_type: str | None = None, **kwargs, ): plot_backend = _get_plot_backend("matplotlib") @@ -499,8 +501,8 @@ def boxplot_frame( by=None, ax=None, fontsize=None, - rot=0, - grid=True, + rot: int = 0, + grid: bool = True, figsize=None, layout=None, return_type=None, @@ -525,16 +527,16 @@ def boxplot_frame( def boxplot_frame_groupby( grouped, - subplots=True, + subplots: bool = True, column=None, fontsize=None, - rot=0, - grid=True, + rot: int = 0, + grid: bool = True, ax=None, figsize=None, layout=None, - sharex=False, - sharey=True, + sharex: bool = False, + sharey: bool = True, backend=None, **kwargs, ): @@ -1041,7 +1043,7 @@ def __call__(self, *args, **kwargs): ) @Substitution(kind="line") @Appender(_bar_or_line_doc) - def line(self, x=None, y=None, **kwargs): + def line(self, x=None, y=None, **kwargs) -> PlotAccessor: """ Plot Series or DataFrame as lines. @@ -1128,7 +1130,7 @@ def line(self, x=None, y=None, **kwargs): ) @Substitution(kind="bar") @Appender(_bar_or_line_doc) - def bar(self, x=None, y=None, **kwargs): + def bar(self, x=None, y=None, **kwargs) -> PlotAccessor: """ Vertical bar plot. @@ -1214,7 +1216,7 @@ def bar(self, x=None, y=None, **kwargs): ) @Substitution(kind="bar") @Appender(_bar_or_line_doc) - def barh(self, x=None, y=None, **kwargs): + def barh(self, x=None, y=None, **kwargs) -> PlotAccessor: """ Make a horizontal bar plot. @@ -1226,7 +1228,7 @@ def barh(self, x=None, y=None, **kwargs): """ return self(kind="barh", x=x, y=y, **kwargs) - def box(self, by=None, **kwargs): + def box(self, by=None, **kwargs) -> PlotAccessor: r""" Make a box plot of the DataFrame columns. @@ -1293,7 +1295,7 @@ def box(self, by=None, **kwargs): """ return self(kind="box", by=by, **kwargs) - def hist(self, by=None, bins=10, **kwargs): + def hist(self, by=None, bins: int = 10, **kwargs) -> PlotAccessor: """ Draw one histogram of the DataFrame's columns. @@ -1355,7 +1357,7 @@ def hist(self, by=None, bins=10, **kwargs): """ return self(kind="hist", by=by, bins=bins, **kwargs) - def kde(self, bw_method=None, ind=None, **kwargs): + def kde(self, bw_method=None, ind=None, **kwargs) -> PlotAccessor: """ Generate Kernel Density Estimate plot using Gaussian kernels. @@ -1465,7 +1467,7 @@ def kde(self, bw_method=None, ind=None, **kwargs): density = kde - def area(self, x=None, y=None, **kwargs): + def area(self, x=None, y=None, **kwargs) -> PlotAccessor: """ Draw a stacked area plot. @@ -1538,7 +1540,7 @@ def area(self, x=None, y=None, **kwargs): """ return self(kind="area", x=x, y=y, **kwargs) - def pie(self, **kwargs): + def pie(self, **kwargs) -> PlotAccessor: """ Generate a pie plot. @@ -1593,7 +1595,7 @@ def pie(self, **kwargs): raise ValueError("pie requires either y column or 'subplots=True'") return self(kind="pie", **kwargs) - def scatter(self, x, y, s=None, c=None, **kwargs): + def scatter(self, x, y, s=None, c=None, **kwargs) -> PlotAccessor: """ Create a scatter plot with varying marker point size and color. @@ -1699,7 +1701,9 @@ def scatter(self, x, y, s=None, c=None, **kwargs): return self(kind="scatter", x=x, y=y, **kwargs) - def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs): + def hexbin( + self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs + ) -> PlotAccessor: """ Generate a hexagonal binning plot. diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index a49b035b1aaf1..045c27bb8fe56 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -48,7 +48,7 @@ class BP(NamedTuple): ax: Axes lines: dict[str, list[Line2D]] - def __init__(self, data, return_type="axes", **kwargs) -> None: + def __init__(self, data, return_type: str = "axes", **kwargs) -> None: # Do not call LinePlot.__init__ which may fill nan if return_type not in self._valid_return_types: raise ValueError("return_type must be {None, 'axes', 'dict', 'both'}") @@ -117,7 +117,7 @@ def _validate_color_args(self): def _get_colors(self, num_colors=None, color_kwds="color"): pass - def maybe_color_bp(self, bp): + def maybe_color_bp(self, bp) -> None: if isinstance(self.color, dict): boxes = self.color.get("boxes", self._boxes_c) whiskers = self.color.get("whiskers", self._whiskers_c) @@ -292,8 +292,8 @@ def boxplot( by=None, ax=None, fontsize=None, - rot=0, - grid=True, + rot: int = 0, + grid: bool = True, figsize=None, layout=None, return_type=None, @@ -443,8 +443,8 @@ def boxplot_frame( by=None, ax=None, fontsize=None, - rot=0, - grid=True, + rot: int = 0, + grid: bool = True, figsize=None, layout=None, return_type=None, @@ -471,16 +471,16 @@ def boxplot_frame( def boxplot_frame_groupby( grouped, - subplots=True, + subplots: bool = True, column=None, fontsize=None, - rot=0, - grid=True, + rot: int = 0, + grid: bool = True, ax=None, figsize=None, layout=None, - sharex=False, - sharey=True, + sharex: bool = False, + sharey: bool = True, **kwds, ): if subplots is True: diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py index 873084393371c..8510a7acac117 100644 --- a/pandas/plotting/_matplotlib/converter.py +++ b/pandas/plotting/_matplotlib/converter.py @@ -10,6 +10,8 @@ import functools from typing import ( Any, + Final, + Iterator, cast, ) @@ -56,14 +58,14 @@ import pandas.core.tools.datetimes as tools # constants -HOURS_PER_DAY = 24.0 -MIN_PER_HOUR = 60.0 -SEC_PER_MIN = 60.0 +HOURS_PER_DAY: Final = 24.0 +MIN_PER_HOUR: Final = 60.0 +SEC_PER_MIN: Final = 60.0 -SEC_PER_HOUR = SEC_PER_MIN * MIN_PER_HOUR -SEC_PER_DAY = SEC_PER_HOUR * HOURS_PER_DAY +SEC_PER_HOUR: Final = SEC_PER_MIN * MIN_PER_HOUR +SEC_PER_DAY: Final = SEC_PER_HOUR * HOURS_PER_DAY -MUSEC_PER_DAY = 10**6 * SEC_PER_DAY +MUSEC_PER_DAY: Final = 10**6 * SEC_PER_DAY _mpl_units = {} # Cache for units overwritten by us @@ -94,7 +96,7 @@ def wrapper(*args, **kwargs): @contextlib.contextmanager -def pandas_converters(): +def pandas_converters() -> Iterator[None]: """ Context manager registering pandas' converters for a plot. @@ -115,7 +117,7 @@ def pandas_converters(): deregister() -def register(): +def register() -> None: pairs = get_pairs() for type_, cls in pairs: # Cache previous converter if present @@ -126,7 +128,7 @@ def register(): units.registry[type_] = cls() -def deregister(): +def deregister() -> None: # Renamed in pandas.plotting.__init__ for type_, cls in get_pairs(): # We use type to catch our classes directly, no inheritance @@ -187,7 +189,7 @@ class TimeFormatter(Formatter): def __init__(self, locs) -> None: self.locs = locs - def __call__(self, x, pos=0) -> str: + def __call__(self, x, pos: int = 0) -> str: """ Return the time of day as a formatted string. @@ -339,7 +341,7 @@ def axisinfo(unit: tzinfo | None, axis) -> units.AxisInfo: class PandasAutoDateFormatter(dates.AutoDateFormatter): - def __init__(self, locator, tz=None, defaultfmt="%Y-%m-%d") -> None: + def __init__(self, locator, tz=None, defaultfmt: str = "%Y-%m-%d") -> None: dates.AutoDateFormatter.__init__(self, locator, tz, defaultfmt) @@ -937,12 +939,12 @@ class TimeSeries_DateLocator(Locator): def __init__( self, freq: BaseOffset, - minor_locator=False, - dynamic_mode=True, - base=1, - quarter=1, - month=1, - day=1, + minor_locator: bool = False, + dynamic_mode: bool = True, + base: int = 1, + quarter: int = 1, + month: int = 1, + day: int = 1, plot_obj=None, ) -> None: freq = to_offset(freq) @@ -1053,7 +1055,7 @@ def _set_default_format(self, vmin, vmax): self.formatdict = {x: f for (x, _, _, f) in format} return self.formatdict - def set_locs(self, locs): + def set_locs(self, locs) -> None: """Sets the locations of the ticks""" # don't actually use the locs. This is just needed to work with # matplotlib. Force to use vmin, vmax @@ -1068,7 +1070,7 @@ def set_locs(self, locs): (vmin, vmax) = (vmax, vmin) self._set_default_format(vmin, vmax) - def __call__(self, x, pos=0) -> str: + def __call__(self, x, pos: int = 0) -> str: if self.formatdict is None: return "" @@ -1103,7 +1105,7 @@ def format_timedelta_ticks(x, pos, n_decimals: int) -> str: s = f"{int(d):d} days {s}" return s - def __call__(self, x, pos=0) -> str: + def __call__(self, x, pos: int = 0) -> str: (vmin, vmax) = tuple(self.axis.get_view_interval()) n_decimals = int(np.ceil(np.log10(100 * 10**9 / abs(vmax - vmin)))) if n_decimals > 9: diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 301474edc6a8e..ee7493813f13a 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -118,11 +118,11 @@ def __init__( by: IndexLabel | None = None, subplots: bool | Sequence[Sequence[str]] = False, sharex=None, - sharey=False, - use_index=True, + sharey: bool = False, + use_index: bool = True, figsize=None, grid=None, - legend=True, + legend: bool | str = True, rot=None, ax=None, fig=None, @@ -133,13 +133,13 @@ def __init__( yticks=None, xlabel: Hashable | None = None, ylabel: Hashable | None = None, - sort_columns=False, + sort_columns: bool = False, fontsize=None, - secondary_y=False, + secondary_y: bool | tuple | list | np.ndarray = False, colormap=None, - table=False, + table: bool = False, layout=None, - include_bool=False, + include_bool: bool = False, column: IndexLabel | None = None, **kwds, ) -> None: @@ -437,10 +437,10 @@ def nseries(self) -> int: else: return self.data.shape[1] - def draw(self): + def draw(self) -> None: self.plt.draw_if_interactive() - def generate(self): + def generate(self) -> None: self._args_adjust() self._compute_plot_data() self._setup_subplots() @@ -547,8 +547,11 @@ def result(self): return self.axes else: sec_true = isinstance(self.secondary_y, bool) and self.secondary_y + # error: Argument 1 to "len" has incompatible type "Union[bool, + # Tuple[Any, ...], List[Any], ndarray[Any, Any]]"; expected "Sized" all_sec = ( - is_list_like(self.secondary_y) and len(self.secondary_y) == self.nseries + is_list_like(self.secondary_y) + and len(self.secondary_y) == self.nseries # type: ignore[arg-type] ) if sec_true or all_sec: # if all data is plotted on secondary, return right axes @@ -937,7 +940,7 @@ def _get_ax(self, i: int): return ax @classmethod - def get_default_ax(cls, ax): + def get_default_ax(cls, ax) -> None: import matplotlib.pyplot as plt if ax is None and len(plt.get_fignums()) > 0: diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index 77496cf049f3d..3b151d67c70be 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -49,7 +49,13 @@ class HistPlot(LinePlot): def _kind(self) -> Literal["hist", "kde"]: return "hist" - def __init__(self, data, bins=10, bottom=0, **kwargs) -> None: + def __init__( + self, + data, + bins: int | np.ndarray | list[np.ndarray] = 10, + bottom: int | np.ndarray = 0, + **kwargs, + ) -> None: self.bins = bins # use mpl default self.bottom = bottom # Do not call LinePlot.__init__ which may fill nan @@ -369,13 +375,13 @@ def hist_series( self, by=None, ax=None, - grid=True, + grid: bool = True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, figsize=None, - bins=10, + bins: int = 10, legend: bool = False, **kwds, ): @@ -441,17 +447,17 @@ def hist_frame( data, column=None, by=None, - grid=True, + grid: bool = True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, ax=None, - sharex=False, - sharey=False, + sharex: bool = False, + sharey: bool = False, figsize=None, layout=None, - bins=10, + bins: int = 10, legend: bool = False, **kwds, ): diff --git a/pandas/plotting/_matplotlib/misc.py b/pandas/plotting/_matplotlib/misc.py index 083d85ef0876d..e2a0d50544f22 100644 --- a/pandas/plotting/_matplotlib/misc.py +++ b/pandas/plotting/_matplotlib/misc.py @@ -34,15 +34,15 @@ def scatter_matrix( frame: DataFrame, - alpha=0.5, + alpha: float = 0.5, figsize=None, ax=None, - grid=False, - diagonal="hist", - marker=".", + grid: bool = False, + diagonal: str = "hist", + marker: str = ".", density_kwds=None, hist_kwds=None, - range_padding=0.05, + range_padding: float = 0.05, **kwds, ): df = frame._get_numeric_data() @@ -352,7 +352,7 @@ def parallel_coordinates( cols=None, ax: Axes | None = None, color=None, - use_columns=False, + use_columns: bool = False, xticks=None, colormap=None, axvlines: bool = True, diff --git a/pandas/plotting/_matplotlib/timeseries.py b/pandas/plotting/_matplotlib/timeseries.py index ca6cccb0f98eb..06aac478bfb11 100644 --- a/pandas/plotting/_matplotlib/timeseries.py +++ b/pandas/plotting/_matplotlib/timeseries.py @@ -291,7 +291,7 @@ def _format_coord(freq, t, y) -> str: return f"t = {time_period} y = {y:8f}" -def format_dateaxis(subplot, freq, index): +def format_dateaxis(subplot, freq, index) -> None: """ Pretty-formats the date axis (x-axis). diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py index 94357e5002ffd..a8b1e4c572c43 100644 --- a/pandas/plotting/_matplotlib/tools.py +++ b/pandas/plotting/_matplotlib/tools.py @@ -50,7 +50,7 @@ def maybe_adjust_figure(fig: Figure, *args, **kwargs): fig.subplots_adjust(*args, **kwargs) -def format_date_labels(ax: Axes, rot): +def format_date_labels(ax: Axes, rot) -> None: # mini version of autofmt_xdate for label in ax.get_xticklabels(): label.set_ha("right") diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index 0e82a0fc924fb..e575f248f08c5 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -4,6 +4,7 @@ from typing import ( TYPE_CHECKING, Iterator, + Sequence, ) from pandas.plotting._core import _get_plot_backend @@ -13,6 +14,11 @@ from matplotlib.figure import Figure import numpy as np + from pandas import ( + DataFrame, + Series, + ) + def table(ax, data, rowLabels=None, colLabels=None, **kwargs): """ @@ -81,16 +87,16 @@ def deregister() -> None: def scatter_matrix( - frame, - alpha=0.5, - figsize=None, - ax=None, - grid=False, - diagonal="hist", - marker=".", + frame: DataFrame, + alpha: float = 0.5, + figsize: tuple[float, float] | None = None, + ax: Axes | None = None, + grid: bool = False, + diagonal: str = "hist", + marker: str = ".", density_kwds=None, hist_kwds=None, - range_padding=0.05, + range_padding: float = 0.05, **kwargs, ) -> np.ndarray: """ @@ -167,7 +173,14 @@ def scatter_matrix( ) -def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds) -> Axes: +def radviz( + frame: DataFrame, + class_column: str, + ax: Axes | None = None, + color: list[str] | tuple[str, ...] | None = None, + colormap=None, + **kwds, +) -> Axes: """ Plot a multidimensional dataset in 2D. @@ -249,7 +262,13 @@ def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds) -> A def andrews_curves( - frame, class_column, ax=None, samples=200, color=None, colormap=None, **kwargs + frame: DataFrame, + class_column: str, + ax: Axes | None = None, + samples: int = 200, + color: list[str] | tuple[str, ...] | None = None, + colormap=None, + **kwargs, ) -> Axes: """ Generate a matplotlib plot of Andrews curves, for visualising clusters of @@ -308,7 +327,13 @@ def andrews_curves( ) -def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds) -> Figure: +def bootstrap_plot( + series: Series, + fig: Figure | None = None, + size: int = 50, + samples: int = 500, + **kwds, +) -> Figure: """ Bootstrap plot on mean, median and mid-range statistics. @@ -363,17 +388,17 @@ def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds) -> Figure: def parallel_coordinates( - frame, - class_column, - cols=None, - ax=None, - color=None, - use_columns=False, - xticks=None, + frame: DataFrame, + class_column: str, + cols: list[str] | None = None, + ax: Axes | None = None, + color: list[str] | tuple[str, ...] | None = None, + use_columns: bool = False, + xticks: Sequence | None = None, colormap=None, - axvlines=True, + axvlines: bool = True, axvlines_kwds=None, - sort_labels=False, + sort_labels: bool = False, **kwargs, ) -> Axes: """ @@ -441,7 +466,7 @@ def parallel_coordinates( ) -def lag_plot(series, lag=1, ax=None, **kwds) -> Axes: +def lag_plot(series: Series, lag: int = 1, ax: Axes | None = None, **kwds) -> Axes: """ Lag plot for time series. @@ -485,7 +510,7 @@ def lag_plot(series, lag=1, ax=None, **kwds) -> Axes: return plot_backend.lag_plot(series=series, lag=lag, ax=ax, **kwds) -def autocorrelation_plot(series, ax=None, **kwargs) -> Axes: +def autocorrelation_plot(series: Series, ax: Axes | None = None, **kwargs) -> Axes: """ Autocorrelation plot for time series. @@ -532,7 +557,7 @@ class _Options(dict): _ALIASES = {"x_compat": "xaxis.compat"} _DEFAULT_KEYS = ["xaxis.compat"] - def __init__(self, deprecated=False) -> None: + def __init__(self, deprecated: bool = False) -> None: self._deprecated = deprecated super().__setitem__("xaxis.compat", False) From dae557c359b3010b8ccf2179f1a42355ff76c21b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sat, 23 Jul 2022 18:53:21 -0400 Subject: [PATCH 2/2] xticks + pyright --- .pre-commit-config.yaml | 2 +- pandas/plotting/_misc.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 92f3b3ce83297..f8cb869e6ed89 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -93,7 +93,7 @@ repos: types: [python] stages: [manual] additional_dependencies: &pyright_dependencies - - pyright@1.1.258 + - pyright@1.1.262 - id: pyright_reportGeneralTypeIssues name: pyright reportGeneralTypeIssues entry: pyright --skipunannotated -p pyright_reportGeneralTypeIssues.json diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index e575f248f08c5..17763b25329ab 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -4,7 +4,6 @@ from typing import ( TYPE_CHECKING, Iterator, - Sequence, ) from pandas.plotting._core import _get_plot_backend @@ -394,7 +393,7 @@ def parallel_coordinates( ax: Axes | None = None, color: list[str] | tuple[str, ...] | None = None, use_columns: bool = False, - xticks: Sequence | None = None, + xticks: list | tuple | None = None, colormap=None, axvlines: bool = True, axvlines_kwds=None,