From 6381f068b6f20781b707d0106a8f9fcc4259b28f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Fri, 17 Feb 2023 19:48:48 -0500 Subject: [PATCH 1/5] TYP: misc typing from pandas-stubs in io and plotting --- pandas/core/generic.py | 13 ++- pandas/io/formats/excel.py | 8 +- pandas/io/formats/style.py | 18 ++-- pandas/io/json/_json.py | 12 +-- pandas/io/parsers/readers.py | 190 ++++++++++++++++++---------------- pandas/plotting/_core.py | 42 ++++++-- pandas/plotting/_misc.py | 18 ++-- pandas/tseries/frequencies.py | 19 +++- 8 files changed, 193 insertions(+), 127 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 821e41db6b065..135927292be92 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -80,6 +80,7 @@ TimestampConvertibleTypes, ValueKeyFunc, WriteBuffer, + WriteExcelBuffer, npt, ) from pandas.compat._optional import import_optional_dependency @@ -189,12 +190,14 @@ if TYPE_CHECKING: from pandas._libs.tslibs import BaseOffset - from pandas.core.frame import DataFrame + from pandas import ( + DataFrame, + ExcelWriter, + HDFStore, + Series, + ) from pandas.core.indexers.objects import BaseIndexer from pandas.core.resample import Resampler - from pandas.core.series import Series - - from pandas.io.pytables import HDFStore # goal is to be able to define the docs close to function, while still being @@ -2089,7 +2092,7 @@ def _repr_data_resource_(self): ) def to_excel( self, - excel_writer, + excel_writer: FilePath | WriteExcelBuffer | ExcelWriter, sheet_name: str = "Sheet1", na_rep: str = "", float_format: str | None = None, diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 34c4d330761f5..0aa7a8f9e5da7 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -10,6 +10,7 @@ import itertools import re from typing import ( + TYPE_CHECKING, Any, Callable, Hashable, @@ -24,8 +25,10 @@ from pandas._libs.lib import is_list_like from pandas._typing import ( + FilePath, IndexLabel, StorageOptions, + WriteExcelBuffer, ) from pandas.util._decorators import doc from pandas.util._exceptions import find_stack_level @@ -53,6 +56,9 @@ from pandas.io.formats.format import get_level_lengths from pandas.io.formats.printing import pprint_thing +if TYPE_CHECKING: + from pandas import ExcelWriter + class ExcelCell: __fields__ = ("row", "col", "val", "style", "mergestart", "mergeend") @@ -887,7 +893,7 @@ def get_formatted_cells(self) -> Iterable[ExcelCell]: @doc(storage_options=_shared_docs["storage_options"]) def write( self, - writer, + writer: FilePath | WriteExcelBuffer | ExcelWriter, sheet_name: str = "Sheet1", startrow: int = 0, startcol: int = 0, diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 8ee73e77f5b11..2fa04d3627c41 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -31,6 +31,7 @@ Scalar, StorageOptions, WriteBuffer, + WriteExcelBuffer, ) from pandas.compat._optional import import_optional_dependency from pandas.util._decorators import ( @@ -71,6 +72,8 @@ if TYPE_CHECKING: from matplotlib.colors import Colormap + from pandas import ExcelWriter + try: import matplotlib as mpl import matplotlib.pyplot as plt @@ -493,7 +496,7 @@ def set_tooltips( ) def to_excel( self, - excel_writer, + excel_writer: FilePath | WriteExcelBuffer | ExcelWriter, sheet_name: str = "Sheet1", na_rep: str = "", float_format: str | None = None, @@ -1326,7 +1329,7 @@ def to_string( self, buf: FilePath | WriteBuffer[str], *, - encoding=..., + encoding: str | None = ..., sparse_index: bool | None = ..., sparse_columns: bool | None = ..., max_rows: int | None = ..., @@ -1340,7 +1343,7 @@ def to_string( self, buf: None = ..., *, - encoding=..., + encoding: str | None = ..., sparse_index: bool | None = ..., sparse_columns: bool | None = ..., max_rows: int | None = ..., @@ -1354,7 +1357,7 @@ def to_string( self, buf: FilePath | WriteBuffer[str] | None = None, *, - encoding=None, + encoding: str | None = None, sparse_index: bool | None = None, sparse_columns: bool | None = None, max_rows: int | None = None, @@ -3384,8 +3387,11 @@ def highlight_quantile( @classmethod def from_custom_template( - cls, searchpath, html_table: str | None = None, html_style: str | None = None - ): + cls, + searchpath: Sequence[str], + html_table: str | None = None, + html_style: str | None = None, + ) -> type[Styler]: """ Factory function for creating a subclass of ``Styler``. diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 80d2f9eda7ce5..f0f5287412629 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -392,7 +392,7 @@ def read_json( orient: str | None = ..., typ: Literal["frame"] = ..., dtype: DtypeArg | None = ..., - convert_axes=..., + convert_axes: bool | None = ..., convert_dates: bool | list[str] = ..., keep_default_dates: bool = ..., precise_float: bool = ..., @@ -417,7 +417,7 @@ def read_json( orient: str | None = ..., typ: Literal["series"], dtype: DtypeArg | None = ..., - convert_axes=..., + convert_axes: bool | None = ..., convert_dates: bool | list[str] = ..., keep_default_dates: bool = ..., precise_float: bool = ..., @@ -442,7 +442,7 @@ def read_json( orient: str | None = ..., typ: Literal["series"], dtype: DtypeArg | None = ..., - convert_axes=..., + convert_axes: bool | None = ..., convert_dates: bool | list[str] = ..., keep_default_dates: bool = ..., precise_float: bool = ..., @@ -467,7 +467,7 @@ def read_json( orient: str | None = ..., typ: Literal["frame"] = ..., dtype: DtypeArg | None = ..., - convert_axes=..., + convert_axes: bool | None = ..., convert_dates: bool | list[str] = ..., keep_default_dates: bool = ..., precise_float: bool = ..., @@ -495,7 +495,7 @@ def read_json( orient: str | None = None, typ: Literal["frame", "series"] = "frame", dtype: DtypeArg | None = None, - convert_axes=None, + convert_axes: bool | None = None, convert_dates: bool | list[str] = True, keep_default_dates: bool = True, precise_float: bool = False, @@ -811,7 +811,7 @@ def __init__( orient, typ: FrameSeriesStrT, dtype, - convert_axes, + convert_axes: bool | None, convert_dates, keep_default_dates: bool, precise_float: bool, diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 635c98e38da16..4f1b2a7399200 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -7,6 +7,7 @@ from collections import abc import csv +from enum import Enum import sys from textwrap import fill from types import TracebackType @@ -16,6 +17,7 @@ Callable, Hashable, Literal, + Mapping, NamedTuple, Sequence, overload, @@ -434,15 +436,19 @@ ) -_c_parser_defaults = { - "delim_whitespace": False, - "na_filter": True, - "low_memory": True, - "memory_map": False, - "float_precision": None, -} +class _c_parser_defaults(Enum): + delim_whitespace = False + na_filter = True + low_memory = True + memory_map = False + float_precision = None + + +class _fwf_defaults(Enum): + colspecs = "infer" + infer_nrows = 100 + widths = None -_fwf_defaults = {"colspecs": "infer", "infer_nrows": 100, "widths": None} _c_unsupported = {"skipfooter"} _python_unsupported = {"low_memory", "float_precision"} @@ -599,17 +605,17 @@ def read_csv( header: int | Sequence[int] | None | Literal["infer"] = ..., names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., - usecols=..., + usecols: Sequence[Hashable] | Callable[[Hashable], bool] | None = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., - converters=..., - true_values=..., - false_values=..., + converters: Mapping[Hashable, Callable] | None = ..., + true_values: list | None = ..., + false_values: list | None = ..., skipinitialspace: bool = ..., - skiprows=..., + skiprows: list[int] | int | Callable[[Hashable], bool] | None = ..., skipfooter: int = ..., nrows: int | None = ..., - na_values=..., + na_values: Sequence[str] | Mapping[str, Sequence[str]] | None = ..., keep_default_na: bool = ..., na_filter: bool = ..., verbose: bool = ..., @@ -617,7 +623,7 @@ def read_csv( parse_dates: bool | Sequence[Hashable] | None = ..., infer_datetime_format: bool | lib.NoDefault = ..., keep_date_col: bool = ..., - date_parser=..., + date_parser: Callable | lib.NoDefault = ..., date_format: str | None = ..., dayfirst: bool = ..., cache_dates: bool = ..., @@ -637,7 +643,7 @@ def read_csv( dialect: str | csv.Dialect | None = ..., on_bad_lines=..., delim_whitespace: bool = ..., - low_memory=..., + low_memory: bool = ..., memory_map: bool = ..., float_precision: Literal["high", "legacy"] | None = ..., storage_options: StorageOptions = ..., @@ -656,17 +662,17 @@ def read_csv( header: int | Sequence[int] | None | Literal["infer"] = ..., names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., - usecols=..., + usecols: Sequence[Hashable] | Callable[[Hashable], bool] | None = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., - converters=..., - true_values=..., - false_values=..., + converters: Mapping[Hashable, Callable] | None = ..., + true_values: list | None = ..., + false_values: list | None = ..., skipinitialspace: bool = ..., - skiprows=..., + skiprows: list[int] | int | Callable[[Hashable], bool] | None = ..., skipfooter: int = ..., nrows: int | None = ..., - na_values=..., + na_values: Sequence[str] | Mapping[str, Sequence[str]] | None = ..., keep_default_na: bool = ..., na_filter: bool = ..., verbose: bool = ..., @@ -674,7 +680,7 @@ def read_csv( parse_dates: bool | Sequence[Hashable] | None = ..., infer_datetime_format: bool | lib.NoDefault = ..., keep_date_col: bool = ..., - date_parser=..., + date_parser: Callable | lib.NoDefault = ..., date_format: str | None = ..., dayfirst: bool = ..., cache_dates: bool = ..., @@ -694,7 +700,7 @@ def read_csv( dialect: str | csv.Dialect | None = ..., on_bad_lines=..., delim_whitespace: bool = ..., - low_memory=..., + low_memory: bool = ..., memory_map: bool = ..., float_precision: Literal["high", "legacy"] | None = ..., storage_options: StorageOptions = ..., @@ -713,17 +719,17 @@ def read_csv( header: int | Sequence[int] | None | Literal["infer"] = ..., names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., - usecols=..., + usecols: Sequence[Hashable] | Callable[[Hashable], bool] | None = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., - converters=..., - true_values=..., - false_values=..., + converters: Mapping[Hashable, Callable] | None = ..., + true_values: list | None = ..., + false_values: list | None = ..., skipinitialspace: bool = ..., - skiprows=..., + skiprows: list[int] | int | Callable[[Hashable], bool] | None = ..., skipfooter: int = ..., nrows: int | None = ..., - na_values=..., + na_values: Sequence[str] | Mapping[str, Sequence[str]] | None = ..., keep_default_na: bool = ..., na_filter: bool = ..., verbose: bool = ..., @@ -731,7 +737,7 @@ def read_csv( parse_dates: bool | Sequence[Hashable] | None = ..., infer_datetime_format: bool | lib.NoDefault = ..., keep_date_col: bool = ..., - date_parser=..., + date_parser: Callable | lib.NoDefault = ..., date_format: str | None = ..., dayfirst: bool = ..., cache_dates: bool = ..., @@ -751,7 +757,7 @@ def read_csv( dialect: str | csv.Dialect | None = ..., on_bad_lines=..., delim_whitespace: bool = ..., - low_memory=..., + low_memory: bool = ..., memory_map: bool = ..., float_precision: Literal["high", "legacy"] | None = ..., storage_options: StorageOptions = ..., @@ -770,17 +776,17 @@ def read_csv( header: int | Sequence[int] | None | Literal["infer"] = ..., names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., - usecols=..., + usecols: Sequence[Hashable] | Callable[[Hashable], bool] | None = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., - converters=..., - true_values=..., - false_values=..., + converters: Mapping[Hashable, Callable] | None = ..., + true_values: list | None = ..., + false_values: list | None = ..., skipinitialspace: bool = ..., - skiprows=..., + skiprows: list[int] | int | Callable[[Hashable], bool] | None = ..., skipfooter: int = ..., nrows: int | None = ..., - na_values=..., + na_values: Sequence[str] | Mapping[str, Sequence[str]] | None = ..., keep_default_na: bool = ..., na_filter: bool = ..., verbose: bool = ..., @@ -788,7 +794,7 @@ def read_csv( parse_dates: bool | Sequence[Hashable] | None = ..., infer_datetime_format: bool | lib.NoDefault = ..., keep_date_col: bool = ..., - date_parser=..., + date_parser: Callable | lib.NoDefault = ..., date_format: str | None = ..., dayfirst: bool = ..., cache_dates: bool = ..., @@ -808,7 +814,7 @@ def read_csv( dialect: str | csv.Dialect | None = ..., on_bad_lines=..., delim_whitespace: bool = ..., - low_memory=..., + low_memory: bool = ..., memory_map: bool = ..., float_precision: Literal["high", "legacy"] | None = ..., storage_options: StorageOptions = ..., @@ -836,19 +842,19 @@ def read_csv( header: int | Sequence[int] | None | Literal["infer"] = "infer", names: Sequence[Hashable] | None | lib.NoDefault = lib.no_default, index_col: IndexLabel | Literal[False] | None = None, - usecols=None, + usecols: Sequence[Hashable] | Callable[[Hashable], bool] | None = None, # General Parsing Configuration dtype: DtypeArg | None = None, engine: CSVEngine | None = None, - converters=None, - true_values=None, - false_values=None, + converters: Mapping[Hashable, Callable] | None = None, + true_values: list | None = None, + false_values: list | None = None, skipinitialspace: bool = False, - skiprows=None, + skiprows: list[int] | int | Callable[[Hashable], bool] | None = None, skipfooter: int = 0, nrows: int | None = None, # NA and Missing Data Handling - na_values=None, + na_values: Sequence[str] | Mapping[str, Sequence[str]] | None = None, keep_default_na: bool = True, na_filter: bool = True, verbose: bool = False, @@ -857,7 +863,7 @@ def read_csv( parse_dates: bool | Sequence[Hashable] | None = None, infer_datetime_format: bool | lib.NoDefault = lib.no_default, keep_date_col: bool = False, - date_parser=lib.no_default, + date_parser: Callable | lib.NoDefault = lib.no_default, date_format: str | None = None, dayfirst: bool = False, cache_dates: bool = True, @@ -881,7 +887,7 @@ def read_csv( on_bad_lines: str = "error", # Internal delim_whitespace: bool = False, - low_memory=_c_parser_defaults["low_memory"], + low_memory: bool = _c_parser_defaults.low_memory.value, memory_map: bool = False, float_precision: Literal["high", "legacy"] | None = None, storage_options: StorageOptions = None, @@ -928,17 +934,17 @@ def read_table( header: int | Sequence[int] | None | Literal["infer"] = ..., names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., - usecols=..., + usecols: Sequence[Hashable] | Callable[[Hashable], bool] | None = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., - converters=..., - true_values=..., - false_values=..., + converters: Mapping[Hashable, Callable] | None = ..., + true_values: list | None = ..., + false_values: list | None = ..., skipinitialspace: bool = ..., - skiprows=..., + skiprows: list[int] | int | Callable[[Hashable], bool] | None = ..., skipfooter: int = ..., nrows: int | None = ..., - na_values=..., + na_values: Sequence[str] | Mapping[str, Sequence[str]] | None = ..., keep_default_na: bool = ..., na_filter: bool = ..., verbose: bool = ..., @@ -946,7 +952,7 @@ def read_table( parse_dates: bool | Sequence[Hashable] = ..., infer_datetime_format: bool | lib.NoDefault = ..., keep_date_col: bool = ..., - date_parser=..., + date_parser: Callable | lib.NoDefault = ..., date_format: str | None = ..., dayfirst: bool = ..., cache_dates: bool = ..., @@ -966,7 +972,7 @@ def read_table( dialect: str | csv.Dialect | None = ..., on_bad_lines=..., delim_whitespace: bool = ..., - low_memory=..., + low_memory: bool = ..., memory_map: bool = ..., float_precision: str | None = ..., storage_options: StorageOptions = ..., @@ -985,17 +991,17 @@ def read_table( header: int | Sequence[int] | None | Literal["infer"] = ..., names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., - usecols=..., + usecols: Sequence[Hashable] | Callable[[Hashable], bool] | None = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., - converters=..., - true_values=..., - false_values=..., + converters: Mapping[Hashable, Callable] | None = ..., + true_values: list | None = ..., + false_values: list | None = ..., skipinitialspace: bool = ..., - skiprows=..., + skiprows: list[int] | int | Callable[[Hashable], bool] | None = ..., skipfooter: int = ..., nrows: int | None = ..., - na_values=..., + na_values: Sequence[str] | Mapping[str, Sequence[str]] | None = ..., keep_default_na: bool = ..., na_filter: bool = ..., verbose: bool = ..., @@ -1003,7 +1009,7 @@ def read_table( parse_dates: bool | Sequence[Hashable] = ..., infer_datetime_format: bool | lib.NoDefault = ..., keep_date_col: bool = ..., - date_parser=..., + date_parser: Callable | lib.NoDefault = ..., date_format: str | None = ..., dayfirst: bool = ..., cache_dates: bool = ..., @@ -1023,7 +1029,7 @@ def read_table( dialect: str | csv.Dialect | None = ..., on_bad_lines=..., delim_whitespace: bool = ..., - low_memory=..., + low_memory: bool = ..., memory_map: bool = ..., float_precision: str | None = ..., storage_options: StorageOptions = ..., @@ -1042,17 +1048,17 @@ def read_table( header: int | Sequence[int] | None | Literal["infer"] = ..., names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., - usecols=..., + usecols: Sequence[Hashable] | Callable[[Hashable], bool] | None = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., - converters=..., - true_values=..., - false_values=..., + converters: Mapping[Hashable, Callable] | None = ..., + true_values: list | None = ..., + false_values: list | None = ..., skipinitialspace: bool = ..., - skiprows=..., + skiprows: list[int] | int | Callable[[Hashable], bool] | None = ..., skipfooter: int = ..., nrows: int | None = ..., - na_values=..., + na_values: Sequence[str] | Mapping[str, Sequence[str]] | None = ..., keep_default_na: bool = ..., na_filter: bool = ..., verbose: bool = ..., @@ -1060,7 +1066,7 @@ def read_table( parse_dates: bool | Sequence[Hashable] = ..., infer_datetime_format: bool | lib.NoDefault = ..., keep_date_col: bool = ..., - date_parser=..., + date_parser: Callable | lib.NoDefault = ..., date_format: str | None = ..., dayfirst: bool = ..., cache_dates: bool = ..., @@ -1080,7 +1086,7 @@ def read_table( dialect: str | csv.Dialect | None = ..., on_bad_lines=..., delim_whitespace: bool = ..., - low_memory=..., + low_memory: bool = ..., memory_map: bool = ..., float_precision: str | None = ..., storage_options: StorageOptions = ..., @@ -1099,17 +1105,17 @@ def read_table( header: int | Sequence[int] | None | Literal["infer"] = ..., names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., - usecols=..., + usecols: Sequence[Hashable] | Callable[[Hashable], bool] | None = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., - converters=..., - true_values=..., - false_values=..., + converters: Mapping[Hashable, Callable] | None = ..., + true_values: list | None = ..., + false_values: list | None = ..., skipinitialspace: bool = ..., - skiprows=..., + skiprows: list[int] | int | Callable[[Hashable], bool] | None = ..., skipfooter: int = ..., nrows: int | None = ..., - na_values=..., + na_values: Sequence[str] | Mapping[str, Sequence[str]] | None = ..., keep_default_na: bool = ..., na_filter: bool = ..., verbose: bool = ..., @@ -1117,7 +1123,7 @@ def read_table( parse_dates: bool | Sequence[Hashable] = ..., infer_datetime_format: bool | lib.NoDefault = ..., keep_date_col: bool = ..., - date_parser=..., + date_parser: Callable | lib.NoDefault = ..., date_format: str | None = ..., dayfirst: bool = ..., cache_dates: bool = ..., @@ -1137,7 +1143,7 @@ def read_table( dialect: str | csv.Dialect | None = ..., on_bad_lines=..., delim_whitespace: bool = ..., - low_memory=..., + low_memory: bool = ..., memory_map: bool = ..., float_precision: str | None = ..., storage_options: StorageOptions = ..., @@ -1165,19 +1171,19 @@ def read_table( header: int | Sequence[int] | None | Literal["infer"] = "infer", names: Sequence[Hashable] | None | lib.NoDefault = lib.no_default, index_col: IndexLabel | Literal[False] | None = None, - usecols=None, + usecols: Sequence[Hashable] | Callable[[Hashable], bool] | None = None, # General Parsing Configuration dtype: DtypeArg | None = None, engine: CSVEngine | None = None, - converters=None, - true_values=None, - false_values=None, + converters: Mapping[Hashable, Callable] | None = None, + true_values: list | None = None, + false_values: list | None = None, skipinitialspace: bool = False, - skiprows=None, + skiprows: list[int] | int | Callable[[Hashable], bool] | None = None, skipfooter: int = 0, nrows: int | None = None, # NA and Missing Data Handling - na_values=None, + na_values: Sequence[str] | Mapping[str, Sequence[str]] | None = None, keep_default_na: bool = True, na_filter: bool = True, verbose: bool = False, @@ -1186,7 +1192,7 @@ def read_table( parse_dates: bool | Sequence[Hashable] = False, infer_datetime_format: bool | lib.NoDefault = lib.no_default, keep_date_col: bool = False, - date_parser=lib.no_default, + date_parser: Callable | lib.NoDefault = lib.no_default, date_format: str | None = None, dayfirst: bool = False, cache_dates: bool = True, @@ -1210,7 +1216,7 @@ def read_table( on_bad_lines: str = "error", # Internal delim_whitespace: bool = False, - low_memory=_c_parser_defaults["low_memory"], + low_memory: bool = _c_parser_defaults.low_memory.value, memory_map: bool = False, float_precision: str | None = None, storage_options: StorageOptions = None, @@ -1450,7 +1456,9 @@ def _get_options_with_defaults(self, engine: CSVEngine) -> dict[str, Any]: ) options[argname] = value - for argname, default in _c_parser_defaults.items(): + for c_enum in _c_parser_defaults: + argname = c_enum.name + default = c_enum.value if argname in kwds: value = kwds[argname] @@ -1467,8 +1475,8 @@ def _get_options_with_defaults(self, engine: CSVEngine) -> dict[str, Any]: options[argname] = value if engine == "python-fwf": - for argname, default in _fwf_defaults.items(): - options[argname] = kwds.get(argname, default) + for fwf_enum in _fwf_defaults: + options[argname] = kwds.get(fwf_enum.name, fwf_enum.value) return options diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 51c95808506a8..40c5a5b6f4d8b 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -4,6 +4,9 @@ import types from typing import ( TYPE_CHECKING, + Callable, + Hashable, + Literal, Sequence, ) @@ -28,6 +31,7 @@ if TYPE_CHECKING: from matplotlib.axes import Axes + import numpy as np from pandas import DataFrame @@ -1034,7 +1038,7 @@ def __call__(self, *args, **kwargs): ) @Substitution(kind="line") @Appender(_bar_or_line_doc) - def line(self, x=None, y=None, **kwargs) -> PlotAccessor: + def line(self, x: Hashable = None, y: Hashable = None, **kwargs) -> PlotAccessor: """ Plot Series or DataFrame as lines. @@ -1122,7 +1126,7 @@ def line(self, x=None, y=None, **kwargs) -> PlotAccessor: @Substitution(kind="bar") @Appender(_bar_or_line_doc) def bar( # pylint: disable=disallowed-name - self, x=None, y=None, **kwargs + self, x: Hashable = None, y: Hashable = None, **kwargs ) -> PlotAccessor: """ Vertical bar plot. @@ -1209,7 +1213,7 @@ def bar( # pylint: disable=disallowed-name ) @Substitution(kind="bar") @Appender(_bar_or_line_doc) - def barh(self, x=None, y=None, **kwargs) -> PlotAccessor: + def barh(self, x: Hashable = None, y: Hashable = None, **kwargs) -> PlotAccessor: """ Make a horizontal bar plot. @@ -1221,7 +1225,7 @@ def barh(self, x=None, y=None, **kwargs) -> PlotAccessor: """ return self(kind="barh", x=x, y=y, **kwargs) - def box(self, by=None, **kwargs) -> PlotAccessor: + def box(self, by: IndexLabel = None, **kwargs) -> PlotAccessor: r""" Make a box plot of the DataFrame columns. @@ -1288,7 +1292,7 @@ def box(self, by=None, **kwargs) -> PlotAccessor: """ return self(kind="box", by=by, **kwargs) - def hist(self, by=None, bins: int = 10, **kwargs) -> PlotAccessor: + def hist(self, by: IndexLabel = None, bins: int = 10, **kwargs) -> PlotAccessor: """ Draw one histogram of the DataFrame's columns. @@ -1350,7 +1354,12 @@ def hist(self, by=None, bins: int = 10, **kwargs) -> PlotAccessor: """ return self(kind="hist", by=by, bins=bins, **kwargs) - def kde(self, bw_method=None, ind=None, **kwargs) -> PlotAccessor: + def kde( + self, + bw_method: Literal["scott", "silverman"] | float | Callable | None = None, + ind: np.ndarray | int | None = None, + **kwargs, + ) -> PlotAccessor: """ Generate Kernel Density Estimate plot using Gaussian kernels. @@ -1460,7 +1469,9 @@ def kde(self, bw_method=None, ind=None, **kwargs) -> PlotAccessor: density = kde - def area(self, x=None, y=None, stacked: bool = True, **kwargs) -> PlotAccessor: + def area( + self, x: Hashable = None, y: Hashable = None, stacked: bool = True, **kwargs + ) -> PlotAccessor: """ Draw a stacked area plot. @@ -1588,7 +1599,14 @@ def pie(self, **kwargs) -> PlotAccessor: raise ValueError("pie requires either y column or 'subplots=True'") return self(kind="pie", **kwargs) - def scatter(self, x, y, s=None, c=None, **kwargs) -> PlotAccessor: + def scatter( + self, + x: Hashable, + y: Hashable, + s: Hashable | Sequence[str] = None, + c: Hashable | Sequence[str] = None, + **kwargs, + ) -> PlotAccessor: """ Create a scatter plot with varying marker point size and color. @@ -1674,7 +1692,13 @@ def scatter(self, x, y, s=None, c=None, **kwargs) -> PlotAccessor: return self(kind="scatter", x=x, y=y, s=s, c=c, **kwargs) def hexbin( - self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs + self, + x: Hashable, + y: Hashable, + C: Hashable = None, + reduce_C_function: Callable | None = None, + gridsize: int | tuple[int, int] | None = None, + **kwargs, ) -> PlotAccessor: """ Generate a hexagonal binning plot. diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index 791e7c0214b58..9f8673b5b15de 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -3,14 +3,18 @@ from contextlib import contextmanager from typing import ( TYPE_CHECKING, + Any, Generator, + Mapping, ) from pandas.plotting._core import _get_plot_backend if TYPE_CHECKING: from matplotlib.axes import Axes + from matplotlib.colors import Colormap from matplotlib.figure import Figure + from matplotlib.table import Table import numpy as np from pandas import ( @@ -19,7 +23,7 @@ ) -def table(ax, data, **kwargs): +def table(ax: Axes, data: DataFrame | Series, **kwargs) -> Table: """ Helper function to convert DataFrame and Series to matplotlib.table. @@ -93,8 +97,8 @@ def scatter_matrix( grid: bool = False, diagonal: str = "hist", marker: str = ".", - density_kwds=None, - hist_kwds=None, + density_kwds: Mapping[str, Any] | None = None, + hist_kwds: Mapping[str, Any] | None = None, range_padding: float = 0.05, **kwargs, ) -> np.ndarray: @@ -177,7 +181,7 @@ def radviz( class_column: str, ax: Axes | None = None, color: list[str] | tuple[str, ...] | None = None, - colormap=None, + colormap: Colormap | str | None = None, **kwds, ) -> Axes: """ @@ -265,7 +269,7 @@ def andrews_curves( ax: Axes | None = None, samples: int = 200, color: list[str] | tuple[str, ...] | None = None, - colormap=None, + colormap: Colormap | str | None = None, **kwargs, ) -> Axes: """ @@ -396,9 +400,9 @@ def parallel_coordinates( color: list[str] | tuple[str, ...] | None = None, use_columns: bool = False, xticks: list | tuple | None = None, - colormap=None, + colormap: Colormap | str | None = None, axvlines: bool = True, - axvlines_kwds=None, + axvlines_kwds: Mapping[str, Any] | None = None, sort_labels: bool = False, **kwargs, ) -> Axes: diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index e1af8c0b48c2f..42c22ae0539a4 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np from pandas._libs.algos import unique_deltas @@ -42,6 +44,14 @@ from pandas.core.algorithms import unique +if TYPE_CHECKING: + from pandas import ( + DatetimeIndex, + Series, + TimedeltaIndex, + ) + from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin + # --------------------------------------------------------------------- # Offset names ("time rules") and related functions @@ -102,7 +112,9 @@ def get_period_alias(offset_str: str) -> str | None: # Period codes -def infer_freq(index) -> str | None: +def infer_freq( + index: DatetimeIndex | TimedeltaIndex | Series | DatetimeLikeArrayMixin, +) -> str | None: """ Infer the most likely frequency given the input index. @@ -166,7 +178,10 @@ def infer_freq(index) -> str | None: raise TypeError( f"cannot infer freq from a non-convertible index of dtype {index.dtype}" ) - index = index._values + # error: Incompatible types in assignment (expression has type + # "Union[ExtensionArray, ndarray[Any, Any]]", variable has type + # "Union[DatetimeIndex, TimedeltaIndex, Series, DatetimeLikeArrayMixin]") + index = index._values # type: ignore[assignment] if not isinstance(index, DatetimeIndex): index = DatetimeIndex(index) From 190f7fd48e47e3f28b2c3642ca260ed971d5f292 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sat, 18 Feb 2023 18:59:30 -0500 Subject: [PATCH 2/5] fix enum --- pandas/io/parsers/readers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 4f1b2a7399200..c4ef8e387d9a7 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -887,7 +887,7 @@ def read_csv( on_bad_lines: str = "error", # Internal delim_whitespace: bool = False, - low_memory: bool = _c_parser_defaults.low_memory.value, + low_memory: bool = _c_parser_defaults["low_memory"].value, memory_map: bool = False, float_precision: Literal["high", "legacy"] | None = None, storage_options: StorageOptions = None, @@ -1216,7 +1216,7 @@ def read_table( on_bad_lines: str = "error", # Internal delim_whitespace: bool = False, - low_memory: bool = _c_parser_defaults.low_memory.value, + low_memory: bool = _c_parser_defaults["low_memory"].value, memory_map: bool = False, float_precision: str | None = None, storage_options: StorageOptions = None, @@ -1568,7 +1568,7 @@ def _clean_options( if "python" in engine: for arg in _python_unsupported: - if fallback_reason and result[arg] != _c_parser_defaults[arg]: + if fallback_reason and result[arg] != _c_parser_defaults[arg].value: raise ValueError( "Falling back to the 'python' engine because " f"{fallback_reason}, but this causes {repr(arg)} to be " From 13835d665aa5b103f8e3378e349ede88aeb873d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sat, 18 Feb 2023 21:57:38 -0500 Subject: [PATCH 3/5] go with TypedDict --- pandas/io/parsers/readers.py | 46 +++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index c4ef8e387d9a7..38c37a306ef0b 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -7,7 +7,6 @@ from collections import abc import csv -from enum import Enum import sys from textwrap import fill from types import TracebackType @@ -20,6 +19,7 @@ Mapping, NamedTuple, Sequence, + TypedDict, overload, ) import warnings @@ -436,20 +436,30 @@ ) -class _c_parser_defaults(Enum): - delim_whitespace = False - na_filter = True - low_memory = True - memory_map = False - float_precision = None +class _C_Parser_Defaults(TypedDict): + delim_whitespace: Literal[False] + na_filter: Literal[True] + low_memory: Literal[True] + memory_map: Literal[False] + float_precision: None -class _fwf_defaults(Enum): - colspecs = "infer" - infer_nrows = 100 - widths = None +_c_parser_defaults: _C_Parser_Defaults = { + "delim_whitespace": False, + "na_filter": True, + "low_memory": True, + "memory_map": False, + "float_precision": None, +} + + +class _Fwf_Defaults(TypedDict): + colspecs: Literal["infer"] + infer_nrows: Literal[100] + widths: None +_fwf_defaults: _Fwf_Defaults = {"colspecs": "infer", "infer_nrows": 100, "widths": None} _c_unsupported = {"skipfooter"} _python_unsupported = {"low_memory", "float_precision"} _pyarrow_unsupported = { @@ -887,7 +897,7 @@ def read_csv( on_bad_lines: str = "error", # Internal delim_whitespace: bool = False, - low_memory: bool = _c_parser_defaults["low_memory"].value, + low_memory: bool = _c_parser_defaults["low_memory"], memory_map: bool = False, float_precision: Literal["high", "legacy"] | None = None, storage_options: StorageOptions = None, @@ -1216,7 +1226,7 @@ def read_table( on_bad_lines: str = "error", # Internal delim_whitespace: bool = False, - low_memory: bool = _c_parser_defaults["low_memory"].value, + low_memory: bool = _c_parser_defaults["low_memory"], memory_map: bool = False, float_precision: str | None = None, storage_options: StorageOptions = None, @@ -1456,9 +1466,7 @@ def _get_options_with_defaults(self, engine: CSVEngine) -> dict[str, Any]: ) options[argname] = value - for c_enum in _c_parser_defaults: - argname = c_enum.name - default = c_enum.value + for argname, default in _c_parser_defaults.items(): if argname in kwds: value = kwds[argname] @@ -1475,8 +1483,8 @@ def _get_options_with_defaults(self, engine: CSVEngine) -> dict[str, Any]: options[argname] = value if engine == "python-fwf": - for fwf_enum in _fwf_defaults: - options[argname] = kwds.get(fwf_enum.name, fwf_enum.value) + for argname, default in _fwf_defaults.items(): + options[argname] = kwds.get(argname, default) return options @@ -1568,7 +1576,7 @@ def _clean_options( if "python" in engine: for arg in _python_unsupported: - if fallback_reason and result[arg] != _c_parser_defaults[arg].value: + if fallback_reason and result[arg] != _c_parser_defaults[arg]: raise ValueError( "Falling back to the 'python' engine because " f"{fallback_reason}, but this causes {repr(arg)} to be " From e414d661ee99aea048a8859c11eb55fde17cf0bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sat, 18 Feb 2023 22:14:29 -0500 Subject: [PATCH 4/5] avoid mypy error --- pandas/io/parsers/readers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 38c37a306ef0b..7e848d667cd3d 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -1576,7 +1576,7 @@ def _clean_options( if "python" in engine: for arg in _python_unsupported: - if fallback_reason and result[arg] != _c_parser_defaults[arg]: + if fallback_reason and result[arg] != _c_parser_defaults.get(arg): raise ValueError( "Falling back to the 'python' engine because " f"{fallback_reason}, but this causes {repr(arg)} to be " From 84a4dd4f19a434a24217d4f238aa7e6f89b87cab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Wed, 1 Mar 2023 18:37:49 -0500 Subject: [PATCH 5/5] fix some annotations --- pandas/io/parsers/readers.py | 21 +++++++++++---------- pandas/plotting/_core.py | 4 ++-- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 7e848d667cd3d..b89b969929689 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -35,6 +35,7 @@ CSVEngine, DtypeArg, FilePath, + HashableT, IndexLabel, ReadCsvBuffer, StorageOptions, @@ -615,7 +616,7 @@ def read_csv( header: int | Sequence[int] | None | Literal["infer"] = ..., names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., - usecols: Sequence[Hashable] | Callable[[Hashable], bool] | None = ..., + usecols: list[HashableT] | Callable[[Hashable], bool] | None = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., converters: Mapping[Hashable, Callable] | None = ..., @@ -672,7 +673,7 @@ def read_csv( header: int | Sequence[int] | None | Literal["infer"] = ..., names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., - usecols: Sequence[Hashable] | Callable[[Hashable], bool] | None = ..., + usecols: list[HashableT] | Callable[[Hashable], bool] | None = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., converters: Mapping[Hashable, Callable] | None = ..., @@ -729,7 +730,7 @@ def read_csv( header: int | Sequence[int] | None | Literal["infer"] = ..., names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., - usecols: Sequence[Hashable] | Callable[[Hashable], bool] | None = ..., + usecols: list[HashableT] | Callable[[Hashable], bool] | None = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., converters: Mapping[Hashable, Callable] | None = ..., @@ -786,7 +787,7 @@ def read_csv( header: int | Sequence[int] | None | Literal["infer"] = ..., names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., - usecols: Sequence[Hashable] | Callable[[Hashable], bool] | None = ..., + usecols: list[HashableT] | Callable[[Hashable], bool] | None = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., converters: Mapping[Hashable, Callable] | None = ..., @@ -852,7 +853,7 @@ def read_csv( header: int | Sequence[int] | None | Literal["infer"] = "infer", names: Sequence[Hashable] | None | lib.NoDefault = lib.no_default, index_col: IndexLabel | Literal[False] | None = None, - usecols: Sequence[Hashable] | Callable[[Hashable], bool] | None = None, + usecols: list[HashableT] | Callable[[Hashable], bool] | None = None, # General Parsing Configuration dtype: DtypeArg | None = None, engine: CSVEngine | None = None, @@ -944,7 +945,7 @@ def read_table( header: int | Sequence[int] | None | Literal["infer"] = ..., names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., - usecols: Sequence[Hashable] | Callable[[Hashable], bool] | None = ..., + usecols: list[HashableT] | Callable[[Hashable], bool] | None = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., converters: Mapping[Hashable, Callable] | None = ..., @@ -1001,7 +1002,7 @@ def read_table( header: int | Sequence[int] | None | Literal["infer"] = ..., names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., - usecols: Sequence[Hashable] | Callable[[Hashable], bool] | None = ..., + usecols: list[HashableT] | Callable[[Hashable], bool] | None = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., converters: Mapping[Hashable, Callable] | None = ..., @@ -1058,7 +1059,7 @@ def read_table( header: int | Sequence[int] | None | Literal["infer"] = ..., names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., - usecols: Sequence[Hashable] | Callable[[Hashable], bool] | None = ..., + usecols: list[HashableT] | Callable[[Hashable], bool] | None = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., converters: Mapping[Hashable, Callable] | None = ..., @@ -1115,7 +1116,7 @@ def read_table( header: int | Sequence[int] | None | Literal["infer"] = ..., names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., - usecols: Sequence[Hashable] | Callable[[Hashable], bool] | None = ..., + usecols: list[HashableT] | Callable[[Hashable], bool] | None = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., converters: Mapping[Hashable, Callable] | None = ..., @@ -1181,7 +1182,7 @@ def read_table( header: int | Sequence[int] | None | Literal["infer"] = "infer", names: Sequence[Hashable] | None | lib.NoDefault = lib.no_default, index_col: IndexLabel | Literal[False] | None = None, - usecols: Sequence[Hashable] | Callable[[Hashable], bool] | None = None, + usecols: list[HashableT] | Callable[[Hashable], bool] | None = None, # General Parsing Configuration dtype: DtypeArg | None = None, engine: CSVEngine | None = None, diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 40c5a5b6f4d8b..4d909c911ecd2 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1603,8 +1603,8 @@ def scatter( self, x: Hashable, y: Hashable, - s: Hashable | Sequence[str] = None, - c: Hashable | Sequence[str] = None, + s: Hashable | Sequence[Hashable] = None, + c: Hashable | Sequence[Hashable] = None, **kwargs, ) -> PlotAccessor: """