diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3253f9ab87363..e683fc50c1c5d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -127,7 +127,7 @@ repos: types: [python] stages: [manual] additional_dependencies: &pyright_dependencies - - pyright@1.1.350 + - pyright@1.1.351 - id: pyright # note: assumes python env is setup and activated name: pyright reportGeneralTypeIssues diff --git a/pandas/_libs/tslibs/nattype.pyi b/pandas/_libs/tslibs/nattype.pyi index bd86a6fdc2174..f49e894a0bfec 100644 --- a/pandas/_libs/tslibs/nattype.pyi +++ b/pandas/_libs/tslibs/nattype.pyi @@ -1,20 +1,30 @@ from datetime import ( + date as date_, datetime, + time as time_, timedelta, tzinfo as _tzinfo, ) -import typing +from typing import ( + Literal, + NoReturn, + TypeAlias, +) import numpy as np from pandas._libs.tslibs.period import Period -from pandas._typing import Self +from pandas._typing import ( + Frequency, + Self, + TimestampNonexistent, +) NaT: NaTType iNaT: int nat_strings: set[str] -_NaTComparisonTypes: typing.TypeAlias = ( +_NaTComparisonTypes: TypeAlias = ( datetime | timedelta | Period | np.datetime64 | np.timedelta64 ) @@ -61,18 +71,38 @@ class NaTType: def week(self) -> float: ... @property def weekofyear(self) -> float: ... + @property + def fold(self) -> int: ... def day_name(self) -> float: ... def month_name(self) -> float: ... def weekday(self) -> float: ... def isoweekday(self) -> float: ... + def isoformat(self, sep: str = ..., timespec: str = ...) -> str: ... + def strftime(self, format: str) -> NoReturn: ... def total_seconds(self) -> float: ... def today(self, *args, **kwargs) -> NaTType: ... def now(self, *args, **kwargs) -> NaTType: ... def to_pydatetime(self) -> NaTType: ... def date(self) -> NaTType: ... - def round(self) -> NaTType: ... - def floor(self) -> NaTType: ... - def ceil(self) -> NaTType: ... + def round( + self, + freq: Frequency, + ambiguous: bool | Literal["raise"] | NaTType = ..., + nonexistent: TimestampNonexistent = ..., + ) -> NaTType: ... + def floor( + self, + freq: Frequency, + ambiguous: bool | Literal["raise"] | NaTType = ..., + nonexistent: TimestampNonexistent = ..., + ) -> NaTType: ... + def ceil( + self, + freq: Frequency, + ambiguous: bool | Literal["raise"] | NaTType = ..., + nonexistent: TimestampNonexistent = ..., + ) -> NaTType: ... + def combine(cls, date: date_, time: time_) -> NoReturn: ... @property def tzinfo(self) -> None: ... @property @@ -81,8 +111,8 @@ class NaTType: def tz_localize( self, tz: _tzinfo | str | None, - ambiguous: str = ..., - nonexistent: str = ..., + ambiguous: bool | Literal["raise"] | NaTType = ..., + nonexistent: TimestampNonexistent = ..., ) -> NaTType: ... def replace( self, @@ -121,6 +151,8 @@ class NaTType: @property def days(self) -> float: ... @property + def seconds(self) -> float: ... + @property def microseconds(self) -> float: ... @property def nanoseconds(self) -> float: ... diff --git a/pandas/_libs/tslibs/offsets.pyi b/pandas/_libs/tslibs/offsets.pyi index 8f1e34522c026..791ebc0fbb245 100644 --- a/pandas/_libs/tslibs/offsets.pyi +++ b/pandas/_libs/tslibs/offsets.pyi @@ -196,7 +196,10 @@ class WeekOfMonth(WeekOfMonthMixin): self, n: int = ..., normalize: bool = ..., week: int = ..., weekday: int = ... ) -> None: ... -class LastWeekOfMonth(WeekOfMonthMixin): ... +class LastWeekOfMonth(WeekOfMonthMixin): + def __init__( + self, n: int = ..., normalize: bool = ..., weekday: int = ... + ) -> None: ... class FY5253Mixin(SingleConstructorOffset): def __init__( diff --git a/pandas/_typing.py b/pandas/_typing.py index 0bcf5284315d2..d7325fed93d62 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -207,7 +207,7 @@ def __reversed__(self) -> Iterator[_T_co]: IndexLabel = Union[Hashable, Sequence[Hashable]] Level = Hashable Shape = tuple[int, ...] -Suffixes = tuple[Optional[str], Optional[str]] +Suffixes = Sequence[Optional[str]] Ordered = Optional[bool] JSONSerializable = Optional[Union[PythonScalar, list, dict]] Frequency = Union[str, "BaseOffset"] @@ -226,7 +226,7 @@ def __reversed__(self) -> Iterator[_T_co]: Dtype = Union["ExtensionDtype", NpDtype] AstypeArg = Union["ExtensionDtype", "npt.DTypeLike"] # DtypeArg specifies all allowable dtypes in a functions its dtype argument -DtypeArg = Union[Dtype, dict[Hashable, Dtype]] +DtypeArg = Union[Dtype, Mapping[Hashable, Dtype]] DtypeObj = Union[np.dtype, "ExtensionDtype"] # converters diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index cb751a2b44b59..ddfb7ea7f3696 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -45,6 +45,8 @@ if TYPE_CHECKING: from re import Pattern + from pandas._libs.missing import NAType + from pandas._libs.tslibs import NaTType from pandas._typing import ( ArrayLike, DtypeObj, @@ -66,7 +68,7 @@ @overload -def isna(obj: Scalar | Pattern) -> bool: +def isna(obj: Scalar | Pattern | NAType | NaTType) -> bool: ... @@ -283,7 +285,7 @@ def _isna_recarray_dtype(values: np.rec.recarray) -> npt.NDArray[np.bool_]: @overload -def notna(obj: Scalar) -> bool: +def notna(obj: Scalar | Pattern | NAType | NaTType) -> bool: ... diff --git a/pandas/core/frame.py b/pandas/core/frame.py index df413fda0255a..e5d424b15e69e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4799,7 +4799,7 @@ def insert( self, loc: int, column: Hashable, - value: Scalar | AnyArrayLike, + value: object, allow_duplicates: bool | lib.NoDefault = lib.no_default, ) -> None: """ @@ -6266,7 +6266,7 @@ def dropna( axis: Axis = 0, how: AnyAll | lib.NoDefault = lib.no_default, thresh: int | lib.NoDefault = lib.no_default, - subset: IndexLabel | None = None, + subset: IndexLabel | AnyArrayLike | None = None, inplace: bool = False, ignore_index: bool = False, ) -> DataFrame | None: @@ -6390,7 +6390,7 @@ def dropna( if subset is not None: # subset needs to be list if not is_list_like(subset): - subset = [subset] + subset = [cast(Hashable, subset)] ax = self._get_axis(agg_axis) indices = ax.get_indexer_for(subset) check = indices == -1 diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 3011fa235b22d..3b7963693bcae 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1326,7 +1326,7 @@ def hist( xrot: float | None = None, ylabelsize: int | None = None, yrot: float | None = None, - figsize: tuple[int, int] | None = None, + figsize: tuple[float, float] | None = None, bins: int | Sequence[int] = 10, backend: str | None = None, legend: bool = False, @@ -2599,7 +2599,7 @@ def hist( ax=None, sharex: bool = False, sharey: bool = False, - figsize: tuple[int, int] | None = None, + figsize: tuple[float, float] | None = None, layout: tuple[int, int] | None = None, bins: int | Sequence[int] = 10, backend: str | None = None, diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 0494138d1e16f..d54bfec389a38 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -445,7 +445,7 @@ def merge_asof( left_by=None, right_by=None, suffixes: Suffixes = ("_x", "_y"), - tolerance: int | Timedelta | None = None, + tolerance: int | datetime.timedelta | None = None, allow_exact_matches: bool = True, direction: str = "backward", ) -> DataFrame: @@ -494,7 +494,7 @@ def merge_asof( suffixes : 2-length sequence (tuple, list, ...) Suffix to apply to overlapping column names in the left and right side, respectively. - tolerance : int or Timedelta, optional, default None + tolerance : int or timedelta, optional, default None Select asof tolerance within this range; must be compatible with the merge index. allow_exact_matches : bool, default True diff --git a/pandas/core/series.py b/pandas/core/series.py index 5c9bc428e256f..bae95418c7641 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2891,7 +2891,7 @@ def autocorr(self, lag: int = 1) -> float: """ return self.corr(cast(Series, self.shift(lag))) - def dot(self, other: AnyArrayLike) -> Series | np.ndarray: + def dot(self, other: AnyArrayLike | DataFrame) -> Series | np.ndarray: """ Compute the dot product between the Series and the columns of other. @@ -6346,7 +6346,7 @@ def mean( skipna: bool = True, numeric_only: bool = False, **kwargs, - ): + ) -> Any: return NDFrame.mean( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) @@ -6358,7 +6358,7 @@ def median( skipna: bool = True, numeric_only: bool = False, **kwargs, - ): + ) -> Any: return NDFrame.median( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index cf9c3be97ee5c..f8d950db6642a 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -76,6 +76,7 @@ DtypeBackend, ExcelWriterIfSheetExists, FilePath, + HashableT, IntStrT, ReadBuffer, Self, @@ -382,7 +383,7 @@ def read_excel( | str | Sequence[int] | Sequence[str] - | Callable[[str], bool] + | Callable[[HashableT], bool] | None = ..., dtype: DtypeArg | None = ..., engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb", "calamine"] | None = ..., @@ -421,7 +422,7 @@ def read_excel( | str | Sequence[int] | Sequence[str] - | Callable[[str], bool] + | Callable[[HashableT], bool] | None = ..., dtype: DtypeArg | None = ..., engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb", "calamine"] | None = ..., @@ -460,7 +461,7 @@ def read_excel( | str | Sequence[int] | Sequence[str] - | Callable[[str], bool] + | Callable[[HashableT], bool] | None = None, dtype: DtypeArg | None = None, engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb", "calamine"] | None = None, diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 2a42b4115d1a2..8995faa7ad346 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -17,6 +17,7 @@ TYPE_CHECKING, Any, Callable, + Generic, Literal, NamedTuple, TypedDict, @@ -83,6 +84,7 @@ DtypeArg, DtypeBackend, FilePath, + HashableT, IndexLabel, ReadCsvBuffer, Self, @@ -90,6 +92,62 @@ Unpack, UsecolsArgType, ) + + class _read_shared(TypedDict, Generic[HashableT], total=False): + # annotations shared between read_csv/fwf/table's overloads + # NOTE: Keep in sync with the annotations of the implementation + sep: str | None | lib.NoDefault + delimiter: str | None | lib.NoDefault + header: int | Sequence[int] | None | Literal["infer"] + names: Sequence[Hashable] | None | lib.NoDefault + index_col: IndexLabel | Literal[False] | None + usecols: UsecolsArgType + dtype: DtypeArg | None + engine: CSVEngine | None + converters: Mapping[HashableT, Callable] | None + true_values: list | None + false_values: list | None + skipinitialspace: bool + skiprows: list[int] | int | Callable[[Hashable], bool] | None + skipfooter: int + nrows: int | None + na_values: Hashable | Iterable[Hashable] | Mapping[ + Hashable, Iterable[Hashable] + ] | None + keep_default_na: bool + na_filter: bool + verbose: bool | lib.NoDefault + skip_blank_lines: bool + parse_dates: bool | Sequence[Hashable] | None + infer_datetime_format: bool | lib.NoDefault + keep_date_col: bool | lib.NoDefault + date_parser: Callable | lib.NoDefault + date_format: str | dict[Hashable, str] | None + dayfirst: bool + cache_dates: bool + compression: CompressionOptions + thousands: str | None + decimal: str + lineterminator: str | None + quotechar: str + quoting: int + doublequote: bool + escapechar: str | None + comment: str | None + encoding: str | None + encoding_errors: str | None + dialect: str | csv.Dialect | None + on_bad_lines: str + delim_whitespace: bool | lib.NoDefault + low_memory: bool + memory_map: bool + float_precision: Literal["high", "legacy", "round_trip"] | None + storage_options: StorageOptions | None + dtype_backend: DtypeBackend | lib.NoDefault +else: + _read_shared = dict + + _doc_read_csv_and_table = ( r""" {summary} @@ -480,59 +538,6 @@ class _Fwf_Defaults(TypedDict): widths: None -class _read_shared(TypedDict, total=False): - # annotations shared between read_csv/fwf/table's overloads - # NOTE: Keep in sync with the annotations of the implementation - sep: str | None | lib.NoDefault - delimiter: str | None | lib.NoDefault - header: int | Sequence[int] | None | Literal["infer"] - names: Sequence[Hashable] | None | lib.NoDefault - index_col: IndexLabel | Literal[False] | None - usecols: UsecolsArgType - dtype: DtypeArg | None - engine: CSVEngine | None - converters: Mapping[Hashable, Callable] | None - true_values: list | None - false_values: list | None - skipinitialspace: bool - skiprows: list[int] | int | Callable[[Hashable], bool] | None - skipfooter: int - nrows: int | None - na_values: Hashable | Iterable[Hashable] | Mapping[ - Hashable, Iterable[Hashable] - ] | None - keep_default_na: bool - na_filter: bool - verbose: bool | lib.NoDefault - skip_blank_lines: bool - parse_dates: bool | Sequence[Hashable] | None - infer_datetime_format: bool | lib.NoDefault - keep_date_col: bool | lib.NoDefault - date_parser: Callable | lib.NoDefault - date_format: str | dict[Hashable, str] | None - dayfirst: bool - cache_dates: bool - compression: CompressionOptions - thousands: str | None - decimal: str - lineterminator: str | None - quotechar: str - quoting: int - doublequote: bool - escapechar: str | None - comment: str | None - encoding: str | None - encoding_errors: str | None - dialect: str | csv.Dialect | None - on_bad_lines: str - delim_whitespace: bool | lib.NoDefault - low_memory: bool - memory_map: bool - float_precision: Literal["high", "legacy", "round_trip"] | None - storage_options: StorageOptions | None - dtype_backend: DtypeBackend | lib.NoDefault - - _fwf_defaults: _Fwf_Defaults = {"colspecs": "infer", "infer_nrows": 100, "widths": None} _c_unsupported = {"skipfooter"} _python_unsupported = {"low_memory", "float_precision"} @@ -685,7 +690,7 @@ def read_csv( *, iterator: Literal[True], chunksize: int | None = ..., - **kwds: Unpack[_read_shared], + **kwds: Unpack[_read_shared[HashableT]], ) -> TextFileReader: ... @@ -696,7 +701,7 @@ def read_csv( *, iterator: bool = ..., chunksize: int, - **kwds: Unpack[_read_shared], + **kwds: Unpack[_read_shared[HashableT]], ) -> TextFileReader: ... @@ -707,7 +712,7 @@ def read_csv( *, iterator: Literal[False] = ..., chunksize: None = ..., - **kwds: Unpack[_read_shared], + **kwds: Unpack[_read_shared[HashableT]], ) -> DataFrame: ... @@ -718,7 +723,7 @@ def read_csv( *, iterator: bool = ..., chunksize: int | None = ..., - **kwds: Unpack[_read_shared], + **kwds: Unpack[_read_shared[HashableT]], ) -> DataFrame | TextFileReader: ... @@ -748,7 +753,7 @@ def read_csv( # General Parsing Configuration dtype: DtypeArg | None = None, engine: CSVEngine | None = None, - converters: Mapping[Hashable, Callable] | None = None, + converters: Mapping[HashableT, Callable] | None = None, true_values: list | None = None, false_values: list | None = None, skipinitialspace: bool = False, @@ -890,7 +895,7 @@ def read_table( *, iterator: Literal[True], chunksize: int | None = ..., - **kwds: Unpack[_read_shared], + **kwds: Unpack[_read_shared[HashableT]], ) -> TextFileReader: ... @@ -901,7 +906,7 @@ def read_table( *, iterator: bool = ..., chunksize: int, - **kwds: Unpack[_read_shared], + **kwds: Unpack[_read_shared[HashableT]], ) -> TextFileReader: ... @@ -912,7 +917,7 @@ def read_table( *, iterator: Literal[False] = ..., chunksize: None = ..., - **kwds: Unpack[_read_shared], + **kwds: Unpack[_read_shared[HashableT]], ) -> DataFrame: ... @@ -923,7 +928,7 @@ def read_table( *, iterator: bool = ..., chunksize: int | None = ..., - **kwds: Unpack[_read_shared], + **kwds: Unpack[_read_shared[HashableT]], ) -> DataFrame | TextFileReader: ... @@ -955,7 +960,7 @@ def read_table( # General Parsing Configuration dtype: DtypeArg | None = None, engine: CSVEngine | None = None, - converters: Mapping[Hashable, Callable] | None = None, + converters: Mapping[HashableT, Callable] | None = None, true_values: list | None = None, false_values: list | None = None, skipinitialspace: bool = False, @@ -1091,7 +1096,7 @@ def read_fwf( infer_nrows: int = ..., iterator: Literal[True], chunksize: int | None = ..., - **kwds: Unpack[_read_shared], + **kwds: Unpack[_read_shared[HashableT]], ) -> TextFileReader: ... @@ -1105,7 +1110,7 @@ def read_fwf( infer_nrows: int = ..., iterator: bool = ..., chunksize: int, - **kwds: Unpack[_read_shared], + **kwds: Unpack[_read_shared[HashableT]], ) -> TextFileReader: ... @@ -1119,7 +1124,7 @@ def read_fwf( infer_nrows: int = ..., iterator: Literal[False] = ..., chunksize: None = ..., - **kwds: Unpack[_read_shared], + **kwds: Unpack[_read_shared[HashableT]], ) -> DataFrame: ... @@ -1132,7 +1137,7 @@ def read_fwf( infer_nrows: int = 100, iterator: bool = False, chunksize: int | None = None, - **kwds: Unpack[_read_shared], + **kwds: Unpack[_read_shared[HashableT]], ) -> DataFrame | TextFileReader: r""" Read a table of fixed-width formatted lines into DataFrame.