diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index fbc577712d294..31d4274bb5f8d 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -1,6 +1,6 @@ # TODO(npdtypes): Many types specified here can be made more specific/accurate; # the more specific versions are specified in comments - +from decimal import Decimal from typing import ( Any, Callable, @@ -13,9 +13,12 @@ from typing import ( import numpy as np +from pandas._libs.interval import Interval +from pandas._libs.tslibs import Period from pandas._typing import ( ArrayLike, DtypeObj, + TypeGuard, npt, ) @@ -38,13 +41,13 @@ def infer_dtype(value: object, skipna: bool = ...) -> str: ... def is_iterator(obj: object) -> bool: ... def is_scalar(val: object) -> bool: ... def is_list_like(obj: object, allow_sets: bool = ...) -> bool: ... -def is_period(val: object) -> bool: ... -def is_interval(val: object) -> bool: ... -def is_decimal(val: object) -> bool: ... -def is_complex(val: object) -> bool: ... -def is_bool(val: object) -> bool: ... -def is_integer(val: object) -> bool: ... -def is_float(val: object) -> bool: ... +def is_period(val: object) -> TypeGuard[Period]: ... +def is_interval(val: object) -> TypeGuard[Interval]: ... +def is_decimal(val: object) -> TypeGuard[Decimal]: ... +def is_complex(val: object) -> TypeGuard[complex]: ... +def is_bool(val: object) -> TypeGuard[bool | np.bool_]: ... +def is_integer(val: object) -> TypeGuard[int | np.integer]: ... +def is_float(val: object) -> TypeGuard[float]: ... def is_interval_array(values: np.ndarray) -> bool: ... def is_datetime64_array(values: np.ndarray) -> bool: ... def is_timedelta_or_timedelta64_array(values: np.ndarray) -> bool: ... diff --git a/pandas/_typing.py b/pandas/_typing.py index 9d64842373573..e299d5309a6b9 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -84,6 +84,11 @@ # Name "npt._ArrayLikeInt_co" is not defined [name-defined] NumpySorter = Optional[npt._ArrayLikeInt_co] # type: ignore[name-defined] + if sys.version_info >= (3, 10): + from typing import TypeGuard + else: + from typing_extensions import TypeGuard # pyright: reportUnusedImport = false + if sys.version_info >= (3, 11): from typing import Self else: @@ -91,6 +96,7 @@ else: npt: Any = None Self: Any = None + TypeGuard: Any = None HashableT = TypeVar("HashableT", bound=Hashable) diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index bdd26b315ed83..8b2916bf1ded9 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -25,6 +25,7 @@ overload, ) +import numpy as np from numpy import ndarray from pandas._libs.lib import ( @@ -215,7 +216,7 @@ def validate_clip_with_axis( ) -def validate_cum_func_with_skipna(skipna, args, kwargs, name) -> bool: +def validate_cum_func_with_skipna(skipna: bool, args, kwargs, name) -> bool: """ If this function is called via the 'numpy' library, the third parameter in its signature is 'dtype', which takes either a 'numpy' dtype or 'None', so @@ -224,6 +225,8 @@ def validate_cum_func_with_skipna(skipna, args, kwargs, name) -> bool: if not is_bool(skipna): args = (skipna,) + args skipna = True + elif isinstance(skipna, np.bool_): + skipna = bool(skipna) validate_cum_func(args, kwargs, fname=name) return skipna diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 8c537a5082585..bd3d4b57c6d63 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -2182,7 +2182,6 @@ def validate_periods(periods: int | float | None) -> int | None: periods = int(periods) elif not lib.is_integer(periods): raise TypeError(f"periods must be a number, got {periods}") - periods = cast(int, periods) return periods diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index e9da7598d1ebc..aa43c53d3c56c 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -194,15 +194,9 @@ def maybe_box_native(value: Scalar | None | NAType) -> Scalar | None | NAType: scalar or Series """ if is_float(value): - # error: Argument 1 to "float" has incompatible type - # "Union[Union[str, int, float, bool], Union[Any, Timestamp, Timedelta, Any]]"; - # expected "Union[SupportsFloat, _SupportsIndex, str]" - value = float(value) # type: ignore[arg-type] + value = float(value) elif is_integer(value): - # error: Argument 1 to "int" has incompatible type - # "Union[Union[str, int, float, bool], Union[Any, Timestamp, Timedelta, Any]]"; - # expected "Union[str, SupportsInt, _SupportsIndex, _SupportsTrunc]" - value = int(value) # type: ignore[arg-type] + value = int(value) elif is_bool(value): value = bool(value) elif isinstance(value, (np.datetime64, np.timedelta64)): diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index 28e034de869f4..af4f0a1c0aa05 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -5,12 +5,19 @@ from collections import abc from numbers import Number import re -from typing import Pattern +from typing import ( + TYPE_CHECKING, + Hashable, + Pattern, +) import numpy as np from pandas._libs import lib +if TYPE_CHECKING: + from pandas._typing import TypeGuard + is_bool = lib.is_bool is_integer = lib.is_integer @@ -30,7 +37,7 @@ is_iterator = lib.is_iterator -def is_number(obj) -> bool: +def is_number(obj) -> TypeGuard[Number | np.number]: """ Check if the object is a number. @@ -132,7 +139,7 @@ def is_file_like(obj) -> bool: return bool(hasattr(obj, "__iter__")) -def is_re(obj) -> bool: +def is_re(obj) -> TypeGuard[Pattern]: """ Check if the object is a regex pattern instance. @@ -325,7 +332,7 @@ def is_named_tuple(obj) -> bool: return isinstance(obj, abc.Sequence) and hasattr(obj, "_fields") -def is_hashable(obj) -> bool: +def is_hashable(obj) -> TypeGuard[Hashable]: """ Return True if hash(obj) will succeed, False otherwise. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ce4c3d81c4f90..93edb93781ba3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9501,11 +9501,7 @@ def melt( ) def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame: if not lib.is_integer(periods): - if not ( - is_float(periods) - # error: "int" has no attribute "is_integer" - and periods.is_integer() # type: ignore[attr-defined] - ): + if not (is_float(periods) and periods.is_integer()): raise ValueError("periods must be an integer") periods = int(periods) @@ -10397,8 +10393,13 @@ def _series_round(ser: Series, decimals: int) -> Series: new_cols = list(_dict_round(self, decimals)) elif is_integer(decimals): # Dispatch to Block.round + # Argument "decimals" to "round" of "BaseBlockManager" has incompatible + # type "Union[int, integer[Any]]"; expected "int" return self._constructor( - self._mgr.round(decimals=decimals, using_cow=using_copy_on_write()), + self._mgr.round( + decimals=decimals, # type: ignore[arg-type] + using_cow=using_copy_on_write(), + ), ).__finalize__(self, method="round") else: raise TypeError("decimals must be an integer, a dict-like or a Series") diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 17e4a4c142f66..fa19aae674621 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4089,7 +4089,8 @@ class animal locomotion loc, new_index = index._get_loc_level(key, level=0) if not drop_level: if lib.is_integer(loc): - new_index = index[loc : loc + 1] + # Slice index must be an integer or None + new_index = index[loc : loc + 1] # type: ignore[misc] else: new_index = index[loc] else: diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index f880e1f10106d..4070b25767912 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -70,7 +70,11 @@ def get_objs_combined_axis( - objs, intersect: bool = False, axis: Axis = 0, sort: bool = True, copy: bool = False + objs, + intersect: bool = False, + axis: Axis = 0, + sort: bool = True, + copy: bool = False, ) -> Index: """ Extract combined index: return intersection or union (depending on the diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 37df3a7024626..c2dcd7389db5c 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2699,6 +2699,7 @@ def _partial_tup_index(self, tup: tuple, side: Literal["left", "right"] = "left" for k, (lab, lev, level_codes) in enumerate(zipped): section = level_codes[start:end] + loc: npt.NDArray[np.intp] | np.intp | int if lab not in lev and not isna(lab): # short circuit try: @@ -2930,7 +2931,8 @@ def get_loc_level(self, key, level: IndexLabel = 0, drop_level: bool = True): loc, mi = self._get_loc_level(key, level=level) if not drop_level: if lib.is_integer(loc): - mi = self[loc : loc + 1] + # Slice index must be an integer or None + mi = self[loc : loc + 1] # type: ignore[misc] else: mi = self[loc] return loc, mi diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index faa1e9658fa80..d92a3fe13e42f 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -298,9 +298,7 @@ def _maybe_convert_timedelta(self, other) -> int | npt.NDArray[np.int64]: raise raise_on_incompatible(self, other) elif is_integer(other): - # integer is passed to .shift via - # _add_datetimelike_methods basically - # but ufunc may pass integer to _add_delta + assert isinstance(other, int) return other # raise when input doesn't have freq diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 8db08fc15c0f4..1dac439ad9aea 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1562,7 +1562,7 @@ def _is_scalar_access(self, key: tuple) -> bool: return all(is_integer(k) for k in key) - def _validate_integer(self, key: int, axis: AxisInt) -> None: + def _validate_integer(self, key: int | np.integer, axis: AxisInt) -> None: """ Check that 'key' is a valid position in the desired axis. @@ -2171,7 +2171,7 @@ def _ensure_iterable_column_indexer(self, column_indexer): """ Ensure that our column indexer is something that can be iterated over. """ - ilocs: Sequence[int] | np.ndarray + ilocs: Sequence[int | np.integer] | np.ndarray if is_integer(column_indexer): ilocs = [column_indexer] elif isinstance(column_indexer, slice): diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 650d51b896dc5..395db8060ce0e 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -391,6 +391,8 @@ class _Concatenator: Orchestrates a concatenation operation for BlockManagers """ + sort: bool + def __init__( self, objs: Iterable[NDFrame] | Mapping[HashableT, NDFrame], @@ -555,7 +557,9 @@ def __init__( raise ValueError( f"The 'sort' keyword only accepts boolean values; {sort} was passed." ) - self.sort = sort + # Incompatible types in assignment (expression has type "Union[bool, bool_]", + # variable has type "bool") + self.sort = sort # type: ignore[assignment] self.ignore_index = ignore_index self.verify_integrity = verify_integrity diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 21ce1d3c96379..d2b022214167f 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -2026,7 +2026,8 @@ def _get_merge_keys( elif is_float_dtype(lt): if not is_number(self.tolerance): raise MergeError(msg) - if self.tolerance < 0: + # error: Unsupported operand types for > ("int" and "Number") + if self.tolerance < 0: # type: ignore[operator] raise MergeError("tolerance must be positive") else: diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index b11ff11421ed4..ed0de80e381c3 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1402,7 +1402,7 @@ def _generate_cython_apply_func( self, args: tuple[Any, ...], kwargs: dict[str, Any], - raw: bool, + raw: bool | np.bool_, function: Callable[..., Any], ) -> Callable[[np.ndarray, np.ndarray, np.ndarray, int], np.ndarray]: from pandas import Series diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index 315d18d052d9f..d8c9ece3b2cce 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -1348,4 +1348,5 @@ def _validate_skipfooter_arg(skipfooter: int) -> int: if skipfooter < 0: raise ValueError("skipfooter cannot be negative") - return skipfooter + # Incompatible return value type (got "Union[int, integer[Any]]", expected "int") + return skipfooter # type: ignore[return-value] diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 8d48d04c738e8..ec04a9ce81d92 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -44,7 +44,6 @@ from pandas.core.dtypes.common import ( is_datetime64tz_dtype, is_dict_like, - is_integer, is_list_like, ) from pandas.core.dtypes.dtypes import DatetimeTZDtype @@ -1022,7 +1021,7 @@ def insert( chunk_iter = zip(*(arr[start_i:end_i] for arr in data_list)) num_inserted = exec_insert(conn, keys, chunk_iter) # GH 46891 - if is_integer(num_inserted): + if num_inserted is not None: if total_inserted is None: total_inserted = num_inserted else: diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index dc51a5b0a77fb..3d79d483038ee 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -775,7 +775,7 @@ def psql_insert_copy(table, conn, keys, data_iter): "test_frame", conn, index=False, method=psql_insert_copy ) # GH 46891 - if not isinstance(expected_count, int): + if expected_count is None: assert result_count is None else: assert result_count == expected_count diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 17ef583febc24..f03d1ceb507fd 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -250,7 +250,7 @@ def validate_bool_kwarg( """ good_value = is_bool(value) if none_allowed: - good_value = good_value or value is None + good_value = good_value or (value is None) if int_allowed: good_value = good_value or isinstance(value, int) @@ -260,7 +260,7 @@ def validate_bool_kwarg( f'For argument "{arg_name}" expected type bool, received ' f"type {type(value).__name__}." ) - return value + return value # pyright: ignore[reportGeneralTypeIssues] def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True): @@ -438,7 +438,7 @@ def validate_insert_loc(loc: int, length: int) -> int: loc += length if not 0 <= loc <= length: raise IndexError(f"loc must be an integer between -{length} and {length}") - return loc + return loc # pyright: ignore[reportGeneralTypeIssues] def check_dtype_backend(dtype_backend) -> None: