pandas-dev · WillAyd · Mar 16, 2023 · Feb 10, 2023 · Feb 10, 2023 · Feb 11, 2023
diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi
@@ -1,6 +1,6 @@
 # TODO(npdtypes): Many types specified here can be made more specific/accurate;
 #  the more specific versions are specified in comments
-
+from decimal import Decimal
 from typing import (
     Any,
     Callable,
@@ -13,9 +13,12 @@ from typing import (
 
 import numpy as np
 
+from pandas._libs.interval import Interval
+from pandas._libs.tslibs import Period
 from pandas._typing import (
     ArrayLike,
     DtypeObj,
+    TypeGuard,
     npt,
 )
 
@@ -38,13 +41,13 @@ def infer_dtype(value: object, skipna: bool = ...) -> str: ...
 def is_iterator(obj: object) -> bool: ...
 def is_scalar(val: object) -> bool: ...
 def is_list_like(obj: object, allow_sets: bool = ...) -> bool: ...
-def is_period(val: object) -> bool: ...
-def is_interval(val: object) -> bool: ...
-def is_decimal(val: object) -> bool: ...
-def is_complex(val: object) -> bool: ...
-def is_bool(val: object) -> bool: ...
-def is_integer(val: object) -> bool: ...
-def is_float(val: object) -> bool: ...
+def is_period(val: object) -> TypeGuard[Period]: ...
+def is_interval(val: object) -> TypeGuard[Interval]: ...
+def is_decimal(val: object) -> TypeGuard[Decimal]: ...
+def is_complex(val: object) -> TypeGuard[complex]: ...
+def is_bool(val: object) -> TypeGuard[bool | np.bool_]: ...
+def is_integer(val: object) -> TypeGuard[int | np.integer]: ...
+def is_float(val: object) -> TypeGuard[float]: ...
 def is_interval_array(values: np.ndarray) -> bool: ...
 def is_datetime64_array(values: np.ndarray) -> bool: ...
 def is_timedelta_or_timedelta64_array(values: np.ndarray) -> bool: ...

@@ -84,13 +84,19 @@
     # Name "npt._ArrayLikeInt_co" is not defined  [name-defined]
     NumpySorter = Optional[npt._ArrayLikeInt_co]  # type: ignore[name-defined]
 
+    if sys.version_info >= (3, 10):
+        from typing import TypeGuard
+    else:
+        from typing_extensions import TypeGuard  # pyright: reportUnusedImport = false
+
     if sys.version_info >= (3, 11):
         from typing import Self
     else:
         from typing_extensions import Self  # pyright: reportUnusedImport = false
 else:
     npt: Any = None
     Self: Any = None
+    TypeGuard: Any = None
 
 HashableT = TypeVar("HashableT", bound=Hashable)
 

diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py
@@ -25,6 +25,7 @@
     overload,
 )
 
+import numpy as np
 from numpy import ndarray
 
 from pandas._libs.lib import (
@@ -215,7 +216,7 @@ def validate_clip_with_axis(
 )
 
 
-def validate_cum_func_with_skipna(skipna, args, kwargs, name) -> bool:
+def validate_cum_func_with_skipna(skipna: bool, args, kwargs, name) -> bool:
     """
     If this function is called via the 'numpy' library, the third parameter in
     its signature is 'dtype', which takes either a 'numpy' dtype or 'None', so
@@ -224,6 +225,8 @@ def validate_cum_func_with_skipna(skipna, args, kwargs, name) -> bool:
     if not is_bool(skipna):
         args = (skipna,) + args
         skipna = True
+    elif isinstance(skipna, np.bool_):
+        skipna = bool(skipna)
 
     validate_cum_func(args, kwargs, fname=name)
     return skipna

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -2182,7 +2182,6 @@ def validate_periods(periods: int | float | None) -> int | None:
             periods = int(periods)
         elif not lib.is_integer(periods):
             raise TypeError(f"periods must be a number, got {periods}")
-        periods = cast(int, periods)
     return periods
 
 

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -194,15 +194,9 @@ def maybe_box_native(value: Scalar | None | NAType) -> Scalar | None | NAType:
     scalar or Series
     """
     if is_float(value):
-        # error: Argument 1 to "float" has incompatible type
-        # "Union[Union[str, int, float, bool], Union[Any, Timestamp, Timedelta, Any]]";
-        # expected "Union[SupportsFloat, _SupportsIndex, str]"
-        value = float(value)  # type: ignore[arg-type]
+        value = float(value)
     elif is_integer(value):
-        # error: Argument 1 to "int" has incompatible type
-        # "Union[Union[str, int, float, bool], Union[Any, Timestamp, Timedelta, Any]]";
-        # expected "Union[str, SupportsInt, _SupportsIndex, _SupportsTrunc]"
-        value = int(value)  # type: ignore[arg-type]
+        value = int(value)
     elif is_bool(value):
         value = bool(value)
     elif isinstance(value, (np.datetime64, np.timedelta64)):

diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py
@@ -5,12 +5,19 @@
 from collections import abc
 from numbers import Number
 import re
-from typing import Pattern
+from typing import (
+    TYPE_CHECKING,
+    Hashable,
+    Pattern,
+)
 
 import numpy as np
 
 from pandas._libs import lib
 
+if TYPE_CHECKING:
+    from pandas._typing import TypeGuard
+
 is_bool = lib.is_bool
 
 is_integer = lib.is_integer
@@ -30,7 +37,7 @@
 is_iterator = lib.is_iterator
 
 
-def is_number(obj) -> bool:
+def is_number(obj) -> TypeGuard[Number | np.number]:
     """
     Check if the object is a number.
 
@@ -132,7 +139,7 @@ def is_file_like(obj) -> bool:
     return bool(hasattr(obj, "__iter__"))
 
 
-def is_re(obj) -> bool:
+def is_re(obj) -> TypeGuard[Pattern]:
     """
     Check if the object is a regex pattern instance.
 
@@ -325,7 +332,7 @@ def is_named_tuple(obj) -> bool:
     return isinstance(obj, abc.Sequence) and hasattr(obj, "_fields")
 
 
-def is_hashable(obj) -> bool:
+def is_hashable(obj) -> TypeGuard[Hashable]:
     """
     Return True if hash(obj) will succeed, False otherwise.
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -9501,11 +9501,7 @@ def melt(
     )
     def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame:
         if not lib.is_integer(periods):
-            if not (
-                is_float(periods)
-                # error: "int" has no attribute "is_integer"
-                and periods.is_integer()  # type: ignore[attr-defined]
-            ):
+            if not (is_float(periods) and periods.is_integer()):
                 raise ValueError("periods must be an integer")
             periods = int(periods)
 
@@ -10397,8 +10393,13 @@ def _series_round(ser: Series, decimals: int) -> Series:
             new_cols = list(_dict_round(self, decimals))
         elif is_integer(decimals):
             # Dispatch to Block.round
+            # Argument "decimals" to "round" of "BaseBlockManager" has incompatible
+            # type "Union[int, integer[Any]]"; expected "int"
             return self._constructor(
-                self._mgr.round(decimals=decimals, using_cow=using_copy_on_write()),
+                self._mgr.round(
+                    decimals=decimals,  # type: ignore[arg-type]
+                    using_cow=using_copy_on_write(),
+                ),
             ).__finalize__(self, method="round")
         else:
             raise TypeError("decimals must be an integer, a dict-like or a Series")

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -4089,7 +4089,8 @@ class   animal   locomotion
             loc, new_index = index._get_loc_level(key, level=0)
             if not drop_level:
                 if lib.is_integer(loc):
-                    new_index = index[loc : loc + 1]
+                    # Slice index must be an integer or None
+                    new_index = index[loc : loc + 1]  # type: ignore[misc]
                 else:
                     new_index = index[loc]
         else:

diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
@@ -70,7 +70,11 @@
 
 
 def get_objs_combined_axis(
-    objs, intersect: bool = False, axis: Axis = 0, sort: bool = True, copy: bool = False
+    objs,
+    intersect: bool = False,
+    axis: Axis = 0,
+    sort: bool = True,
+    copy: bool = False,
 ) -> Index:
     """
     Extract combined index: return intersection or union (depending on the

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -2699,6 +2699,7 @@ def _partial_tup_index(self, tup: tuple, side: Literal["left", "right"] = "left"
         for k, (lab, lev, level_codes) in enumerate(zipped):
             section = level_codes[start:end]
 
+            loc: npt.NDArray[np.intp] | np.intp | int
             if lab not in lev and not isna(lab):
                 # short circuit
                 try:
@@ -2930,7 +2931,8 @@ def get_loc_level(self, key, level: IndexLabel = 0, drop_level: bool = True):
         loc, mi = self._get_loc_level(key, level=level)
         if not drop_level:
             if lib.is_integer(loc):
-                mi = self[loc : loc + 1]
+                # Slice index must be an integer or None
+                mi = self[loc : loc + 1]  # type: ignore[misc]
             else:
                 mi = self[loc]
         return loc, mi

diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
@@ -298,9 +298,7 @@ def _maybe_convert_timedelta(self, other) -> int | npt.NDArray[np.int64]:
 
             raise raise_on_incompatible(self, other)
         elif is_integer(other):
-            # integer is passed to .shift via
-            # _add_datetimelike_methods basically
-            # but ufunc may pass integer to _add_delta
+            assert isinstance(other, int)
             return other
 
         # raise when input doesn't have freq

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -1562,7 +1562,7 @@ def _is_scalar_access(self, key: tuple) -> bool:
 
         return all(is_integer(k) for k in key)
 
-    def _validate_integer(self, key: int, axis: AxisInt) -> None:
+    def _validate_integer(self, key: int | np.integer, axis: AxisInt) -> None:
         """
         Check that 'key' is a valid position in the desired axis.
 
@@ -2171,7 +2171,7 @@ def _ensure_iterable_column_indexer(self, column_indexer):
         """
         Ensure that our column indexer is something that can be iterated over.
         """
-        ilocs: Sequence[int] | np.ndarray
+        ilocs: Sequence[int | np.integer] | np.ndarray
         if is_integer(column_indexer):
             ilocs = [column_indexer]
         elif isinstance(column_indexer, slice):

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
@@ -391,6 +391,8 @@ class _Concatenator:
     Orchestrates a concatenation operation for BlockManagers
     """
 
+    sort: bool
+
     def __init__(
         self,
         objs: Iterable[NDFrame] | Mapping[HashableT, NDFrame],
@@ -555,7 +557,9 @@ def __init__(
             raise ValueError(
                 f"The 'sort' keyword only accepts boolean values; {sort} was passed."
             )
-        self.sort = sort
+        # Incompatible types in assignment (expression has type "Union[bool, bool_]",
+        # variable has type "bool")
+        self.sort = sort  # type: ignore[assignment]
 
         self.ignore_index = ignore_index
         self.verify_integrity = verify_integrity

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -2026,7 +2026,8 @@ def _get_merge_keys(
             elif is_float_dtype(lt):
                 if not is_number(self.tolerance):
                     raise MergeError(msg)
-                if self.tolerance < 0:
+                # error: Unsupported operand types for > ("int" and "Number")
+                if self.tolerance < 0:  # type: ignore[operator]
                     raise MergeError("tolerance must be positive")
 
             else:

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
@@ -1402,7 +1402,7 @@ def _generate_cython_apply_func(
         self,
         args: tuple[Any, ...],
         kwargs: dict[str, Any],
-        raw: bool,
+        raw: bool | np.bool_,
         function: Callable[..., Any],
     ) -> Callable[[np.ndarray, np.ndarray, np.ndarray, int], np.ndarray]:
         from pandas import Series

diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
@@ -1348,4 +1348,5 @@ def _validate_skipfooter_arg(skipfooter: int) -> int:
     if skipfooter < 0:
         raise ValueError("skipfooter cannot be negative")
 
-    return skipfooter
+    # Incompatible return value type (got "Union[int, integer[Any]]", expected "int")
+    return skipfooter  # type: ignore[return-value]
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
@@ -44,7 +44,6 @@
 from pandas.core.dtypes.common import (
     is_datetime64tz_dtype,
     is_dict_like,
-    is_integer,
     is_list_like,
 )
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
@@ -1022,7 +1021,7 @@ def insert(
                 chunk_iter = zip(*(arr[start_i:end_i] for arr in data_list))
                 num_inserted = exec_insert(conn, keys, chunk_iter)
                 # GH 46891
-                if is_integer(num_inserted):
+                if num_inserted is not None:
                     if total_inserted is None:
                         total_inserted = num_inserted
                     else:

diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
@@ -775,7 +775,7 @@ def psql_insert_copy(table, conn, keys, data_iter):
         "test_frame", conn, index=False, method=psql_insert_copy
     )
     # GH 46891
-    if not isinstance(expected_count, int):
+    if expected_count is None:
         assert result_count is None
     else:
         assert result_count == expected_count

diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py
@@ -250,7 +250,7 @@ def validate_bool_kwarg(
     """
     good_value = is_bool(value)
     if none_allowed:
-        good_value = good_value or value is None
+        good_value = good_value or (value is None)
 
     if int_allowed:
         good_value = good_value or isinstance(value, int)
@@ -260,7 +260,7 @@ def validate_bool_kwarg(
             f'For argument "{arg_name}" expected type bool, received '
             f"type {type(value).__name__}."
         )
-    return value
+    return value  # pyright: ignore[reportGeneralTypeIssues]
 
 
 def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True):
@@ -438,7 +438,7 @@ def validate_insert_loc(loc: int, length: int) -> int:
         loc += length
     if not 0 <= loc <= length:
         raise IndexError(f"loc must be an integer between -{length} and {length}")
-    return loc
+    return loc  # pyright: ignore[reportGeneralTypeIssues]
 
 
 def check_dtype_backend(dtype_backend) -> None: