diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 966fda1ed710a..8df6d70a3bed4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -268,7 +268,7 @@ repos: |/_testing/ - id: autotyping name: autotyping - entry: python -m libcst.tool codemod autotyping.AutotypeCommand --none-return --scalar-return --annotate-magics --annotate-imprecise-magics --bool-param --bytes-param --str-param --float-param + entry: python -m libcst.tool codemod autotyping.AutotypeCommand --aggressive types_or: [python, pyi] files: ^pandas exclude: ^(pandas/tests|pandas/_version.py|pandas/io/clipboard) diff --git a/pandas/_config/config.py b/pandas/_config/config.py index e1a6cf04a435e..3b17d6953d61a 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -704,7 +704,7 @@ def _build_option_description(k: str) -> str: return s -def pp_options_list(keys: Iterable[str], width=80, _print: bool = False): +def pp_options_list(keys: Iterable[str], width: int = 80, _print: bool = False): """Builds a concise listing of available options, grouped by prefix""" from itertools import groupby from textwrap import wrap diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index e9bf19899f31e..f5f2aa23459e4 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -341,11 +341,13 @@ def getCols(k) -> str: # make index -def makeStringIndex(k=10, name=None) -> Index: +def makeStringIndex(k: int = 10, name=None) -> Index: return Index(rands_array(nchars=10, size=k), name=name) -def makeCategoricalIndex(k=10, n=3, name=None, **kwargs) -> CategoricalIndex: +def makeCategoricalIndex( + k: int = 10, n: int = 3, name=None, **kwargs +) -> CategoricalIndex: """make a length k index or n categories""" x = rands_array(nchars=4, size=n, replace=False) return CategoricalIndex( @@ -353,13 +355,13 @@ def makeCategoricalIndex(k=10, n=3, name=None, **kwargs) -> CategoricalIndex: ) -def makeIntervalIndex(k=10, name=None, **kwargs) -> IntervalIndex: +def makeIntervalIndex(k: int = 10, name=None, **kwargs) -> IntervalIndex: """make a length k IntervalIndex""" x = np.linspace(0, 100, num=(k + 1)) return IntervalIndex.from_breaks(x, name=name, **kwargs) -def makeBoolIndex(k=10, name=None) -> Index: +def makeBoolIndex(k: int = 10, name=None) -> Index: if k == 1: return Index([True], name=name) elif k == 2: @@ -367,7 +369,7 @@ def makeBoolIndex(k=10, name=None) -> Index: return Index([False, True] + [False] * (k - 2), name=name) -def makeNumericIndex(k=10, name=None, *, dtype) -> NumericIndex: +def makeNumericIndex(k: int = 10, name=None, *, dtype) -> NumericIndex: dtype = pandas_dtype(dtype) assert isinstance(dtype, np.dtype) @@ -385,21 +387,21 @@ def makeNumericIndex(k=10, name=None, *, dtype) -> NumericIndex: return NumericIndex(values, dtype=dtype, name=name) -def makeIntIndex(k=10, name=None) -> Int64Index: +def makeIntIndex(k: int = 10, name=None) -> Int64Index: base_idx = makeNumericIndex(k, name=name, dtype="int64") return Int64Index(base_idx) -def makeUIntIndex(k=10, name=None) -> UInt64Index: +def makeUIntIndex(k: int = 10, name=None) -> UInt64Index: base_idx = makeNumericIndex(k, name=name, dtype="uint64") return UInt64Index(base_idx) -def makeRangeIndex(k=10, name=None, **kwargs) -> RangeIndex: +def makeRangeIndex(k: int = 10, name=None, **kwargs) -> RangeIndex: return RangeIndex(0, k, 1, name=name, **kwargs) -def makeFloatIndex(k=10, name=None) -> Float64Index: +def makeFloatIndex(k: int = 10, name=None) -> Float64Index: base_idx = makeNumericIndex(k, name=name, dtype="float64") return Float64Index(base_idx) @@ -423,7 +425,7 @@ def makePeriodIndex(k: int = 10, name=None, **kwargs) -> PeriodIndex: return pd.period_range(start=dt, periods=k, freq="B", name=name, **kwargs) -def makeMultiIndex(k=10, names=None, **kwargs): +def makeMultiIndex(k: int = 10, names=None, **kwargs): N = (k // 2) + 1 rng = range(N) mi = MultiIndex.from_product([("foo", "bar"), rng], names=names, **kwargs) @@ -665,8 +667,8 @@ def makeCustomDataframe( ncols, c_idx_names: bool | list[str] = True, r_idx_names: bool | list[str] = True, - c_idx_nlevels=1, - r_idx_nlevels=1, + c_idx_nlevels: int = 1, + r_idx_nlevels: int = 1, data_gen_f=None, c_ndupe_l=None, r_ndupe_l=None, diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 6860ba291bf73..74fc15a6fad63 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -250,7 +250,7 @@ def _validate_searchsorted_value( return value @doc(ExtensionArray.shift) - def shift(self, periods=1, fill_value=None, axis=0): + def shift(self, periods: int = 1, fill_value=None, axis: AxisInt = 0): fill_value = self._validate_shift_value(fill_value) new_values = shift(self._ndarray, periods, axis, fill_value) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index b04a26120cabb..043e0baf3ec0e 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -1062,7 +1062,12 @@ def _wrap_reduction_result(self, name: str, result, skipna, **kwargs): return result def sum( - self, *, skipna: bool = True, min_count=0, axis: AxisInt | None = 0, **kwargs + self, + *, + skipna: bool = True, + min_count: int = 0, + axis: AxisInt | None = 0, + **kwargs, ): nv.validate_sum((), kwargs) @@ -1085,7 +1090,12 @@ def sum( ) def prod( - self, *, skipna: bool = True, min_count=0, axis: AxisInt | None = 0, **kwargs + self, + *, + skipna: bool = True, + min_count: int = 0, + axis: AxisInt | None = 0, + **kwargs, ): nv.validate_prod((), kwargs) result = masked_reductions.prod( diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 290860d897ea2..4d5286e7364f5 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -243,7 +243,12 @@ def max( return self._wrap_reduction_result(axis, result) def sum( - self, *, axis: AxisInt | None = None, skipna: bool = True, min_count=0, **kwargs + self, + *, + axis: AxisInt | None = None, + skipna: bool = True, + min_count: int = 0, + **kwargs, ) -> Scalar: nv.validate_sum((), kwargs) result = nanops.nansum( @@ -252,7 +257,12 @@ def sum( return self._wrap_reduction_result(axis, result) def prod( - self, *, axis: AxisInt | None = None, skipna: bool = True, min_count=0, **kwargs + self, + *, + axis: AxisInt | None = None, + skipna: bool = True, + min_count: int = 0, + **kwargs, ) -> Scalar: nv.validate_prod((), kwargs) result = nanops.nanprod( @@ -294,7 +304,7 @@ def std( axis: AxisInt | None = None, dtype: NpDtype | None = None, out=None, - ddof=1, + ddof: int = 1, keepdims: bool = False, skipna: bool = True, ): @@ -310,7 +320,7 @@ def var( axis: AxisInt | None = None, dtype: NpDtype | None = None, out=None, - ddof=1, + ddof: int = 1, keepdims: bool = False, skipna: bool = True, ): @@ -326,7 +336,7 @@ def sem( axis: AxisInt | None = None, dtype: NpDtype | None = None, out=None, - ddof=1, + ddof: int = 1, keepdims: bool = False, skipna: bool = True, ): diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index c0d476c2452b8..5e1b0c4b18718 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -1067,7 +1067,7 @@ def dt64arr_to_periodarr( return c_dt64arr_to_periodarr(data.view("i8"), base, tz, reso=reso), freq -def _get_ordinal_range(start, end, periods, freq, mult=1): +def _get_ordinal_range(start, end, periods, freq, mult: int = 1): if com.count_not_none(start, end, periods) != 2: raise ValueError( "Of the three parameters: start, end, and periods, " diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index c6d2cf9e25fa9..40a56e27c63f5 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -32,6 +32,7 @@ from pandas._typing import ( ArrayLike, AstypeArg, + Axis, AxisInt, Dtype, NpDtype, @@ -1479,7 +1480,7 @@ def all(self, axis=None, *args, **kwargs): return values.all() - def any(self, axis=0, *args, **kwargs): + def any(self, axis: AxisInt = 0, *args, **kwargs): """ Tests whether at least one of elements evaluate True @@ -1576,7 +1577,7 @@ def cumsum(self, axis: AxisInt = 0, *args, **kwargs) -> SparseArray: fill_value=self.fill_value, ) - def mean(self, axis=0, *args, **kwargs): + def mean(self, axis: Axis = 0, *args, **kwargs): """ Mean of non-NA/null values diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index cfa61abcaa2a7..faa662611c0e1 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -310,7 +310,7 @@ def _str_map( return lib.map_infer_mask(arr, f, mask.view("uint8")) def _str_contains( - self, pat, case: bool = True, flags=0, na=np.nan, regex: bool = True + self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True ): if flags: fallback_performancewarning() diff --git a/pandas/core/base.py b/pandas/core/base.py index a1ca8b59dcffa..4e2f6a77930a1 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -25,6 +25,7 @@ import pandas._libs.lib as lib from pandas._typing import ( ArrayLike, + Axis, AxisInt, DtypeObj, IndexLabel, @@ -784,7 +785,7 @@ def _reduce( op, name: str, *, - axis=0, + axis: Axis = 0, skipna: bool = True, numeric_only=None, filter_type=None, diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index bcd39eab88590..2ed3ece05ebba 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -176,7 +176,7 @@ def eval( local_dict=None, global_dict=None, resolvers=(), - level=0, + level: int = 0, target=None, inplace: bool = False, ): diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 8ea78ff68a291..004b1751b917f 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -337,7 +337,7 @@ def _concatenate_2d(to_concat, axis: AxisInt): return np.concatenate(to_concat, axis=axis) -def _concat_datetime(to_concat, axis=0): +def _concat_datetime(to_concat, axis: AxisInt = 0): """ provide concatenation of an datetimelike array of arrays each of which is a single M8[ns], datetime64[ns, tz] or m8[ns] dtype diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7064848233581..17bd50b9ad8f0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -10867,7 +10867,7 @@ def func(values: np.ndarray): # We only use this in the case that operates on self.values return op(values, axis=axis, skipna=skipna, **kwds) - def blk_func(values, axis=1): + def blk_func(values, axis: Axis = 1): if isinstance(values, ExtensionArray): if not is_1d_only_ea_dtype(values.dtype) and not isinstance( self._mgr, ArrayManager diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b520aab2cfa5b..d5f3a4ad080e5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1327,7 +1327,7 @@ class name return None @final - def _set_axis_name(self, name, axis=0, inplace: bool_t = False): + def _set_axis_name(self, name, axis: Axis = 0, inplace: bool_t = False): """ Set the name(s) of the axis. @@ -1672,7 +1672,7 @@ def __round__(self: NDFrameT, decimals: int = 0) -> NDFrameT: # have consistent precedence and validation logic throughout the library. @final - def _is_level_reference(self, key: Level, axis=0) -> bool_t: + def _is_level_reference(self, key: Level, axis: Axis = 0) -> bool_t: """ Test whether a key is a level reference for a given axis. @@ -1693,17 +1693,17 @@ def _is_level_reference(self, key: Level, axis=0) -> bool_t: ------- is_level : bool """ - axis = self._get_axis_number(axis) + axis_int = self._get_axis_number(axis) return ( key is not None and is_hashable(key) - and key in self.axes[axis].names - and not self._is_label_reference(key, axis=axis) + and key in self.axes[axis_int].names + and not self._is_label_reference(key, axis=axis_int) ) @final - def _is_label_reference(self, key: Level, axis=0) -> bool_t: + def _is_label_reference(self, key: Level, axis: int = 0) -> bool_t: """ Test whether a key is a label reference for a given axis. @@ -1758,7 +1758,7 @@ def _is_label_or_level_reference(self, key: Level, axis: AxisInt = 0) -> bool_t: ) @final - def _check_label_or_level_ambiguity(self, key: Level, axis: AxisInt = 0) -> None: + def _check_label_or_level_ambiguity(self, key: Level, axis: Axis = 0) -> None: """ Check whether `key` is ambiguous. @@ -1777,23 +1777,23 @@ def _check_label_or_level_ambiguity(self, key: Level, axis: AxisInt = 0) -> None ValueError: `key` is ambiguous """ - axis = self._get_axis_number(axis) - other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis) + axis_int = self._get_axis_number(axis) + other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis_int) if ( key is not None and is_hashable(key) - and key in self.axes[axis].names + and key in self.axes[axis_int].names and any(key in self.axes[ax] for ax in other_axes) ): # Build an informative and grammatical warning level_article, level_type = ( - ("an", "index") if axis == 0 else ("a", "column") + ("an", "index") if axis_int == 0 else ("a", "column") ) label_article, label_type = ( - ("a", "column") if axis == 0 else ("an", "index") + ("a", "column") if axis_int == 0 else ("an", "index") ) msg = ( @@ -3784,7 +3784,7 @@ def _clear_item_cache(self) -> None: # Indexing Methods def take( - self: NDFrameT, indices, axis=0, is_copy: bool_t | None = None, **kwargs + self: NDFrameT, indices, axis: Axis = 0, is_copy: bool_t | None = None, **kwargs ) -> NDFrameT: """ Return the elements in the given *positional* indices along an axis. @@ -3881,7 +3881,7 @@ class max_speed def _take( self: NDFrameT, indices, - axis=0, + axis: Axis = 0, convert_indices: bool_t = True, ) -> NDFrameT: """ @@ -3899,7 +3899,7 @@ def _take( ) return self._constructor(new_data).__finalize__(self, method="take") - def _take_with_is_copy(self: NDFrameT, indices, axis=0) -> NDFrameT: + def _take_with_is_copy(self: NDFrameT, indices, axis: Axis = 0) -> NDFrameT: """ Internal version of the `take` method that sets the `_is_copy` attribute to keep track of the parent dataframe (using in indexing @@ -4102,7 +4102,7 @@ class animal locomotion def __getitem__(self, item): raise AbstractMethodError(self) - def _slice(self: NDFrameT, slobj: slice, axis=0) -> NDFrameT: + def _slice(self: NDFrameT, slobj: slice, axis: Axis = 0) -> NDFrameT: """ Construct a slice of this container. @@ -9052,7 +9052,7 @@ def last(self: NDFrameT, offset) -> NDFrameT: @final def rank( self: NDFrameT, - axis=0, + axis: Axis = 0, method: str = "average", numeric_only: bool_t | None | lib.NoDefault = lib.no_default, na_option: str = "keep", @@ -9168,7 +9168,7 @@ def rank( elif numeric_only is lib.no_default: numeric_only = None - axis = self._get_axis_number(axis) + axis_int = self._get_axis_number(axis) if na_option not in {"keep", "top", "bottom"}: msg = "na_option must be one of 'keep', 'top', or 'bottom'" @@ -9184,7 +9184,7 @@ def ranker(data): if isinstance(values, ExtensionArray): ranks = values._rank( - axis=axis, + axis=axis_int, method=method, ascending=ascending, na_option=na_option, @@ -9193,7 +9193,7 @@ def ranker(data): else: ranks = algos.rank( values, - axis=axis, + axis=axis_int, method=method, ascending=ascending, na_option=na_option, @@ -9515,7 +9515,7 @@ def _align_frame( fill_value=None, method=None, limit=None, - fill_axis=0, + fill_axis: Axis = 0, ): # defaults join_index, join_columns = None, None @@ -9579,7 +9579,7 @@ def _align_series( fill_value=None, method=None, limit=None, - fill_axis=0, + fill_axis: Axis = 0, ): is_series = isinstance(self, ABCSeries) @@ -10247,7 +10247,7 @@ def shift( return result.__finalize__(self, method="shift") @final - def slice_shift(self: NDFrameT, periods: int = 1, axis=0) -> NDFrameT: + def slice_shift(self: NDFrameT, periods: int = 1, axis: Axis = 0) -> NDFrameT: """ Equivalent to `shift` without copying data. @@ -10506,7 +10506,7 @@ def truncate( @final @doc(klass=_shared_doc_kwargs["klass"]) def tz_convert( - self: NDFrameT, tz, axis=0, level=None, copy: bool_t = True + self: NDFrameT, tz, axis: Axis = 0, level=None, copy: bool_t = True ) -> NDFrameT: """ Convert tz-aware axis to target time zone. @@ -10567,7 +10567,7 @@ def _tz_convert(ax, tz): def tz_localize( self: NDFrameT, tz, - axis=0, + axis: Axis = 0, level=None, copy: bool_t = True, ambiguous: TimeAmbiguous = "raise", @@ -10995,7 +10995,7 @@ def describe( @final def pct_change( self: NDFrameT, - periods=1, + periods: int = 1, fill_method: Literal["backfill", "bfill", "pad", "ffill"] = "pad", limit=None, freq=None, @@ -11542,7 +11542,7 @@ def sum( skipna: bool_t = True, level: Level | None = None, numeric_only: bool_t | None = None, - min_count=0, + min_count: int = 0, **kwargs, ): return self._min_count_stat_function( @@ -11660,7 +11660,12 @@ def _add_numeric_operations(cls) -> None: empty_value=False, ) def any( - self, axis=0, bool_only=None, skipna: bool_t = True, level=None, **kwargs + self, + axis: Axis = 0, + bool_only=None, + skipna: bool_t = True, + level=None, + **kwargs, ): return NDFrame.any(self, axis, bool_only, skipna, level, **kwargs) @@ -11677,7 +11682,12 @@ def any( empty_value=True, ) def all( - self, axis=0, bool_only=None, skipna: bool_t = True, level=None, **kwargs + self, + axis: Axis = 0, + bool_only=None, + skipna: bool_t = True, + level=None, + **kwargs, ): return NDFrame.all(self, axis, bool_only, skipna, level, **kwargs) @@ -11714,7 +11724,7 @@ def sem( axis: Axis | None = None, skipna: bool_t = True, level=None, - ddof=1, + ddof: int = 1, numeric_only=None, **kwargs, ): @@ -11737,7 +11747,7 @@ def var( axis: Axis | None = None, skipna: bool_t = True, level=None, - ddof=1, + ddof: int = 1, numeric_only=None, **kwargs, ): @@ -11761,7 +11771,7 @@ def std( axis: Axis | None = None, skipna: bool_t = True, level=None, - ddof=1, + ddof: int = 1, numeric_only=None, **kwargs, ): @@ -11851,7 +11861,7 @@ def sum( skipna: bool_t = True, level=None, numeric_only=None, - min_count=0, + min_count: int = 0, **kwargs, ): return NDFrame.sum( @@ -11876,7 +11886,7 @@ def prod( skipna: bool_t = True, level=None, numeric_only=None, - min_count=0, + min_count: int = 0, **kwargs, ): return NDFrame.prod( diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 4e17276a4eb1b..c81091ca36b52 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1765,7 +1765,7 @@ def nunique(self, dropna: bool = True) -> DataFrame: ) def idxmax( self, - axis=0, + axis: Axis = 0, skipna: bool = True, numeric_only: bool | lib.NoDefault = lib.no_default, ) -> DataFrame: @@ -1806,7 +1806,7 @@ def func(df): ) def idxmin( self, - axis=0, + axis: Axis = 0, skipna: bool = True, numeric_only: bool | lib.NoDefault = lib.no_default, ) -> DataFrame: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 77b0d2e486bc2..4ad5b0d927450 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -46,6 +46,7 @@ class providing the base-class of operations. from pandas._typing import ( AnyArrayLike, ArrayLike, + Axis, AxisInt, Dtype, FillnaOptions, @@ -920,7 +921,7 @@ def __init__( self, obj: NDFrameT, keys: _KeysArgType | None = None, - axis: AxisInt = 0, + axis: Axis = 0, level: IndexLabel | None = None, grouper: ops.BaseGrouper | None = None, exclusions: frozenset[Hashable] | None = None, @@ -3677,7 +3678,7 @@ def rank( @final @Substitution(name="groupby") @Appender(_common_see_also) - def cumprod(self, axis=0, *args, **kwargs) -> NDFrameT: + def cumprod(self, axis: Axis = 0, *args, **kwargs) -> NDFrameT: """ Cumulative product for each group. @@ -3695,7 +3696,7 @@ def cumprod(self, axis=0, *args, **kwargs) -> NDFrameT: @final @Substitution(name="groupby") @Appender(_common_see_also) - def cumsum(self, axis=0, *args, **kwargs) -> NDFrameT: + def cumsum(self, axis: Axis = 0, *args, **kwargs) -> NDFrameT: """ Cumulative sum for each group. @@ -3713,7 +3714,9 @@ def cumsum(self, axis=0, *args, **kwargs) -> NDFrameT: @final @Substitution(name="groupby") @Appender(_common_see_also) - def cummin(self, axis=0, numeric_only: bool = False, **kwargs) -> NDFrameT: + def cummin( + self, axis: AxisInt = 0, numeric_only: bool = False, **kwargs + ) -> NDFrameT: """ Cumulative min for each group. @@ -3737,7 +3740,9 @@ def cummin(self, axis=0, numeric_only: bool = False, **kwargs) -> NDFrameT: @final @Substitution(name="groupby") @Appender(_common_see_also) - def cummax(self, axis=0, numeric_only: bool = False, **kwargs) -> NDFrameT: + def cummax( + self, axis: AxisInt = 0, numeric_only: bool = False, **kwargs + ) -> NDFrameT: """ Cumulative max for each group. @@ -3907,7 +3912,7 @@ def blk_func(values: ArrayLike) -> ArrayLike: @final @Substitution(name="groupby") - def shift(self, periods=1, freq=None, axis=0, fill_value=None): + def shift(self, periods: int = 1, freq=None, axis: Axis = 0, fill_value=None): """ Shift each group by periods observations. @@ -3999,11 +4004,11 @@ def diff(self, periods: int = 1, axis: AxisInt = 0) -> NDFrameT: @Appender(_common_see_also) def pct_change( self, - periods=1, + periods: int = 1, fill_method: FillnaOptions = "ffill", limit=None, freq=None, - axis=0, + axis: Axis = 0, ): """ Calculate pct_change of each value to previous entry in group. diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index c54954e61096b..11eb12dbacbc7 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -18,7 +18,7 @@ from pandas._typing import ( ArrayLike, - AxisInt, + Axis, NDFrameT, npt, ) @@ -260,7 +260,6 @@ class Grouper: Freq: 17T, dtype: int64 """ - axis: AxisInt sort: bool dropna: bool _gpr_index: Index | None @@ -281,7 +280,7 @@ def __init__( key=None, level=None, freq=None, - axis: AxisInt = 0, + axis: Axis = 0, sort: bool = False, dropna: bool = True, ) -> None: @@ -705,7 +704,7 @@ def groups(self) -> dict[Hashable, np.ndarray]: def get_grouper( obj: NDFrameT, key=None, - axis: AxisInt = 0, + axis: Axis = 0, level=None, sort: bool = True, observed: bool = False, diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index b4f47f70c5a84..48d367b86b157 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -9,6 +9,7 @@ NaT, lib, ) +from pandas._typing import Axis from pandas.errors import InvalidIndexError from pandas.core.dtypes.cast import find_common_type @@ -76,7 +77,7 @@ def get_objs_combined_axis( - objs, intersect: bool = False, axis=0, sort: bool = True, copy: bool = False + objs, intersect: bool = False, axis: Axis = 0, sort: bool = True, copy: bool = False ) -> Index: """ Extract combined index: return intersection or union (depending on the diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7dc04474cbcd8..0582d691ad584 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -49,12 +49,13 @@ AnyAll, ArrayLike, Axes, - AxisInt, + Axis, DropKeep, Dtype, DtypeObj, F, IgnoreRaise, + IndexLabel, Level, Shape, npt, @@ -1180,7 +1181,7 @@ def astype(self, dtype, copy: bool = True): def take( self, indices, - axis: AxisInt = 0, + axis: Axis = 0, allow_fill: bool = True, fill_value=None, **kwargs, @@ -2167,7 +2168,7 @@ def _get_level_values(self, level) -> Index: get_level_values = _get_level_values @final - def droplevel(self, level=0): + def droplevel(self, level: IndexLabel = 0): """ Return index with requested level(s) removed. @@ -5867,7 +5868,7 @@ def sort(self, *args, **kwargs): """ raise TypeError("cannot sort an Index object in-place, use sort_values instead") - def shift(self, periods=1, freq=None): + def shift(self, periods: int = 1, freq=None): """ Shift index by desired number of time frequency increments. diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index f4077449a7907..147d1649e5e1b 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -30,6 +30,7 @@ parsing, to_offset, ) +from pandas._typing import Axis from pandas.compat.numpy import function as nv from pandas.util._decorators import ( Appender, @@ -695,7 +696,14 @@ def insert(self, loc: int, item): # NDArray-Like Methods @Appender(_index_shared_docs["take"] % _index_doc_kwargs) - def take(self, indices, axis=0, allow_fill: bool = True, fill_value=None, **kwargs): + def take( + self, + indices, + axis: Axis = 0, + allow_fill: bool = True, + fill_value=None, + **kwargs, + ): nv.validate_take((), kwargs) indices = np.asarray(indices, dtype=np.intp) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 26dd957ff4d57..4b8842227db8c 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -32,11 +32,12 @@ from pandas._typing import ( AnyAll, AnyArrayLike, - AxisInt, + Axis, DropKeep, DtypeObj, F, IgnoreRaise, + IndexLabel, Scalar, Shape, npt, @@ -2176,7 +2177,7 @@ def _getitem_slice(self: MultiIndex, slobj: slice) -> MultiIndex: def take( self: MultiIndex, indices, - axis: AxisInt = 0, + axis: Axis = 0, allow_fill: bool = True, fill_value=None, **kwargs, @@ -2472,7 +2473,10 @@ def cats(level_codes): ] def sortlevel( - self, level=0, ascending: bool | list[bool] = True, sort_remaining: bool = True + self, + level: IndexLabel = 0, + ascending: bool | list[bool] = True, + sort_remaining: bool = True, ) -> tuple[MultiIndex, npt.NDArray[np.intp]]: """ Sort MultiIndex at the requested level. @@ -2525,9 +2529,13 @@ def sortlevel( (0, 1)], ), array([0, 1])) """ - if isinstance(level, (str, int)): + if not is_list_like(level): level = [level] - level = [self._get_level_number(lev) for lev in level] + # error: Item "Hashable" of "Union[Hashable, Sequence[Hashable]]" has + # no attribute "__iter__" (not iterable) + level = [ + self._get_level_number(lev) for lev in level # type: ignore[union-attr] + ] sortorder = None # we have a directed ordering via ascending @@ -3007,7 +3015,7 @@ def _maybe_to_slice(loc): return _maybe_to_slice(loc) if len(loc) != stop - start else slice(start, stop) - def get_loc_level(self, key, level=0, drop_level: bool = True): + def get_loc_level(self, key, level: IndexLabel = 0, drop_level: bool = True): """ Get location and sliced index for requested label(s)/level(s). diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 9a55076c71f5f..203260f99e547 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -543,7 +543,9 @@ def _interpolate_scipy_wrapper( return new_y -def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate: bool = False): +def _from_derivatives( + xi, yi, x, order=None, der: int | list[int] | None = 0, extrapolate: bool = False +): """ Convenience function for interpolate.BPoly.from_derivatives. @@ -586,7 +588,7 @@ def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate: bool = False): return m(x) -def _akima_interpolate(xi, yi, x, der=0, axis=0): +def _akima_interpolate(xi, yi, x, der: int | list[int] | None = 0, axis: AxisInt = 0): """ Convenience function for akima interpolation. xi and yi are arrays of values used to approximate some function f, @@ -630,7 +632,12 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0): def _cubicspline_interpolate( - xi, yi, x, axis=0, bc_type: str | tuple[Any, Any] = "not-a-knot", extrapolate=None + xi, + yi, + x, + axis: AxisInt = 0, + bc_type: str | tuple[Any, Any] = "not-a-knot", + extrapolate=None, ): """ Convenience function for cubic spline data interpolator. diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 8e162a8249fd9..36a3281b7b931 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -872,7 +872,12 @@ def _get_counts_nanvar( @bottleneck_switch(ddof=1) def nanstd( - values, *, axis: AxisInt | None = None, skipna: bool = True, ddof=1, mask=None + values, + *, + axis: AxisInt | None = None, + skipna: bool = True, + ddof: int = 1, + mask=None, ): """ Compute the standard deviation along given axis while ignoring NaNs @@ -914,7 +919,12 @@ def nanstd( @disallow("M8", "m8") @bottleneck_switch(ddof=1) def nanvar( - values, *, axis: AxisInt | None = None, skipna: bool = True, ddof=1, mask=None + values, + *, + axis: AxisInt | None = None, + skipna: bool = True, + ddof: int = 1, + mask=None, ): """ Compute the variance along given axis while ignoring NaNs diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index c2a76b9a9ae19..0dcccdbd3bb8b 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -14,6 +14,7 @@ from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op from pandas._typing import ( + Axis, AxisInt, Level, ) @@ -178,7 +179,7 @@ def flex_method_SERIES(op): doc = make_flex_doc(name, "series") @Appender(doc) - def flex_wrapper(self, other, level=None, fill_value=None, axis=0): + def flex_wrapper(self, other, level=None, fill_value=None, axis: Axis = 0): # validate axis if axis is not None: self._get_axis_number(axis) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 0e0aa9e272c89..d752cb0b5b23f 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -28,6 +28,7 @@ ) from pandas._typing import ( AnyArrayLike, + Axis, AxisInt, Frequency, IndexLabel, @@ -152,7 +153,7 @@ def __init__( self, obj: DataFrame | Series, groupby: TimeGrouper, - axis: AxisInt = 0, + axis: Axis = 0, kind=None, *, group_keys: bool | lib.NoDefault = lib.no_default, @@ -162,7 +163,9 @@ def __init__( self.groupby = groupby self.keys = None self.sort = True - self.axis = axis + # error: Incompatible types in assignment (expression has type "Union + # [int, Literal['index', 'columns', 'rows']]", variable has type "int") + self.axis = axis # type: ignore[assignment] self.kind = kind self.squeeze = False self.group_keys = group_keys @@ -926,7 +929,7 @@ def fillna(self, method, limit=None): def interpolate( self, method: QuantileInterpolation = "linear", - axis=0, + axis: Axis = 0, limit=None, inplace: bool = False, limit_direction: Literal["forward", "backward", "both"] = "forward", @@ -973,7 +976,7 @@ def asfreq(self, fill_value=None): def std( self, - ddof=1, + ddof: int = 1, numeric_only: bool | lib.NoDefault = lib.no_default, *args, **kwargs, @@ -1000,7 +1003,7 @@ def std( def var( self, - ddof=1, + ddof: int = 1, numeric_only: bool | lib.NoDefault = lib.no_default, *args, **kwargs, @@ -1597,7 +1600,7 @@ def __init__( closed: Literal["left", "right"] | None = None, label: Literal["left", "right"] | None = None, how: str = "mean", - axis=0, + axis: Axis = 0, fill_method=None, limit=None, loffset=None, diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 33f164efdd0c7..3c855b8291303 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -390,7 +390,7 @@ class _Concatenator: def __init__( self, objs: Iterable[NDFrame] | Mapping[HashableT, NDFrame], - axis=0, + axis: Axis = 0, join: str = "outer", keys=None, levels=None, diff --git a/pandas/core/series.py b/pandas/core/series.py index b91d5540335f1..25ce6ea739d0e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -942,7 +942,7 @@ def take( result = self._constructor(new_values, index=new_index, fastpath=True) return result.__finalize__(self, method="take") - def _take_with_is_copy(self, indices, axis=0) -> Series: + def _take_with_is_copy(self, indices, axis: Axis = 0) -> Series: """ Internal version of the `take` method that sets the `_is_copy` attribute to keep track of the parent dataframe (using in indexing @@ -967,7 +967,7 @@ def _ixs(self, i: int, axis: AxisInt = 0) -> Any: """ return self._values[i] - def _slice(self, slobj: slice, axis: AxisInt = 0) -> Series: + def _slice(self, slobj: slice, axis: Axis = 0) -> Series: # axis kwarg is retained for compat with NDFrame method # _slice is *always* positional return self._get_values(slobj) @@ -4784,7 +4784,7 @@ def _reduce( op, name: str, *, - axis=0, + axis: Axis = 0, skipna: bool = True, numeric_only=None, filter_type=None, diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 3f1176f42e44c..acefb8c76c0d4 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1133,7 +1133,9 @@ def join(self, sep): return self._wrap_result(result) @forbid_nonstring_types(["bytes"]) - def contains(self, pat, case: bool = True, flags=0, na=None, regex: bool = True): + def contains( + self, pat, case: bool = True, flags: int = 0, na=None, regex: bool = True + ): r""" Test if pattern or regex is contained within a string of a Series or Index. @@ -1269,7 +1271,7 @@ def contains(self, pat, case: bool = True, flags=0, na=None, regex: bool = True) return self._wrap_result(result, fill_value=na, returns_string=False) @forbid_nonstring_types(["bytes"]) - def match(self, pat, case: bool = True, flags=0, na=None): + def match(self, pat, case: bool = True, flags: int = 0, na=None): """ Determine if each string starts with a match of a regular expression. @@ -1301,7 +1303,7 @@ def match(self, pat, case: bool = True, flags=0, na=None): return self._wrap_result(result, fill_value=na, returns_string=False) @forbid_nonstring_types(["bytes"]) - def fullmatch(self, pat, case: bool = True, flags=0, na=None): + def fullmatch(self, pat, case: bool = True, flags: int = 0, na=None): """ Determine if each string entirely matches a regular expression. @@ -2232,7 +2234,7 @@ def translate(self, table): return self._wrap_result(result) @forbid_nonstring_types(["bytes"]) - def count(self, pat, flags=0): + def count(self, pat, flags: int = 0): r""" Count occurrences of pattern in each string of the Series/Index. @@ -2440,7 +2442,7 @@ def endswith( return self._wrap_result(result, returns_string=False) @forbid_nonstring_types(["bytes"]) - def findall(self, pat, flags=0): + def findall(self, pat, flags: int = 0): """ Find all occurrences of pattern or regular expression in the Series/Index. @@ -2661,7 +2663,7 @@ def extract( return self._wrap_result(result, name=name) @forbid_nonstring_types(["bytes"]) - def extractall(self, pat, flags=0): + def extractall(self, pat, flags: int = 0): r""" Extract capture groups in the regex `pat` as columns in DataFrame. @@ -2774,7 +2776,7 @@ def extractall(self, pat, flags=0): } ) @forbid_nonstring_types(["bytes"]) - def find(self, sub, start=0, end=None): + def find(self, sub, start: int = 0, end=None): if not isinstance(sub, str): msg = f"expected a string object, not {type(sub).__name__}" raise TypeError(msg) @@ -2791,7 +2793,7 @@ def find(self, sub, start=0, end=None): } ) @forbid_nonstring_types(["bytes"]) - def rfind(self, sub, start=0, end=None): + def rfind(self, sub, start: int = 0, end=None): if not isinstance(sub, str): msg = f"expected a string object, not {type(sub).__name__}" raise TypeError(msg) @@ -2858,7 +2860,7 @@ def normalize(self, form): } ) @forbid_nonstring_types(["bytes"]) - def index(self, sub, start=0, end=None): + def index(self, sub, start: int = 0, end=None): if not isinstance(sub, str): msg = f"expected a string object, not {type(sub).__name__}" raise TypeError(msg) @@ -2876,7 +2878,7 @@ def index(self, sub, start=0, end=None): } ) @forbid_nonstring_types(["bytes"]) - def rindex(self, sub, start=0, end=None): + def rindex(self, sub, start: int = 0, end=None): if not isinstance(sub, str): msg = f"expected a string object, not {type(sub).__name__}" raise TypeError(msg) @@ -3345,7 +3347,7 @@ def _get_group_names(regex: re.Pattern) -> list[Hashable]: return [names.get(1 + i, i) for i in range(regex.groups)] -def str_extractall(arr, pat, flags=0): +def str_extractall(arr, pat, flags: int = 0): regex = re.compile(pat, flags=flags) # the regex must contain capture groups. if regex.groups == 0: diff --git a/pandas/core/strings/base.py b/pandas/core/strings/base.py index ae39526871d63..b5618207ab9d8 100644 --- a/pandas/core/strings/base.py +++ b/pandas/core/strings/base.py @@ -39,7 +39,7 @@ def _str_getitem(self, key): return self._str_get(key) @abc.abstractmethod - def _str_count(self, pat, flags=0): + def _str_count(self, pat, flags: int = 0): pass @abc.abstractmethod @@ -53,7 +53,7 @@ def _str_pad( @abc.abstractmethod def _str_contains( - self, pat, case: bool = True, flags=0, na=None, regex: bool = True + self, pat, case: bool = True, flags: int = 0, na=None, regex: bool = True ): pass @@ -102,15 +102,15 @@ def _str_encode(self, encoding, errors: str = "strict"): pass @abc.abstractmethod - def _str_find(self, sub, start=0, end=None): + def _str_find(self, sub, start: int = 0, end=None): pass @abc.abstractmethod - def _str_rfind(self, sub, start=0, end=None): + def _str_rfind(self, sub, start: int = 0, end=None): pass @abc.abstractmethod - def _str_findall(self, pat, flags=0): + def _str_findall(self, pat, flags: int = 0): pass @abc.abstractmethod @@ -118,11 +118,11 @@ def _str_get(self, i): pass @abc.abstractmethod - def _str_index(self, sub, start=0, end=None): + def _str_index(self, sub, start: int = 0, end=None): pass @abc.abstractmethod - def _str_rindex(self, sub, start=0, end=None): + def _str_rindex(self, sub, start: int = 0, end=None): pass @abc.abstractmethod diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index e9b05f0d4e0e4..3e8cdc12e7216 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -101,7 +101,7 @@ def g(x): result = lib.maybe_convert_objects(result) return result - def _str_count(self, pat, flags=0): + def _str_count(self, pat, flags: int = 0): regex = re.compile(pat, flags=flags) f = lambda x: len(regex.findall(x)) return self._str_map(f, dtype="int64") @@ -123,7 +123,7 @@ def _str_pad( return self._str_map(f) def _str_contains( - self, pat, case: bool = True, flags=0, na=np.nan, regex: bool = True + self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True ): if regex: if not case: @@ -232,10 +232,10 @@ def _str_encode(self, encoding, errors: str = "strict"): f = lambda x: x.encode(encoding, errors=errors) return self._str_map(f, dtype=object) - def _str_find(self, sub, start=0, end=None): + def _str_find(self, sub, start: int = 0, end=None): return self._str_find_(sub, start, end, side="left") - def _str_rfind(self, sub, start=0, end=None): + def _str_rfind(self, sub, start: int = 0, end=None): return self._str_find_(sub, start, end, side="right") def _str_find_(self, sub, start, end, side): @@ -252,7 +252,7 @@ def _str_find_(self, sub, start, end, side): f = lambda x: getattr(x, method)(sub, start, end) return self._str_map(f, dtype="int64") - def _str_findall(self, pat, flags=0): + def _str_findall(self, pat, flags: int = 0): regex = re.compile(pat, flags=flags) return self._str_map(regex.findall, dtype="object") @@ -266,14 +266,14 @@ def f(x): return self._str_map(f) - def _str_index(self, sub, start=0, end=None): + def _str_index(self, sub, start: int = 0, end=None): if end: f = lambda x: x.index(sub, start, end) else: f = lambda x: x.index(sub, start, end) return self._str_map(f, dtype="int64") - def _str_rindex(self, sub, start=0, end=None): + def _str_rindex(self, sub, start: int = 0, end=None): if end: f = lambda x: x.rindex(sub, start, end) else: diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 627092f632042..03afdcda35021 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -485,7 +485,7 @@ def _pull_records(js: dict[str, Any], spec: list | str) -> list: meta_vals: DefaultDict = defaultdict(list) meta_keys = [sep.join(val) for val in _meta] - def _recursive_extract(data, path, seen_meta, level=0) -> None: + def _recursive_extract(data, path, seen_meta, level: int = 0) -> None: if isinstance(data, dict): data = [data] if len(path) > 1: diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 754200d2628ad..bacdbbecc3011 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -502,21 +502,21 @@ class _DeprecationConfig(NamedTuple): @overload -def validate_integer(name, val: None, min_val=...) -> None: +def validate_integer(name, val: None, min_val: int = ...) -> None: ... @overload -def validate_integer(name, val: float, min_val=...) -> int: +def validate_integer(name, val: float, min_val: int = ...) -> int: ... @overload -def validate_integer(name, val: int | None, min_val=...) -> int | None: +def validate_integer(name, val: int | None, min_val: int = ...) -> int | None: ... -def validate_integer(name, val: int | float | None, min_val=0) -> int | None: +def validate_integer(name, val: int | float | None, min_val: int = 0) -> int | None: """ Checks whether the 'name' parameter for parsing is either an integer OR float that can SAFELY be cast to an integer diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index f8862dd661ba1..201fdff249d67 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -21,6 +21,7 @@ from pandas._typing import ( IndexLabel, PlottingOrientation, + npt, ) from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly @@ -1657,7 +1658,14 @@ def _args_adjust(self) -> None: # error: Signature of "_plot" incompatible with supertype "MPLPlot" @classmethod def _plot( # type: ignore[override] - cls, ax: Axes, x, y, w, start=0, log: bool = False, **kwds + cls, + ax: Axes, + x, + y, + w, + start: int | npt.NDArray[np.intp] = 0, + log: bool = False, + **kwds, ): return ax.bar(x, y, w, bottom=start, log=log, **kwds) @@ -1783,7 +1791,14 @@ def _start_base(self): # error: Signature of "_plot" incompatible with supertype "MPLPlot" @classmethod def _plot( # type: ignore[override] - cls, ax: Axes, x, y, w, start=0, log: bool = False, **kwds + cls, + ax: Axes, + x, + y, + w, + start: int | npt.NDArray[np.intp] = 0, + log: bool = False, + **kwds, ): return ax.barh(x, y, w, left=start, log=log, **kwds) diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index 1bebea357ad68..9aad516d308c3 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -87,16 +87,18 @@ def _calculate_bins(self, data: DataFrame) -> np.ndarray: ) return bins + # error: Signature of "_plot" incompatible with supertype "LinePlot" @classmethod - def _plot( + def _plot( # type: ignore[override] cls, ax, y, style=None, - bins=None, - bottom=0, - column_num=0, + bottom: int | np.ndarray = 0, + column_num: int = 0, stacking_id=None, + *, + bins, **kwds, ): if column_num == 0: @@ -257,7 +259,7 @@ def _grouped_plot( sharex: bool = True, sharey: bool = True, layout=None, - rot=0, + rot: float = 0, ax=None, **kwargs, ): @@ -295,12 +297,12 @@ def _grouped_hist( column=None, by=None, ax=None, - bins=50, + bins: int = 50, figsize=None, layout=None, sharex: bool = False, sharey: bool = False, - rot=90, + rot: float = 90, grid: bool = True, xlabelsize=None, xrot=None,