diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 3b472b162cdff..301644274111b 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -411,7 +411,7 @@ cdef class {{name}}HashTable(HashTable): k = kh_get_{{dtype}}(self.table, ckey) return k != self.table.n_buckets - def sizeof(self, deep=False): + def sizeof(self, deep: bool = False) -> int: """ return the size of my table in bytes """ overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*) for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t) @@ -419,7 +419,7 @@ cdef class {{name}}HashTable(HashTable): sizeof(Py_ssize_t)) # vals return overhead + for_flags + for_pairs - def get_state(self): + def get_state(self) -> dict[str, int]: """ returns infos about the state of the hashtable""" return { 'n_buckets' : self.table.n_buckets, @@ -747,14 +747,14 @@ cdef class StringHashTable(HashTable): kh_destroy_str(self.table) self.table = NULL - def sizeof(self, deep=False): + def sizeof(self, deep: bool = False) -> int: overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*) for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t) for_pairs = self.table.n_buckets * (sizeof(char *) + # keys sizeof(Py_ssize_t)) # vals return overhead + for_flags + for_pairs - def get_state(self): + def get_state(self) -> dict[str, int]: """ returns infos about the state of the hashtable""" return { 'n_buckets' : self.table.n_buckets, @@ -1079,7 +1079,7 @@ cdef class PyObjectHashTable(HashTable): k = kh_get_pymap(self.table, key) return k != self.table.n_buckets - def sizeof(self, deep=False): + def sizeof(self, deep: bool = False) -> int: """ return the size of my table in bytes """ overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*) for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t) @@ -1087,7 +1087,7 @@ cdef class PyObjectHashTable(HashTable): sizeof(Py_ssize_t)) # vals return overhead + for_flags + for_pairs - def get_state(self): + def get_state(self) -> dict[str, int]: """ returns infos about the current state of the hashtable like size, number of buckets and so on. diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx index ecb7041fb2c5a..7951bb5c093ef 100644 --- a/pandas/_libs/ops.pyx +++ b/pandas/_libs/ops.pyx @@ -258,7 +258,7 @@ def vec_binop(object[:] left, object[:] right, object op) -> ndarray: def maybe_convert_bool(ndarray[object] arr, - true_values=None, false_values=None): + true_values=None, false_values=None) -> ndarray: cdef: Py_ssize_t i, n ndarray[uint8_t] result diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 1d99ebba3b9f0..f536c8dd76f0d 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -311,7 +311,9 @@ cdef convert_to_timedelta64(object ts, str unit): @cython.boundscheck(False) @cython.wraparound(False) -def array_to_timedelta64(ndarray[object] values, str unit=None, str errors="raise"): +def array_to_timedelta64( + ndarray[object] values, str unit=None, str errors="raise" +) -> ndarray: """ Convert an ndarray to an array of timedeltas. If errors == 'coerce', coerce non-convertible objects to NaT. Otherwise, raise. diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 541dd8abee3c3..0c8a5bbc33c91 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1619,7 +1619,7 @@ def searchsorted(arr, value, side="left", sorter=None) -> np.ndarray: _diff_special = {"float64", "float32", "int64", "int32", "int16", "int8"} -def diff(arr, n: int, axis: int = 0, stacklevel=3): +def diff(arr, n: int, axis: int = 0, stacklevel: int = 3): """ difference of n between self, analogous to s-s.shift(n) @@ -1865,7 +1865,7 @@ def safe_sort( return ordered, ensure_platform_int(new_codes) -def _sort_mixed(values): +def _sort_mixed(values) -> np.ndarray: """ order ints before strings in 1d arrays, safe in py3 """ str_pos = np.array([isinstance(x, str) for x in values], dtype=bool) nums = np.sort(values[~str_pos]) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 427b3106ea10c..08061eb1ec28c 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -157,8 +157,7 @@ def ndim(self) -> int: @cache_readonly def size(self) -> int: - # error: Incompatible return value type (got "number", expected "int") - return np.prod(self.shape) # type: ignore[return-value] + return self._ndarray.size @cache_readonly def nbytes(self) -> int: @@ -190,7 +189,7 @@ def equals(self, other) -> bool: return False return bool(array_equivalent(self._ndarray, other._ndarray)) - def _values_for_argsort(self): + def _values_for_argsort(self) -> np.ndarray: return self._ndarray # Signature of "argmin" incompatible with supertype "ExtensionArray" diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 9aafea4b998a1..32c3095c3e6ee 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -606,7 +606,9 @@ def _from_inferred_categories( if true_values is None: true_values = ["True", "TRUE", "true"] - cats = cats.isin(true_values) + # error: Incompatible types in assignment (expression has type + # "ndarray", variable has type "Index") + cats = cats.isin(true_values) # type: ignore[assignment] if known_categories: # Recode from observation order to dtype.categories order. @@ -1444,7 +1446,7 @@ def memory_usage(self, deep: bool = False) -> int: """ return self._codes.nbytes + self.dtype.categories.memory_usage(deep=deep) - def isna(self): + def isna(self) -> np.ndarray: """ Detect missing values @@ -1465,7 +1467,7 @@ def isna(self): isnull = isna - def notna(self): + def notna(self) -> np.ndarray: """ Inverse of isna @@ -1731,7 +1733,7 @@ def view(self, dtype=None): raise NotImplementedError(dtype) return self._from_backing_data(self._ndarray) - def to_dense(self): + def to_dense(self) -> np.ndarray: """ Return my 'dense' representation @@ -1804,14 +1806,14 @@ def __contains__(self, key) -> bool: """ # if key is a NaN, check if any NaN is in self. if is_valid_na_for_dtype(key, self.categories.dtype): - return self.isna().any() + return bool(self.isna().any()) return contains(self, key, container=self._codes) # ------------------------------------------------------------------ # Rendering Methods - def _formatter(self, boxed=False): + def _formatter(self, boxed: bool = False): # Defer to CategoricalFormatter's formatter. return None @@ -1889,7 +1891,7 @@ def _repr_footer(self) -> str: info = self._repr_categories_info() return f"Length: {len(self)}\n{info}" - def _get_repr(self, length=True, na_rep="NaN", footer=True) -> str: + def _get_repr(self, length: bool = True, na_rep="NaN", footer: bool = True) -> str: from pandas.io.formats import format as fmt formatter = fmt.CategoricalFormatter( diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index cefb9bfa51280..7be06fe92c418 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -327,7 +327,7 @@ def _format_native_types(self, na_rep="NaT", date_format=None): """ raise AbstractMethodError(self) - def _formatter(self, boxed=False): + def _formatter(self, boxed: bool = False): # TODO: Remove Datetime & DatetimeTZ formatters. return "'{}'".format @@ -354,7 +354,7 @@ def __getitem__( result._freq = self._get_getitem_freq(key) return result - def _get_getitem_freq(self, key): + def _get_getitem_freq(self, key) -> Optional[BaseOffset]: """ Find the `freq` attribute to assign to the result of a __getitem__ lookup. """ @@ -406,7 +406,7 @@ def _maybe_clear_freq(self): # DatetimeArray and TimedeltaArray pass - def astype(self, dtype, copy=True): + def astype(self, dtype, copy: bool = True): # Some notes on cases we don't have to handle here in the base class: # 1. PeriodArray.astype handles period -> period # 2. DatetimeArray.astype handles conversion between tz. @@ -545,7 +545,7 @@ def _values_for_factorize(self): @classmethod def _from_factorized( - cls: Type[DatetimeLikeArrayT], values, original + cls: Type[DatetimeLikeArrayT], values, original: DatetimeLikeArrayT ) -> DatetimeLikeArrayT: return cls(values, dtype=original.dtype) @@ -939,7 +939,7 @@ def freq(self, value): self._freq = value @property - def freqstr(self): + def freqstr(self) -> Optional[str]: """ Return the frequency object as a string if its set, otherwise None. """ @@ -948,7 +948,7 @@ def freqstr(self): return self.freq.freqstr @property # NB: override with cache_readonly in immutable subclasses - def inferred_freq(self): + def inferred_freq(self) -> Optional[str]: """ Tries to return a string representing a frequency guess, generated by infer_freq. Returns None if it can't autodetect the @@ -963,8 +963,11 @@ def inferred_freq(self): @property # NB: override with cache_readonly in immutable subclasses def _resolution_obj(self) -> Optional[Resolution]: + freqstr = self.freqstr + if freqstr is None: + return None try: - return Resolution.get_reso_from_freq(self.freqstr) + return Resolution.get_reso_from_freq(freqstr) except KeyError: return None @@ -1241,7 +1244,7 @@ def _addsub_object_array(self, other: np.ndarray, op): ) return result - def _time_shift(self, periods, freq=None): + def _time_shift(self, periods: int, freq=None): """ Shift each value by `periods`. @@ -1440,7 +1443,7 @@ def __isub__(self, other): # -------------------------------------------------------------- # Reductions - def min(self, *, axis=None, skipna=True, **kwargs): + def min(self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs): """ Return the minimum value of the Array or minimum along an axis. @@ -1469,7 +1472,7 @@ def min(self, *, axis=None, skipna=True, **kwargs): result = nanops.nanmin(self._ndarray, axis=axis, skipna=skipna) return self._wrap_reduction_result(axis, result) - def max(self, *, axis=None, skipna=True, **kwargs): + def max(self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs): """ Return the maximum value of the Array or maximum along an axis. @@ -1500,7 +1503,7 @@ def max(self, *, axis=None, skipna=True, **kwargs): result = nanops.nanmax(self._ndarray, axis=axis, skipna=skipna) return self._wrap_reduction_result(axis, result) - def mean(self, *, skipna=True, axis: Optional[int] = 0): + def mean(self, *, skipna: bool = True, axis: Optional[int] = 0): """ Return the mean value of the Array. @@ -1568,7 +1571,7 @@ class DatelikeOps(DatetimeLikeArrayMixin): URL="https://docs.python.org/3/library/datetime.html" "#strftime-and-strptime-behavior" ) - def strftime(self, date_format): + def strftime(self, date_format: str) -> np.ndarray: """ Convert to Index using specified date_format. @@ -1760,7 +1763,7 @@ def all(self, *, axis: Optional[int] = None, skipna: bool = True): # -------------------------------------------------------------- # Frequency Methods - def _maybe_clear_freq(self): + def _maybe_clear_freq(self) -> None: self._freq = None def _with_freq(self, freq): diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index d1f0f506766a8..956a93a142afe 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -84,6 +84,11 @@ if TYPE_CHECKING: from typing import Literal + from pandas.core.arrays import ( + PeriodArray, + TimedeltaArray, + ) + _midnight = time(0, 0) @@ -244,7 +249,7 @@ class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): _dtype: Union[np.dtype, DatetimeTZDtype] _freq = None - def __init__(self, values, dtype=DT64NS_DTYPE, freq=None, copy=False): + def __init__(self, values, dtype=DT64NS_DTYPE, freq=None, copy: bool = False): values = extract_array(values, extract_numpy=True) if isinstance(values, IntegerArray): values = values.to_numpy("int64", na_value=iNaT) @@ -319,7 +324,7 @@ def __init__(self, values, dtype=DT64NS_DTYPE, freq=None, copy=False): @classmethod def _simple_new( - cls, values, freq: Optional[BaseOffset] = None, dtype=DT64NS_DTYPE + cls, values: np.ndarray, freq: Optional[BaseOffset] = None, dtype=DT64NS_DTYPE ) -> DatetimeArray: assert isinstance(values, np.ndarray) assert values.dtype == DT64NS_DTYPE @@ -339,11 +344,11 @@ def _from_sequence_not_strict( cls, data, dtype=None, - copy=False, + copy: bool = False, tz=None, freq=lib.no_default, - dayfirst=False, - yearfirst=False, + dayfirst: bool = False, + yearfirst: bool = False, ambiguous="raise", ): explicit_none = freq is None @@ -492,7 +497,7 @@ def _unbox_scalar(self, value, setitem: bool = False) -> np.datetime64: self._check_compatible_with(value, setitem=setitem) return value.asm8 - def _scalar_from_string(self, value): + def _scalar_from_string(self, value) -> Union[Timestamp, NaTType]: return Timestamp(value, tz=self.tz) def _check_compatible_with(self, other, setitem: bool = False): @@ -536,7 +541,7 @@ def dtype(self) -> Union[np.dtype, DatetimeTZDtype]: # type: ignore[override] return self._dtype @property - def tz(self): + def tz(self) -> Optional[tzinfo]: """ Return timezone, if any. @@ -557,14 +562,14 @@ def tz(self, value): ) @property - def tzinfo(self): + def tzinfo(self) -> Optional[tzinfo]: """ Alias for tz attribute """ return self.tz @property # NB: override with cache_readonly in immutable subclasses - def is_normalized(self): + def is_normalized(self) -> bool: """ Returns True if all of the dates are at midnight ("no time") """ @@ -609,7 +614,7 @@ def __iter__(self): ) yield from converted - def astype(self, dtype, copy=True): + def astype(self, dtype, copy: bool = True): # We handle # --> datetime # --> period @@ -636,7 +641,9 @@ def astype(self, dtype, copy=True): # Rendering Methods @dtl.ravel_compat - def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs): + def _format_native_types( + self, na_rep="NaT", date_format=None, **kwargs + ) -> np.ndarray: from pandas.io.formats.format import get_format_datetime64_from_values fmt = get_format_datetime64_from_values(self, date_format) @@ -660,7 +667,7 @@ def _has_same_tz(self, other) -> bool: other_tz = other.tzinfo return timezones.tz_compare(self.tzinfo, other_tz) - def _assert_tzawareness_compat(self, other): + def _assert_tzawareness_compat(self, other) -> None: # adapted from _Timestamp._assert_tzawareness_compat other_tz = getattr(other, "tzinfo", None) other_dtype = getattr(other, "dtype", None) @@ -708,7 +715,7 @@ def _sub_datetime_arraylike(self, other): np.putmask(new_values, arr_mask, iNaT) return new_values.view("timedelta64[ns]") - def _add_offset(self, offset): + def _add_offset(self, offset) -> DatetimeArray: if self.ndim == 2: return self.ravel()._add_offset(offset).reshape(self.shape) @@ -756,7 +763,7 @@ def _sub_datetimelike_scalar(self, other): # ----------------------------------------------------------------- # Timezone Conversion and Localization Methods - def _local_timestamps(self): + def _local_timestamps(self) -> np.ndarray: """ Convert to an i8 (unix-like nanosecond timestamp) representation while keeping the local timezone and not using UTC. @@ -767,7 +774,7 @@ def _local_timestamps(self): return self.asi8 return tzconversion.tz_convert_from_utc(self.asi8, self.tz) - def tz_convert(self, tz): + def tz_convert(self, tz) -> DatetimeArray: """ Convert tz-aware Datetime Array/Index from one time zone to another. @@ -844,7 +851,7 @@ def tz_convert(self, tz): return self._simple_new(self._ndarray, dtype=dtype, freq=self.freq) @dtl.ravel_compat - def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): + def tz_localize(self, tz, ambiguous="raise", nonexistent="raise") -> DatetimeArray: """ Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index. @@ -1035,7 +1042,7 @@ def to_pydatetime(self) -> np.ndarray: """ return ints_to_pydatetime(self.asi8, tz=self.tz) - def normalize(self): + def normalize(self) -> DatetimeArray: """ Convert times to midnight. @@ -1077,7 +1084,7 @@ def normalize(self): return type(self)(new_values)._with_freq("infer").tz_localize(self.tz) @dtl.ravel_compat - def to_period(self, freq=None): + def to_period(self, freq=None) -> PeriodArray: """ Cast to PeriodArray/Index at a particular frequency. @@ -1148,7 +1155,7 @@ def to_period(self, freq=None): return PeriodArray._from_datetime64(self._ndarray, freq, tz=self.tz) - def to_perioddelta(self, freq): + def to_perioddelta(self, freq) -> TimedeltaArray: """ Calculate TimedeltaArray of difference between index values and index converted to PeriodArray at specified diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 45656459792ba..a824e27e3e36a 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -205,38 +205,60 @@ def _values_for_factorize(self) -> Tuple[np.ndarray, int]: # ------------------------------------------------------------------------ # Reductions - def any(self, *, axis=None, out=None, keepdims=False, skipna=True): + def any( + self, + *, + axis: Optional[int] = None, + out=None, + keepdims: bool = False, + skipna: bool = True, + ): nv.validate_any((), {"out": out, "keepdims": keepdims}) result = nanops.nanany(self._ndarray, axis=axis, skipna=skipna) return self._wrap_reduction_result(axis, result) - def all(self, *, axis=None, out=None, keepdims=False, skipna=True): + def all( + self, + *, + axis: Optional[int] = None, + out=None, + keepdims: bool = False, + skipna: bool = True, + ): nv.validate_all((), {"out": out, "keepdims": keepdims}) result = nanops.nanall(self._ndarray, axis=axis, skipna=skipna) return self._wrap_reduction_result(axis, result) - def min(self, *, axis=None, skipna: bool = True, **kwargs) -> Scalar: + def min( + self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs + ) -> Scalar: nv.validate_min((), kwargs) result = nanops.nanmin( values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna ) return self._wrap_reduction_result(axis, result) - def max(self, *, axis=None, skipna: bool = True, **kwargs) -> Scalar: + def max( + self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs + ) -> Scalar: nv.validate_max((), kwargs) result = nanops.nanmax( values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna ) return self._wrap_reduction_result(axis, result) - def sum(self, *, axis=None, skipna=True, min_count=0, **kwargs) -> Scalar: + def sum( + self, *, axis: Optional[int] = None, skipna: bool = True, min_count=0, **kwargs + ) -> Scalar: nv.validate_sum((), kwargs) result = nanops.nansum( self._ndarray, axis=axis, skipna=skipna, min_count=min_count ) return self._wrap_reduction_result(axis, result) - def prod(self, *, axis=None, skipna=True, min_count=0, **kwargs) -> Scalar: + def prod( + self, *, axis: Optional[int] = None, skipna: bool = True, min_count=0, **kwargs + ) -> Scalar: nv.validate_prod((), kwargs) result = nanops.nanprod( self._ndarray, axis=axis, skipna=skipna, min_count=min_count @@ -246,18 +268,24 @@ def prod(self, *, axis=None, skipna=True, min_count=0, **kwargs) -> Scalar: def mean( self, *, - axis=None, + axis: Optional[int] = None, dtype: Optional[NpDtype] = None, out=None, - keepdims=False, - skipna=True, + keepdims: bool = False, + skipna: bool = True, ): nv.validate_mean((), {"dtype": dtype, "out": out, "keepdims": keepdims}) result = nanops.nanmean(self._ndarray, axis=axis, skipna=skipna) return self._wrap_reduction_result(axis, result) def median( - self, *, axis=None, out=None, overwrite_input=False, keepdims=False, skipna=True + self, + *, + axis: Optional[int] = None, + out=None, + overwrite_input: bool = False, + keepdims: bool = False, + skipna: bool = True, ): nv.validate_median( (), {"out": out, "overwrite_input": overwrite_input, "keepdims": keepdims} @@ -268,12 +296,12 @@ def median( def std( self, *, - axis=None, + axis: Optional[int] = None, dtype: Optional[NpDtype] = None, out=None, ddof=1, - keepdims=False, - skipna=True, + keepdims: bool = False, + skipna: bool = True, ): nv.validate_stat_ddof_func( (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="std" @@ -284,12 +312,12 @@ def std( def var( self, *, - axis=None, + axis: Optional[int] = None, dtype: Optional[NpDtype] = None, out=None, ddof=1, - keepdims=False, - skipna=True, + keepdims: bool = False, + skipna: bool = True, ): nv.validate_stat_ddof_func( (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="var" @@ -300,12 +328,12 @@ def var( def sem( self, *, - axis=None, + axis: Optional[int] = None, dtype: Optional[NpDtype] = None, out=None, ddof=1, - keepdims=False, - skipna=True, + keepdims: bool = False, + skipna: bool = True, ): nv.validate_stat_ddof_func( (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="sem" @@ -316,11 +344,11 @@ def sem( def kurt( self, *, - axis=None, + axis: Optional[int] = None, dtype: Optional[NpDtype] = None, out=None, - keepdims=False, - skipna=True, + keepdims: bool = False, + skipna: bool = True, ): nv.validate_stat_ddof_func( (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="kurt" @@ -331,11 +359,11 @@ def kurt( def skew( self, *, - axis=None, + axis: Optional[int] = None, dtype: Optional[NpDtype] = None, out=None, - keepdims=False, - skipna=True, + keepdims: bool = False, + skipna: bool = True, ): nv.validate_stat_ddof_func( (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="skew" @@ -368,7 +396,7 @@ def to_numpy( # type: ignore[override] # ------------------------------------------------------------------------ # Ops - def __invert__(self): + def __invert__(self) -> PandasArray: return type(self)(~self._ndarray) def _cmp_method(self, other, op): diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index d91522a9e1bb6..2355999933a7a 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -3,6 +3,7 @@ from datetime import timedelta import operator from typing import ( + TYPE_CHECKING, Any, Callable, List, @@ -76,6 +77,9 @@ from pandas.core.arrays import datetimelike as dtl import pandas.core.common as com +if TYPE_CHECKING: + from pandas.core.arrays import DatetimeArray + _shared_doc_kwargs = { "klass": "PeriodArray", } @@ -186,7 +190,9 @@ class PeriodArray(PeriodMixin, dtl.DatelikeOps): # -------------------------------------------------------------------- # Constructors - def __init__(self, values, dtype: Optional[Dtype] = None, freq=None, copy=False): + def __init__( + self, values, dtype: Optional[Dtype] = None, freq=None, copy: bool = False + ): freq = validate_dtype_freq(dtype, freq) if freq is not None: @@ -250,7 +256,7 @@ def _from_sequence( @classmethod def _from_sequence_of_strings( - cls, strings, *, dtype: Optional[Dtype] = None, copy=False + cls, strings, *, dtype: Optional[Dtype] = None, copy: bool = False ) -> PeriodArray: return cls._from_sequence(strings, dtype=dtype, copy=copy) @@ -448,7 +454,7 @@ def is_leap_year(self) -> np.ndarray: """ return isleapyear_arr(np.asarray(self.year)) - def to_timestamp(self, freq=None, how="start"): + def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray: """ Cast to DatetimeArray/Index. @@ -492,7 +498,7 @@ def to_timestamp(self, freq=None, how="start"): # -------------------------------------------------------------------- - def _time_shift(self, periods, freq=None): + def _time_shift(self, periods: int, freq=None) -> PeriodArray: """ Shift each value by `periods`. @@ -597,7 +603,9 @@ def _formatter(self, boxed: bool = False): return "'{}'".format @dtl.ravel_compat - def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs): + def _format_native_types( + self, na_rep="NaT", date_format=None, **kwargs + ) -> np.ndarray: """ actually format my specific types """ diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 7251faee333bb..e1262d691128f 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -222,7 +222,7 @@ def _chk_pyarrow_available(cls) -> None: raise ImportError(msg) @classmethod - def _from_sequence(cls, scalars, dtype: Optional[Dtype] = None, copy=False): + def _from_sequence(cls, scalars, dtype: Optional[Dtype] = None, copy: bool = False): cls._chk_pyarrow_available() # convert non-na-likes to str, and nan-likes to ArrowStringDtype.na_value scalars = lib.ensure_string_array(scalars, copy=False) @@ -230,7 +230,7 @@ def _from_sequence(cls, scalars, dtype: Optional[Dtype] = None, copy=False): @classmethod def _from_sequence_of_strings( - cls, strings, dtype: Optional[Dtype] = None, copy=False + cls, strings, dtype: Optional[Dtype] = None, copy: bool = False ): return cls._from_sequence(strings, dtype=dtype, copy=copy) @@ -431,7 +431,7 @@ def fillna(self, value=None, method=None, limit=None): new_values = self.copy() return new_values - def _reduce(self, name, skipna=True, **kwargs): + def _reduce(self, name: str, skipna: bool = True, **kwargs): if name in ["min", "max"]: return getattr(self, name)(skipna=skipna) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 1bf822c1ae3e5..59077bfceaa4a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -2,6 +2,7 @@ from datetime import timedelta from typing import ( + TYPE_CHECKING, List, Optional, Tuple, @@ -35,7 +36,10 @@ ints_to_pytimedelta, parse_timedelta_unit, ) -from pandas._typing import NpDtype +from pandas._typing import ( + DtypeObj, + NpDtype, +) from pandas.compat.numpy import function as nv from pandas.core.dtypes.cast import astype_td64_unit_conversion @@ -70,6 +74,12 @@ from pandas.core.construction import extract_array from pandas.core.ops.common import unpack_zerodim_and_defer +if TYPE_CHECKING: + from pandas.core.arrays import ( + DatetimeArray, + PeriodArray, + ) + def _field_accessor(name: str, alias: str, docstring: str): def f(self) -> np.ndarray: @@ -171,7 +181,9 @@ def dtype(self) -> np.dtype: # type: ignore[override] _freq = None - def __init__(self, values, dtype=TD64NS_DTYPE, freq=lib.no_default, copy=False): + def __init__( + self, values, dtype=TD64NS_DTYPE, freq=lib.no_default, copy: bool = False + ): values = extract_array(values, extract_numpy=True) if isinstance(values, IntegerArray): values = values.to_numpy("int64", na_value=tslibs.iNaT) @@ -230,7 +242,7 @@ def __init__(self, values, dtype=TD64NS_DTYPE, freq=lib.no_default, copy=False): @classmethod def _simple_new( - cls, values, freq: Optional[BaseOffset] = None, dtype=TD64NS_DTYPE + cls, values: np.ndarray, freq: Optional[BaseOffset] = None, dtype=TD64NS_DTYPE ) -> TimedeltaArray: assert dtype == TD64NS_DTYPE, dtype assert isinstance(values, np.ndarray), type(values) @@ -331,10 +343,10 @@ def _unbox_scalar(self, value, setitem: bool = False) -> np.timedelta64: self._check_compatible_with(value, setitem=setitem) return np.timedelta64(value.value, "ns") - def _scalar_from_string(self, value): + def _scalar_from_string(self, value) -> Union[Timedelta, NaTType]: return Timedelta(value) - def _check_compatible_with(self, other, setitem: bool = False): + def _check_compatible_with(self, other, setitem: bool = False) -> None: # we don't have anything to validate. pass @@ -375,7 +387,7 @@ def __iter__(self): def sum( self, *, - axis=None, + axis: Optional[int] = None, dtype: Optional[NpDtype] = None, out=None, keepdims: bool = False, @@ -395,7 +407,7 @@ def sum( def std( self, *, - axis=None, + axis: Optional[int] = None, dtype: Optional[NpDtype] = None, out=None, ddof: int = 1, @@ -414,13 +426,15 @@ def std( # ---------------------------------------------------------------- # Rendering Methods - def _formatter(self, boxed=False): + def _formatter(self, boxed: bool = False): from pandas.io.formats.format import get_format_timedelta64 return get_format_timedelta64(self, box=True) @dtl.ravel_compat - def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs): + def _format_native_types( + self, na_rep="NaT", date_format=None, **kwargs + ) -> np.ndarray: from pandas.io.formats.format import get_format_timedelta64 formatter = get_format_timedelta64(self._ndarray, na_rep) @@ -435,7 +449,7 @@ def _add_offset(self, other): f"cannot add the type {type(other).__name__} to a {type(self).__name__}" ) - def _add_period(self, other: Period): + def _add_period(self, other: Period) -> PeriodArray: """ Add a Period object. """ @@ -459,7 +473,7 @@ def _add_datetime_arraylike(self, other): # defer to implementation in DatetimeArray return other + self - def _add_datetimelike_scalar(self, other): + def _add_datetimelike_scalar(self, other) -> DatetimeArray: # adding a timedeltaindex to a datetimelike from pandas.core.arrays import DatetimeArray @@ -919,7 +933,7 @@ def f(x): def sequence_to_td64ns( - data, copy=False, unit=None, errors="raise" + data, copy: bool = False, unit=None, errors="raise" ) -> Tuple[np.ndarray, Optional[Tick]]: """ Parameters @@ -1095,7 +1109,7 @@ def objects_to_td64ns(data, unit=None, errors="raise"): return result.view("timedelta64[ns]") -def _validate_td64_dtype(dtype): +def _validate_td64_dtype(dtype) -> DtypeObj: dtype = pandas_dtype(dtype) if is_dtype_equal(dtype, np.dtype("timedelta64")): # no precision disallowed GH#24806 diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 29c2f7cfcf00d..23632922adc29 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2376,7 +2376,7 @@ def __reduce__(self): """The expected NA value to use with this index.""" @cache_readonly - def _isnan(self): + def _isnan(self) -> np.ndarray: """ Return if each value is NaN. """ @@ -5238,7 +5238,7 @@ def get_indexer_non_unique(self, target): return ensure_platform_int(indexer), missing @final - def get_indexer_for(self, target, **kwargs): + def get_indexer_for(self, target, **kwargs) -> np.ndarray: """ Guaranteed return of an indexer even when non-unique. @@ -5306,7 +5306,7 @@ def _index_as_unique(self) -> bool: _requires_unique_msg = "Reindexing only valid with uniquely valued Index objects" @final - def _maybe_promote(self, other: Index): + def _maybe_promote(self, other: Index) -> Tuple[Index, Index]: """ When dealing with an object-dtype Index and a non-object Index, see if we can upcast the object-dtype one to improve performance. @@ -5481,7 +5481,7 @@ def _transform_index(self, func, level=None) -> Index: items = [func(x) for x in self] return Index(items, name=self.name, tupleize_cols=False) - def isin(self, values, level=None): + def isin(self, values, level=None) -> np.ndarray: """ Return a boolean array where the index values are in `values`. @@ -5849,7 +5849,7 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): return start_slice, end_slice - def delete(self, loc): + def delete(self, loc) -> Index: """ Make new Index with passed location(-s) deleted. @@ -5881,7 +5881,7 @@ def delete(self, loc): res_values = np.delete(self._data, loc) return type(self)._simple_new(res_values, name=self.name) - def insert(self, loc: int, item): + def insert(self, loc: int, item) -> Index: """ Make new Index inserting new item at location. @@ -5916,7 +5916,7 @@ def insert(self, loc: int, item): idx = np.concatenate((arr[:loc], item, arr[loc:])) return Index(idx, name=self.name) - def drop(self: _IndexT, labels, errors: str_t = "raise") -> _IndexT: + def drop(self, labels, errors: str_t = "raise") -> Index: """ Make new Index with passed list of labels deleted. @@ -5929,6 +5929,7 @@ def drop(self: _IndexT, labels, errors: str_t = "raise") -> _IndexT: Returns ------- dropped : Index + Will be same type as self, except for RangeIndex. Raises ------ diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index f37faa4ab844b..c1ed00820a376 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -402,7 +402,7 @@ def _get_indexer( method: Optional[str] = None, limit: Optional[int] = None, tolerance=None, - ): + ) -> np.ndarray: if com.any_not_none(method, tolerance, limit): return super()._get_indexer( target, method=method, tolerance=tolerance, limit=limit @@ -436,10 +436,11 @@ def repeat(self, repeats, axis=None) -> Int64Index: return self._int64index.repeat(repeats, axis=axis) def delete(self, loc) -> Int64Index: - return self._int64index.delete(loc) + # error: Incompatible return value type (got "Index", expected "Int64Index") + return self._int64index.delete(loc) # type: ignore[return-value] def take( - self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs + self, indices, axis: int = 0, allow_fill: bool = True, fill_value=None, **kwargs ) -> Int64Index: with rewrite_exception("Int64Index", type(self).__name__): return self._int64index.take( @@ -471,7 +472,13 @@ def _view(self: RangeIndex) -> RangeIndex: return result @doc(Int64Index.copy) - def copy(self, name=None, deep=False, dtype: Optional[Dtype] = None, names=None): + def copy( + self, + name: Hashable = None, + deep: bool = False, + dtype: Optional[Dtype] = None, + names=None, + ): name = self._validate_names(name=name, names=names, deep=deep)[0] new_index = self._rename(name=name) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 292c459919398..5550da7421e00 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1413,7 +1413,10 @@ def hide_columns(self, subset) -> Styler: """ subset = _non_reducing_slice(subset) hidden_df = self.data.loc[subset] - self.hidden_columns = self.columns.get_indexer_for(hidden_df.columns) + hcols = self.columns.get_indexer_for(hidden_df.columns) + # error: Incompatible types in assignment (expression has type + # "ndarray", variable has type "Sequence[int]") + self.hidden_columns = hcols # type: ignore[assignment] return self # -----------------------------------------------------------------------