From 647a3939c89f236033c06afe120c8e879bf1f853 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 20 Mar 2021 10:26:16 -0700 Subject: [PATCH 1/5] DOC: suppress warnings from CategoricalBlock deprecation --- doc/source/user_guide/io.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index cf153ddd2cbbd..3b7a6037a9715 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -5240,6 +5240,7 @@ Write to a feather file. Read from a feather file. .. ipython:: python + :okwarning: result = pd.read_feather("example.feather") result @@ -5323,6 +5324,7 @@ Write to a parquet file. Read from a parquet file. .. ipython:: python + :okwarning: result = pd.read_parquet("example_fp.parquet", engine="fastparquet") result = pd.read_parquet("example_pa.parquet", engine="pyarrow") From d348888a13220318fad63907554a927eba3ee00d Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 20 Mar 2021 13:17:25 -0700 Subject: [PATCH 2/5] TYP: annotate all the things --- pandas/_libs/hashtable_class_helper.pxi.in | 11 +++-- pandas/_libs/ops.pyx | 2 +- pandas/_libs/tslibs/timedeltas.pyx | 8 +++- pandas/core/algorithms.py | 8 ++-- pandas/core/arrays/_mixins.py | 5 +-- pandas/core/arrays/categorical.py | 20 ++++----- pandas/core/arrays/datetimelike.py | 24 +++++------ pandas/core/arrays/datetimes.py | 47 +++++++++++++--------- pandas/core/arrays/numpy_.py | 42 ++++++++++--------- pandas/core/arrays/period.py | 18 ++++++--- pandas/core/arrays/string_arrow.py | 6 +-- pandas/core/arrays/timedeltas.py | 42 ++++++++++++------- pandas/core/indexes/base.py | 21 +++++----- pandas/core/indexes/range.py | 15 +++++-- pandas/io/formats/style.py | 5 ++- 15 files changed, 163 insertions(+), 111 deletions(-) diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 6ace327ca3599..12256a8774f0a 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -411,7 +411,7 @@ cdef class {{name}}HashTable(HashTable): k = kh_get_{{dtype}}(self.table, ckey) return k != self.table.n_buckets - def sizeof(self, deep=False): + def sizeof(self, deep: bool = False) -> int: """ return the size of my table in bytes """ overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*) for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t) @@ -419,7 +419,7 @@ cdef class {{name}}HashTable(HashTable): sizeof(Py_ssize_t)) # vals return overhead + for_flags + for_pairs - def get_state(self): + def get_state(self) -> dict[str, int]: """ returns infos about the state of the hashtable""" return { 'n_buckets' : self.table.n_buckets, @@ -747,14 +747,13 @@ cdef class StringHashTable(HashTable): kh_destroy_str(self.table) self.table = NULL - def sizeof(self, deep=False): overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*) for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t) for_pairs = self.table.n_buckets * (sizeof(char *) + # keys sizeof(Py_ssize_t)) # vals return overhead + for_flags + for_pairs - def get_state(self): + def get_state(self) -> dict[str, int]: """ returns infos about the state of the hashtable""" return { 'n_buckets' : self.table.n_buckets, @@ -1079,7 +1078,7 @@ cdef class PyObjectHashTable(HashTable): k = kh_get_pymap(self.table, key) return k != self.table.n_buckets - def sizeof(self, deep=False): + def sizeof(self, deep: bool = False) -> int: """ return the size of my table in bytes """ overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*) for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t) @@ -1087,7 +1086,7 @@ cdef class PyObjectHashTable(HashTable): sizeof(Py_ssize_t)) # vals return overhead + for_flags + for_pairs - def get_state(self): + def get_state(self) -> dict[str, int]: """ returns infos about the current state of the hashtable like size, number of buckets and so on. diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx index ecb7041fb2c5a..7951bb5c093ef 100644 --- a/pandas/_libs/ops.pyx +++ b/pandas/_libs/ops.pyx @@ -258,7 +258,7 @@ def vec_binop(object[:] left, object[:] right, object op) -> ndarray: def maybe_convert_bool(ndarray[object] arr, - true_values=None, false_values=None): + true_values=None, false_values=None) -> ndarray: cdef: Py_ssize_t i, n ndarray[uint8_t] result diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 3cdb654642b9c..f4a1e7cc9b2fa 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -311,10 +311,16 @@ cdef convert_to_timedelta64(object ts, str unit): @cython.boundscheck(False) @cython.wraparound(False) -def array_to_timedelta64(ndarray[object] values, str unit=None, str errors="raise"): +def array_to_timedelta64( + ndarray[object] values, str unit=None, str errors="raise" +) -> ndarray: """ Convert an ndarray to an array of timedeltas. If errors == 'coerce', coerce non-convertible objects to NaT. Otherwise, raise. + + Returns + ------- + np.ndarray[timedelta64ns] """ cdef: diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 15f54c11be0a0..f09bcec9c5763 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -941,7 +941,7 @@ def duplicated(values: ArrayLike, keep: Union[str, bool] = "first") -> np.ndarra Returns ------- - duplicated : ndarray + duplicated : ndarray[bool] """ values, _ = _ensure_data(values) ndtype = values.dtype.name @@ -1613,7 +1613,7 @@ def searchsorted(arr, value, side="left", sorter=None) -> np.ndarray: _diff_special = {"float64", "float32", "int64", "int32", "int16", "int8"} -def diff(arr, n: int, axis: int = 0, stacklevel=3): +def diff(arr, n: int, axis: int = 0, stacklevel: int = 3): """ difference of n between self, analogous to s-s.shift(n) @@ -1625,7 +1625,7 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3): number of periods axis : {0, 1} axis to shift on - stacklevel : int + stacklevel : int, default 3 The stacklevel for the lost dtype warning. Returns @@ -1859,7 +1859,7 @@ def safe_sort( return ordered, ensure_platform_int(new_codes) -def _sort_mixed(values): +def _sort_mixed(values) -> np.ndarray: """ order ints before strings in 1d arrays, safe in py3 """ str_pos = np.array([isinstance(x, str) for x in values], dtype=bool) nums = np.sort(values[~str_pos]) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 678e532f05772..ade3320513eea 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -153,8 +153,7 @@ def ndim(self) -> int: @cache_readonly def size(self) -> int: - # error: Incompatible return value type (got "number", expected "int") - return np.prod(self.shape) # type: ignore[return-value] + return self._ndarray.size @cache_readonly def nbytes(self) -> int: @@ -186,7 +185,7 @@ def equals(self, other) -> bool: return False return bool(array_equivalent(self._ndarray, other._ndarray)) - def _values_for_argsort(self): + def _values_for_argsort(self) -> np.ndarray: return self._ndarray def copy(self: NDArrayBackedExtensionArrayT) -> NDArrayBackedExtensionArrayT: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 769ae52744c74..36ced762712aa 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -605,7 +605,9 @@ def _from_inferred_categories( if true_values is None: true_values = ["True", "TRUE", "true"] - cats = cats.isin(true_values) + # error: Incompatible types in assignment (expression has type + # "ndarray", variable has type "Index") + cats = cats.isin(true_values) # type: ignore[assignment] if known_categories: # Recode from observation order to dtype.categories order. @@ -1443,7 +1445,7 @@ def memory_usage(self, deep: bool = False) -> int: """ return self._codes.nbytes + self.dtype.categories.memory_usage(deep=deep) - def isna(self): + def isna(self) -> np.ndarray: """ Detect missing values @@ -1451,7 +1453,7 @@ def isna(self): Returns ------- - a boolean array of whether my values are null + np.ndarray[bool] of whether my values are null See Also -------- @@ -1464,7 +1466,7 @@ def isna(self): isnull = isna - def notna(self): + def notna(self) -> np.ndarray: """ Inverse of isna @@ -1473,7 +1475,7 @@ def notna(self): Returns ------- - a boolean array of whether my values are not null + np.ndarray[bool] of whether my values are not null See Also -------- @@ -1730,7 +1732,7 @@ def view(self, dtype=None): raise NotImplementedError(dtype) return self._from_backing_data(self._ndarray) - def to_dense(self): + def to_dense(self) -> np.ndarray: """ Return my 'dense' representation @@ -1799,14 +1801,14 @@ def __contains__(self, key) -> bool: """ # if key is a NaN, check if any NaN is in self. if is_valid_na_for_dtype(key, self.categories.dtype): - return self.isna().any() + return bool(self.isna().any()) return contains(self, key, container=self._codes) # ------------------------------------------------------------------ # Rendering Methods - def _formatter(self, boxed=False): + def _formatter(self, boxed: bool = False): # Defer to CategoricalFormatter's formatter. return None @@ -1884,7 +1886,7 @@ def _repr_footer(self) -> str: info = self._repr_categories_info() return f"Length: {len(self)}\n{info}" - def _get_repr(self, length=True, na_rep="NaN", footer=True) -> str: + def _get_repr(self, length: bool = True, na_rep="NaN", footer: bool = True) -> str: from pandas.io.formats import format as fmt formatter = fmt.CategoricalFormatter( diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 0900688e04374..d91bc998b2c69 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -327,7 +327,7 @@ def _format_native_types(self, na_rep="NaT", date_format=None): """ raise AbstractMethodError(self) - def _formatter(self, boxed=False): + def _formatter(self, boxed: bool = False): # TODO: Remove Datetime & DatetimeTZ formatters. return "'{}'".format @@ -354,7 +354,7 @@ def __getitem__( result._freq = self._get_getitem_freq(key) return result - def _get_getitem_freq(self, key): + def _get_getitem_freq(self, key) -> Optional[BaseOffset]: """ Find the `freq` attribute to assign to the result of a __getitem__ lookup. """ @@ -406,7 +406,7 @@ def _maybe_clear_freq(self): # DatetimeArray and TimedeltaArray pass - def astype(self, dtype, copy=True): + def astype(self, dtype, copy: bool = True): # Some notes on cases we don't have to handle here in the base class: # 1. PeriodArray.astype handles period -> period # 2. DatetimeArray.astype handles conversion between tz. @@ -545,7 +545,7 @@ def _values_for_factorize(self): @classmethod def _from_factorized( - cls: Type[DatetimeLikeArrayT], values, original + cls: Type[DatetimeLikeArrayT], values, original: DatetimeLikeArrayT ) -> DatetimeLikeArrayT: return cls(values, dtype=original.dtype) @@ -943,7 +943,7 @@ def freq(self, value): self._freq = value @property - def freqstr(self): + def freqstr(self) -> Optional[str]: """ Return the frequency object as a string if its set, otherwise None. """ @@ -952,7 +952,7 @@ def freqstr(self): return self.freq.freqstr @property # NB: override with cache_readonly in immutable subclasses - def inferred_freq(self): + def inferred_freq(self) -> Optional[str]: """ Tries to return a string representing a frequency guess, generated by infer_freq. Returns None if it can't autodetect the @@ -1245,7 +1245,7 @@ def _addsub_object_array(self, other: np.ndarray, op): ) return result - def _time_shift(self, periods, freq=None): + def _time_shift(self, periods: int, freq=None): """ Shift each value by `periods`. @@ -1444,7 +1444,7 @@ def __isub__(self, other): # -------------------------------------------------------------- # Reductions - def min(self, *, axis=None, skipna=True, **kwargs): + def min(self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs): """ Return the minimum value of the Array or minimum along an axis. @@ -1473,7 +1473,7 @@ def min(self, *, axis=None, skipna=True, **kwargs): result = nanops.nanmin(self._ndarray, axis=axis, skipna=skipna) return self._wrap_reduction_result(axis, result) - def max(self, *, axis=None, skipna=True, **kwargs): + def max(self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs): """ Return the maximum value of the Array or maximum along an axis. @@ -1504,7 +1504,7 @@ def max(self, *, axis=None, skipna=True, **kwargs): result = nanops.nanmax(self._ndarray, axis=axis, skipna=skipna) return self._wrap_reduction_result(axis, result) - def mean(self, *, skipna=True, axis: Optional[int] = 0): + def mean(self, *, skipna: bool = True, axis: Optional[int] = 0): """ Return the mean value of the Array. @@ -1572,7 +1572,7 @@ class DatelikeOps(DatetimeLikeArrayMixin): URL="https://docs.python.org/3/library/datetime.html" "#strftime-and-strptime-behavior" ) - def strftime(self, date_format): + def strftime(self, date_format: str) -> np.ndarray: """ Convert to Index using specified date_format. @@ -1763,7 +1763,7 @@ def all(self, *, axis: Optional[int] = None, skipna: bool = True): # -------------------------------------------------------------- # Frequency Methods - def _maybe_clear_freq(self): + def _maybe_clear_freq(self) -> None: self._freq = None def _with_freq(self, freq): diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index c0a8c20832fa8..956a93a142afe 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -84,6 +84,11 @@ if TYPE_CHECKING: from typing import Literal + from pandas.core.arrays import ( + PeriodArray, + TimedeltaArray, + ) + _midnight = time(0, 0) @@ -244,7 +249,7 @@ class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): _dtype: Union[np.dtype, DatetimeTZDtype] _freq = None - def __init__(self, values, dtype=DT64NS_DTYPE, freq=None, copy=False): + def __init__(self, values, dtype=DT64NS_DTYPE, freq=None, copy: bool = False): values = extract_array(values, extract_numpy=True) if isinstance(values, IntegerArray): values = values.to_numpy("int64", na_value=iNaT) @@ -319,7 +324,7 @@ def __init__(self, values, dtype=DT64NS_DTYPE, freq=None, copy=False): @classmethod def _simple_new( - cls, values, freq: Optional[BaseOffset] = None, dtype=DT64NS_DTYPE + cls, values: np.ndarray, freq: Optional[BaseOffset] = None, dtype=DT64NS_DTYPE ) -> DatetimeArray: assert isinstance(values, np.ndarray) assert values.dtype == DT64NS_DTYPE @@ -339,11 +344,11 @@ def _from_sequence_not_strict( cls, data, dtype=None, - copy=False, + copy: bool = False, tz=None, freq=lib.no_default, - dayfirst=False, - yearfirst=False, + dayfirst: bool = False, + yearfirst: bool = False, ambiguous="raise", ): explicit_none = freq is None @@ -492,7 +497,7 @@ def _unbox_scalar(self, value, setitem: bool = False) -> np.datetime64: self._check_compatible_with(value, setitem=setitem) return value.asm8 - def _scalar_from_string(self, value): + def _scalar_from_string(self, value) -> Union[Timestamp, NaTType]: return Timestamp(value, tz=self.tz) def _check_compatible_with(self, other, setitem: bool = False): @@ -536,7 +541,7 @@ def dtype(self) -> Union[np.dtype, DatetimeTZDtype]: # type: ignore[override] return self._dtype @property - def tz(self): + def tz(self) -> Optional[tzinfo]: """ Return timezone, if any. @@ -557,14 +562,14 @@ def tz(self, value): ) @property - def tzinfo(self): + def tzinfo(self) -> Optional[tzinfo]: """ Alias for tz attribute """ return self.tz @property # NB: override with cache_readonly in immutable subclasses - def is_normalized(self): + def is_normalized(self) -> bool: """ Returns True if all of the dates are at midnight ("no time") """ @@ -609,7 +614,7 @@ def __iter__(self): ) yield from converted - def astype(self, dtype, copy=True): + def astype(self, dtype, copy: bool = True): # We handle # --> datetime # --> period @@ -636,7 +641,9 @@ def astype(self, dtype, copy=True): # Rendering Methods @dtl.ravel_compat - def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs): + def _format_native_types( + self, na_rep="NaT", date_format=None, **kwargs + ) -> np.ndarray: from pandas.io.formats.format import get_format_datetime64_from_values fmt = get_format_datetime64_from_values(self, date_format) @@ -660,7 +667,7 @@ def _has_same_tz(self, other) -> bool: other_tz = other.tzinfo return timezones.tz_compare(self.tzinfo, other_tz) - def _assert_tzawareness_compat(self, other): + def _assert_tzawareness_compat(self, other) -> None: # adapted from _Timestamp._assert_tzawareness_compat other_tz = getattr(other, "tzinfo", None) other_dtype = getattr(other, "dtype", None) @@ -708,7 +715,7 @@ def _sub_datetime_arraylike(self, other): np.putmask(new_values, arr_mask, iNaT) return new_values.view("timedelta64[ns]") - def _add_offset(self, offset): + def _add_offset(self, offset) -> DatetimeArray: if self.ndim == 2: return self.ravel()._add_offset(offset).reshape(self.shape) @@ -756,7 +763,7 @@ def _sub_datetimelike_scalar(self, other): # ----------------------------------------------------------------- # Timezone Conversion and Localization Methods - def _local_timestamps(self): + def _local_timestamps(self) -> np.ndarray: """ Convert to an i8 (unix-like nanosecond timestamp) representation while keeping the local timezone and not using UTC. @@ -767,7 +774,7 @@ def _local_timestamps(self): return self.asi8 return tzconversion.tz_convert_from_utc(self.asi8, self.tz) - def tz_convert(self, tz): + def tz_convert(self, tz) -> DatetimeArray: """ Convert tz-aware Datetime Array/Index from one time zone to another. @@ -844,7 +851,7 @@ def tz_convert(self, tz): return self._simple_new(self._ndarray, dtype=dtype, freq=self.freq) @dtl.ravel_compat - def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): + def tz_localize(self, tz, ambiguous="raise", nonexistent="raise") -> DatetimeArray: """ Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index. @@ -1031,11 +1038,11 @@ def to_pydatetime(self) -> np.ndarray: Returns ------- - datetimes : ndarray + datetimes : ndarray[object] """ return ints_to_pydatetime(self.asi8, tz=self.tz) - def normalize(self): + def normalize(self) -> DatetimeArray: """ Convert times to midnight. @@ -1077,7 +1084,7 @@ def normalize(self): return type(self)(new_values)._with_freq("infer").tz_localize(self.tz) @dtl.ravel_compat - def to_period(self, freq=None): + def to_period(self, freq=None) -> PeriodArray: """ Cast to PeriodArray/Index at a particular frequency. @@ -1148,7 +1155,7 @@ def to_period(self, freq=None): return PeriodArray._from_datetime64(self._ndarray, freq, tz=self.tz) - def to_perioddelta(self, freq): + def to_perioddelta(self, freq) -> TimedeltaArray: """ Calculate TimedeltaArray of difference between index values and index converted to PeriodArray at specified diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 45656459792ba..bb2a7ca2da26c 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -205,12 +205,12 @@ def _values_for_factorize(self) -> Tuple[np.ndarray, int]: # ------------------------------------------------------------------------ # Reductions - def any(self, *, axis=None, out=None, keepdims=False, skipna=True): + def any(self, *, axis=None, out=None, keepdims: bool = False, skipna: bool = True): nv.validate_any((), {"out": out, "keepdims": keepdims}) result = nanops.nanany(self._ndarray, axis=axis, skipna=skipna) return self._wrap_reduction_result(axis, result) - def all(self, *, axis=None, out=None, keepdims=False, skipna=True): + def all(self, *, axis=None, out=None, keepdims: bool = False, skipna: bool = True): nv.validate_all((), {"out": out, "keepdims": keepdims}) result = nanops.nanall(self._ndarray, axis=axis, skipna=skipna) return self._wrap_reduction_result(axis, result) @@ -229,14 +229,14 @@ def max(self, *, axis=None, skipna: bool = True, **kwargs) -> Scalar: ) return self._wrap_reduction_result(axis, result) - def sum(self, *, axis=None, skipna=True, min_count=0, **kwargs) -> Scalar: + def sum(self, *, axis=None, skipna: bool = True, min_count=0, **kwargs) -> Scalar: nv.validate_sum((), kwargs) result = nanops.nansum( self._ndarray, axis=axis, skipna=skipna, min_count=min_count ) return self._wrap_reduction_result(axis, result) - def prod(self, *, axis=None, skipna=True, min_count=0, **kwargs) -> Scalar: + def prod(self, *, axis=None, skipna: bool = True, min_count=0, **kwargs) -> Scalar: nv.validate_prod((), kwargs) result = nanops.nanprod( self._ndarray, axis=axis, skipna=skipna, min_count=min_count @@ -249,15 +249,21 @@ def mean( axis=None, dtype: Optional[NpDtype] = None, out=None, - keepdims=False, - skipna=True, + keepdims: bool = False, + skipna: bool = True, ): nv.validate_mean((), {"dtype": dtype, "out": out, "keepdims": keepdims}) result = nanops.nanmean(self._ndarray, axis=axis, skipna=skipna) return self._wrap_reduction_result(axis, result) def median( - self, *, axis=None, out=None, overwrite_input=False, keepdims=False, skipna=True + self, + *, + axis=None, + out=None, + overwrite_input: bool = False, + keepdims: bool = False, + skipna: bool = True, ): nv.validate_median( (), {"out": out, "overwrite_input": overwrite_input, "keepdims": keepdims} @@ -272,8 +278,8 @@ def std( dtype: Optional[NpDtype] = None, out=None, ddof=1, - keepdims=False, - skipna=True, + keepdims: bool = False, + skipna: bool = True, ): nv.validate_stat_ddof_func( (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="std" @@ -288,8 +294,8 @@ def var( dtype: Optional[NpDtype] = None, out=None, ddof=1, - keepdims=False, - skipna=True, + keepdims: bool = False, + skipna: bool = True, ): nv.validate_stat_ddof_func( (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="var" @@ -304,8 +310,8 @@ def sem( dtype: Optional[NpDtype] = None, out=None, ddof=1, - keepdims=False, - skipna=True, + keepdims: bool = False, + skipna: bool = True, ): nv.validate_stat_ddof_func( (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="sem" @@ -319,8 +325,8 @@ def kurt( axis=None, dtype: Optional[NpDtype] = None, out=None, - keepdims=False, - skipna=True, + keepdims: bool = False, + skipna: bool = True, ): nv.validate_stat_ddof_func( (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="kurt" @@ -334,8 +340,8 @@ def skew( axis=None, dtype: Optional[NpDtype] = None, out=None, - keepdims=False, - skipna=True, + keepdims: bool = False, + skipna: bool = True, ): nv.validate_stat_ddof_func( (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="skew" @@ -368,7 +374,7 @@ def to_numpy( # type: ignore[override] # ------------------------------------------------------------------------ # Ops - def __invert__(self): + def __invert__(self) -> PandasArray: return type(self)(~self._ndarray) def _cmp_method(self, other, op): diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index d91522a9e1bb6..2355999933a7a 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -3,6 +3,7 @@ from datetime import timedelta import operator from typing import ( + TYPE_CHECKING, Any, Callable, List, @@ -76,6 +77,9 @@ from pandas.core.arrays import datetimelike as dtl import pandas.core.common as com +if TYPE_CHECKING: + from pandas.core.arrays import DatetimeArray + _shared_doc_kwargs = { "klass": "PeriodArray", } @@ -186,7 +190,9 @@ class PeriodArray(PeriodMixin, dtl.DatelikeOps): # -------------------------------------------------------------------- # Constructors - def __init__(self, values, dtype: Optional[Dtype] = None, freq=None, copy=False): + def __init__( + self, values, dtype: Optional[Dtype] = None, freq=None, copy: bool = False + ): freq = validate_dtype_freq(dtype, freq) if freq is not None: @@ -250,7 +256,7 @@ def _from_sequence( @classmethod def _from_sequence_of_strings( - cls, strings, *, dtype: Optional[Dtype] = None, copy=False + cls, strings, *, dtype: Optional[Dtype] = None, copy: bool = False ) -> PeriodArray: return cls._from_sequence(strings, dtype=dtype, copy=copy) @@ -448,7 +454,7 @@ def is_leap_year(self) -> np.ndarray: """ return isleapyear_arr(np.asarray(self.year)) - def to_timestamp(self, freq=None, how="start"): + def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray: """ Cast to DatetimeArray/Index. @@ -492,7 +498,7 @@ def to_timestamp(self, freq=None, how="start"): # -------------------------------------------------------------------- - def _time_shift(self, periods, freq=None): + def _time_shift(self, periods: int, freq=None) -> PeriodArray: """ Shift each value by `periods`. @@ -597,7 +603,9 @@ def _formatter(self, boxed: bool = False): return "'{}'".format @dtl.ravel_compat - def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs): + def _format_native_types( + self, na_rep="NaT", date_format=None, **kwargs + ) -> np.ndarray: """ actually format my specific types """ diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 7251faee333bb..e1262d691128f 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -222,7 +222,7 @@ def _chk_pyarrow_available(cls) -> None: raise ImportError(msg) @classmethod - def _from_sequence(cls, scalars, dtype: Optional[Dtype] = None, copy=False): + def _from_sequence(cls, scalars, dtype: Optional[Dtype] = None, copy: bool = False): cls._chk_pyarrow_available() # convert non-na-likes to str, and nan-likes to ArrowStringDtype.na_value scalars = lib.ensure_string_array(scalars, copy=False) @@ -230,7 +230,7 @@ def _from_sequence(cls, scalars, dtype: Optional[Dtype] = None, copy=False): @classmethod def _from_sequence_of_strings( - cls, strings, dtype: Optional[Dtype] = None, copy=False + cls, strings, dtype: Optional[Dtype] = None, copy: bool = False ): return cls._from_sequence(strings, dtype=dtype, copy=copy) @@ -431,7 +431,7 @@ def fillna(self, value=None, method=None, limit=None): new_values = self.copy() return new_values - def _reduce(self, name, skipna=True, **kwargs): + def _reduce(self, name: str, skipna: bool = True, **kwargs): if name in ["min", "max"]: return getattr(self, name)(skipna=skipna) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index f3889ff360aa8..033271957da29 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -2,6 +2,7 @@ from datetime import timedelta from typing import ( + TYPE_CHECKING, List, Optional, Tuple, @@ -35,7 +36,10 @@ ints_to_pytimedelta, parse_timedelta_unit, ) -from pandas._typing import NpDtype +from pandas._typing import ( + DtypeObj, + NpDtype, +) from pandas.compat.numpy import function as nv from pandas.core.dtypes.cast import astype_td64_unit_conversion @@ -70,6 +74,12 @@ from pandas.core.construction import extract_array from pandas.core.ops.common import unpack_zerodim_and_defer +if TYPE_CHECKING: + from pandas.core.arrays import ( + DatetimeArray, + PeriodArray, + ) + def _field_accessor(name: str, alias: str, docstring: str): def f(self) -> np.ndarray: @@ -171,7 +181,9 @@ def dtype(self) -> np.dtype: # type: ignore[override] _freq = None - def __init__(self, values, dtype=TD64NS_DTYPE, freq=lib.no_default, copy=False): + def __init__( + self, values, dtype=TD64NS_DTYPE, freq=lib.no_default, copy: bool = False + ): values = extract_array(values, extract_numpy=True) if isinstance(values, IntegerArray): values = values.to_numpy("int64", na_value=tslibs.iNaT) @@ -230,7 +242,7 @@ def __init__(self, values, dtype=TD64NS_DTYPE, freq=lib.no_default, copy=False): @classmethod def _simple_new( - cls, values, freq: Optional[BaseOffset] = None, dtype=TD64NS_DTYPE + cls, values: np.ndarray, freq: Optional[BaseOffset] = None, dtype=TD64NS_DTYPE ) -> TimedeltaArray: assert dtype == TD64NS_DTYPE, dtype assert isinstance(values, np.ndarray), type(values) @@ -331,10 +343,10 @@ def _unbox_scalar(self, value, setitem: bool = False) -> np.timedelta64: self._check_compatible_with(value, setitem=setitem) return np.timedelta64(value.value, "ns") - def _scalar_from_string(self, value): + def _scalar_from_string(self, value) -> Union[Timedelta, NaTType]: return Timedelta(value) - def _check_compatible_with(self, other, setitem: bool = False): + def _check_compatible_with(self, other, setitem: bool = False) -> None: # we don't have anything to validate. pass @@ -375,7 +387,7 @@ def __iter__(self): def sum( self, *, - axis=None, + axis: Optional[int] = None, dtype: Optional[NpDtype] = None, out=None, keepdims: bool = False, @@ -395,7 +407,7 @@ def sum( def std( self, *, - axis=None, + axis: Optional[int] = None, dtype: Optional[NpDtype] = None, out=None, ddof: int = 1, @@ -414,13 +426,15 @@ def std( # ---------------------------------------------------------------- # Rendering Methods - def _formatter(self, boxed=False): + def _formatter(self, boxed: bool = False): from pandas.io.formats.format import get_format_timedelta64 return get_format_timedelta64(self, box=True) @dtl.ravel_compat - def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs): + def _format_native_types( + self, na_rep="NaT", date_format=None, **kwargs + ) -> np.ndarray: from pandas.io.formats.format import get_format_timedelta64 formatter = get_format_timedelta64(self._ndarray, na_rep) @@ -435,7 +449,7 @@ def _add_offset(self, other): f"cannot add the type {type(other).__name__} to a {type(self).__name__}" ) - def _add_period(self, other: Period): + def _add_period(self, other: Period) -> PeriodArray: """ Add a Period object. """ @@ -459,7 +473,7 @@ def _add_datetime_arraylike(self, other): # defer to implementation in DatetimeArray return other + self - def _add_datetimelike_scalar(self, other): + def _add_datetimelike_scalar(self, other) -> DatetimeArray: # adding a timedeltaindex to a datetimelike from pandas.core.arrays import DatetimeArray @@ -853,7 +867,7 @@ def to_pytimedelta(self) -> np.ndarray: Returns ------- - datetimes : ndarray + datetimes : ndarray[object] """ return tslibs.ints_to_pytimedelta(self.asi8) @@ -919,7 +933,7 @@ def f(x): def sequence_to_td64ns( - data, copy=False, unit=None, errors="raise" + data, copy: bool = False, unit=None, errors="raise" ) -> Tuple[np.ndarray, Optional[Tick]]: """ Parameters @@ -1095,7 +1109,7 @@ def objects_to_td64ns(data, unit=None, errors="raise"): return result.view("timedelta64[ns]") -def _validate_td64_dtype(dtype): +def _validate_td64_dtype(dtype) -> DtypeObj: dtype = pandas_dtype(dtype) if is_dtype_equal(dtype, np.dtype("timedelta64")): # no precision disallowed GH#24806 diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e8b83af16254a..f2ab82823443e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2375,7 +2375,7 @@ def __reduce__(self): """The expected NA value to use with this index.""" @cache_readonly - def _isnan(self): + def _isnan(self) -> np.ndarray: """ Return if each value is NaN. """ @@ -2411,7 +2411,7 @@ def isna(self) -> np.ndarray: Returns ------- - numpy.ndarray + numpy.ndarray[bool] A boolean array of whether my values are NA. See Also @@ -2469,7 +2469,7 @@ def notna(self) -> np.ndarray: Returns ------- - numpy.ndarray + numpy.ndarray[bool] Boolean array to indicate which entries are not NA. See Also @@ -5239,7 +5239,7 @@ def get_indexer_non_unique(self, target): return ensure_platform_int(indexer), missing @final - def get_indexer_for(self, target, **kwargs): + def get_indexer_for(self, target, **kwargs) -> np.ndarray: """ Guaranteed return of an indexer even when non-unique. @@ -5307,7 +5307,7 @@ def _index_as_unique(self) -> bool: _requires_unique_msg = "Reindexing only valid with uniquely valued Index objects" @final - def _maybe_promote(self, other: Index): + def _maybe_promote(self, other: Index) -> Tuple[Index, Index]: """ When dealing with an object-dtype Index and a non-object Index, see if we can upcast the object-dtype one to improve performance. @@ -5482,7 +5482,7 @@ def _transform_index(self, func, level=None) -> Index: items = [func(x) for x in self] return Index(items, name=self.name, tupleize_cols=False) - def isin(self, values, level=None): + def isin(self, values, level=None) -> np.ndarray: """ Return a boolean array where the index values are in `values`. @@ -5500,7 +5500,7 @@ def isin(self, values, level=None): Returns ------- - is_contained : ndarray + is_contained : ndarray[bool] NumPy array of boolean values. See Also @@ -5850,7 +5850,7 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): return start_slice, end_slice - def delete(self, loc): + def delete(self, loc) -> Index: """ Make new Index with passed location(-s) deleted. @@ -5882,7 +5882,7 @@ def delete(self, loc): res_values = np.delete(self._data, loc) return type(self)._simple_new(res_values, name=self.name) - def insert(self, loc: int, item): + def insert(self, loc: int, item) -> Index: """ Make new Index inserting new item at location. @@ -5917,7 +5917,7 @@ def insert(self, loc: int, item): idx = np.concatenate((arr[:loc], item, arr[loc:])) return Index(idx, name=self.name) - def drop(self: _IndexT, labels, errors: str_t = "raise") -> _IndexT: + def drop(self, labels, errors: str_t = "raise") -> Index: """ Make new Index with passed list of labels deleted. @@ -5930,6 +5930,7 @@ def drop(self: _IndexT, labels, errors: str_t = "raise") -> _IndexT: Returns ------- dropped : Index + Will be same type as self, except for RangeIndex. Raises ------ diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index f37faa4ab844b..c1ed00820a376 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -402,7 +402,7 @@ def _get_indexer( method: Optional[str] = None, limit: Optional[int] = None, tolerance=None, - ): + ) -> np.ndarray: if com.any_not_none(method, tolerance, limit): return super()._get_indexer( target, method=method, tolerance=tolerance, limit=limit @@ -436,10 +436,11 @@ def repeat(self, repeats, axis=None) -> Int64Index: return self._int64index.repeat(repeats, axis=axis) def delete(self, loc) -> Int64Index: - return self._int64index.delete(loc) + # error: Incompatible return value type (got "Index", expected "Int64Index") + return self._int64index.delete(loc) # type: ignore[return-value] def take( - self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs + self, indices, axis: int = 0, allow_fill: bool = True, fill_value=None, **kwargs ) -> Int64Index: with rewrite_exception("Int64Index", type(self).__name__): return self._int64index.take( @@ -471,7 +472,13 @@ def _view(self: RangeIndex) -> RangeIndex: return result @doc(Int64Index.copy) - def copy(self, name=None, deep=False, dtype: Optional[Dtype] = None, names=None): + def copy( + self, + name: Hashable = None, + deep: bool = False, + dtype: Optional[Dtype] = None, + names=None, + ): name = self._validate_names(name=name, names=names, deep=deep)[0] new_index = self._rename(name=name) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 5ec2141028fa4..11814d4aa85c2 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1392,7 +1392,10 @@ def hide_columns(self, subset) -> Styler: """ subset = _non_reducing_slice(subset) hidden_df = self.data.loc[subset] - self.hidden_columns = self.columns.get_indexer_for(hidden_df.columns) + hcols = self.columns.get_indexer_for(hidden_df.columns) + # error: Incompatible types in assignment (expression has type + # "ndarray", variable has type "Sequence[int]") + self.hidden_columns = hcols # type: ignore[assignment] return self # ----------------------------------------------------------------------- From ffde5a45545a03e01ef6a3be41633730bdd6bda1 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 20 Mar 2021 15:35:57 -0700 Subject: [PATCH 3/5] typo fixup --- pandas/_libs/hashtable_class_helper.pxi.in | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 12256a8774f0a..d814b97c4b5fa 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -747,6 +747,7 @@ cdef class StringHashTable(HashTable): kh_destroy_str(self.table) self.table = NULL + def sizeof(self, deep: bool = False) -> int: overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*) for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t) for_pairs = self.table.n_buckets * (sizeof(char *) + # keys From 80b53c19778a5a54c110e872adec6aed1ec52209 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 31 Mar 2021 07:59:43 -0700 Subject: [PATCH 4/5] annotate PandasArray axis kwargs --- pandas/core/arrays/numpy_.py | 48 ++++++++++++++++++++++++++---------- pandas/core/missing.py | 5 +--- 2 files changed, 36 insertions(+), 17 deletions(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index bb2a7ca2da26c..a824e27e3e36a 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -205,38 +205,60 @@ def _values_for_factorize(self) -> Tuple[np.ndarray, int]: # ------------------------------------------------------------------------ # Reductions - def any(self, *, axis=None, out=None, keepdims: bool = False, skipna: bool = True): + def any( + self, + *, + axis: Optional[int] = None, + out=None, + keepdims: bool = False, + skipna: bool = True, + ): nv.validate_any((), {"out": out, "keepdims": keepdims}) result = nanops.nanany(self._ndarray, axis=axis, skipna=skipna) return self._wrap_reduction_result(axis, result) - def all(self, *, axis=None, out=None, keepdims: bool = False, skipna: bool = True): + def all( + self, + *, + axis: Optional[int] = None, + out=None, + keepdims: bool = False, + skipna: bool = True, + ): nv.validate_all((), {"out": out, "keepdims": keepdims}) result = nanops.nanall(self._ndarray, axis=axis, skipna=skipna) return self._wrap_reduction_result(axis, result) - def min(self, *, axis=None, skipna: bool = True, **kwargs) -> Scalar: + def min( + self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs + ) -> Scalar: nv.validate_min((), kwargs) result = nanops.nanmin( values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna ) return self._wrap_reduction_result(axis, result) - def max(self, *, axis=None, skipna: bool = True, **kwargs) -> Scalar: + def max( + self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs + ) -> Scalar: nv.validate_max((), kwargs) result = nanops.nanmax( values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna ) return self._wrap_reduction_result(axis, result) - def sum(self, *, axis=None, skipna: bool = True, min_count=0, **kwargs) -> Scalar: + def sum( + self, *, axis: Optional[int] = None, skipna: bool = True, min_count=0, **kwargs + ) -> Scalar: nv.validate_sum((), kwargs) result = nanops.nansum( self._ndarray, axis=axis, skipna=skipna, min_count=min_count ) return self._wrap_reduction_result(axis, result) - def prod(self, *, axis=None, skipna: bool = True, min_count=0, **kwargs) -> Scalar: + def prod( + self, *, axis: Optional[int] = None, skipna: bool = True, min_count=0, **kwargs + ) -> Scalar: nv.validate_prod((), kwargs) result = nanops.nanprod( self._ndarray, axis=axis, skipna=skipna, min_count=min_count @@ -246,7 +268,7 @@ def prod(self, *, axis=None, skipna: bool = True, min_count=0, **kwargs) -> Scal def mean( self, *, - axis=None, + axis: Optional[int] = None, dtype: Optional[NpDtype] = None, out=None, keepdims: bool = False, @@ -259,7 +281,7 @@ def mean( def median( self, *, - axis=None, + axis: Optional[int] = None, out=None, overwrite_input: bool = False, keepdims: bool = False, @@ -274,7 +296,7 @@ def median( def std( self, *, - axis=None, + axis: Optional[int] = None, dtype: Optional[NpDtype] = None, out=None, ddof=1, @@ -290,7 +312,7 @@ def std( def var( self, *, - axis=None, + axis: Optional[int] = None, dtype: Optional[NpDtype] = None, out=None, ddof=1, @@ -306,7 +328,7 @@ def var( def sem( self, *, - axis=None, + axis: Optional[int] = None, dtype: Optional[NpDtype] = None, out=None, ddof=1, @@ -322,7 +344,7 @@ def sem( def kurt( self, *, - axis=None, + axis: Optional[int] = None, dtype: Optional[NpDtype] = None, out=None, keepdims: bool = False, @@ -337,7 +359,7 @@ def kurt( def skew( self, *, - axis=None, + axis: Optional[int] = None, dtype: Optional[NpDtype] = None, out=None, keepdims: bool = False, diff --git a/pandas/core/missing.py b/pandas/core/missing.py index c2193056cc974..b3d5aaa41d99c 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -848,7 +848,4 @@ def _rolling_window(a: np.ndarray, window: int): # https://stackoverflow.com/a/6811241 shape = a.shape[:-1] + (a.shape[-1] - window + 1, window) strides = a.strides + (a.strides[-1],) - # error: Module has no attribute "stride_tricks" - return np.lib.stride_tricks.as_strided( # type: ignore[attr-defined] - a, shape=shape, strides=strides - ) + return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides) From 267b34b26669164dc8ecbfd29dd8bb3c5e97b4d1 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 31 Mar 2021 09:34:05 -0700 Subject: [PATCH 5/5] mypy fixup --- pandas/core/arrays/datetimelike.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 92fc56569d05d..7be06fe92c418 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -963,8 +963,11 @@ def inferred_freq(self) -> Optional[str]: @property # NB: override with cache_readonly in immutable subclasses def _resolution_obj(self) -> Optional[Resolution]: + freqstr = self.freqstr + if freqstr is None: + return None try: - return Resolution.get_reso_from_freq(self.freqstr) + return Resolution.get_reso_from_freq(freqstr) except KeyError: return None