diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 41c4de51fc2e1..9db22df20e66d 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -288,6 +288,7 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMi # tolist is not actually deprecated, just suppressed in the __dir__ _deprecations = PandasObject._deprecations | frozenset(["tolist"]) _typ = "categorical" + _can_hold_na = True def __init__( self, values, categories=None, ordered=None, dtype=None, fastpath=False @@ -1268,10 +1269,10 @@ def __setstate__(self, state): setattr(self, k, v) @property - def nbytes(self): + def nbytes(self) -> int: return self._codes.nbytes + self.dtype.categories.values.nbytes - def memory_usage(self, deep=False): + def memory_usage(self, deep: bool = False) -> int: """ Memory usage of my values @@ -2144,10 +2145,6 @@ def equals(self, other: object) -> bool: return np.array_equal(self._codes, other_codes) return False - @property - def _can_hold_na(self): - return True - @classmethod def _concat_same_type(self, to_concat): from pandas.core.dtypes.concat import union_categoricals diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 0f723546fb4c2..83a9c0ba61c2d 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -117,7 +117,9 @@ class AttributesMixin: _data: np.ndarray @classmethod - def _simple_new(cls, values: np.ndarray, **kwargs): + def _simple_new( + cls, values: np.ndarray, freq: Optional[BaseOffset] = None, dtype=None + ): raise AbstractMethodError(cls) @property diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 6b051f1f73467..cd5449058fb33 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -6,6 +6,7 @@ from pandas._libs import lib, tslib from pandas._libs.tslibs import ( + BaseOffset, NaT, NaTType, Resolution, @@ -283,7 +284,9 @@ def __init__(self, values, dtype=DT64NS_DTYPE, freq=None, copy=False): type(self)._validate_frequency(self, freq) @classmethod - def _simple_new(cls, values, freq=None, dtype=DT64NS_DTYPE): + def _simple_new( + cls, values, freq: Optional[BaseOffset] = None, dtype=DT64NS_DTYPE + ) -> "DatetimeArray": assert isinstance(values, np.ndarray) if values.dtype != DT64NS_DTYPE: assert values.dtype == "i8" diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 372ef7df9dc3a..15f2842e39875 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -174,11 +174,13 @@ def __init__(self, values, freq=None, dtype=None, copy=False): self._dtype = PeriodDtype(freq) @classmethod - def _simple_new(cls, values: np.ndarray, freq=None, **kwargs) -> "PeriodArray": + def _simple_new( + cls, values: np.ndarray, freq: Optional[BaseOffset] = None, dtype=None + ) -> "PeriodArray": # alias for PeriodArray.__init__ assertion_msg = "Should be numpy array of type i8" assert isinstance(values, np.ndarray) and values.dtype == "i8", assertion_msg - return cls(values, freq=freq, **kwargs) + return cls(values, freq=freq, dtype=dtype) @classmethod def _from_sequence( diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index bf8b93b5a4164..9ea34d4680748 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -308,7 +308,7 @@ def value_counts(self, dropna=False): return value_counts(self._ndarray, dropna=dropna).astype("Int64") - def memory_usage(self, deep=False): + def memory_usage(self, deep: bool = False) -> int: result = self._ndarray.nbytes if deep: return result + lib.memory_usage_of_objects(self._ndarray) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 145380ecce9fd..6ca57e7872910 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -1,10 +1,11 @@ from datetime import timedelta -from typing import List, Union +from typing import List, Optional, Union import numpy as np from pandas._libs import lib, tslibs from pandas._libs.tslibs import ( + BaseOffset, NaT, NaTType, Period, @@ -45,8 +46,8 @@ from pandas.core.ops.common import unpack_zerodim_and_defer -def _field_accessor(name, alias, docstring=None): - def f(self): +def _field_accessor(name: str, alias: str, docstring: str): + def f(self) -> np.ndarray: values = self.asi8 result = get_timedelta_field(values, alias) if self._hasnans: @@ -121,7 +122,7 @@ def _box_func(self, x) -> Union[Timedelta, NaTType]: return Timedelta(x, unit="ns") @property - def dtype(self): + def dtype(self) -> np.dtype: """ The dtype for the TimedeltaArray. @@ -196,7 +197,9 @@ def __init__(self, values, dtype=TD64NS_DTYPE, freq=lib.no_default, copy=False): type(self)._validate_frequency(self, freq) @classmethod - def _simple_new(cls, values, freq=None, dtype=TD64NS_DTYPE): + def _simple_new( + cls, values, freq: Optional[BaseOffset] = None, dtype=TD64NS_DTYPE + ) -> "TimedeltaArray": assert dtype == TD64NS_DTYPE, dtype assert isinstance(values, np.ndarray), type(values) if values.dtype != TD64NS_DTYPE: @@ -211,8 +214,13 @@ def _simple_new(cls, values, freq=None, dtype=TD64NS_DTYPE): @classmethod def _from_sequence( - cls, data, dtype=TD64NS_DTYPE, copy=False, freq=lib.no_default, unit=None - ): + cls, + data, + dtype=TD64NS_DTYPE, + copy: bool = False, + freq=lib.no_default, + unit=None, + ) -> "TimedeltaArray": if dtype: _validate_td64_dtype(dtype) @@ -240,7 +248,9 @@ def _from_sequence( return result @classmethod - def _generate_range(cls, start, end, periods, freq, closed=None): + def _generate_range( + cls, start, end, periods, freq, closed=None + ) -> "TimedeltaArray": periods = dtl.validate_periods(periods) if freq is None and any(x is None for x in [periods, start, end]): @@ -298,7 +308,7 @@ def _maybe_clear_freq(self): # ---------------------------------------------------------------- # Array-Like / EA-Interface Methods - def astype(self, dtype, copy=True): + def astype(self, dtype, copy: bool = True): # We handle # --> timedelta64[ns] # --> timedelta64 @@ -461,7 +471,7 @@ def _addsub_object_array(self, other, op): ) from err @unpack_zerodim_and_defer("__mul__") - def __mul__(self, other): + def __mul__(self, other) -> "TimedeltaArray": if is_scalar(other): # numpy will accept float and int, raise TypeError for others result = self._data * other @@ -737,22 +747,22 @@ def __rdivmod__(self, other): res2 = other - res1 * self return res1, res2 - def __neg__(self): + def __neg__(self) -> "TimedeltaArray": if self.freq is not None: return type(self)(-self._data, freq=-self.freq) return type(self)(-self._data) - def __pos__(self): + def __pos__(self) -> "TimedeltaArray": return type(self)(self._data, freq=self.freq) - def __abs__(self): + def __abs__(self) -> "TimedeltaArray": # Note: freq is not preserved return type(self)(np.abs(self._data)) # ---------------------------------------------------------------- # Conversion Methods - Vectorized analogues of Timedelta methods - def total_seconds(self): + def total_seconds(self) -> np.ndarray: """ Return total duration of each element expressed in seconds. diff --git a/pandas/core/base.py b/pandas/core/base.py index 4d5cddc086b2a..a50181c1be2f0 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1347,7 +1347,7 @@ def memory_usage(self, deep=False): Parameters ---------- - deep : bool + deep : bool, default False Introspect the data deeply, interrogate `object` dtypes for system-level memory consumption. diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 3d2820976a6af..23cc93b9ecb33 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -105,7 +105,7 @@ def _is_all_dates(self) -> bool: # Abstract data attributes @property - def values(self): + def values(self) -> np.ndarray: # Note: PeriodArray overrides this to return an ndarray of objects. return self._data._data diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 900d3f9f1866b..27b60747015de 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -60,7 +60,7 @@ def _new_PeriodIndex(cls, **d): @inherit_names( - ["strftime", "to_timestamp", "start_time", "end_time"] + PeriodArray._field_ops, + ["strftime", "start_time", "end_time"] + PeriodArray._field_ops, PeriodArray, wrap=True, ) @@ -149,12 +149,18 @@ class PeriodIndex(DatetimeIndexOpsMixin, Int64Index): # -------------------------------------------------------------------- # methods that dispatch to array and wrap result in PeriodIndex + # These are defined here instead of via inherit_names for mypy @doc(PeriodArray.asfreq) def asfreq(self, freq=None, how: str = "E") -> "PeriodIndex": arr = self._data.asfreq(freq, how) return type(self)._simple_new(arr, name=self.name) + @doc(PeriodArray.to_timestamp) + def to_timestamp(self, freq=None, how="start") -> DatetimeIndex: + arr = self._data.to_timestamp(freq, how) + return DatetimeIndex._simple_new(arr, name=self.name) + # ------------------------------------------------------------------------ # Index Constructors @@ -244,11 +250,11 @@ def _simple_new(cls, values: PeriodArray, name: Label = None): # Data @property - def values(self): + def values(self) -> np.ndarray: return np.asarray(self) @property - def _has_complex_internals(self): + def _has_complex_internals(self) -> bool: # used to avoid libreduction code paths, which raise or require conversion return True @@ -402,7 +408,7 @@ def asof_locs(self, where, mask: np.ndarray) -> np.ndarray: return result @doc(Index.astype) - def astype(self, dtype, copy=True, how="start"): + def astype(self, dtype, copy: bool = True, how="start"): dtype = pandas_dtype(dtype) if is_datetime64_any_dtype(dtype): @@ -421,7 +427,7 @@ def is_full(self) -> bool: """ if len(self) == 0: return True - if not self.is_monotonic: + if not self.is_monotonic_increasing: raise ValueError("Index is not monotonic") values = self.asi8 return ((values[1:] - values[:-1]) < 2).all() @@ -432,7 +438,7 @@ def inferred_type(self) -> str: # indexing return "period" - def insert(self, loc, item): + def insert(self, loc: int, item): if not isinstance(item, Period) or self.freq != item.freq: return self.astype(object).insert(loc, item) @@ -706,7 +712,7 @@ def _union(self, other, sort): # ------------------------------------------------------------------------ - def memory_usage(self, deep=False): + def memory_usage(self, deep: bool = False) -> int: result = super().memory_usage(deep=deep) if hasattr(self, "_cache") and "_int64index" in self._cache: result += self._int64index.memory_usage(deep=deep) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 20ebc80c7e0af..854c4e33eca01 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -184,7 +184,7 @@ def _formatter_func(self): # ------------------------------------------------------------------- @doc(Index.astype) - def astype(self, dtype, copy=True): + def astype(self, dtype, copy: bool = True): dtype = pandas_dtype(dtype) if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype): # Have to repeat the check for 'timedelta64' (not ns) dtype diff --git a/pandas/core/series.py b/pandas/core/series.py index d2c702d924136..fca1b6b08b434 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -394,7 +394,7 @@ def _constructor_expanddim(self) -> Type["DataFrame"]: # types @property - def _can_hold_na(self): + def _can_hold_na(self) -> bool: return self._mgr._can_hold_na _index = None @@ -4904,10 +4904,7 @@ def to_timestamp(self, freq=None, how="start", copy=True) -> "Series": if not isinstance(self.index, PeriodIndex): raise TypeError(f"unsupported Type {type(self.index).__name__}") - # error: "PeriodIndex" has no attribute "to_timestamp" - new_index = self.index.to_timestamp( # type: ignore[attr-defined] - freq=freq, how=how - ) + new_index = self.index.to_timestamp(freq=freq, how=how) return self._constructor(new_values, index=new_index).__finalize__( self, method="to_timestamp" ) diff --git a/pandas/tests/extension/arrow/arrays.py b/pandas/tests/extension/arrow/arrays.py index 8a18f505058bc..5e930b7b22f30 100644 --- a/pandas/tests/extension/arrow/arrays.py +++ b/pandas/tests/extension/arrow/arrays.py @@ -68,6 +68,8 @@ def construct_array_type(cls) -> Type["ArrowStringArray"]: class ArrowExtensionArray(ExtensionArray): + _data: pa.ChunkedArray + @classmethod def from_scalars(cls, values): arr = pa.chunked_array([pa.array(np.asarray(values))]) @@ -129,7 +131,7 @@ def __or__(self, other): return self._boolean_op(other, operator.or_) @property - def nbytes(self): + def nbytes(self) -> int: return sum( x.size for chunk in self._data.chunks