diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index db62136947250..590b40b0434e5 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -1,10 +1,11 @@ import numbers -from typing import TYPE_CHECKING, Any, List, Tuple, Type, Union +from typing import TYPE_CHECKING, List, Tuple, Type, Union import warnings import numpy as np from pandas._libs import lib, missing as libmissing +from pandas._typing import ArrayLike from pandas.compat import set_function_name from pandas.compat.numpy import function as nv @@ -281,20 +282,15 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False): if not mask.ndim == 1: raise ValueError("mask must be a 1D array") - if copy: - values = values.copy() - mask = mask.copy() - - self._data = values - self._mask = mask self._dtype = BooleanDtype() + super().__init__(values, mask, copy=copy) @property - def dtype(self): + def dtype(self) -> BooleanDtype: return self._dtype @classmethod - def _from_sequence(cls, scalars, dtype=None, copy: bool = False): + def _from_sequence(cls, scalars, dtype=None, copy: bool = False) -> "BooleanArray": if dtype: assert dtype == "boolean" values, mask = coerce_to_array(scalars, copy=copy) @@ -303,7 +299,7 @@ def _from_sequence(cls, scalars, dtype=None, copy: bool = False): @classmethod def _from_sequence_of_strings( cls, strings: List[str], dtype=None, copy: bool = False - ): + ) -> "BooleanArray": def map_string(s): if isna(s): return s @@ -317,18 +313,18 @@ def map_string(s): scalars = [map_string(x) for x in strings] return cls._from_sequence(scalars, dtype, copy) - def _values_for_factorize(self) -> Tuple[np.ndarray, Any]: + def _values_for_factorize(self) -> Tuple[np.ndarray, int]: data = self._data.astype("int8") data[self._mask] = -1 return data, -1 @classmethod - def _from_factorized(cls, values, original: "BooleanArray"): + def _from_factorized(cls, values, original: "BooleanArray") -> "BooleanArray": return cls._from_sequence(values, dtype=original.dtype) _HANDLED_TYPES = (np.ndarray, numbers.Number, bool, np.bool_) - def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + def __array_ufunc__(self, ufunc, method: str, *inputs, **kwargs): # For BooleanArray inputs, we apply the ufunc to ._data # and mask the result. if method == "reduce": @@ -373,7 +369,7 @@ def reconstruct(x): else: return reconstruct(result) - def __setitem__(self, key, value): + def __setitem__(self, key, value) -> None: _is_scalar = is_scalar(value) if _is_scalar: value = [value] @@ -387,7 +383,7 @@ def __setitem__(self, key, value): self._data[key] = value self._mask[key] = mask - def astype(self, dtype, copy=True): + def astype(self, dtype, copy: bool = True) -> ArrayLike: """ Cast to a NumPy array or ExtensionArray with 'dtype'. @@ -402,8 +398,8 @@ def astype(self, dtype, copy=True): Returns ------- - array : ndarray or ExtensionArray - NumPy ndarray, BooleanArray or IntergerArray with 'dtype' for its dtype. + ndarray or ExtensionArray + NumPy ndarray, BooleanArray or IntegerArray with 'dtype' for its dtype. Raises ------ @@ -693,7 +689,7 @@ def cmp_method(self, other): name = f"__{op.__name__}" return set_function_name(cmp_method, name, cls) - def _reduce(self, name, skipna=True, **kwargs): + def _reduce(self, name: str, skipna: bool = True, **kwargs): if name in {"any", "all"}: return getattr(self, name)(skipna=skipna, **kwargs) @@ -722,7 +718,7 @@ def _reduce(self, name, skipna=True, **kwargs): return result - def _maybe_mask_result(self, result, mask, other, op_name): + def _maybe_mask_result(self, result, mask, other, op_name: str): """ Parameters ---------- diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 4bfd5f5770b69..19ab43fc1c248 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -1,10 +1,11 @@ import numbers -from typing import TYPE_CHECKING, Any, Dict, Tuple, Type, Union +from typing import TYPE_CHECKING, Tuple, Type, Union import warnings import numpy as np from pandas._libs import lib, missing as libmissing +from pandas._typing import ArrayLike from pandas.compat import set_function_name from pandas.util._decorators import cache_readonly @@ -347,13 +348,7 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False): "mask should be boolean numpy array. Use " "the 'integer_array' function instead" ) - - if copy: - values = values.copy() - mask = mask.copy() - - self._data = values - self._mask = mask + super().__init__(values, mask, copy=copy) @classmethod def _from_sequence(cls, scalars, dtype=None, copy: bool = False) -> "IntegerArray": @@ -417,7 +412,7 @@ def reconstruct(x): else: return reconstruct(result) - def __setitem__(self, key, value): + def __setitem__(self, key, value) -> None: _is_scalar = is_scalar(value) if _is_scalar: value = [value] @@ -431,9 +426,9 @@ def __setitem__(self, key, value): self._data[key] = value self._mask[key] = mask - def astype(self, dtype, copy=True): + def astype(self, dtype, copy: bool = True) -> ArrayLike: """ - Cast to a NumPy array or IntegerArray with 'dtype'. + Cast to a NumPy array or ExtensionArray with 'dtype'. Parameters ---------- @@ -446,8 +441,8 @@ def astype(self, dtype, copy=True): Returns ------- - array : ndarray or IntegerArray - NumPy ndarray or IntergerArray with 'dtype' for its dtype. + ndarray or ExtensionArray + NumPy ndarray, BooleanArray or IntegerArray with 'dtype' for its dtype. Raises ------ @@ -488,7 +483,7 @@ def _ndarray_values(self) -> np.ndarray: """ return self._data - def _values_for_factorize(self) -> Tuple[np.ndarray, Any]: + def _values_for_factorize(self) -> Tuple[np.ndarray, float]: # TODO: https://github.com/pandas-dev/pandas/issues/30037 # use masked algorithms, rather than object-dtype / np.nan. return self.to_numpy(na_value=np.nan), np.nan @@ -565,7 +560,7 @@ def cmp_method(self, other): name = f"__{op.__name__}__" return set_function_name(cmp_method, name, cls) - def _reduce(self, name, skipna=True, **kwargs): + def _reduce(self, name: str, skipna: bool = True, **kwargs): data = self._data mask = self._mask @@ -592,7 +587,7 @@ def _reduce(self, name, skipna=True, **kwargs): return result - def _maybe_mask_result(self, result, mask, other, op_name): + def _maybe_mask_result(self, result, mask, other, op_name: str): """ Parameters ---------- @@ -768,7 +763,7 @@ class UInt64Dtype(_IntegerDtype): __doc__ = _dtype_docstring.format(dtype="uint64") -_dtypes: Dict[str, _IntegerDtype] = { +_dtypes = { "int8": Int8Dtype(), "int16": Int16Dtype(), "int32": Int32Dtype(), diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 80e317123126a..47892b55b3ce8 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -1,8 +1,9 @@ -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Optional, Type, TypeVar import numpy as np from pandas._libs import lib, missing as libmissing +from pandas._typing import Scalar from pandas.core.dtypes.common import is_integer, is_object_dtype, is_string_dtype from pandas.core.dtypes.missing import isna, notna @@ -12,7 +13,10 @@ from pandas.core.indexers import check_array_indexer if TYPE_CHECKING: - from pandas._typing import Scalar + from pandas import Series + + +BaseMaskedArrayT = TypeVar("BaseMaskedArrayT", bound="BaseMaskedArray") class BaseMaskedArray(ExtensionArray, ExtensionOpsMixin): @@ -22,11 +26,16 @@ class BaseMaskedArray(ExtensionArray, ExtensionOpsMixin): numpy based """ - _data: np.ndarray - _mask: np.ndarray - # The value used to fill '_data' to avoid upcasting - _internal_fill_value: "Scalar" + _internal_fill_value: Scalar + + def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False): + if copy: + values = values.copy() + mask = mask.copy() + + self._data = values + self._mask = mask def __getitem__(self, item): if is_integer(item): @@ -48,12 +57,12 @@ def __iter__(self): def __len__(self) -> int: return len(self._data) - def __invert__(self): + def __invert__(self: BaseMaskedArrayT) -> BaseMaskedArrayT: return type(self)(~self._data, self._mask) def to_numpy( - self, dtype=None, copy=False, na_value: "Scalar" = lib.no_default, - ): + self, dtype=None, copy: bool = False, na_value: Scalar = lib.no_default, + ) -> np.ndarray: """ Convert to a NumPy Array. @@ -159,7 +168,7 @@ def _hasna(self) -> bool: # source code using it.. return self._mask.any() - def isna(self): + def isna(self) -> np.ndarray: return self._mask @property @@ -167,16 +176,21 @@ def _na_value(self): return self.dtype.na_value @property - def nbytes(self): + def nbytes(self) -> int: return self._data.nbytes + self._mask.nbytes @classmethod - def _concat_same_type(cls, to_concat): + def _concat_same_type(cls: Type[BaseMaskedArrayT], to_concat) -> BaseMaskedArrayT: data = np.concatenate([x._data for x in to_concat]) mask = np.concatenate([x._mask for x in to_concat]) return cls(data, mask) - def take(self, indexer, allow_fill=False, fill_value=None): + def take( + self: BaseMaskedArrayT, + indexer, + allow_fill: bool = False, + fill_value: Optional[Scalar] = None, + ) -> BaseMaskedArrayT: # we always fill with 1 internally # to avoid upcasting data_fill_value = self._internal_fill_value if isna(fill_value) else fill_value @@ -197,13 +211,13 @@ def take(self, indexer, allow_fill=False, fill_value=None): return type(self)(result, mask, copy=False) - def copy(self): + def copy(self: BaseMaskedArrayT) -> BaseMaskedArrayT: data, mask = self._data, self._mask data = data.copy() mask = mask.copy() return type(self)(data, mask, copy=False) - def value_counts(self, dropna=True): + def value_counts(self, dropna: bool = True) -> "Series": """ Returns a Series containing counts of each unique value.