TYP: partial typing of masked array (#31728)

simonjayhawkins · web-flow · commit 35174ae224e3 · 2020-02-11T17:32:20.000-08:00
diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
@@ -1,10 +1,11 @@
 import numbers
-from typing import TYPE_CHECKING, Any, List, Tuple, Type, Union
+from typing import TYPE_CHECKING, List, Tuple, Type, Union
 import warnings
 
 import numpy as np
 
 from pandas._libs import lib, missing as libmissing
+from pandas._typing import ArrayLike
 from pandas.compat import set_function_name
 from pandas.compat.numpy import function as nv
 
@@ -281,20 +282,15 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
         if not mask.ndim == 1:
             raise ValueError("mask must be a 1D array")
 
-        if copy:
-            values = values.copy()
-            mask = mask.copy()
-
-        self._data = values
-        self._mask = mask
         self._dtype = BooleanDtype()
+        super().__init__(values, mask, copy=copy)
 
     @property
-    def dtype(self):
+    def dtype(self) -> BooleanDtype:
         return self._dtype
 
     @classmethod
-    def _from_sequence(cls, scalars, dtype=None, copy: bool = False):
+    def _from_sequence(cls, scalars, dtype=None, copy: bool = False) -> "BooleanArray":
         if dtype:
             assert dtype == "boolean"
         values, mask = coerce_to_array(scalars, copy=copy)
@@ -303,7 +299,7 @@ def _from_sequence(cls, scalars, dtype=None, copy: bool = False):
     @classmethod
     def _from_sequence_of_strings(
         cls, strings: List[str], dtype=None, copy: bool = False
-    ):
+    ) -> "BooleanArray":
         def map_string(s):
             if isna(s):
                 return s
@@ -317,18 +313,18 @@ def map_string(s):
         scalars = [map_string(x) for x in strings]
         return cls._from_sequence(scalars, dtype, copy)
 
-    def _values_for_factorize(self) -> Tuple[np.ndarray, Any]:
+    def _values_for_factorize(self) -> Tuple[np.ndarray, int]:
         data = self._data.astype("int8")
         data[self._mask] = -1
         return data, -1
 
     @classmethod
-    def _from_factorized(cls, values, original: "BooleanArray"):
+    def _from_factorized(cls, values, original: "BooleanArray") -> "BooleanArray":
         return cls._from_sequence(values, dtype=original.dtype)
 
     _HANDLED_TYPES = (np.ndarray, numbers.Number, bool, np.bool_)
 
-    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
+    def __array_ufunc__(self, ufunc, method: str, *inputs, **kwargs):
         # For BooleanArray inputs, we apply the ufunc to ._data
         # and mask the result.
         if method == "reduce":
@@ -373,7 +369,7 @@ def reconstruct(x):
         else:
             return reconstruct(result)
 
-    def __setitem__(self, key, value):
+    def __setitem__(self, key, value) -> None:
         _is_scalar = is_scalar(value)
         if _is_scalar:
             value = [value]
@@ -387,7 +383,7 @@ def __setitem__(self, key, value):
         self._data[key] = value
         self._mask[key] = mask
 
-    def astype(self, dtype, copy=True):
+    def astype(self, dtype, copy: bool = True) -> ArrayLike:
         """
         Cast to a NumPy array or ExtensionArray with 'dtype'.
 
@@ -402,8 +398,8 @@ def astype(self, dtype, copy=True):
 
         Returns
         -------
-        array : ndarray or ExtensionArray
-            NumPy ndarray, BooleanArray or IntergerArray with 'dtype' for its dtype.
+        ndarray or ExtensionArray
+            NumPy ndarray, BooleanArray or IntegerArray with 'dtype' for its dtype.
 
         Raises
         ------
@@ -693,7 +689,7 @@ def cmp_method(self, other):
         name = f"__{op.__name__}"
         return set_function_name(cmp_method, name, cls)
 
-    def _reduce(self, name, skipna=True, **kwargs):
+    def _reduce(self, name: str, skipna: bool = True, **kwargs):
 
         if name in {"any", "all"}:
             return getattr(self, name)(skipna=skipna, **kwargs)
@@ -722,7 +718,7 @@ def _reduce(self, name, skipna=True, **kwargs):
 
         return result
 
-    def _maybe_mask_result(self, result, mask, other, op_name):
+    def _maybe_mask_result(self, result, mask, other, op_name: str):
         """
         Parameters
         ----------
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -1,10 +1,11 @@
 import numbers
-from typing import TYPE_CHECKING, Any, Dict, Tuple, Type, Union
+from typing import TYPE_CHECKING, Tuple, Type, Union
 import warnings
 
 import numpy as np
 
 from pandas._libs import lib, missing as libmissing
+from pandas._typing import ArrayLike
 from pandas.compat import set_function_name
 from pandas.util._decorators import cache_readonly
 
@@ -347,13 +348,7 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
                 "mask should be boolean numpy array. Use "
                 "the 'integer_array' function instead"
             )
-
-        if copy:
-            values = values.copy()
-            mask = mask.copy()
-
-        self._data = values
-        self._mask = mask
+        super().__init__(values, mask, copy=copy)
 
     @classmethod
     def _from_sequence(cls, scalars, dtype=None, copy: bool = False) -> "IntegerArray":
@@ -417,7 +412,7 @@ def reconstruct(x):
         else:
             return reconstruct(result)
 
-    def __setitem__(self, key, value):
+    def __setitem__(self, key, value) -> None:
         _is_scalar = is_scalar(value)
         if _is_scalar:
             value = [value]
@@ -431,9 +426,9 @@ def __setitem__(self, key, value):
         self._data[key] = value
         self._mask[key] = mask
 
-    def astype(self, dtype, copy=True):
+    def astype(self, dtype, copy: bool = True) -> ArrayLike:
         """
-        Cast to a NumPy array or IntegerArray with 'dtype'.
+        Cast to a NumPy array or ExtensionArray with 'dtype'.
 
         Parameters
         ----------
@@ -446,8 +441,8 @@ def astype(self, dtype, copy=True):
 
         Returns
         -------
-        array : ndarray or IntegerArray
-            NumPy ndarray or IntergerArray with 'dtype' for its dtype.
+        ndarray or ExtensionArray
+            NumPy ndarray, BooleanArray or IntegerArray with 'dtype' for its dtype.
 
         Raises
         ------
@@ -488,7 +483,7 @@ def _ndarray_values(self) -> np.ndarray:
         """
         return self._data
 
-    def _values_for_factorize(self) -> Tuple[np.ndarray, Any]:
+    def _values_for_factorize(self) -> Tuple[np.ndarray, float]:
         # TODO: https://github.com/pandas-dev/pandas/issues/30037
         # use masked algorithms, rather than object-dtype / np.nan.
         return self.to_numpy(na_value=np.nan), np.nan
@@ -565,7 +560,7 @@ def cmp_method(self, other):
         name = f"__{op.__name__}__"
         return set_function_name(cmp_method, name, cls)
 
-    def _reduce(self, name, skipna=True, **kwargs):
+    def _reduce(self, name: str, skipna: bool = True, **kwargs):
         data = self._data
         mask = self._mask
 
@@ -592,7 +587,7 @@ def _reduce(self, name, skipna=True, **kwargs):
 
         return result
 
-    def _maybe_mask_result(self, result, mask, other, op_name):
+    def _maybe_mask_result(self, result, mask, other, op_name: str):
         """
         Parameters
         ----------
@@ -768,7 +763,7 @@ class UInt64Dtype(_IntegerDtype):
     __doc__ = _dtype_docstring.format(dtype="uint64")
 
 
-_dtypes: Dict[str, _IntegerDtype] = {
+_dtypes = {
     "int8": Int8Dtype(),
     "int16": Int16Dtype(),
     "int32": Int32Dtype(),
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -1,8 +1,9 @@
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Optional, Type, TypeVar
 
 import numpy as np
 
 from pandas._libs import lib, missing as libmissing
+from pandas._typing import Scalar
 
 from pandas.core.dtypes.common import is_integer, is_object_dtype, is_string_dtype
 from pandas.core.dtypes.missing import isna, notna
@@ -12,7 +13,10 @@
 from pandas.core.indexers import check_array_indexer
 
 if TYPE_CHECKING:
-    from pandas._typing import Scalar
+    from pandas import Series
+
+
+BaseMaskedArrayT = TypeVar("BaseMaskedArrayT", bound="BaseMaskedArray")
 
 
 class BaseMaskedArray(ExtensionArray, ExtensionOpsMixin):
@@ -22,11 +26,16 @@ class BaseMaskedArray(ExtensionArray, ExtensionOpsMixin):
     numpy based
     """
 
-    _data: np.ndarray
-    _mask: np.ndarray
-
     # The value used to fill '_data' to avoid upcasting
-    _internal_fill_value: "Scalar"
+    _internal_fill_value: Scalar
+
+    def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
+        if copy:
+            values = values.copy()
+            mask = mask.copy()
+
+        self._data = values
+        self._mask = mask
 
     def __getitem__(self, item):
         if is_integer(item):
@@ -48,12 +57,12 @@ def __iter__(self):
     def __len__(self) -> int:
         return len(self._data)
 
-    def __invert__(self):
+    def __invert__(self: BaseMaskedArrayT) -> BaseMaskedArrayT:
         return type(self)(~self._data, self._mask)
 
     def to_numpy(
-        self, dtype=None, copy=False, na_value: "Scalar" = lib.no_default,
-    ):
+        self, dtype=None, copy: bool = False, na_value: Scalar = lib.no_default,
+    ) -> np.ndarray:
         """
         Convert to a NumPy Array.
 
@@ -159,24 +168,29 @@ def _hasna(self) -> bool:
         # source code using it..
         return self._mask.any()
 
-    def isna(self):
+    def isna(self) -> np.ndarray:
         return self._mask
 
     @property
     def _na_value(self):
         return self.dtype.na_value
 
     @property
-    def nbytes(self):
+    def nbytes(self) -> int:
         return self._data.nbytes + self._mask.nbytes
 
     @classmethod
-    def _concat_same_type(cls, to_concat):
+    def _concat_same_type(cls: Type[BaseMaskedArrayT], to_concat) -> BaseMaskedArrayT:
         data = np.concatenate([x._data for x in to_concat])
         mask = np.concatenate([x._mask for x in to_concat])
         return cls(data, mask)
 
-    def take(self, indexer, allow_fill=False, fill_value=None):
+    def take(
+        self: BaseMaskedArrayT,
+        indexer,
+        allow_fill: bool = False,
+        fill_value: Optional[Scalar] = None,
+    ) -> BaseMaskedArrayT:
         # we always fill with 1 internally
         # to avoid upcasting
         data_fill_value = self._internal_fill_value if isna(fill_value) else fill_value
@@ -197,13 +211,13 @@ def take(self, indexer, allow_fill=False, fill_value=None):
 
         return type(self)(result, mask, copy=False)
 
-    def copy(self):
+    def copy(self: BaseMaskedArrayT) -> BaseMaskedArrayT:
         data, mask = self._data, self._mask
         data = data.copy()
         mask = mask.copy()
         return type(self)(data, mask, copy=False)
 
-    def value_counts(self, dropna=True):
+    def value_counts(self, dropna: bool = True) -> "Series":
         """
         Returns a Series containing counts of each unique value.