-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
TYP: partial typing of masked array #31728
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,11 @@ | ||
import numbers | ||
from typing import TYPE_CHECKING, Any, List, Tuple, Type, Union | ||
from typing import TYPE_CHECKING, List, Tuple, Type, Union | ||
import warnings | ||
|
||
import numpy as np | ||
|
||
from pandas._libs import lib, missing as libmissing | ||
from pandas._typing import ArrayLike | ||
from pandas.compat import set_function_name | ||
from pandas.compat.numpy import function as nv | ||
|
||
|
@@ -281,20 +282,15 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False): | |
if not mask.ndim == 1: | ||
raise ValueError("mask must be a 1D array") | ||
|
||
if copy: | ||
values = values.copy() | ||
mask = mask.copy() | ||
|
||
self._data = values | ||
self._mask = mask | ||
self._dtype = BooleanDtype() | ||
super().__init__(values, mask, copy=copy) | ||
|
||
@property | ||
def dtype(self): | ||
def dtype(self) -> BooleanDtype: | ||
return self._dtype | ||
|
||
@classmethod | ||
def _from_sequence(cls, scalars, dtype=None, copy: bool = False): | ||
def _from_sequence(cls, scalars, dtype=None, copy: bool = False) -> "BooleanArray": | ||
if dtype: | ||
assert dtype == "boolean" | ||
values, mask = coerce_to_array(scalars, copy=copy) | ||
|
@@ -303,7 +299,7 @@ def _from_sequence(cls, scalars, dtype=None, copy: bool = False): | |
@classmethod | ||
def _from_sequence_of_strings( | ||
cls, strings: List[str], dtype=None, copy: bool = False | ||
): | ||
) -> "BooleanArray": | ||
def map_string(s): | ||
if isna(s): | ||
return s | ||
|
@@ -317,18 +313,18 @@ def map_string(s): | |
scalars = [map_string(x) for x in strings] | ||
return cls._from_sequence(scalars, dtype, copy) | ||
|
||
def _values_for_factorize(self) -> Tuple[np.ndarray, Any]: | ||
def _values_for_factorize(self) -> Tuple[np.ndarray, int]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In general, though, the "Any" is correct (I don't know what the typing should do here, but the signature of the base class would use "Any", so any place where |
||
data = self._data.astype("int8") | ||
data[self._mask] = -1 | ||
return data, -1 | ||
|
||
@classmethod | ||
def _from_factorized(cls, values, original: "BooleanArray"): | ||
def _from_factorized(cls, values, original: "BooleanArray") -> "BooleanArray": | ||
return cls._from_sequence(values, dtype=original.dtype) | ||
|
||
_HANDLED_TYPES = (np.ndarray, numbers.Number, bool, np.bool_) | ||
|
||
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): | ||
def __array_ufunc__(self, ufunc, method: str, *inputs, **kwargs): | ||
# For BooleanArray inputs, we apply the ufunc to ._data | ||
# and mask the result. | ||
if method == "reduce": | ||
|
@@ -373,7 +369,7 @@ def reconstruct(x): | |
else: | ||
return reconstruct(result) | ||
|
||
def __setitem__(self, key, value): | ||
def __setitem__(self, key, value) -> None: | ||
_is_scalar = is_scalar(value) | ||
if _is_scalar: | ||
value = [value] | ||
|
@@ -387,7 +383,7 @@ def __setitem__(self, key, value): | |
self._data[key] = value | ||
self._mask[key] = mask | ||
|
||
def astype(self, dtype, copy=True): | ||
def astype(self, dtype, copy: bool = True) -> ArrayLike: | ||
""" | ||
Cast to a NumPy array or ExtensionArray with 'dtype'. | ||
|
||
|
@@ -402,8 +398,8 @@ def astype(self, dtype, copy=True): | |
|
||
Returns | ||
------- | ||
array : ndarray or ExtensionArray | ||
NumPy ndarray, BooleanArray or IntergerArray with 'dtype' for its dtype. | ||
ndarray or ExtensionArray | ||
NumPy ndarray, BooleanArray or IntegerArray with 'dtype' for its dtype. | ||
|
||
Raises | ||
------ | ||
|
@@ -693,7 +689,7 @@ def cmp_method(self, other): | |
name = f"__{op.__name__}" | ||
return set_function_name(cmp_method, name, cls) | ||
|
||
def _reduce(self, name, skipna=True, **kwargs): | ||
def _reduce(self, name: str, skipna: bool = True, **kwargs): | ||
|
||
if name in {"any", "all"}: | ||
return getattr(self, name)(skipna=skipna, **kwargs) | ||
|
@@ -722,7 +718,7 @@ def _reduce(self, name, skipna=True, **kwargs): | |
|
||
return result | ||
|
||
def _maybe_mask_result(self, result, mask, other, op_name): | ||
def _maybe_mask_result(self, result, mask, other, op_name: str): | ||
""" | ||
Parameters | ||
---------- | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,11 @@ | ||
import numbers | ||
from typing import TYPE_CHECKING, Any, Dict, Tuple, Type, Union | ||
from typing import TYPE_CHECKING, Tuple, Type, Union | ||
import warnings | ||
|
||
import numpy as np | ||
|
||
from pandas._libs import lib, missing as libmissing | ||
from pandas._typing import ArrayLike | ||
from pandas.compat import set_function_name | ||
from pandas.util._decorators import cache_readonly | ||
|
||
|
@@ -347,13 +348,7 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False): | |
"mask should be boolean numpy array. Use " | ||
"the 'integer_array' function instead" | ||
) | ||
|
||
if copy: | ||
values = values.copy() | ||
mask = mask.copy() | ||
|
||
self._data = values | ||
self._mask = mask | ||
super().__init__(values, mask, copy=copy) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Which superclass performs this logic? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BaseMaskedArray? |
||
|
||
@classmethod | ||
def _from_sequence(cls, scalars, dtype=None, copy: bool = False) -> "IntegerArray": | ||
|
@@ -417,7 +412,7 @@ def reconstruct(x): | |
else: | ||
return reconstruct(result) | ||
|
||
def __setitem__(self, key, value): | ||
def __setitem__(self, key, value) -> None: | ||
_is_scalar = is_scalar(value) | ||
if _is_scalar: | ||
value = [value] | ||
|
@@ -431,9 +426,9 @@ def __setitem__(self, key, value): | |
self._data[key] = value | ||
self._mask[key] = mask | ||
|
||
def astype(self, dtype, copy=True): | ||
def astype(self, dtype, copy: bool = True) -> ArrayLike: | ||
""" | ||
Cast to a NumPy array or IntegerArray with 'dtype'. | ||
Cast to a NumPy array or ExtensionArray with 'dtype'. | ||
|
||
Parameters | ||
---------- | ||
|
@@ -446,8 +441,8 @@ def astype(self, dtype, copy=True): | |
|
||
Returns | ||
------- | ||
array : ndarray or IntegerArray | ||
NumPy ndarray or IntergerArray with 'dtype' for its dtype. | ||
ndarray or ExtensionArray | ||
NumPy ndarray, BooleanArray or IntegerArray with 'dtype' for its dtype. | ||
|
||
Raises | ||
------ | ||
|
@@ -488,7 +483,7 @@ def _ndarray_values(self) -> np.ndarray: | |
""" | ||
return self._data | ||
|
||
def _values_for_factorize(self) -> Tuple[np.ndarray, Any]: | ||
def _values_for_factorize(self) -> Tuple[np.ndarray, float]: | ||
# TODO: https://github.com/pandas-dev/pandas/issues/30037 | ||
# use masked algorithms, rather than object-dtype / np.nan. | ||
return self.to_numpy(na_value=np.nan), np.nan | ||
|
@@ -565,7 +560,7 @@ def cmp_method(self, other): | |
name = f"__{op.__name__}__" | ||
return set_function_name(cmp_method, name, cls) | ||
|
||
def _reduce(self, name, skipna=True, **kwargs): | ||
def _reduce(self, name: str, skipna: bool = True, **kwargs): | ||
data = self._data | ||
mask = self._mask | ||
|
||
|
@@ -592,7 +587,7 @@ def _reduce(self, name, skipna=True, **kwargs): | |
|
||
return result | ||
|
||
def _maybe_mask_result(self, result, mask, other, op_name): | ||
def _maybe_mask_result(self, result, mask, other, op_name: str): | ||
""" | ||
Parameters | ||
---------- | ||
|
@@ -768,7 +763,7 @@ class UInt64Dtype(_IntegerDtype): | |
__doc__ = _dtype_docstring.format(dtype="uint64") | ||
|
||
|
||
_dtypes: Dict[str, _IntegerDtype] = { | ||
_dtypes = { | ||
"int8": Int8Dtype(), | ||
"int16": Int16Dtype(), | ||
"int32": Int32Dtype(), | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,9 @@ | ||
from typing import TYPE_CHECKING | ||
from typing import TYPE_CHECKING, Optional, Type, TypeVar | ||
|
||
import numpy as np | ||
|
||
from pandas._libs import lib, missing as libmissing | ||
from pandas._typing import Scalar | ||
|
||
from pandas.core.dtypes.common import is_integer, is_object_dtype, is_string_dtype | ||
from pandas.core.dtypes.missing import isna, notna | ||
|
@@ -12,7 +13,10 @@ | |
from pandas.core.indexers import check_array_indexer | ||
|
||
if TYPE_CHECKING: | ||
from pandas._typing import Scalar | ||
from pandas import Series | ||
|
||
|
||
BaseMaskedArrayT = TypeVar("BaseMaskedArrayT", bound="BaseMaskedArray") | ||
|
||
|
||
class BaseMaskedArray(ExtensionArray, ExtensionOpsMixin): | ||
|
@@ -22,11 +26,16 @@ class BaseMaskedArray(ExtensionArray, ExtensionOpsMixin): | |
numpy based | ||
""" | ||
|
||
_data: np.ndarray | ||
_mask: np.ndarray | ||
|
||
# The value used to fill '_data' to avoid upcasting | ||
_internal_fill_value: "Scalar" | ||
_internal_fill_value: Scalar | ||
|
||
def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. __init__ needs to be declared in the base class... pandas\core\arrays\masked.py:56: error: Too many arguments for "BaseMaskedArray" also creating this ensures that the subclasses have the correct signature for the constructor to work with we could just use a AbstractMethodError but I think it makes sense to put the shared functionality here. BooleanArray has checking for values.ndim and mask.ndim. IntegerArray does not. It may make sense to have that check here also if applicable to IntegerArray. |
||
if copy: | ||
values = values.copy() | ||
mask = mask.copy() | ||
|
||
self._data = values | ||
self._mask = mask | ||
|
||
def __getitem__(self, item): | ||
if is_integer(item): | ||
|
@@ -48,12 +57,12 @@ def __iter__(self): | |
def __len__(self) -> int: | ||
return len(self._data) | ||
|
||
def __invert__(self): | ||
def __invert__(self: BaseMaskedArrayT) -> BaseMaskedArrayT: | ||
return type(self)(~self._data, self._mask) | ||
|
||
def to_numpy( | ||
self, dtype=None, copy=False, na_value: "Scalar" = lib.no_default, | ||
): | ||
self, dtype=None, copy: bool = False, na_value: Scalar = lib.no_default, | ||
) -> np.ndarray: | ||
""" | ||
Convert to a NumPy Array. | ||
|
||
|
@@ -159,24 +168,29 @@ def _hasna(self) -> bool: | |
# source code using it.. | ||
return self._mask.any() | ||
|
||
def isna(self): | ||
def isna(self) -> np.ndarray: | ||
return self._mask | ||
|
||
@property | ||
def _na_value(self): | ||
return self.dtype.na_value | ||
|
||
@property | ||
def nbytes(self): | ||
def nbytes(self) -> int: | ||
return self._data.nbytes + self._mask.nbytes | ||
|
||
@classmethod | ||
def _concat_same_type(cls, to_concat): | ||
def _concat_same_type(cls: Type[BaseMaskedArrayT], to_concat) -> BaseMaskedArrayT: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we need to add the typevar to avoid... pandas\core\arrays\integer.py:117: error: Incompatible return value type (got "BaseMaskedArray", expected "IntegerArray") we can't use the unbound typevar from pandas._typing here otherwise we get... pandas\core\arrays\masked.py:183: error: Too many arguments for "object" since the typevar is needed here, it is also used for the other methods that return type(self) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As I said in another issue, I don't have problems with it if there is no way around it, but it needs to be documented then There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this is a problem; it's documented in PEP 484 https://www.python.org/dev/peps/pep-0484/#annotating-instance-and-class-methods |
||
data = np.concatenate([x._data for x in to_concat]) | ||
mask = np.concatenate([x._mask for x in to_concat]) | ||
return cls(data, mask) | ||
|
||
def take(self, indexer, allow_fill=False, fill_value=None): | ||
def take( | ||
self: BaseMaskedArrayT, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We otherwise don't type self, or do we? |
||
indexer, | ||
allow_fill: bool = False, | ||
fill_value: Optional[Scalar] = None, | ||
) -> BaseMaskedArrayT: | ||
# we always fill with 1 internally | ||
# to avoid upcasting | ||
data_fill_value = self._internal_fill_value if isna(fill_value) else fill_value | ||
|
@@ -197,13 +211,13 @@ def take(self, indexer, allow_fill=False, fill_value=None): | |
|
||
return type(self)(result, mask, copy=False) | ||
|
||
def copy(self): | ||
def copy(self: BaseMaskedArrayT) -> BaseMaskedArrayT: | ||
data, mask = self._data, self._mask | ||
data = data.copy() | ||
mask = mask.copy() | ||
return type(self)(data, mask, copy=False) | ||
|
||
def value_counts(self, dropna=True): | ||
def value_counts(self, dropna: bool = True) -> "Series": | ||
""" | ||
Returns a Series containing counts of each unique value. | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this could be a class attribute. no need to be assigned in constructor?