Skip to content

Commit 35174ae

Browse files
TYP: partial typing of masked array (#31728)
1 parent 1e6cf96 commit 35174ae

File tree

3 files changed

+56
-51
lines changed

3 files changed

+56
-51
lines changed

pandas/core/arrays/boolean.py

+15-19
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
import numbers
2-
from typing import TYPE_CHECKING, Any, List, Tuple, Type, Union
2+
from typing import TYPE_CHECKING, List, Tuple, Type, Union
33
import warnings
44

55
import numpy as np
66

77
from pandas._libs import lib, missing as libmissing
8+
from pandas._typing import ArrayLike
89
from pandas.compat import set_function_name
910
from pandas.compat.numpy import function as nv
1011

@@ -281,20 +282,15 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
281282
if not mask.ndim == 1:
282283
raise ValueError("mask must be a 1D array")
283284

284-
if copy:
285-
values = values.copy()
286-
mask = mask.copy()
287-
288-
self._data = values
289-
self._mask = mask
290285
self._dtype = BooleanDtype()
286+
super().__init__(values, mask, copy=copy)
291287

292288
@property
293-
def dtype(self):
289+
def dtype(self) -> BooleanDtype:
294290
return self._dtype
295291

296292
@classmethod
297-
def _from_sequence(cls, scalars, dtype=None, copy: bool = False):
293+
def _from_sequence(cls, scalars, dtype=None, copy: bool = False) -> "BooleanArray":
298294
if dtype:
299295
assert dtype == "boolean"
300296
values, mask = coerce_to_array(scalars, copy=copy)
@@ -303,7 +299,7 @@ def _from_sequence(cls, scalars, dtype=None, copy: bool = False):
303299
@classmethod
304300
def _from_sequence_of_strings(
305301
cls, strings: List[str], dtype=None, copy: bool = False
306-
):
302+
) -> "BooleanArray":
307303
def map_string(s):
308304
if isna(s):
309305
return s
@@ -317,18 +313,18 @@ def map_string(s):
317313
scalars = [map_string(x) for x in strings]
318314
return cls._from_sequence(scalars, dtype, copy)
319315

320-
def _values_for_factorize(self) -> Tuple[np.ndarray, Any]:
316+
def _values_for_factorize(self) -> Tuple[np.ndarray, int]:
321317
data = self._data.astype("int8")
322318
data[self._mask] = -1
323319
return data, -1
324320

325321
@classmethod
326-
def _from_factorized(cls, values, original: "BooleanArray"):
322+
def _from_factorized(cls, values, original: "BooleanArray") -> "BooleanArray":
327323
return cls._from_sequence(values, dtype=original.dtype)
328324

329325
_HANDLED_TYPES = (np.ndarray, numbers.Number, bool, np.bool_)
330326

331-
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
327+
def __array_ufunc__(self, ufunc, method: str, *inputs, **kwargs):
332328
# For BooleanArray inputs, we apply the ufunc to ._data
333329
# and mask the result.
334330
if method == "reduce":
@@ -373,7 +369,7 @@ def reconstruct(x):
373369
else:
374370
return reconstruct(result)
375371

376-
def __setitem__(self, key, value):
372+
def __setitem__(self, key, value) -> None:
377373
_is_scalar = is_scalar(value)
378374
if _is_scalar:
379375
value = [value]
@@ -387,7 +383,7 @@ def __setitem__(self, key, value):
387383
self._data[key] = value
388384
self._mask[key] = mask
389385

390-
def astype(self, dtype, copy=True):
386+
def astype(self, dtype, copy: bool = True) -> ArrayLike:
391387
"""
392388
Cast to a NumPy array or ExtensionArray with 'dtype'.
393389
@@ -402,8 +398,8 @@ def astype(self, dtype, copy=True):
402398
403399
Returns
404400
-------
405-
array : ndarray or ExtensionArray
406-
NumPy ndarray, BooleanArray or IntergerArray with 'dtype' for its dtype.
401+
ndarray or ExtensionArray
402+
NumPy ndarray, BooleanArray or IntegerArray with 'dtype' for its dtype.
407403
408404
Raises
409405
------
@@ -693,7 +689,7 @@ def cmp_method(self, other):
693689
name = f"__{op.__name__}"
694690
return set_function_name(cmp_method, name, cls)
695691

696-
def _reduce(self, name, skipna=True, **kwargs):
692+
def _reduce(self, name: str, skipna: bool = True, **kwargs):
697693

698694
if name in {"any", "all"}:
699695
return getattr(self, name)(skipna=skipna, **kwargs)
@@ -722,7 +718,7 @@ def _reduce(self, name, skipna=True, **kwargs):
722718

723719
return result
724720

725-
def _maybe_mask_result(self, result, mask, other, op_name):
721+
def _maybe_mask_result(self, result, mask, other, op_name: str):
726722
"""
727723
Parameters
728724
----------

pandas/core/arrays/integer.py

+12-17
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
import numbers
2-
from typing import TYPE_CHECKING, Any, Dict, Tuple, Type, Union
2+
from typing import TYPE_CHECKING, Tuple, Type, Union
33
import warnings
44

55
import numpy as np
66

77
from pandas._libs import lib, missing as libmissing
8+
from pandas._typing import ArrayLike
89
from pandas.compat import set_function_name
910
from pandas.util._decorators import cache_readonly
1011

@@ -347,13 +348,7 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
347348
"mask should be boolean numpy array. Use "
348349
"the 'integer_array' function instead"
349350
)
350-
351-
if copy:
352-
values = values.copy()
353-
mask = mask.copy()
354-
355-
self._data = values
356-
self._mask = mask
351+
super().__init__(values, mask, copy=copy)
357352

358353
@classmethod
359354
def _from_sequence(cls, scalars, dtype=None, copy: bool = False) -> "IntegerArray":
@@ -417,7 +412,7 @@ def reconstruct(x):
417412
else:
418413
return reconstruct(result)
419414

420-
def __setitem__(self, key, value):
415+
def __setitem__(self, key, value) -> None:
421416
_is_scalar = is_scalar(value)
422417
if _is_scalar:
423418
value = [value]
@@ -431,9 +426,9 @@ def __setitem__(self, key, value):
431426
self._data[key] = value
432427
self._mask[key] = mask
433428

434-
def astype(self, dtype, copy=True):
429+
def astype(self, dtype, copy: bool = True) -> ArrayLike:
435430
"""
436-
Cast to a NumPy array or IntegerArray with 'dtype'.
431+
Cast to a NumPy array or ExtensionArray with 'dtype'.
437432
438433
Parameters
439434
----------
@@ -446,8 +441,8 @@ def astype(self, dtype, copy=True):
446441
447442
Returns
448443
-------
449-
array : ndarray or IntegerArray
450-
NumPy ndarray or IntergerArray with 'dtype' for its dtype.
444+
ndarray or ExtensionArray
445+
NumPy ndarray, BooleanArray or IntegerArray with 'dtype' for its dtype.
451446
452447
Raises
453448
------
@@ -488,7 +483,7 @@ def _ndarray_values(self) -> np.ndarray:
488483
"""
489484
return self._data
490485

491-
def _values_for_factorize(self) -> Tuple[np.ndarray, Any]:
486+
def _values_for_factorize(self) -> Tuple[np.ndarray, float]:
492487
# TODO: https://github.com/pandas-dev/pandas/issues/30037
493488
# use masked algorithms, rather than object-dtype / np.nan.
494489
return self.to_numpy(na_value=np.nan), np.nan
@@ -565,7 +560,7 @@ def cmp_method(self, other):
565560
name = f"__{op.__name__}__"
566561
return set_function_name(cmp_method, name, cls)
567562

568-
def _reduce(self, name, skipna=True, **kwargs):
563+
def _reduce(self, name: str, skipna: bool = True, **kwargs):
569564
data = self._data
570565
mask = self._mask
571566

@@ -592,7 +587,7 @@ def _reduce(self, name, skipna=True, **kwargs):
592587

593588
return result
594589

595-
def _maybe_mask_result(self, result, mask, other, op_name):
590+
def _maybe_mask_result(self, result, mask, other, op_name: str):
596591
"""
597592
Parameters
598593
----------
@@ -768,7 +763,7 @@ class UInt64Dtype(_IntegerDtype):
768763
__doc__ = _dtype_docstring.format(dtype="uint64")
769764

770765

771-
_dtypes: Dict[str, _IntegerDtype] = {
766+
_dtypes = {
772767
"int8": Int8Dtype(),
773768
"int16": Int16Dtype(),
774769
"int32": Int32Dtype(),

pandas/core/arrays/masked.py

+29-15
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
from typing import TYPE_CHECKING
1+
from typing import TYPE_CHECKING, Optional, Type, TypeVar
22

33
import numpy as np
44

55
from pandas._libs import lib, missing as libmissing
6+
from pandas._typing import Scalar
67

78
from pandas.core.dtypes.common import is_integer, is_object_dtype, is_string_dtype
89
from pandas.core.dtypes.missing import isna, notna
@@ -12,7 +13,10 @@
1213
from pandas.core.indexers import check_array_indexer
1314

1415
if TYPE_CHECKING:
15-
from pandas._typing import Scalar
16+
from pandas import Series
17+
18+
19+
BaseMaskedArrayT = TypeVar("BaseMaskedArrayT", bound="BaseMaskedArray")
1620

1721

1822
class BaseMaskedArray(ExtensionArray, ExtensionOpsMixin):
@@ -22,11 +26,16 @@ class BaseMaskedArray(ExtensionArray, ExtensionOpsMixin):
2226
numpy based
2327
"""
2428

25-
_data: np.ndarray
26-
_mask: np.ndarray
27-
2829
# The value used to fill '_data' to avoid upcasting
29-
_internal_fill_value: "Scalar"
30+
_internal_fill_value: Scalar
31+
32+
def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
33+
if copy:
34+
values = values.copy()
35+
mask = mask.copy()
36+
37+
self._data = values
38+
self._mask = mask
3039

3140
def __getitem__(self, item):
3241
if is_integer(item):
@@ -48,12 +57,12 @@ def __iter__(self):
4857
def __len__(self) -> int:
4958
return len(self._data)
5059

51-
def __invert__(self):
60+
def __invert__(self: BaseMaskedArrayT) -> BaseMaskedArrayT:
5261
return type(self)(~self._data, self._mask)
5362

5463
def to_numpy(
55-
self, dtype=None, copy=False, na_value: "Scalar" = lib.no_default,
56-
):
64+
self, dtype=None, copy: bool = False, na_value: Scalar = lib.no_default,
65+
) -> np.ndarray:
5766
"""
5867
Convert to a NumPy Array.
5968
@@ -159,24 +168,29 @@ def _hasna(self) -> bool:
159168
# source code using it..
160169
return self._mask.any()
161170

162-
def isna(self):
171+
def isna(self) -> np.ndarray:
163172
return self._mask
164173

165174
@property
166175
def _na_value(self):
167176
return self.dtype.na_value
168177

169178
@property
170-
def nbytes(self):
179+
def nbytes(self) -> int:
171180
return self._data.nbytes + self._mask.nbytes
172181

173182
@classmethod
174-
def _concat_same_type(cls, to_concat):
183+
def _concat_same_type(cls: Type[BaseMaskedArrayT], to_concat) -> BaseMaskedArrayT:
175184
data = np.concatenate([x._data for x in to_concat])
176185
mask = np.concatenate([x._mask for x in to_concat])
177186
return cls(data, mask)
178187

179-
def take(self, indexer, allow_fill=False, fill_value=None):
188+
def take(
189+
self: BaseMaskedArrayT,
190+
indexer,
191+
allow_fill: bool = False,
192+
fill_value: Optional[Scalar] = None,
193+
) -> BaseMaskedArrayT:
180194
# we always fill with 1 internally
181195
# to avoid upcasting
182196
data_fill_value = self._internal_fill_value if isna(fill_value) else fill_value
@@ -197,13 +211,13 @@ def take(self, indexer, allow_fill=False, fill_value=None):
197211

198212
return type(self)(result, mask, copy=False)
199213

200-
def copy(self):
214+
def copy(self: BaseMaskedArrayT) -> BaseMaskedArrayT:
201215
data, mask = self._data, self._mask
202216
data = data.copy()
203217
mask = mask.copy()
204218
return type(self)(data, mask, copy=False)
205219

206-
def value_counts(self, dropna=True):
220+
def value_counts(self, dropna: bool = True) -> "Series":
207221
"""
208222
Returns a Series containing counts of each unique value.
209223

0 commit comments

Comments
 (0)