Skip to content

Commit a2bce66

Browse files
authored
REF: move MaskedArray subclass attributes to dtypes (#58423)
1 parent cf0014a commit a2bce66

File tree

7 files changed

+53
-63
lines changed

7 files changed

+53
-63
lines changed

pandas/_libs/lib.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -2808,14 +2808,14 @@ def maybe_convert_objects(ndarray[object] objects,
28082808
from pandas.core.arrays import IntegerArray
28092809

28102810
# Set these values to 1 to be deterministic, match
2811-
# IntegerArray._internal_fill_value
2811+
# IntegerDtype._internal_fill_value
28122812
result[mask] = 1
28132813
result = IntegerArray(result, mask)
28142814
elif result is floats and convert_to_nullable_dtype:
28152815
from pandas.core.arrays import FloatingArray
28162816

28172817
# Set these values to 1.0 to be deterministic, match
2818-
# FloatingArray._internal_fill_value
2818+
# FloatingDtype._internal_fill_value
28192819
result[mask] = 1.0
28202820
result = FloatingArray(result, mask)
28212821

pandas/core/arrays/boolean.py

+3-7
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ class BooleanDtype(BaseMaskedDtype):
6868

6969
name: ClassVar[str] = "boolean"
7070

71+
# The value used to fill '_data' to avoid upcasting
72+
_internal_fill_value = False
73+
7174
# https://github.com/python/mypy/issues/4125
7275
# error: Signature of "type" incompatible with supertype "BaseMaskedDtype"
7376
@property
@@ -293,13 +296,6 @@ class BooleanArray(BaseMaskedArray):
293296
Length: 3, dtype: boolean
294297
"""
295298

296-
# The value used to fill '_data' to avoid upcasting
297-
_internal_fill_value = False
298-
# Fill values used for any/all
299-
# Incompatible types in assignment (expression has type "bool", base class
300-
# "BaseMaskedArray" defined the type as "<typing special form>")
301-
_truthy_value = True # type: ignore[assignment]
302-
_falsey_value = False # type: ignore[assignment]
303299
_TRUE_VALUES = {"True", "TRUE", "true", "1", "1.0"}
304300
_FALSE_VALUES = {"False", "FALSE", "false", "0", "0.0"}
305301

pandas/core/arrays/floating.py

+2-8
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ class FloatingDtype(NumericDtype):
2323
The attributes name & type are set when these subclasses are created.
2424
"""
2525

26+
# The value used to fill '_data' to avoid upcasting
27+
_internal_fill_value = np.nan
2628
_default_np_dtype = np.dtype(np.float64)
2729
_checker = is_float_dtype
2830

@@ -113,14 +115,6 @@ class FloatingArray(NumericArray):
113115

114116
_dtype_cls = FloatingDtype
115117

116-
# The value used to fill '_data' to avoid upcasting
117-
_internal_fill_value = np.nan
118-
# Fill values used for any/all
119-
# Incompatible types in assignment (expression has type "float", base class
120-
# "BaseMaskedArray" defined the type as "<typing special form>")
121-
_truthy_value = 1.0 # type: ignore[assignment]
122-
_falsey_value = 0.0 # type: ignore[assignment]
123-
124118

125119
_dtype_docstring = """
126120
An ExtensionDtype for {dtype} data.

pandas/core/arrays/integer.py

+2-8
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ class IntegerDtype(NumericDtype):
2323
The attributes name & type are set when these subclasses are created.
2424
"""
2525

26+
# The value used to fill '_data' to avoid upcasting
27+
_internal_fill_value = 1
2628
_default_np_dtype = np.dtype(np.int64)
2729
_checker = is_integer_dtype
2830

@@ -128,14 +130,6 @@ class IntegerArray(NumericArray):
128130

129131
_dtype_cls = IntegerDtype
130132

131-
# The value used to fill '_data' to avoid upcasting
132-
_internal_fill_value = 1
133-
# Fill values used for any/all
134-
# Incompatible types in assignment (expression has type "int", base class
135-
# "BaseMaskedArray" defined the type as "<typing special form>")
136-
_truthy_value = 1 # type: ignore[assignment]
137-
_falsey_value = 0 # type: ignore[assignment]
138-
139133

140134
_dtype_docstring = """
141135
An ExtensionDtype for {dtype} integer data.

pandas/core/arrays/masked.py

+23-37
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
Any,
66
Callable,
77
Literal,
8+
cast,
89
overload,
910
)
1011
import warnings
@@ -16,22 +17,6 @@
1617
missing as libmissing,
1718
)
1819
from pandas._libs.tslibs import is_supported_dtype
19-
from pandas._typing import (
20-
ArrayLike,
21-
AstypeArg,
22-
AxisInt,
23-
DtypeObj,
24-
FillnaOptions,
25-
InterpolateOptions,
26-
NpDtype,
27-
PositionalIndexer,
28-
Scalar,
29-
ScalarIndexer,
30-
Self,
31-
SequenceIndexer,
32-
Shape,
33-
npt,
34-
)
3520
from pandas.compat import (
3621
IS64,
3722
is_platform_windows,
@@ -97,6 +82,20 @@
9782
from pandas._typing import (
9883
NumpySorter,
9984
NumpyValueArrayLike,
85+
ArrayLike,
86+
AstypeArg,
87+
AxisInt,
88+
DtypeObj,
89+
FillnaOptions,
90+
InterpolateOptions,
91+
NpDtype,
92+
PositionalIndexer,
93+
Scalar,
94+
ScalarIndexer,
95+
Self,
96+
SequenceIndexer,
97+
Shape,
98+
npt,
10099
)
101100
from pandas._libs.missing import NAType
102101
from pandas.core.arrays import FloatingArray
@@ -111,16 +110,10 @@ class BaseMaskedArray(OpsMixin, ExtensionArray):
111110
numpy based
112111
"""
113112

114-
# The value used to fill '_data' to avoid upcasting
115-
_internal_fill_value: Scalar
116113
# our underlying data and mask are each ndarrays
117114
_data: np.ndarray
118115
_mask: npt.NDArray[np.bool_]
119116

120-
# Fill values used for any/all
121-
_truthy_value = Scalar # bool(_truthy_value) = True
122-
_falsey_value = Scalar # bool(_falsey_value) = False
123-
124117
@classmethod
125118
def _simple_new(cls, values: np.ndarray, mask: npt.NDArray[np.bool_]) -> Self:
126119
result = BaseMaskedArray.__new__(cls)
@@ -155,8 +148,9 @@ def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False) -> Self:
155148
@classmethod
156149
@doc(ExtensionArray._empty)
157150
def _empty(cls, shape: Shape, dtype: ExtensionDtype) -> Self:
158-
values = np.empty(shape, dtype=dtype.type)
159-
values.fill(cls._internal_fill_value)
151+
dtype = cast(BaseMaskedDtype, dtype)
152+
values: np.ndarray = np.empty(shape, dtype=dtype.type)
153+
values.fill(dtype._internal_fill_value)
160154
mask = np.ones(shape, dtype=bool)
161155
result = cls(values, mask)
162156
if not isinstance(result, cls) or dtype != result.dtype:
@@ -917,7 +911,9 @@ def take(
917911
) -> Self:
918912
# we always fill with 1 internally
919913
# to avoid upcasting
920-
data_fill_value = self._internal_fill_value if isna(fill_value) else fill_value
914+
data_fill_value = (
915+
self.dtype._internal_fill_value if isna(fill_value) else fill_value
916+
)
921917
result = take(
922918
self._data,
923919
indexer,
@@ -1397,12 +1393,7 @@ def any(
13971393
nv.validate_any((), kwargs)
13981394

13991395
values = self._data.copy()
1400-
# error: Argument 3 to "putmask" has incompatible type "object";
1401-
# expected "Union[_SupportsArray[dtype[Any]],
1402-
# _NestedSequence[_SupportsArray[dtype[Any]]],
1403-
# bool, int, float, complex, str, bytes,
1404-
# _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"
1405-
np.putmask(values, self._mask, self._falsey_value) # type: ignore[arg-type]
1396+
np.putmask(values, self._mask, self.dtype._falsey_value)
14061397
result = values.any()
14071398
if skipna:
14081399
return result
@@ -1490,12 +1481,7 @@ def all(
14901481
nv.validate_all((), kwargs)
14911482

14921483
values = self._data.copy()
1493-
# error: Argument 3 to "putmask" has incompatible type "object";
1494-
# expected "Union[_SupportsArray[dtype[Any]],
1495-
# _NestedSequence[_SupportsArray[dtype[Any]]],
1496-
# bool, int, float, complex, str, bytes,
1497-
# _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"
1498-
np.putmask(values, self._mask, self._truthy_value) # type: ignore[arg-type]
1484+
np.putmask(values, self._mask, self.dtype._truthy_value)
14991485
result = values.all(axis=axis)
15001486

15011487
if skipna:

pandas/core/arrays/numeric.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ def _coerce_to_data_and_mask(
221221
# we copy as need to coerce here
222222
if mask.any():
223223
values = values.copy()
224-
values[mask] = cls._internal_fill_value
224+
values[mask] = dtype_cls._internal_fill_value
225225
if inferred_type in ("string", "unicode"):
226226
# casts from str are always safe since they raise
227227
# a ValueError if the str cannot be parsed into a float

pandas/core/dtypes/dtypes.py

+20
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@
7979
DtypeObj,
8080
IntervalClosedType,
8181
Ordered,
82+
Scalar,
8283
Self,
8384
npt,
8485
type_t,
@@ -1551,6 +1552,25 @@ class BaseMaskedDtype(ExtensionDtype):
15511552

15521553
base = None
15531554
type: type
1555+
_internal_fill_value: Scalar
1556+
1557+
@property
1558+
def _truthy_value(self):
1559+
# Fill values used for 'any'
1560+
if self.kind == "f":
1561+
return 1.0
1562+
if self.kind in "iu":
1563+
return 1
1564+
return True
1565+
1566+
@property
1567+
def _falsey_value(self):
1568+
# Fill values used for 'all'
1569+
if self.kind == "f":
1570+
return 0.0
1571+
if self.kind in "iu":
1572+
return 0
1573+
return False
15541574

15551575
@property
15561576
def na_value(self) -> libmissing.NAType:

0 commit comments

Comments
 (0)