Skip to content

Commit efa85af

Browse files
PERF: improve IntegerArray fast constructor (#33359)
1 parent 716689a commit efa85af

File tree

5 files changed

+33
-19
lines changed

5 files changed

+33
-19
lines changed

asv_bench/benchmarks/array.py

+18
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ def setup(self):
99
self.values_float = np.array([1.0, 0.0, 1.0, 0.0])
1010
self.values_integer = np.array([1, 0, 1, 0])
1111
self.values_integer_like = [1, 0, 1, 0]
12+
self.data = np.array([True, False, True, False])
13+
self.mask = np.array([False, False, True, False])
14+
15+
def time_constructor(self):
16+
pd.arrays.BooleanArray(self.data, self.mask)
1217

1318
def time_from_bool_array(self):
1419
pd.array(self.values_bool, dtype="boolean")
@@ -21,3 +26,16 @@ def time_from_integer_like(self):
2126

2227
def time_from_float_array(self):
2328
pd.array(self.values_float, dtype="boolean")
29+
30+
31+
class IntegerArray:
32+
def setup(self):
33+
self.values_integer = np.array([1, 0, 1, 0])
34+
self.data = np.array([1, 2, 3, 4], dtype="int64")
35+
self.mask = np.array([False, False, True, False])
36+
37+
def time_constructor(self):
38+
pd.arrays.IntegerArray(self.data, self.mask)
39+
40+
def time_from_integer_array(self):
41+
pd.array(self.values_integer, dtype="Int64")

pandas/core/arrays/boolean.py

+1-11
Original file line numberDiff line numberDiff line change
@@ -271,18 +271,8 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
271271
if not (isinstance(values, np.ndarray) and values.dtype == np.bool_):
272272
raise TypeError(
273273
"values should be boolean numpy array. Use "
274-
"the 'array' function instead"
274+
"the 'pd.array' function instead"
275275
)
276-
if not (isinstance(mask, np.ndarray) and mask.dtype == np.bool_):
277-
raise TypeError(
278-
"mask should be boolean numpy array. Use "
279-
"the 'array' function instead"
280-
)
281-
if not values.ndim == 1:
282-
raise ValueError("values must be a 1D array")
283-
if not mask.ndim == 1:
284-
raise ValueError("mask must be a 1D array")
285-
286276
self._dtype = BooleanDtype()
287277
super().__init__(values, mask, copy=copy)
288278

pandas/core/arrays/integer.py

+2-7
Original file line numberDiff line numberDiff line change
@@ -343,15 +343,10 @@ def dtype(self) -> _IntegerDtype:
343343
return _dtypes[str(self._data.dtype)]
344344

345345
def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
346-
if not (isinstance(values, np.ndarray) and is_integer_dtype(values.dtype)):
346+
if not (isinstance(values, np.ndarray) and values.dtype.kind in ["i", "u"]):
347347
raise TypeError(
348348
"values should be integer numpy array. Use "
349-
"the 'integer_array' function instead"
350-
)
351-
if not (isinstance(mask, np.ndarray) and is_bool_dtype(mask.dtype)):
352-
raise TypeError(
353-
"mask should be boolean numpy array. Use "
354-
"the 'integer_array' function instead"
349+
"the 'pd.array' function instead"
355350
)
356351
super().__init__(values, mask, copy=copy)
357352

pandas/core/arrays/masked.py

+11
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,17 @@ class BaseMaskedArray(ExtensionArray, ExtensionOpsMixin):
3030
_internal_fill_value: Scalar
3131

3232
def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
33+
# values is supposed to already be validated in the subclass
34+
if not (isinstance(mask, np.ndarray) and mask.dtype == np.bool_):
35+
raise TypeError(
36+
"mask should be boolean numpy array. Use "
37+
"the 'pd.array' function instead"
38+
)
39+
if not values.ndim == 1:
40+
raise ValueError("values must be a 1D array")
41+
if not mask.ndim == 1:
42+
raise ValueError("mask must be a 1D array")
43+
3344
if copy:
3445
values = values.copy()
3546
mask = mask.copy()

pandas/tests/arrays/integer/test_construction.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def test_integer_array_constructor():
7070
expected = integer_array([1, 2, 3, np.nan], dtype="int64")
7171
tm.assert_extension_array_equal(result, expected)
7272

73-
msg = r".* should be .* numpy array. Use the 'integer_array' function instead"
73+
msg = r".* should be .* numpy array. Use the 'pd.array' function instead"
7474
with pytest.raises(TypeError, match=msg):
7575
IntegerArray(values.tolist(), mask)
7676

0 commit comments

Comments
 (0)