diff --git a/asv_bench/benchmarks/array.py b/asv_bench/benchmarks/array.py index 8cbf8c8592661..103df0fd94847 100644 --- a/asv_bench/benchmarks/array.py +++ b/asv_bench/benchmarks/array.py @@ -9,6 +9,11 @@ def setup(self): self.values_float = np.array([1.0, 0.0, 1.0, 0.0]) self.values_integer = np.array([1, 0, 1, 0]) self.values_integer_like = [1, 0, 1, 0] + self.data = np.array([True, False, True, False]) + self.mask = np.array([False, False, True, False]) + + def time_constructor(self): + pd.arrays.BooleanArray(self.data, self.mask) def time_from_bool_array(self): pd.array(self.values_bool, dtype="boolean") @@ -21,3 +26,16 @@ def time_from_integer_like(self): def time_from_float_array(self): pd.array(self.values_float, dtype="boolean") + + +class IntegerArray: + def setup(self): + self.values_integer = np.array([1, 0, 1, 0]) + self.data = np.array([1, 2, 3, 4], dtype="int64") + self.mask = np.array([False, False, True, False]) + + def time_constructor(self): + pd.arrays.IntegerArray(self.data, self.mask) + + def time_from_integer_array(self): + pd.array(self.values_integer, dtype="Int64") diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index e85534def6b97..b78a10efa04a0 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -271,18 +271,8 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False): if not (isinstance(values, np.ndarray) and values.dtype == np.bool_): raise TypeError( "values should be boolean numpy array. Use " - "the 'array' function instead" + "the 'pd.array' function instead" ) - if not (isinstance(mask, np.ndarray) and mask.dtype == np.bool_): - raise TypeError( - "mask should be boolean numpy array. Use " - "the 'array' function instead" - ) - if not values.ndim == 1: - raise ValueError("values must be a 1D array") - if not mask.ndim == 1: - raise ValueError("mask must be a 1D array") - self._dtype = BooleanDtype() super().__init__(values, mask, copy=copy) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index d47a396bbb14e..5d6f49852e696 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -343,15 +343,10 @@ def dtype(self) -> _IntegerDtype: return _dtypes[str(self._data.dtype)] def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False): - if not (isinstance(values, np.ndarray) and is_integer_dtype(values.dtype)): + if not (isinstance(values, np.ndarray) and values.dtype.kind in ["i", "u"]): raise TypeError( "values should be integer numpy array. Use " - "the 'integer_array' function instead" - ) - if not (isinstance(mask, np.ndarray) and is_bool_dtype(mask.dtype)): - raise TypeError( - "mask should be boolean numpy array. Use " - "the 'integer_array' function instead" + "the 'pd.array' function instead" ) super().__init__(values, mask, copy=copy) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index d23d26d870f75..fc5b307bd5754 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -30,6 +30,17 @@ class BaseMaskedArray(ExtensionArray, ExtensionOpsMixin): _internal_fill_value: Scalar def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False): + # values is supposed to already be validated in the subclass + if not (isinstance(mask, np.ndarray) and mask.dtype == np.bool_): + raise TypeError( + "mask should be boolean numpy array. Use " + "the 'pd.array' function instead" + ) + if not values.ndim == 1: + raise ValueError("values must be a 1D array") + if not mask.ndim == 1: + raise ValueError("mask must be a 1D array") + if copy: values = values.copy() mask = mask.copy() diff --git a/pandas/tests/arrays/integer/test_construction.py b/pandas/tests/arrays/integer/test_construction.py index 4a62a35e23d93..43936d8b95bd6 100644 --- a/pandas/tests/arrays/integer/test_construction.py +++ b/pandas/tests/arrays/integer/test_construction.py @@ -70,7 +70,7 @@ def test_integer_array_constructor(): expected = integer_array([1, 2, 3, np.nan], dtype="int64") tm.assert_extension_array_equal(result, expected) - msg = r".* should be .* numpy array. Use the 'integer_array' function instead" + msg = r".* should be .* numpy array. Use the 'pd.array' function instead" with pytest.raises(TypeError, match=msg): IntegerArray(values.tolist(), mask)