diff --git a/asv_bench/benchmarks/array.py b/asv_bench/benchmarks/array.py new file mode 100644 index 0000000000000..8cbf8c8592661 --- /dev/null +++ b/asv_bench/benchmarks/array.py @@ -0,0 +1,23 @@ +import numpy as np + +import pandas as pd + + +class BooleanArray: + def setup(self): + self.values_bool = np.array([True, False, True, False]) + self.values_float = np.array([1.0, 0.0, 1.0, 0.0]) + self.values_integer = np.array([1, 0, 1, 0]) + self.values_integer_like = [1, 0, 1, 0] + + def time_from_bool_array(self): + pd.array(self.values_bool, dtype="boolean") + + def time_from_integer_array(self): + pd.array(self.values_integer, dtype="boolean") + + def time_from_integer_like(self): + pd.array(self.values_integer_like, dtype="boolean") + + def time_from_float_array(self): + pd.array(self.values_float, dtype="boolean") diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 1a07b424fa884..5261b30ef9c1e 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -156,7 +156,7 @@ type dedicated to boolean data that can hold missing values. With the default ``'bool`` data type based on a numpy bool array, the column can only hold True or False values and not missing values. This new :class:`BooleanDtype` can store missing values as well by keeping track of this in a separate mask. -(:issue:`29555`) +(:issue:`29555`, :issue:`30095`) .. ipython:: python diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index aec3397bddd16..349cbd1919e76 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -17,6 +17,7 @@ is_integer, is_integer_dtype, is_list_like, + is_numeric_dtype, is_scalar, pandas_dtype, ) @@ -130,9 +131,19 @@ def coerce_to_array(values, mask=None, copy: bool = False): if isinstance(values, np.ndarray) and values.dtype == np.bool_: if copy: values = values.copy() + elif isinstance(values, np.ndarray) and is_numeric_dtype(values.dtype): + mask_values = isna(values) + + values_bool = np.zeros(len(values), dtype=bool) + values_bool[~mask_values] = values[~mask_values].astype(bool) + + if not np.all( + values_bool[~mask_values].astype(values.dtype) == values[~mask_values] + ): + raise TypeError("Need to pass bool-like values") + + values = values_bool else: - # TODO conversion from integer/float ndarray can be done more efficiently - # (avoid roundtrip through object) values_object = np.asarray(values, dtype=object) inferred_dtype = lib.infer_dtype(values_object, skipna=True) diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index a13bb8edc8e48..2b946a2a925d5 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -124,6 +124,8 @@ def test_to_boolean_array_missing_indicators(a, b): [1.0, 2.0], pd.date_range("20130101", periods=2), np.array(["foo"]), + np.array([1, 2]), + np.array([1.0, 2.0]), [np.nan, {"a": 1}], ], ) @@ -133,24 +135,37 @@ def test_to_boolean_array_error(values): pd.array(values, dtype="boolean") -def test_to_boolean_array_integer_like(): - # integers of 0's and 1's - result = pd.array([1, 0, 1, 0], dtype="boolean") +def test_to_boolean_array_from_integer_array(): + result = pd.array(np.array([1, 0, 1, 0]), dtype="boolean") expected = pd.array([True, False, True, False], dtype="boolean") tm.assert_extension_array_equal(result, expected) - result = pd.array(np.array([1, 0, 1, 0]), dtype="boolean") + # with missing values + result = pd.array(np.array([1, 0, 1, None]), dtype="boolean") + expected = pd.array([True, False, True, None], dtype="boolean") tm.assert_extension_array_equal(result, expected) + +def test_to_boolean_array_from_float_array(): result = pd.array(np.array([1.0, 0.0, 1.0, 0.0]), dtype="boolean") + expected = pd.array([True, False, True, False], dtype="boolean") tm.assert_extension_array_equal(result, expected) # with missing values - result = pd.array([1, 0, 1, None], dtype="boolean") + result = pd.array(np.array([1.0, 0.0, 1.0, np.nan]), dtype="boolean") expected = pd.array([True, False, True, None], dtype="boolean") tm.assert_extension_array_equal(result, expected) - result = pd.array(np.array([1.0, 0.0, 1.0, np.nan]), dtype="boolean") + +def test_to_boolean_array_integer_like(): + # integers of 0's and 1's + result = pd.array([1, 0, 1, 0], dtype="boolean") + expected = pd.array([True, False, True, False], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + # with missing values + result = pd.array([1, 0, 1, None], dtype="boolean") + expected = pd.array([True, False, True, None], dtype="boolean") tm.assert_extension_array_equal(result, expected)