Skip to content

PERF: improve conversion to BooleanArray from int/float array #30095

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Dec 9, 2019
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ type dedicated to boolean data that can hold missing values. With the default
``'bool`` data type based on a numpy bool array, the column can only hold
True or False values and not missing values. This new :class:`BooleanDtype`
can store missing values as well by keeping track of this in a separate mask.
(:issue:`29555`)
(:issue:`29555`, :issue:`30095`)

.. ipython:: python

Expand Down
15 changes: 13 additions & 2 deletions pandas/core/arrays/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
is_integer,
is_integer_dtype,
is_list_like,
is_numeric_dtype,
is_scalar,
pandas_dtype,
)
Expand Down Expand Up @@ -130,9 +131,19 @@ def coerce_to_array(values, mask=None, copy: bool = False):
if isinstance(values, np.ndarray) and values.dtype == np.bool_:
if copy:
values = values.copy()
elif isinstance(values, np.ndarray) and is_numeric_dtype(values.dtype):
mask_values = isna(values)

values_bool = np.zeros(len(values), dtype=bool)
values_bool[~mask_values] = values[~mask_values].astype(bool)

if not np.all(
values_bool[~mask_values].astype(values.dtype) == values[~mask_values]
):
raise TypeError("Need to pass bool-like values")

values = values_bool
else:
# TODO conversion from integer/float ndarray can be done more efficiently
# (avoid roundtrip through object)
values_object = np.asarray(values, dtype=object)

inferred_dtype = lib.infer_dtype(values_object, skipna=True)
Expand Down
25 changes: 19 additions & 6 deletions pandas/tests/arrays/test_boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,24 +133,37 @@ def test_to_boolean_array_error(values):
pd.array(values, dtype="boolean")


def test_to_boolean_array_integer_like():
# integers of 0's and 1's
result = pd.array([1, 0, 1, 0], dtype="boolean")
def test_to_boolean_array_from_integer_array():
result = pd.array(np.array([1, 0, 1, 0]), dtype="boolean")
expected = pd.array([True, False, True, False], dtype="boolean")
tm.assert_extension_array_equal(result, expected)

result = pd.array(np.array([1, 0, 1, 0]), dtype="boolean")
# with missing values
result = pd.array(np.array([1, 0, 1, None]), dtype="boolean")
expected = pd.array([True, False, True, None], dtype="boolean")
tm.assert_extension_array_equal(result, expected)


def test_to_boolean_array_from_float_array():
result = pd.array(np.array([1.0, 0.0, 1.0, 0.0]), dtype="boolean")
expected = pd.array([True, False, True, False], dtype="boolean")
tm.assert_extension_array_equal(result, expected)

# with missing values
result = pd.array([1, 0, 1, None], dtype="boolean")
result = pd.array(np.array([1.0, 0.0, 1.0, np.nan]), dtype="boolean")
expected = pd.array([True, False, True, None], dtype="boolean")
tm.assert_extension_array_equal(result, expected)

result = pd.array(np.array([1.0, 0.0, 1.0, np.nan]), dtype="boolean")

def test_to_boolean_array_integer_like():
# integers of 0's and 1's
result = pd.array([1, 0, 1, 0], dtype="boolean")
expected = pd.array([True, False, True, False], dtype="boolean")
tm.assert_extension_array_equal(result, expected)

# with missing values
result = pd.array([1, 0, 1, None], dtype="boolean")
expected = pd.array([True, False, True, None], dtype="boolean")
tm.assert_extension_array_equal(result, expected)


Expand Down