Skip to content

Commit 02770f4

Browse files
Yu Wangproost
Yu Wang
authored andcommitted
PERF: improve conversion to BooleanArray from int/float array (pandas-dev#30095)
1 parent 3ab8d2f commit 02770f4

File tree

4 files changed

+58
-9
lines changed

4 files changed

+58
-9
lines changed

asv_bench/benchmarks/array.py

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import numpy as np
2+
3+
import pandas as pd
4+
5+
6+
class BooleanArray:
7+
def setup(self):
8+
self.values_bool = np.array([True, False, True, False])
9+
self.values_float = np.array([1.0, 0.0, 1.0, 0.0])
10+
self.values_integer = np.array([1, 0, 1, 0])
11+
self.values_integer_like = [1, 0, 1, 0]
12+
13+
def time_from_bool_array(self):
14+
pd.array(self.values_bool, dtype="boolean")
15+
16+
def time_from_integer_array(self):
17+
pd.array(self.values_integer, dtype="boolean")
18+
19+
def time_from_integer_like(self):
20+
pd.array(self.values_integer_like, dtype="boolean")
21+
22+
def time_from_float_array(self):
23+
pd.array(self.values_float, dtype="boolean")

doc/source/whatsnew/v1.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ type dedicated to boolean data that can hold missing values. With the default
156156
``'bool`` data type based on a numpy bool array, the column can only hold
157157
True or False values and not missing values. This new :class:`BooleanDtype`
158158
can store missing values as well by keeping track of this in a separate mask.
159-
(:issue:`29555`)
159+
(:issue:`29555`, :issue:`30095`)
160160

161161
.. ipython:: python
162162

pandas/core/arrays/boolean.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
is_integer,
1818
is_integer_dtype,
1919
is_list_like,
20+
is_numeric_dtype,
2021
is_scalar,
2122
pandas_dtype,
2223
)
@@ -130,9 +131,19 @@ def coerce_to_array(values, mask=None, copy: bool = False):
130131
if isinstance(values, np.ndarray) and values.dtype == np.bool_:
131132
if copy:
132133
values = values.copy()
134+
elif isinstance(values, np.ndarray) and is_numeric_dtype(values.dtype):
135+
mask_values = isna(values)
136+
137+
values_bool = np.zeros(len(values), dtype=bool)
138+
values_bool[~mask_values] = values[~mask_values].astype(bool)
139+
140+
if not np.all(
141+
values_bool[~mask_values].astype(values.dtype) == values[~mask_values]
142+
):
143+
raise TypeError("Need to pass bool-like values")
144+
145+
values = values_bool
133146
else:
134-
# TODO conversion from integer/float ndarray can be done more efficiently
135-
# (avoid roundtrip through object)
136147
values_object = np.asarray(values, dtype=object)
137148

138149
inferred_dtype = lib.infer_dtype(values_object, skipna=True)

pandas/tests/arrays/test_boolean.py

+21-6
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ def test_to_boolean_array_missing_indicators(a, b):
124124
[1.0, 2.0],
125125
pd.date_range("20130101", periods=2),
126126
np.array(["foo"]),
127+
np.array([1, 2]),
128+
np.array([1.0, 2.0]),
127129
[np.nan, {"a": 1}],
128130
],
129131
)
@@ -133,24 +135,37 @@ def test_to_boolean_array_error(values):
133135
pd.array(values, dtype="boolean")
134136

135137

136-
def test_to_boolean_array_integer_like():
137-
# integers of 0's and 1's
138-
result = pd.array([1, 0, 1, 0], dtype="boolean")
138+
def test_to_boolean_array_from_integer_array():
139+
result = pd.array(np.array([1, 0, 1, 0]), dtype="boolean")
139140
expected = pd.array([True, False, True, False], dtype="boolean")
140141
tm.assert_extension_array_equal(result, expected)
141142

142-
result = pd.array(np.array([1, 0, 1, 0]), dtype="boolean")
143+
# with missing values
144+
result = pd.array(np.array([1, 0, 1, None]), dtype="boolean")
145+
expected = pd.array([True, False, True, None], dtype="boolean")
143146
tm.assert_extension_array_equal(result, expected)
144147

148+
149+
def test_to_boolean_array_from_float_array():
145150
result = pd.array(np.array([1.0, 0.0, 1.0, 0.0]), dtype="boolean")
151+
expected = pd.array([True, False, True, False], dtype="boolean")
146152
tm.assert_extension_array_equal(result, expected)
147153

148154
# with missing values
149-
result = pd.array([1, 0, 1, None], dtype="boolean")
155+
result = pd.array(np.array([1.0, 0.0, 1.0, np.nan]), dtype="boolean")
150156
expected = pd.array([True, False, True, None], dtype="boolean")
151157
tm.assert_extension_array_equal(result, expected)
152158

153-
result = pd.array(np.array([1.0, 0.0, 1.0, np.nan]), dtype="boolean")
159+
160+
def test_to_boolean_array_integer_like():
161+
# integers of 0's and 1's
162+
result = pd.array([1, 0, 1, 0], dtype="boolean")
163+
expected = pd.array([True, False, True, False], dtype="boolean")
164+
tm.assert_extension_array_equal(result, expected)
165+
166+
# with missing values
167+
result = pd.array([1, 0, 1, None], dtype="boolean")
168+
expected = pd.array([True, False, True, None], dtype="boolean")
154169
tm.assert_extension_array_equal(result, expected)
155170

156171

0 commit comments

Comments
 (0)