Skip to content

Commit 76c39d5

Browse files
authored
PERF: BooleanArray._from_sequence_of_strings (pandas-dev#50613)
1 parent db4d3b0 commit 76c39d5

File tree

3 files changed

+6
-11
lines changed

3 files changed

+6
-11
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -779,6 +779,7 @@ Performance improvements
779779
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.__setitem__` when key is a null slice (:issue:`50248`)
780780
- Performance improvement in :class:`~arrays.ArrowExtensionArray` comparison methods when array contains NA (:issue:`50524`)
781781
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`49973`)
782+
- Performance improvement when parsing strings to :class:`BooleanDtype` (:issue:`50613`)
782783
- Performance improvement in :meth:`DataFrame.join` when joining on a subset of a :class:`MultiIndex` (:issue:`48611`)
783784
- Performance improvement for :meth:`MultiIndex.intersection` (:issue:`48604`)
784785
- Performance improvement in ``var`` and ``std`` for nullable dtypes (:issue:`48379`).

pandas/core/arrays/boolean.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -321,17 +321,17 @@ def _from_sequence_of_strings(
321321
true_values_union = cls._TRUE_VALUES.union(true_values or [])
322322
false_values_union = cls._FALSE_VALUES.union(false_values or [])
323323

324-
def map_string(s):
325-
if isna(s):
326-
return s
327-
elif s in true_values_union:
324+
def map_string(s) -> bool:
325+
if s in true_values_union:
328326
return True
329327
elif s in false_values_union:
330328
return False
331329
else:
332330
raise ValueError(f"{s} cannot be cast to bool")
333331

334-
scalars = [map_string(x) for x in strings]
332+
scalars = np.array(strings, dtype=object)
333+
mask = isna(scalars)
334+
scalars[~mask] = list(map(map_string, scalars[~mask]))
335335
return cls._from_sequence(scalars, dtype=dtype, copy=copy)
336336

337337
_HANDLED_TYPES = (np.ndarray, numbers.Number, bool, np.bool_)

pandas/tests/extension/test_arrow.py

-6
Original file line numberDiff line numberDiff line change
@@ -294,12 +294,6 @@ def test_from_sequence_of_strings_pa_array(self, data, request):
294294
reason=f"pyarrow doesn't support parsing {pa_dtype}",
295295
)
296296
)
297-
elif pa.types.is_boolean(pa_dtype):
298-
request.node.add_marker(
299-
pytest.mark.xfail(
300-
reason="Iterating over ChunkedArray[bool] returns PyArrow scalars.",
301-
)
302-
)
303297
elif pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None:
304298
if pa_version_under7p0:
305299
request.node.add_marker(

0 commit comments

Comments
 (0)