diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 9d44c770f58ef..430f2ca0fe837 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -778,6 +778,7 @@ Performance improvements - Performance improvement in :meth:`~arrays.ArrowExtensionArray.__setitem__` when key is a null slice (:issue:`50248`) - Performance improvement in :class:`~arrays.ArrowExtensionArray` comparison methods when array contains NA (:issue:`50524`) - Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`49973`) +- Performance improvement when parsing strings to :class:`BooleanDtype` (:issue:`50613`) - Performance improvement in :meth:`DataFrame.join` when joining on a subset of a :class:`MultiIndex` (:issue:`48611`) - Performance improvement for :meth:`MultiIndex.intersection` (:issue:`48604`) - Performance improvement in ``var`` and ``std`` for nullable dtypes (:issue:`48379`). diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 8ac665b1b2e11..5f8169829ab6b 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -321,17 +321,17 @@ def _from_sequence_of_strings( true_values_union = cls._TRUE_VALUES.union(true_values or []) false_values_union = cls._FALSE_VALUES.union(false_values or []) - def map_string(s): - if isna(s): - return s - elif s in true_values_union: + def map_string(s) -> bool: + if s in true_values_union: return True elif s in false_values_union: return False else: raise ValueError(f"{s} cannot be cast to bool") - scalars = [map_string(x) for x in strings] + scalars = np.array(strings, dtype=object) + mask = isna(scalars) + scalars[~mask] = list(map(map_string, scalars[~mask])) return cls._from_sequence(scalars, dtype=dtype, copy=copy) _HANDLED_TYPES = (np.ndarray, numbers.Number, bool, np.bool_) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 8bb82bf644680..c02fa0aecdacc 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -294,12 +294,6 @@ def test_from_sequence_of_strings_pa_array(self, data, request): reason=f"pyarrow doesn't support parsing {pa_dtype}", ) ) - elif pa.types.is_boolean(pa_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Iterating over ChunkedArray[bool] returns PyArrow scalars.", - ) - ) elif pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None: if pa_version_under7p0: request.node.add_marker(