Skip to content

Commit 7d7f885

Browse files
jorisvandenbosschejreback
authored andcommitted
ENH: add BooleanArray extension array (#29555)
1 parent 11cb423 commit 7d7f885

File tree

15 files changed

+1668
-1
lines changed

15 files changed

+1668
-1
lines changed

doc/source/getting_started/basics.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1950,6 +1950,7 @@ sparse :class:`SparseDtype` (none) :class:`arrays.
19501950
intervals :class:`IntervalDtype` :class:`Interval` :class:`arrays.IntervalArray` :ref:`advanced.intervalindex`
19511951
nullable integer :class:`Int64Dtype`, ... (none) :class:`arrays.IntegerArray` :ref:`integer_na`
19521952
Strings :class:`StringDtype` :class:`str` :class:`arrays.StringArray` :ref:`text`
1953+
Boolean (with NA) :class:`BooleanDtype` :class:`bool` :class:`arrays.BooleanArray` :ref:`api.arrays.bool`
19531954
=================== ========================= ================== ============================= =============================
19541955

19551956
Pandas has two ways to store strings.

doc/source/reference/arrays.rst

+23
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ Nullable Integer :class:`Int64Dtype`, ... (none) :ref:`api.array
2525
Categorical :class:`CategoricalDtype` (none) :ref:`api.arrays.categorical`
2626
Sparse :class:`SparseDtype` (none) :ref:`api.arrays.sparse`
2727
Strings :class:`StringDtype` :class:`str` :ref:`api.arrays.string`
28+
Boolean (with NA) :class:`BooleanDtype` :class:`bool` :ref:`api.arrays.bool`
2829
=================== ========================= ================== =============================
2930

3031
Pandas and third-party libraries can extend NumPy's type system (see :ref:`extending.extension-types`).
@@ -485,6 +486,28 @@ The ``Series.str`` accessor is available for ``Series`` backed by a :class:`arra
485486
See :ref:`api.series.str` for more.
486487

487488

489+
.. _api.arrays.bool:
490+
491+
Boolean data with missing values
492+
--------------------------------
493+
494+
The boolean dtype (with the alias ``"boolean"``) provides support for storing
495+
boolean data (True, False values) with missing values, which is not possible
496+
with a bool :class:`numpy.ndarray`.
497+
498+
.. autosummary::
499+
:toctree: api/
500+
:template: autosummary/class_without_autosummary.rst
501+
502+
arrays.BooleanArray
503+
504+
.. autosummary::
505+
:toctree: api/
506+
:template: autosummary/class_without_autosummary.rst
507+
508+
BooleanDtype
509+
510+
488511
.. Dtype attributes which are manually listed in their docstrings: including
489512
.. it here to make sure a docstring page is built for them
490513

doc/source/whatsnew/v1.0.0.rst

+24
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,30 @@ String accessor methods returning integers will return a value with :class:`Int6
102102
We recommend explicitly using the ``string`` data type when working with strings.
103103
See :ref:`text.types` for more.
104104

105+
.. _whatsnew_100.boolean:
106+
107+
Boolean data type with missing values support
108+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
109+
110+
We've added :class:`BooleanDtype` / :class:`~arrays.BooleanArray`, an extension
111+
type dedicated to boolean data that can hold missing values. With the default
112+
``'bool`` data type based on a numpy bool array, the column can only hold
113+
True or False values and not missing values. This new :class:`BooleanDtype`
114+
can store missing values as well by keeping track of this in a separate mask.
115+
(:issue:`29555`)
116+
117+
.. ipython:: python
118+
119+
pd.Series([True, False, None], dtype=pd.BooleanDtype())
120+
121+
You can use the alias ``"boolean"`` as well.
122+
123+
.. ipython:: python
124+
125+
s = pd.Series([True, False, None], dtype="boolean")
126+
s
127+
128+
105129
.. _whatsnew_1000.enhancements.other:
106130

107131
Other enhancements

pandas/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
IntervalDtype,
6868
DatetimeTZDtype,
6969
StringDtype,
70+
BooleanDtype,
7071
# missing
7172
isna,
7273
isnull,

pandas/arrays/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
See :ref:`extending.extension-types` for more.
55
"""
66
from pandas.core.arrays import (
7+
BooleanArray,
78
Categorical,
89
DatetimeArray,
910
IntegerArray,
@@ -16,6 +17,7 @@
1617
)
1718

1819
__all__ = [
20+
"BooleanArray",
1921
"Categorical",
2022
"DatetimeArray",
2123
"IntegerArray",

pandas/conftest.py

+14
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,20 @@ def compare_operators_no_eq_ne(request):
293293
return request.param
294294

295295

296+
@pytest.fixture(
297+
params=["__and__", "__rand__", "__or__", "__ror__", "__xor__", "__rxor__"]
298+
)
299+
def all_logical_operators(request):
300+
"""
301+
Fixture for dunder names for common logical operations
302+
303+
* |
304+
* &
305+
* ^
306+
"""
307+
return request.param
308+
309+
296310
@pytest.fixture(params=[None, "gzip", "bz2", "zip", "xz"])
297311
def compression(request):
298312
"""

pandas/core/api.py

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
from pandas.core.algorithms import factorize, unique, value_counts
1414
from pandas.core.arrays import Categorical
15+
from pandas.core.arrays.boolean import BooleanDtype
1516
from pandas.core.arrays.integer import (
1617
Int8Dtype,
1718
Int16Dtype,

pandas/core/arrays/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
ExtensionScalarOpsMixin,
55
try_cast_to_ea,
66
)
7+
from .boolean import BooleanArray # noqa: F401
78
from .categorical import Categorical # noqa: F401
89
from .datetimes import DatetimeArray # noqa: F401
910
from .integer import IntegerArray, integer_array # noqa: F401

pandas/core/arrays/base.py

+9
Original file line numberDiff line numberDiff line change
@@ -1088,6 +1088,15 @@ def _add_comparison_ops(cls):
10881088
cls.__le__ = cls._create_comparison_method(operator.le)
10891089
cls.__ge__ = cls._create_comparison_method(operator.ge)
10901090

1091+
@classmethod
1092+
def _add_logical_ops(cls):
1093+
cls.__and__ = cls._create_logical_method(operator.and_)
1094+
cls.__rand__ = cls._create_logical_method(ops.rand_)
1095+
cls.__or__ = cls._create_logical_method(operator.or_)
1096+
cls.__ror__ = cls._create_logical_method(ops.ror_)
1097+
cls.__xor__ = cls._create_logical_method(operator.xor)
1098+
cls.__rxor__ = cls._create_logical_method(ops.rxor)
1099+
10911100

10921101
class ExtensionScalarOpsMixin(ExtensionOpsMixin):
10931102
"""

0 commit comments

Comments
 (0)