Skip to content

Commit 1549306

Browse files
TomAugspurgerproost
authored andcommitted
ENH: Implement Kleene logic for BooleanArray (pandas-dev#29842)
1 parent eed8113 commit 1549306

File tree

9 files changed

+520
-43
lines changed

9 files changed

+520
-43
lines changed

asv_bench/benchmarks/boolean.py

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import numpy as np
2+
3+
import pandas as pd
4+
5+
6+
class TimeLogicalOps:
7+
def setup(self):
8+
N = 10_000
9+
left, right, lmask, rmask = np.random.randint(0, 2, size=(4, N)).astype("bool")
10+
self.left = pd.arrays.BooleanArray(left, lmask)
11+
self.right = pd.arrays.BooleanArray(right, rmask)
12+
13+
def time_or_scalar(self):
14+
self.left | True
15+
self.left | False
16+
17+
def time_or_array(self):
18+
self.left | self.right
19+
20+
def time_and_scalar(self):
21+
self.left & True
22+
self.left & False
23+
24+
def time_and_array(self):
25+
self.left & self.right
26+
27+
def time_xor_scalar(self):
28+
self.left ^ True
29+
self.left ^ False
30+
31+
def time_xor_array(self):
32+
self.left ^ self.right

doc/source/index.rst.template

+1
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ See the :ref:`overview` for more detail about what's in the library.
7373
* :doc:`user_guide/missing_data`
7474
* :doc:`user_guide/categorical`
7575
* :doc:`user_guide/integer_na`
76+
* :doc:`user_guide/boolean`
7677
* :doc:`user_guide/visualization`
7778
* :doc:`user_guide/computation`
7879
* :doc:`user_guide/groupby`

doc/source/user_guide/boolean.rst

+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
.. currentmodule:: pandas
2+
3+
.. ipython:: python
4+
:suppress:
5+
6+
import pandas as pd
7+
import numpy as np
8+
9+
.. _boolean:
10+
11+
**************************
12+
Nullable Boolean Data Type
13+
**************************
14+
15+
.. versionadded:: 1.0.0
16+
17+
.. _boolean.kleene:
18+
19+
Kleene Logical Operations
20+
-------------------------
21+
22+
:class:`arrays.BooleanArray` implements `Kleene Logic`_ (sometimes called three-value logic) for
23+
logical operations like ``&`` (and), ``|`` (or) and ``^`` (exclusive-or).
24+
25+
This table demonstrates the results for every combination. These operations are symmetrical,
26+
so flipping the left- and right-hand side makes no difference in the result.
27+
28+
================= =========
29+
Expression Result
30+
================= =========
31+
``True & True`` ``True``
32+
``True & False`` ``False``
33+
``True & NA`` ``NA``
34+
``False & False`` ``False``
35+
``False & NA`` ``False``
36+
``NA & NA`` ``NA``
37+
``True | True`` ``True``
38+
``True | False`` ``True``
39+
``True | NA`` ``True``
40+
``False | False`` ``False``
41+
``False | NA`` ``NA``
42+
``NA | NA`` ``NA``
43+
``True ^ True`` ``False``
44+
``True ^ False`` ``True``
45+
``True ^ NA`` ``NA``
46+
``False ^ False`` ``False``
47+
``False ^ NA`` ``NA``
48+
``NA ^ NA`` ``NA``
49+
================= =========
50+
51+
When an ``NA`` is present in an operation, the output value is ``NA`` only if
52+
the result cannot be determined solely based on the other input. For example,
53+
``True | NA`` is ``True``, because both ``True | True`` and ``True | False``
54+
are ``True``. In that case, we don't actually need to consider the value
55+
of the ``NA``.
56+
57+
On the other hand, ``True & NA`` is ``NA``. The result depends on whether
58+
the ``NA`` really is ``True`` or ``False``, since ``True & True`` is ``True``,
59+
but ``True & False`` is ``False``, so we can't determine the output.
60+
61+
62+
This differs from how ``np.nan`` behaves in logical operations. Pandas treated
63+
``np.nan`` is *always false in the output*.
64+
65+
In ``or``
66+
67+
.. ipython:: python
68+
69+
pd.Series([True, False, np.nan], dtype="object") | True
70+
pd.Series([True, False, np.nan], dtype="boolean") | True
71+
72+
In ``and``
73+
74+
.. ipython:: python
75+
76+
pd.Series([True, False, np.nan], dtype="object") & True
77+
pd.Series([True, False, np.nan], dtype="boolean") & True
78+
79+
.. _Kleene Logic: https://en.wikipedia.org/wiki/Three-valued_logic#Kleene_and_Priest_logics

doc/source/user_guide/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ Further information on any specific method can be obtained in the
3030
missing_data
3131
categorical
3232
integer_na
33+
boolean
3334
visualization
3435
computation
3536
groupby

pandas/core/arrays/boolean.py

+25-15
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,9 @@ class BooleanArray(ExtensionArray, ExtensionOpsMixin):
184184
represented by 2 numpy arrays: a boolean array with the data and
185185
a boolean array with the mask (True indicating missing).
186186
187+
BooleanArray implements Kleene logic (sometimes called three-value
188+
logic) for logical operations. See :ref:`boolean.kleene` for more.
189+
187190
To construct an BooleanArray from generic array-like input, use
188191
:func:`pandas.array` specifying ``dtype="boolean"`` (see examples
189192
below).
@@ -283,7 +286,7 @@ def __getitem__(self, item):
283286

284287
def _coerce_to_ndarray(self, dtype=None, na_value: "Scalar" = libmissing.NA):
285288
"""
286-
Coerce to an ndarary of object dtype or bool dtype (if force_bool=True).
289+
Coerce to an ndarray of object dtype or bool dtype (if force_bool=True).
287290
288291
Parameters
289292
----------
@@ -565,33 +568,40 @@ def logical_method(self, other):
565568
# Rely on pandas to unbox and dispatch to us.
566569
return NotImplemented
567570

571+
assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"}
568572
other = lib.item_from_zerodim(other)
573+
other_is_booleanarray = isinstance(other, BooleanArray)
574+
other_is_scalar = lib.is_scalar(other)
569575
mask = None
570576

571-
if isinstance(other, BooleanArray):
577+
if other_is_booleanarray:
572578
other, mask = other._data, other._mask
573579
elif is_list_like(other):
574580
other = np.asarray(other, dtype="bool")
575581
if other.ndim > 1:
576582
raise NotImplementedError(
577583
"can only perform ops with 1-d structures"
578584
)
579-
if len(self) != len(other):
580-
raise ValueError("Lengths must match to compare")
581585
other, mask = coerce_to_array(other, copy=False)
586+
elif isinstance(other, np.bool_):
587+
other = other.item()
588+
589+
if other_is_scalar and not (other is libmissing.NA or lib.is_bool(other)):
590+
raise TypeError(
591+
"'other' should be pandas.NA or a bool. Got {} instead.".format(
592+
type(other).__name__
593+
)
594+
)
582595

583-
# numpy will show a DeprecationWarning on invalid elementwise
584-
# comparisons, this will raise in the future
585-
with warnings.catch_warnings():
586-
warnings.filterwarnings("ignore", "elementwise", FutureWarning)
587-
with np.errstate(all="ignore"):
588-
result = op(self._data, other)
596+
if not other_is_scalar and len(self) != len(other):
597+
raise ValueError("Lengths must match to compare")
589598

590-
# nans propagate
591-
if mask is None:
592-
mask = self._mask
593-
else:
594-
mask = self._mask | mask
599+
if op.__name__ in {"or_", "ror_"}:
600+
result, mask = ops.kleene_or(self._data, other, self._mask, mask)
601+
elif op.__name__ in {"and_", "rand_"}:
602+
result, mask = ops.kleene_and(self._data, other, self._mask, mask)
603+
elif op.__name__ in {"xor", "rxor"}:
604+
result, mask = ops.kleene_xor(self._data, other, self._mask, mask)
595605

596606
return BooleanArray(result, mask)
597607

pandas/core/ops/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
_op_descriptions,
4040
)
4141
from pandas.core.ops.invalid import invalid_comparison # noqa:F401
42+
from pandas.core.ops.mask_ops import kleene_and, kleene_or, kleene_xor # noqa: F401
4243
from pandas.core.ops.methods import ( # noqa:F401
4344
add_flex_arithmetic_methods,
4445
add_special_arithmetic_methods,

pandas/core/ops/dispatch.py

+6
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,9 @@ def maybe_dispatch_ufunc_to_dunder_op(
189189
"ge",
190190
"remainder",
191191
"matmul",
192+
"or",
193+
"xor",
194+
"and",
192195
}
193196
aliases = {
194197
"subtract": "sub",
@@ -204,6 +207,9 @@ def maybe_dispatch_ufunc_to_dunder_op(
204207
"less_equal": "le",
205208
"greater": "gt",
206209
"greater_equal": "ge",
210+
"bitwise_or": "or",
211+
"bitwise_and": "and",
212+
"bitwise_xor": "xor",
207213
}
208214

209215
# For op(., Array) -> Array.__r{op}__

0 commit comments

Comments
 (0)