Skip to content

Commit 62571fe

Browse files
author
MomIsBestFriend
committed
Merge remote-tracking branch 'upstream/master' into STY-repr-batch-3
2 parents a0d5529 + 83ec9fb commit 62571fe

27 files changed

+807
-426
lines changed

asv_bench/benchmarks/boolean.py

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import numpy as np
2+
3+
import pandas as pd
4+
5+
6+
class TimeLogicalOps:
7+
def setup(self):
8+
N = 10_000
9+
left, right, lmask, rmask = np.random.randint(0, 2, size=(4, N)).astype("bool")
10+
self.left = pd.arrays.BooleanArray(left, lmask)
11+
self.right = pd.arrays.BooleanArray(right, rmask)
12+
13+
def time_or_scalar(self):
14+
self.left | True
15+
self.left | False
16+
17+
def time_or_array(self):
18+
self.left | self.right
19+
20+
def time_and_scalar(self):
21+
self.left & True
22+
self.left & False
23+
24+
def time_and_array(self):
25+
self.left & self.right
26+
27+
def time_xor_scalar(self):
28+
self.left ^ True
29+
self.left ^ False
30+
31+
def time_xor_array(self):
32+
self.left ^ self.right

doc/source/index.rst.template

+1
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ See the :ref:`overview` for more detail about what's in the library.
7373
* :doc:`user_guide/missing_data`
7474
* :doc:`user_guide/categorical`
7575
* :doc:`user_guide/integer_na`
76+
* :doc:`user_guide/boolean`
7677
* :doc:`user_guide/visualization`
7778
* :doc:`user_guide/computation`
7879
* :doc:`user_guide/groupby`

doc/source/user_guide/boolean.rst

+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
.. currentmodule:: pandas
2+
3+
.. ipython:: python
4+
:suppress:
5+
6+
import pandas as pd
7+
import numpy as np
8+
9+
.. _boolean:
10+
11+
**************************
12+
Nullable Boolean Data Type
13+
**************************
14+
15+
.. versionadded:: 1.0.0
16+
17+
.. _boolean.kleene:
18+
19+
Kleene Logical Operations
20+
-------------------------
21+
22+
:class:`arrays.BooleanArray` implements `Kleene Logic`_ (sometimes called three-value logic) for
23+
logical operations like ``&`` (and), ``|`` (or) and ``^`` (exclusive-or).
24+
25+
This table demonstrates the results for every combination. These operations are symmetrical,
26+
so flipping the left- and right-hand side makes no difference in the result.
27+
28+
================= =========
29+
Expression Result
30+
================= =========
31+
``True & True`` ``True``
32+
``True & False`` ``False``
33+
``True & NA`` ``NA``
34+
``False & False`` ``False``
35+
``False & NA`` ``False``
36+
``NA & NA`` ``NA``
37+
``True | True`` ``True``
38+
``True | False`` ``True``
39+
``True | NA`` ``True``
40+
``False | False`` ``False``
41+
``False | NA`` ``NA``
42+
``NA | NA`` ``NA``
43+
``True ^ True`` ``False``
44+
``True ^ False`` ``True``
45+
``True ^ NA`` ``NA``
46+
``False ^ False`` ``False``
47+
``False ^ NA`` ``NA``
48+
``NA ^ NA`` ``NA``
49+
================= =========
50+
51+
When an ``NA`` is present in an operation, the output value is ``NA`` only if
52+
the result cannot be determined solely based on the other input. For example,
53+
``True | NA`` is ``True``, because both ``True | True`` and ``True | False``
54+
are ``True``. In that case, we don't actually need to consider the value
55+
of the ``NA``.
56+
57+
On the other hand, ``True & NA`` is ``NA``. The result depends on whether
58+
the ``NA`` really is ``True`` or ``False``, since ``True & True`` is ``True``,
59+
but ``True & False`` is ``False``, so we can't determine the output.
60+
61+
62+
This differs from how ``np.nan`` behaves in logical operations. Pandas treated
63+
``np.nan`` is *always false in the output*.
64+
65+
In ``or``
66+
67+
.. ipython:: python
68+
69+
pd.Series([True, False, np.nan], dtype="object") | True
70+
pd.Series([True, False, np.nan], dtype="boolean") | True
71+
72+
In ``and``
73+
74+
.. ipython:: python
75+
76+
pd.Series([True, False, np.nan], dtype="object") & True
77+
pd.Series([True, False, np.nan], dtype="boolean") & True
78+
79+
.. _Kleene Logic: https://en.wikipedia.org/wiki/Three-valued_logic#Kleene_and_Priest_logics

doc/source/user_guide/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ Further information on any specific method can be obtained in the
3030
missing_data
3131
categorical
3232
integer_na
33+
boolean
3334
visualization
3435
computation
3536
groupby

doc/source/whatsnew/v1.0.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
600600
- In :func:`concat` the default value for ``sort`` has been changed from ``None`` to ``False`` (:issue:`20613`)
601601
- Removed previously deprecated "raise_conflict" argument from :meth:`DataFrame.update`, use "errors" instead (:issue:`23585`)
602602
- Removed previously deprecated keyword "n" from :meth:`DatetimeIndex.shift`, :meth:`TimedeltaIndex.shift`, :meth:`PeriodIndex.shift`, use "periods" instead (:issue:`22458`)
603+
- Removed previously deprecated keywords ``how``, ``fill_method``, and ``limit`` from :meth:`DataFrame.resample` (:issue:`30139`)
603604
- Passing an integer to :meth:`Series.fillna` or :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype now raises ``TypeError`` (:issue:`24694`)
604605
- Passing multiple axes to :meth:`DataFrame.dropna` is no longer supported (:issue:`20995`)
605606
- Removed previously deprecated :meth:`Series.nonzero`, use `to_numpy().nonzero()` instead (:issue:`24048`)
@@ -621,6 +622,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
621622
- Changed :meth:`Timedelta.resolution` to match the behavior of the standard library ``datetime.timedelta.resolution``, for the old behavior, use :meth:`Timedelta.resolution_string` (:issue:`26839`)
622623
- Removed previously deprecated :attr:`Timestamp.weekday_name`, :attr:`DatetimeIndex.weekday_name`, and :attr:`Series.dt.weekday_name` (:issue:`18164`)
623624
- Removed previously deprecated ``errors`` argument in :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` (:issue:`22644`)
625+
- Changed the default value for ``ordered`` in :class:`CategoricalDtype` from ``None`` to ``False`` (:issue:`26336`)
624626
- :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` now require "labels" as the first argument and "axis" as an optional named parameter (:issue:`30089`)
625627
-
626628

pandas/core/arrays/boolean.py

+25-15
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,9 @@ class BooleanArray(ExtensionArray, ExtensionOpsMixin):
184184
represented by 2 numpy arrays: a boolean array with the data and
185185
a boolean array with the mask (True indicating missing).
186186
187+
BooleanArray implements Kleene logic (sometimes called three-value
188+
logic) for logical operations. See :ref:`boolean.kleene` for more.
189+
187190
To construct an BooleanArray from generic array-like input, use
188191
:func:`pandas.array` specifying ``dtype="boolean"`` (see examples
189192
below).
@@ -283,7 +286,7 @@ def __getitem__(self, item):
283286

284287
def _coerce_to_ndarray(self, dtype=None, na_value: "Scalar" = libmissing.NA):
285288
"""
286-
Coerce to an ndarary of object dtype or bool dtype (if force_bool=True).
289+
Coerce to an ndarray of object dtype or bool dtype (if force_bool=True).
287290
288291
Parameters
289292
----------
@@ -565,33 +568,40 @@ def logical_method(self, other):
565568
# Rely on pandas to unbox and dispatch to us.
566569
return NotImplemented
567570

571+
assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"}
568572
other = lib.item_from_zerodim(other)
573+
other_is_booleanarray = isinstance(other, BooleanArray)
574+
other_is_scalar = lib.is_scalar(other)
569575
mask = None
570576

571-
if isinstance(other, BooleanArray):
577+
if other_is_booleanarray:
572578
other, mask = other._data, other._mask
573579
elif is_list_like(other):
574580
other = np.asarray(other, dtype="bool")
575581
if other.ndim > 1:
576582
raise NotImplementedError(
577583
"can only perform ops with 1-d structures"
578584
)
579-
if len(self) != len(other):
580-
raise ValueError("Lengths must match to compare")
581585
other, mask = coerce_to_array(other, copy=False)
586+
elif isinstance(other, np.bool_):
587+
other = other.item()
588+
589+
if other_is_scalar and not (other is libmissing.NA or lib.is_bool(other)):
590+
raise TypeError(
591+
"'other' should be pandas.NA or a bool. Got {} instead.".format(
592+
type(other).__name__
593+
)
594+
)
582595

583-
# numpy will show a DeprecationWarning on invalid elementwise
584-
# comparisons, this will raise in the future
585-
with warnings.catch_warnings():
586-
warnings.filterwarnings("ignore", "elementwise", FutureWarning)
587-
with np.errstate(all="ignore"):
588-
result = op(self._data, other)
596+
if not other_is_scalar and len(self) != len(other):
597+
raise ValueError("Lengths must match to compare")
589598

590-
# nans propagate
591-
if mask is None:
592-
mask = self._mask
593-
else:
594-
mask = self._mask | mask
599+
if op.__name__ in {"or_", "ror_"}:
600+
result, mask = ops.kleene_or(self._data, other, self._mask, mask)
601+
elif op.__name__ in {"and_", "rand_"}:
602+
result, mask = ops.kleene_and(self._data, other, self._mask, mask)
603+
elif op.__name__ in {"xor", "rxor"}:
604+
result, mask = ops.kleene_xor(self._data, other, self._mask, mask)
595605

596606
return BooleanArray(result, mask)
597607

pandas/core/arrays/categorical.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ def __init__(
328328
# sanitize input
329329
if is_categorical_dtype(values):
330330
if dtype.categories is None:
331-
dtype = CategoricalDtype(values.categories, dtype._ordered)
331+
dtype = CategoricalDtype(values.categories, dtype.ordered)
332332
elif not isinstance(values, (ABCIndexClass, ABCSeries)):
333333
# sanitize_array coerces np.nan to a string under certain versions
334334
# of numpy
@@ -351,7 +351,7 @@ def __init__(
351351
codes, categories = factorize(values, sort=True)
352352
except TypeError:
353353
codes, categories = factorize(values, sort=False)
354-
if dtype._ordered:
354+
if dtype.ordered:
355355
# raise, as we don't have a sortable data structure and so
356356
# the user should give us one by specifying categories
357357
raise TypeError(
@@ -367,7 +367,7 @@ def __init__(
367367
)
368368

369369
# we're inferring from values
370-
dtype = CategoricalDtype(categories, dtype._ordered)
370+
dtype = CategoricalDtype(categories, dtype.ordered)
371371

372372
elif is_categorical_dtype(values):
373373
old_codes = (
@@ -437,7 +437,7 @@ def ordered(self) -> Ordered:
437437
"""
438438
Whether the categories have an ordered relationship.
439439
"""
440-
return self.dtype._ordered
440+
return self.dtype.ordered
441441

442442
@property
443443
def dtype(self) -> CategoricalDtype:
@@ -833,7 +833,7 @@ def set_categories(self, new_categories, ordered=None, rename=False, inplace=Fal
833833
"""
834834
inplace = validate_bool_kwarg(inplace, "inplace")
835835
if ordered is None:
836-
ordered = self.dtype._ordered
836+
ordered = self.dtype.ordered
837837
new_dtype = CategoricalDtype(new_categories, ordered=ordered)
838838

839839
cat = self if inplace else self.copy()

pandas/core/construction.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -558,7 +558,7 @@ def _try_cast(
558558
# that Categorical is the only array type for 'category'.
559559
dtype = cast(CategoricalDtype, dtype)
560560
subarr = dtype.construct_array_type()(
561-
arr, dtype.categories, ordered=dtype._ordered
561+
arr, dtype.categories, ordered=dtype.ordered
562562
)
563563
elif is_extension_array_dtype(dtype):
564564
# create an extension array from its dtype

0 commit comments

Comments
 (0)