Skip to content

Commit 026833c

Browse files
lukemanleymeeseeksmachine
authored andcommitted
Backport PR pandas-dev#51688: BUG: ArrowExtensionArray logical ops raising KeyError
1 parent 7ceca1a commit 026833c

File tree

3 files changed

+155
-5
lines changed

3 files changed

+155
-5
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1368,6 +1368,7 @@ ExtensionArray
13681368
- Bug in :class:`Series` constructor unnecessarily overflowing for nullable unsigned integer dtypes (:issue:`38798`, :issue:`25880`)
13691369
- Bug in setting non-string value into ``StringArray`` raising ``ValueError`` instead of ``TypeError`` (:issue:`49632`)
13701370
- Bug in :meth:`DataFrame.reindex` not honoring the default ``copy=True`` keyword in case of columns with ExtensionDtype (and as a result also selecting multiple columns with getitem (``[]``) didn't correctly result in a copy) (:issue:`51197`)
1371+
- Bug in :class:`~arrays.ArrowExtensionArray` logical operations ``&`` and ``|`` raising ``KeyError`` (:issue:`51688`)
13711372

13721373
Styler
13731374
^^^^^^

pandas/core/arrays/arrow/array.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,10 @@
8181
}
8282

8383
ARROW_LOGICAL_FUNCS = {
84-
"and": pc.and_kleene,
85-
"rand": lambda x, y: pc.and_kleene(y, x),
86-
"or": pc.or_kleene,
87-
"ror": lambda x, y: pc.or_kleene(y, x),
84+
"and_": pc.and_kleene,
85+
"rand_": lambda x, y: pc.and_kleene(y, x),
86+
"or_": pc.or_kleene,
87+
"ror_": lambda x, y: pc.or_kleene(y, x),
8888
"xor": pc.xor,
8989
"rxor": lambda x, y: pc.xor(y, x),
9090
}
@@ -491,7 +491,12 @@ def _evaluate_op_method(self, other, op, arrow_funcs):
491491
elif isinstance(other, (np.ndarray, list)):
492492
result = pc_func(self._data, pa.array(other, from_pandas=True))
493493
elif is_scalar(other):
494-
result = pc_func(self._data, pa.scalar(other))
494+
if isna(other) and op.__name__ in ARROW_LOGICAL_FUNCS:
495+
# pyarrow kleene ops require null to be typed
496+
pa_scalar = pa.scalar(None, type=self._data.type)
497+
else:
498+
pa_scalar = pa.scalar(other)
499+
result = pc_func(self._data, pa_scalar)
495500
else:
496501
raise NotImplementedError(
497502
f"{op.__name__} not implemented for {type(other)}"

pandas/tests/extension/test_arrow.py

+144
Original file line numberDiff line numberDiff line change
@@ -1270,6 +1270,150 @@ def test_invalid_other_comp(self, data, comparison_op):
12701270
comparison_op(data, object())
12711271

12721272

1273+
class TestLogicalOps:
1274+
"""Various Series and DataFrame logical ops methods."""
1275+
1276+
def test_kleene_or(self):
1277+
a = pd.Series([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean[pyarrow]")
1278+
b = pd.Series([True, False, None] * 3, dtype="boolean[pyarrow]")
1279+
result = a | b
1280+
expected = pd.Series(
1281+
[True, True, True, True, False, None, True, None, None],
1282+
dtype="boolean[pyarrow]",
1283+
)
1284+
tm.assert_series_equal(result, expected)
1285+
1286+
result = b | a
1287+
tm.assert_series_equal(result, expected)
1288+
1289+
# ensure we haven't mutated anything inplace
1290+
tm.assert_series_equal(
1291+
a,
1292+
pd.Series([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean[pyarrow]"),
1293+
)
1294+
tm.assert_series_equal(
1295+
b, pd.Series([True, False, None] * 3, dtype="boolean[pyarrow]")
1296+
)
1297+
1298+
@pytest.mark.parametrize(
1299+
"other, expected",
1300+
[
1301+
(None, [True, None, None]),
1302+
(pd.NA, [True, None, None]),
1303+
(True, [True, True, True]),
1304+
(np.bool_(True), [True, True, True]),
1305+
(False, [True, False, None]),
1306+
(np.bool_(False), [True, False, None]),
1307+
],
1308+
)
1309+
def test_kleene_or_scalar(self, other, expected):
1310+
a = pd.Series([True, False, None], dtype="boolean[pyarrow]")
1311+
result = a | other
1312+
expected = pd.Series(expected, dtype="boolean[pyarrow]")
1313+
tm.assert_series_equal(result, expected)
1314+
1315+
result = other | a
1316+
tm.assert_series_equal(result, expected)
1317+
1318+
# ensure we haven't mutated anything inplace
1319+
tm.assert_series_equal(
1320+
a, pd.Series([True, False, None], dtype="boolean[pyarrow]")
1321+
)
1322+
1323+
def test_kleene_and(self):
1324+
a = pd.Series([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean[pyarrow]")
1325+
b = pd.Series([True, False, None] * 3, dtype="boolean[pyarrow]")
1326+
result = a & b
1327+
expected = pd.Series(
1328+
[True, False, None, False, False, False, None, False, None],
1329+
dtype="boolean[pyarrow]",
1330+
)
1331+
tm.assert_series_equal(result, expected)
1332+
1333+
result = b & a
1334+
tm.assert_series_equal(result, expected)
1335+
1336+
# ensure we haven't mutated anything inplace
1337+
tm.assert_series_equal(
1338+
a,
1339+
pd.Series([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean[pyarrow]"),
1340+
)
1341+
tm.assert_series_equal(
1342+
b, pd.Series([True, False, None] * 3, dtype="boolean[pyarrow]")
1343+
)
1344+
1345+
@pytest.mark.parametrize(
1346+
"other, expected",
1347+
[
1348+
(None, [None, False, None]),
1349+
(pd.NA, [None, False, None]),
1350+
(True, [True, False, None]),
1351+
(False, [False, False, False]),
1352+
(np.bool_(True), [True, False, None]),
1353+
(np.bool_(False), [False, False, False]),
1354+
],
1355+
)
1356+
def test_kleene_and_scalar(self, other, expected):
1357+
a = pd.Series([True, False, None], dtype="boolean[pyarrow]")
1358+
result = a & other
1359+
expected = pd.Series(expected, dtype="boolean[pyarrow]")
1360+
tm.assert_series_equal(result, expected)
1361+
1362+
result = other & a
1363+
tm.assert_series_equal(result, expected)
1364+
1365+
# ensure we haven't mutated anything inplace
1366+
tm.assert_series_equal(
1367+
a, pd.Series([True, False, None], dtype="boolean[pyarrow]")
1368+
)
1369+
1370+
def test_kleene_xor(self):
1371+
a = pd.Series([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean[pyarrow]")
1372+
b = pd.Series([True, False, None] * 3, dtype="boolean[pyarrow]")
1373+
result = a ^ b
1374+
expected = pd.Series(
1375+
[False, True, None, True, False, None, None, None, None],
1376+
dtype="boolean[pyarrow]",
1377+
)
1378+
tm.assert_series_equal(result, expected)
1379+
1380+
result = b ^ a
1381+
tm.assert_series_equal(result, expected)
1382+
1383+
# ensure we haven't mutated anything inplace
1384+
tm.assert_series_equal(
1385+
a,
1386+
pd.Series([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean[pyarrow]"),
1387+
)
1388+
tm.assert_series_equal(
1389+
b, pd.Series([True, False, None] * 3, dtype="boolean[pyarrow]")
1390+
)
1391+
1392+
@pytest.mark.parametrize(
1393+
"other, expected",
1394+
[
1395+
(None, [None, None, None]),
1396+
(pd.NA, [None, None, None]),
1397+
(True, [False, True, None]),
1398+
(np.bool_(True), [False, True, None]),
1399+
(np.bool_(False), [True, False, None]),
1400+
],
1401+
)
1402+
def test_kleene_xor_scalar(self, other, expected):
1403+
a = pd.Series([True, False, None], dtype="boolean[pyarrow]")
1404+
result = a ^ other
1405+
expected = pd.Series(expected, dtype="boolean[pyarrow]")
1406+
tm.assert_series_equal(result, expected)
1407+
1408+
result = other ^ a
1409+
tm.assert_series_equal(result, expected)
1410+
1411+
# ensure we haven't mutated anything inplace
1412+
tm.assert_series_equal(
1413+
a, pd.Series([True, False, None], dtype="boolean[pyarrow]")
1414+
)
1415+
1416+
12731417
def test_arrowdtype_construct_from_string_type_with_unsupported_parameters():
12741418
with pytest.raises(NotImplementedError, match="Passing pyarrow type"):
12751419
ArrowDtype.construct_from_string("not_a_real_dype[s, tz=UTC][pyarrow]")

0 commit comments

Comments
 (0)