Skip to content

Commit 178e504

Browse files
authored
BUG: ArrowExtensionArray logical ops raising KeyError (#51688)
1 parent 056edfa commit 178e504

File tree

3 files changed

+155
-5
lines changed

3 files changed

+155
-5
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1371,6 +1371,7 @@ ExtensionArray
13711371
- Bug in :class:`Series` constructor unnecessarily overflowing for nullable unsigned integer dtypes (:issue:`38798`, :issue:`25880`)
13721372
- Bug in setting non-string value into ``StringArray`` raising ``ValueError`` instead of ``TypeError`` (:issue:`49632`)
13731373
- Bug in :meth:`DataFrame.reindex` not honoring the default ``copy=True`` keyword in case of columns with ExtensionDtype (and as a result also selecting multiple columns with getitem (``[]``) didn't correctly result in a copy) (:issue:`51197`)
1374+
- Bug in :class:`~arrays.ArrowExtensionArray` logical operations ``&`` and ``|`` raising ``KeyError`` (:issue:`51688`)
13741375

13751376
Styler
13761377
^^^^^^

pandas/core/arrays/arrow/array.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,10 @@
8181
}
8282

8383
ARROW_LOGICAL_FUNCS = {
84-
"and": pc.and_kleene,
85-
"rand": lambda x, y: pc.and_kleene(y, x),
86-
"or": pc.or_kleene,
87-
"ror": lambda x, y: pc.or_kleene(y, x),
84+
"and_": pc.and_kleene,
85+
"rand_": lambda x, y: pc.and_kleene(y, x),
86+
"or_": pc.or_kleene,
87+
"ror_": lambda x, y: pc.or_kleene(y, x),
8888
"xor": pc.xor,
8989
"rxor": lambda x, y: pc.xor(y, x),
9090
}
@@ -501,7 +501,12 @@ def _evaluate_op_method(self, other, op, arrow_funcs):
501501
elif isinstance(other, (np.ndarray, list)):
502502
result = pc_func(self._data, pa.array(other, from_pandas=True))
503503
elif is_scalar(other):
504-
result = pc_func(self._data, pa.scalar(other))
504+
if isna(other) and op.__name__ in ARROW_LOGICAL_FUNCS:
505+
# pyarrow kleene ops require null to be typed
506+
pa_scalar = pa.scalar(None, type=self._data.type)
507+
else:
508+
pa_scalar = pa.scalar(other)
509+
result = pc_func(self._data, pa_scalar)
505510
else:
506511
raise NotImplementedError(
507512
f"{op.__name__} not implemented for {type(other)}"

pandas/tests/extension/test_arrow.py

+144
Original file line numberDiff line numberDiff line change
@@ -1261,6 +1261,150 @@ def test_invalid_other_comp(self, data, comparison_op):
12611261
comparison_op(data, object())
12621262

12631263

1264+
class TestLogicalOps:
1265+
"""Various Series and DataFrame logical ops methods."""
1266+
1267+
def test_kleene_or(self):
1268+
a = pd.Series([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean[pyarrow]")
1269+
b = pd.Series([True, False, None] * 3, dtype="boolean[pyarrow]")
1270+
result = a | b
1271+
expected = pd.Series(
1272+
[True, True, True, True, False, None, True, None, None],
1273+
dtype="boolean[pyarrow]",
1274+
)
1275+
tm.assert_series_equal(result, expected)
1276+
1277+
result = b | a
1278+
tm.assert_series_equal(result, expected)
1279+
1280+
# ensure we haven't mutated anything inplace
1281+
tm.assert_series_equal(
1282+
a,
1283+
pd.Series([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean[pyarrow]"),
1284+
)
1285+
tm.assert_series_equal(
1286+
b, pd.Series([True, False, None] * 3, dtype="boolean[pyarrow]")
1287+
)
1288+
1289+
@pytest.mark.parametrize(
1290+
"other, expected",
1291+
[
1292+
(None, [True, None, None]),
1293+
(pd.NA, [True, None, None]),
1294+
(True, [True, True, True]),
1295+
(np.bool_(True), [True, True, True]),
1296+
(False, [True, False, None]),
1297+
(np.bool_(False), [True, False, None]),
1298+
],
1299+
)
1300+
def test_kleene_or_scalar(self, other, expected):
1301+
a = pd.Series([True, False, None], dtype="boolean[pyarrow]")
1302+
result = a | other
1303+
expected = pd.Series(expected, dtype="boolean[pyarrow]")
1304+
tm.assert_series_equal(result, expected)
1305+
1306+
result = other | a
1307+
tm.assert_series_equal(result, expected)
1308+
1309+
# ensure we haven't mutated anything inplace
1310+
tm.assert_series_equal(
1311+
a, pd.Series([True, False, None], dtype="boolean[pyarrow]")
1312+
)
1313+
1314+
def test_kleene_and(self):
1315+
a = pd.Series([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean[pyarrow]")
1316+
b = pd.Series([True, False, None] * 3, dtype="boolean[pyarrow]")
1317+
result = a & b
1318+
expected = pd.Series(
1319+
[True, False, None, False, False, False, None, False, None],
1320+
dtype="boolean[pyarrow]",
1321+
)
1322+
tm.assert_series_equal(result, expected)
1323+
1324+
result = b & a
1325+
tm.assert_series_equal(result, expected)
1326+
1327+
# ensure we haven't mutated anything inplace
1328+
tm.assert_series_equal(
1329+
a,
1330+
pd.Series([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean[pyarrow]"),
1331+
)
1332+
tm.assert_series_equal(
1333+
b, pd.Series([True, False, None] * 3, dtype="boolean[pyarrow]")
1334+
)
1335+
1336+
@pytest.mark.parametrize(
1337+
"other, expected",
1338+
[
1339+
(None, [None, False, None]),
1340+
(pd.NA, [None, False, None]),
1341+
(True, [True, False, None]),
1342+
(False, [False, False, False]),
1343+
(np.bool_(True), [True, False, None]),
1344+
(np.bool_(False), [False, False, False]),
1345+
],
1346+
)
1347+
def test_kleene_and_scalar(self, other, expected):
1348+
a = pd.Series([True, False, None], dtype="boolean[pyarrow]")
1349+
result = a & other
1350+
expected = pd.Series(expected, dtype="boolean[pyarrow]")
1351+
tm.assert_series_equal(result, expected)
1352+
1353+
result = other & a
1354+
tm.assert_series_equal(result, expected)
1355+
1356+
# ensure we haven't mutated anything inplace
1357+
tm.assert_series_equal(
1358+
a, pd.Series([True, False, None], dtype="boolean[pyarrow]")
1359+
)
1360+
1361+
def test_kleene_xor(self):
1362+
a = pd.Series([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean[pyarrow]")
1363+
b = pd.Series([True, False, None] * 3, dtype="boolean[pyarrow]")
1364+
result = a ^ b
1365+
expected = pd.Series(
1366+
[False, True, None, True, False, None, None, None, None],
1367+
dtype="boolean[pyarrow]",
1368+
)
1369+
tm.assert_series_equal(result, expected)
1370+
1371+
result = b ^ a
1372+
tm.assert_series_equal(result, expected)
1373+
1374+
# ensure we haven't mutated anything inplace
1375+
tm.assert_series_equal(
1376+
a,
1377+
pd.Series([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean[pyarrow]"),
1378+
)
1379+
tm.assert_series_equal(
1380+
b, pd.Series([True, False, None] * 3, dtype="boolean[pyarrow]")
1381+
)
1382+
1383+
@pytest.mark.parametrize(
1384+
"other, expected",
1385+
[
1386+
(None, [None, None, None]),
1387+
(pd.NA, [None, None, None]),
1388+
(True, [False, True, None]),
1389+
(np.bool_(True), [False, True, None]),
1390+
(np.bool_(False), [True, False, None]),
1391+
],
1392+
)
1393+
def test_kleene_xor_scalar(self, other, expected):
1394+
a = pd.Series([True, False, None], dtype="boolean[pyarrow]")
1395+
result = a ^ other
1396+
expected = pd.Series(expected, dtype="boolean[pyarrow]")
1397+
tm.assert_series_equal(result, expected)
1398+
1399+
result = other ^ a
1400+
tm.assert_series_equal(result, expected)
1401+
1402+
# ensure we haven't mutated anything inplace
1403+
tm.assert_series_equal(
1404+
a, pd.Series([True, False, None], dtype="boolean[pyarrow]")
1405+
)
1406+
1407+
12641408
def test_arrowdtype_construct_from_string_type_with_unsupported_parameters():
12651409
with pytest.raises(NotImplementedError, match="Passing pyarrow type"):
12661410
ArrowDtype.construct_from_string("not_a_real_dype[s, tz=UTC][pyarrow]")

0 commit comments

Comments
 (0)