Skip to content

Commit 67bec1f

Browse files
String dtype: avoid surfacing pyarrow exception in binary operations (#59610)
1 parent 1b7bfed commit 67bec1f

File tree

16 files changed

+127
-272
lines changed

16 files changed

+127
-272
lines changed

pandas/core/arrays/arrow/array.py

+32-6
Original file line numberDiff line numberDiff line change
@@ -681,7 +681,12 @@ def __invert__(self) -> Self:
681681
return type(self)(pc.invert(self._pa_array))
682682

683683
def __neg__(self) -> Self:
684-
return type(self)(pc.negate_checked(self._pa_array))
684+
try:
685+
return type(self)(pc.negate_checked(self._pa_array))
686+
except pa.ArrowNotImplementedError as err:
687+
raise TypeError(
688+
f"unary '-' not supported for dtype '{self.dtype}'"
689+
) from err
685690

686691
def __pos__(self) -> Self:
687692
return type(self)(self._pa_array)
@@ -736,8 +741,19 @@ def _cmp_method(self, other, op) -> ArrowExtensionArray:
736741
)
737742
return ArrowExtensionArray(result)
738743

744+
def _op_method_error_message(self, other, op) -> str:
745+
if hasattr(other, "dtype"):
746+
other_type = f"dtype '{other.dtype}'"
747+
else:
748+
other_type = f"object of type {type(other)}"
749+
return (
750+
f"operation '{op.__name__}' not supported for "
751+
f"dtype '{self.dtype}' with {other_type}"
752+
)
753+
739754
def _evaluate_op_method(self, other, op, arrow_funcs) -> Self:
740755
pa_type = self._pa_array.type
756+
other_original = other
741757
other = self._box_pa(other)
742758

743759
if (
@@ -747,10 +763,15 @@ def _evaluate_op_method(self, other, op, arrow_funcs) -> Self:
747763
):
748764
if op in [operator.add, roperator.radd]:
749765
sep = pa.scalar("", type=pa_type)
750-
if op is operator.add:
751-
result = pc.binary_join_element_wise(self._pa_array, other, sep)
752-
elif op is roperator.radd:
753-
result = pc.binary_join_element_wise(other, self._pa_array, sep)
766+
try:
767+
if op is operator.add:
768+
result = pc.binary_join_element_wise(self._pa_array, other, sep)
769+
elif op is roperator.radd:
770+
result = pc.binary_join_element_wise(other, self._pa_array, sep)
771+
except pa.ArrowNotImplementedError as err:
772+
raise TypeError(
773+
self._op_method_error_message(other_original, op)
774+
) from err
754775
return type(self)(result)
755776
elif op in [operator.mul, roperator.rmul]:
756777
binary = self._pa_array
@@ -782,9 +803,14 @@ def _evaluate_op_method(self, other, op, arrow_funcs) -> Self:
782803

783804
pc_func = arrow_funcs[op.__name__]
784805
if pc_func is NotImplemented:
806+
if pa.types.is_string(pa_type) or pa.types.is_large_string(pa_type):
807+
raise TypeError(self._op_method_error_message(other_original, op))
785808
raise NotImplementedError(f"{op.__name__} not implemented.")
786809

787-
result = pc_func(self._pa_array, other)
810+
try:
811+
result = pc_func(self._pa_array, other)
812+
except pa.ArrowNotImplementedError as err:
813+
raise TypeError(self._op_method_error_message(other_original, op)) from err
788814
return type(self)(result)
789815

790816
def _logical_method(self, other, op) -> Self:

pandas/core/arrays/string_.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -825,8 +825,11 @@ def _cmp_method(self, other, op):
825825
f"Lengths of operands do not match: {len(self)} != {len(other)}"
826826
)
827827

828-
other = np.asarray(other)
828+
# for array-likes, first filter out NAs before converting to numpy
829+
if not is_array_like(other):
830+
other = np.asarray(other)
829831
other = other[valid]
832+
other = np.asarray(other)
830833

831834
if op.__name__ in ops.ARITHMETIC_BINOPS:
832835
result = np.empty_like(self._ndarray, dtype="object")

pandas/tests/arithmetic/test_object.py

+6-19
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,6 @@
88
import numpy as np
99
import pytest
1010

11-
from pandas._config import using_string_dtype
12-
13-
from pandas.compat import HAS_PYARROW
1411
import pandas.util._test_decorators as td
1512

1613
import pandas as pd
@@ -318,27 +315,17 @@ def test_add(self):
318315
expected = pd.Index(["1a", "1b", "1c"])
319316
tm.assert_index_equal("1" + index, expected)
320317

321-
@pytest.mark.xfail(
322-
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
323-
)
324-
def test_sub_fail(self, using_infer_string):
318+
def test_sub_fail(self):
325319
index = pd.Index([str(i) for i in range(10)])
326320

327-
if using_infer_string:
328-
import pyarrow as pa
329-
330-
err = pa.lib.ArrowNotImplementedError
331-
msg = "has no kernel"
332-
else:
333-
err = TypeError
334-
msg = "unsupported operand type|Cannot broadcast"
335-
with pytest.raises(err, match=msg):
321+
msg = "unsupported operand type|Cannot broadcast|sub' not supported"
322+
with pytest.raises(TypeError, match=msg):
336323
index - "a"
337-
with pytest.raises(err, match=msg):
324+
with pytest.raises(TypeError, match=msg):
338325
index - index
339-
with pytest.raises(err, match=msg):
326+
with pytest.raises(TypeError, match=msg):
340327
index - index.tolist()
341-
with pytest.raises(err, match=msg):
328+
with pytest.raises(TypeError, match=msg):
342329
index.tolist() - index
343330

344331
def test_sub_object(self):

pandas/tests/arrays/boolean/test_arithmetic.py

+7-19
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._config import using_string_dtype
7-
8-
from pandas.compat import HAS_PYARROW
9-
106
import pandas as pd
117
import pandas._testing as tm
128

@@ -94,19 +90,8 @@ def test_op_int8(left_array, right_array, opname):
9490
# -----------------------------------------------------------------------------
9591

9692

97-
@pytest.mark.xfail(
98-
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
99-
)
100-
def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
93+
def test_error_invalid_values(data, all_arithmetic_operators):
10194
# invalid ops
102-
103-
if using_infer_string:
104-
import pyarrow as pa
105-
106-
err = (TypeError, pa.lib.ArrowNotImplementedError, NotImplementedError)
107-
else:
108-
err = TypeError
109-
11095
op = all_arithmetic_operators
11196
s = pd.Series(data)
11297
ops = getattr(s, op)
@@ -116,7 +101,8 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
116101
"did not contain a loop with signature matching types|"
117102
"BooleanArray cannot perform the operation|"
118103
"not supported for the input types, and the inputs could not be safely coerced "
119-
"to any supported types according to the casting rule ''safe''"
104+
"to any supported types according to the casting rule ''safe''|"
105+
"not supported for dtype"
120106
)
121107
with pytest.raises(TypeError, match=msg):
122108
ops("foo")
@@ -125,9 +111,10 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
125111
r"unsupported operand type\(s\) for",
126112
"Concatenation operation is not implemented for NumPy arrays",
127113
"has no kernel",
114+
"not supported for dtype",
128115
]
129116
)
130-
with pytest.raises(err, match=msg):
117+
with pytest.raises(TypeError, match=msg):
131118
ops(pd.Timestamp("20180101"))
132119

133120
# invalid array-likes
@@ -140,7 +127,8 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
140127
"not all arguments converted during string formatting",
141128
"has no kernel",
142129
"not implemented",
130+
"not supported for dtype",
143131
]
144132
)
145-
with pytest.raises(err, match=msg):
133+
with pytest.raises(TypeError, match=msg):
146134
ops(pd.Series("foo", index=s.index))

pandas/tests/arrays/floating/test_arithmetic.py

+8-15
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._config import using_string_dtype
7-
86
import pandas as pd
97
import pandas._testing as tm
108
from pandas.core.arrays import FloatingArray
@@ -124,19 +122,11 @@ def test_arith_zero_dim_ndarray(other):
124122
# -----------------------------------------------------------------------------
125123

126124

127-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
128-
def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
125+
def test_error_invalid_values(data, all_arithmetic_operators):
129126
op = all_arithmetic_operators
130127
s = pd.Series(data)
131128
ops = getattr(s, op)
132129

133-
if using_infer_string:
134-
import pyarrow as pa
135-
136-
errs = (TypeError, pa.lib.ArrowNotImplementedError, NotImplementedError)
137-
else:
138-
errs = TypeError
139-
140130
# invalid scalars
141131
msg = "|".join(
142132
[
@@ -152,15 +142,17 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
152142
"Concatenation operation is not implemented for NumPy arrays",
153143
"has no kernel",
154144
"not implemented",
145+
"not supported for dtype",
146+
"Can only string multiply by an integer",
155147
]
156148
)
157-
with pytest.raises(errs, match=msg):
149+
with pytest.raises(TypeError, match=msg):
158150
ops("foo")
159-
with pytest.raises(errs, match=msg):
151+
with pytest.raises(TypeError, match=msg):
160152
ops(pd.Timestamp("20180101"))
161153

162154
# invalid array-likes
163-
with pytest.raises(errs, match=msg):
155+
with pytest.raises(TypeError, match=msg):
164156
ops(pd.Series("foo", index=s.index))
165157

166158
msg = "|".join(
@@ -181,9 +173,10 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
181173
"cannot subtract DatetimeArray from ndarray",
182174
"has no kernel",
183175
"not implemented",
176+
"not supported for dtype",
184177
]
185178
)
186-
with pytest.raises(errs, match=msg):
179+
with pytest.raises(TypeError, match=msg):
187180
ops(pd.Series(pd.date_range("20180101", periods=len(s))))
188181

189182

pandas/tests/arrays/integer/test_arithmetic.py

+11-23
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._config import using_string_dtype
7-
86
import pandas as pd
97
import pandas._testing as tm
108
from pandas.core import ops
@@ -174,19 +172,11 @@ def test_numpy_zero_dim_ndarray(other):
174172
# -----------------------------------------------------------------------------
175173

176174

177-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
178-
def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
175+
def test_error_invalid_values(data, all_arithmetic_operators):
179176
op = all_arithmetic_operators
180177
s = pd.Series(data)
181178
ops = getattr(s, op)
182179

183-
if using_infer_string:
184-
import pyarrow as pa
185-
186-
errs = (TypeError, pa.lib.ArrowNotImplementedError, NotImplementedError)
187-
else:
188-
errs = TypeError
189-
190180
# invalid scalars
191181
msg = "|".join(
192182
[
@@ -201,24 +191,21 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
201191
"has no kernel",
202192
"not implemented",
203193
"The 'out' kwarg is necessary. Use numpy.strings.multiply without it.",
194+
"not supported for dtype",
204195
]
205196
)
206-
with pytest.raises(errs, match=msg):
197+
with pytest.raises(TypeError, match=msg):
207198
ops("foo")
208-
with pytest.raises(errs, match=msg):
199+
with pytest.raises(TypeError, match=msg):
209200
ops(pd.Timestamp("20180101"))
210201

211202
# invalid array-likes
212203
str_ser = pd.Series("foo", index=s.index)
213204
# with pytest.raises(TypeError, match=msg):
214-
if (
215-
all_arithmetic_operators
216-
in [
217-
"__mul__",
218-
"__rmul__",
219-
]
220-
and not using_infer_string
221-
): # (data[~data.isna()] >= 0).all():
205+
if all_arithmetic_operators in [
206+
"__mul__",
207+
"__rmul__",
208+
]: # (data[~data.isna()] >= 0).all():
222209
res = ops(str_ser)
223210
expected = pd.Series(["foo" * x for x in data], index=s.index)
224211
expected = expected.fillna(np.nan)
@@ -227,7 +214,7 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
227214
# more-correct than np.nan here.
228215
tm.assert_series_equal(res, expected)
229216
else:
230-
with pytest.raises(errs, match=msg):
217+
with pytest.raises(TypeError, match=msg):
231218
ops(str_ser)
232219

233220
msg = "|".join(
@@ -242,9 +229,10 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
242229
"cannot subtract DatetimeArray from ndarray",
243230
"has no kernel",
244231
"not implemented",
232+
"not supported for dtype",
245233
]
246234
)
247-
with pytest.raises(errs, match=msg):
235+
with pytest.raises(TypeError, match=msg):
248236
ops(pd.Series(pd.date_range("20180101", periods=len(s))))
249237

250238

pandas/tests/extension/base/ops.py

+1-9
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class BaseOpsUtil:
2424

2525
def _get_expected_exception(
2626
self, op_name: str, obj, other
27-
) -> type[Exception] | None:
27+
) -> type[Exception] | tuple[type[Exception], ...] | None:
2828
# Find the Exception, if any we expect to raise calling
2929
# obj.__op_name__(other)
3030

@@ -39,14 +39,6 @@ def _get_expected_exception(
3939
else:
4040
result = self.frame_scalar_exc
4141

42-
if using_string_dtype() and result is not None:
43-
import pyarrow as pa
44-
45-
result = ( # type: ignore[assignment]
46-
result,
47-
pa.lib.ArrowNotImplementedError,
48-
NotImplementedError,
49-
)
5042
return result
5143

5244
def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result):

pandas/tests/extension/decimal/test_decimal.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def data_for_grouping():
6868
class TestDecimalArray(base.ExtensionTests):
6969
def _get_expected_exception(
7070
self, op_name: str, obj, other
71-
) -> type[Exception] | None:
71+
) -> type[Exception] | tuple[type[Exception], ...] | None:
7272
return None
7373

7474
def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:

0 commit comments

Comments
 (0)