Skip to content

Commit 8db4edc

Browse files
committed
BUG (string dtype): comparison of string column to mixed object column fails pandas-dev#60228
1 parent 900f3b1 commit 8db4edc

File tree

3 files changed

+13
-8
lines changed

3 files changed

+13
-8
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -768,6 +768,7 @@ Styler
768768
Other
769769
^^^^^
770770
- Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
771+
- Bug in :func:`comparison_op` where comparing a ``string`` dtype array with an ``object`` dtype array containing mixed types would raise a ``TypeError`` when PyArrow-based strings are enabled. (:issue:`60228`)
771772
- Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`)
772773
- Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`)
773774
- Bug in :func:`eval` with ``engine="numexpr"`` returning unexpected result for float division. (:issue:`59736`)

pandas/core/ops/array_ops.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,9 @@
3838
is_bool_dtype,
3939
is_list_like,
4040
is_numeric_v_string_like,
41-
is_string_dtype,
4241
is_object_dtype,
4342
is_scalar,
43+
is_string_dtype,
4444
)
4545
from pandas.core.dtypes.generic import (
4646
ABCExtensionArray,
@@ -54,7 +54,10 @@
5454

5555
from pandas.core import roperator
5656
from pandas.core.computation import expressions
57-
from pandas.core.construction import ensure_wrapped_if_datetimelike, array
57+
from pandas.core.construction import (
58+
array as pd_array,
59+
ensure_wrapped_if_datetimelike,
60+
)
5861
from pandas.core.ops import missing
5962
from pandas.core.ops.dispatch import should_extension_dispatch
6063
from pandas.core.ops.invalid import invalid_comparison
@@ -322,16 +325,15 @@ def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike:
322325
"Lengths must match to compare", lvalues.shape, rvalues.shape
323326
)
324327

325-
if (
326-
(is_string_dtype(lvalues) and is_object_dtype(rvalues)) or
327-
(is_object_dtype(lvalues) and is_string_dtype(rvalues))
328+
if (is_string_dtype(lvalues) and is_object_dtype(rvalues)) or (
329+
is_object_dtype(lvalues) and is_string_dtype(rvalues)
328330
):
329331
if lvalues.dtype.name == "string" and rvalues.dtype == object:
330332
lvalues = lvalues.astype("string")
331-
rvalues = array(rvalues, dtype="string")
333+
rvalues = pd_array(rvalues, dtype="string")
332334
elif rvalues.dtype.name == "string" and lvalues.dtype == object:
333335
rvalues = rvalues.astype("string")
334-
lvalues = array(lvalues, dtype="string")
336+
lvalues = pd_array(lvalues, dtype="string")
335337

336338
if should_extension_dispatch(lvalues, rvalues) or (
337339
(isinstance(rvalues, (Timedelta, BaseOffset, Timestamp)) or right is NaT)

pandas/tests/series/methods/test_compare.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,9 @@ def test_compare_datetime64_and_string():
139139
tm.assert_series_equal(result_eq2, expected_eq)
140140
tm.assert_series_equal(result_neq, expected_neq)
141141

142+
142143
def test_comparison_string_mixed_object():
144+
# Issue https://github.com/pandas-dev/pandas/issues/60228
143145
pd.options.future.infer_string = True
144146

145147
ser_string = pd.Series(["a", "b"], dtype="string")
@@ -149,4 +151,4 @@ def test_comparison_string_mixed_object():
149151
expected = pd.Series([False, True], dtype="boolean")
150152
tm.assert_series_equal(result, expected)
151153

152-
pd.options.future.infer_string = False
154+
pd.options.future.infer_string = False

0 commit comments

Comments
 (0)