Skip to content

Commit 658f757

Browse files
committed
BUG (string dtype): comparison of string column to mixed object column fails pandas-dev#60228
1 parent 82625a3 commit 658f757

File tree

2 files changed

+8
-16
lines changed

2 files changed

+8
-16
lines changed

pandas/core/arrays/arrow/array.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -726,7 +726,13 @@ def _cmp_method(self, other, op) -> ArrowExtensionArray:
726726
other, (ArrowExtensionArray, np.ndarray, list, BaseMaskedArray)
727727
) or isinstance(getattr(other, "dtype", None), CategoricalDtype):
728728
try:
729-
result = pc_func(self._pa_array, self._box_pa(other))
729+
if pa.types.is_string(self._pa_array.type):
730+
other_array = self._box_pa(other)
731+
if pa.types.is_string(other_array.type):
732+
other_array = other_array.cast(pa.large_string())
733+
result = pc_func(self._pa_array, other_array)
734+
else:
735+
result = pc_func(self._pa_array, self._box_pa(other))
730736
except pa.ArrowNotImplementedError:
731737
# TODO: could this be wrong if other is object dtype?
732738
# in which case we need to operate pointwise?

pandas/core/ops/array_ops.py

+1-15
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@
4040
is_numeric_v_string_like,
4141
is_object_dtype,
4242
is_scalar,
43-
is_string_dtype,
4443
)
4544
from pandas.core.dtypes.generic import (
4645
ABCExtensionArray,
@@ -54,10 +53,7 @@
5453

5554
from pandas.core import roperator
5655
from pandas.core.computation import expressions
57-
from pandas.core.construction import (
58-
array as pd_array,
59-
ensure_wrapped_if_datetimelike,
60-
)
56+
from pandas.core.construction import ensure_wrapped_if_datetimelike
6157
from pandas.core.ops import missing
6258
from pandas.core.ops.dispatch import should_extension_dispatch
6359
from pandas.core.ops.invalid import invalid_comparison
@@ -325,16 +321,6 @@ def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike:
325321
"Lengths must match to compare", lvalues.shape, rvalues.shape
326322
)
327323

328-
if (is_string_dtype(lvalues) and is_object_dtype(rvalues)) or (
329-
is_object_dtype(lvalues) and is_string_dtype(rvalues)
330-
):
331-
if lvalues.dtype.name == "string" and rvalues.dtype == object:
332-
lvalues = lvalues.astype("string")
333-
rvalues = pd_array(rvalues, dtype="string")
334-
elif rvalues.dtype.name == "string" and lvalues.dtype == object:
335-
rvalues = rvalues.astype("string")
336-
lvalues = pd_array(lvalues, dtype="string")
337-
338324
if should_extension_dispatch(lvalues, rvalues) or (
339325
(isinstance(rvalues, (Timedelta, BaseOffset, Timestamp)) or right is NaT)
340326
and lvalues.dtype != object

0 commit comments

Comments
 (0)