Skip to content

Commit aacdf61

Browse files
authored
Backport PR pandas-dev#56123 on branch 2.1.x (BUG: ne comparison returns False for NA and other value) (pandas-dev#56382)
1 parent 7006d99 commit aacdf61

File tree

4 files changed

+44
-18
lines changed

4 files changed

+44
-18
lines changed

doc/source/whatsnew/v2.1.4.rst

+1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ Bug fixes
3030
- Fixed bug in :meth:`DataFrame.__setitem__` casting :class:`Index` with object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
3131
- Fixed bug in :meth:`DataFrame.to_hdf` raising when columns have ``StringDtype`` (:issue:`55088`)
3232
- Fixed bug in :meth:`Index.insert` casting object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
33+
- Fixed bug in :meth:`Series.__ne__` resulting in False for comparison between ``NA`` and string value for ``dtype="string[pyarrow_numpy]"`` (:issue:`56122`)
3334
- Fixed bug in :meth:`Series.str.split` and :meth:`Series.str.rsplit` when ``pat=None`` for :class:`ArrowDtype` with ``pyarrow.string`` (:issue:`56271`)
3435
- Fixed bug in :meth:`Series.str.translate` losing object dtype when string option is set (:issue:`56152`)
3536
-

pandas/core/arrays/string_arrow.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
from functools import partial
4+
import operator
45
import re
56
from typing import (
67
TYPE_CHECKING,
@@ -600,7 +601,10 @@ def _str_find(self, sub: str, start: int = 0, end: int | None = None):
600601

601602
def _cmp_method(self, other, op):
602603
result = super()._cmp_method(other, op)
603-
return result.to_numpy(np.bool_, na_value=False)
604+
if op == operator.ne:
605+
return result.to_numpy(np.bool_, na_value=True)
606+
else:
607+
return result.to_numpy(np.bool_, na_value=False)
604608

605609
def value_counts(self, dropna: bool = True):
606610
from pandas import Series

pandas/tests/arithmetic/test_object.py

+18-11
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,13 @@
88
import numpy as np
99
import pytest
1010

11+
import pandas.util._test_decorators as td
12+
1113
import pandas as pd
1214
from pandas import (
1315
Series,
1416
Timestamp,
17+
option_context,
1518
)
1619
import pandas._testing as tm
1720
from pandas.core import ops
@@ -31,20 +34,24 @@ def test_comparison_object_numeric_nas(self, comparison_op):
3134
expected = func(ser.astype(float), shifted.astype(float))
3235
tm.assert_series_equal(result, expected)
3336

34-
def test_object_comparisons(self):
35-
ser = Series(["a", "b", np.nan, "c", "a"])
37+
@pytest.mark.parametrize(
38+
"infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
39+
)
40+
def test_object_comparisons(self, infer_string):
41+
with option_context("future.infer_string", infer_string):
42+
ser = Series(["a", "b", np.nan, "c", "a"])
3643

37-
result = ser == "a"
38-
expected = Series([True, False, False, False, True])
39-
tm.assert_series_equal(result, expected)
44+
result = ser == "a"
45+
expected = Series([True, False, False, False, True])
46+
tm.assert_series_equal(result, expected)
4047

41-
result = ser < "a"
42-
expected = Series([False, False, False, False, False])
43-
tm.assert_series_equal(result, expected)
48+
result = ser < "a"
49+
expected = Series([False, False, False, False, False])
50+
tm.assert_series_equal(result, expected)
4451

45-
result = ser != "a"
46-
expected = -(ser == "a")
47-
tm.assert_series_equal(result, expected)
52+
result = ser != "a"
53+
expected = -(ser == "a")
54+
tm.assert_series_equal(result, expected)
4855

4956
@pytest.mark.parametrize("dtype", [None, object])
5057
def test_more_na_comparisons(self, dtype):

pandas/tests/arrays/string_/test_string.py

+20-6
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
This module tests the functionality of StringArray and ArrowStringArray.
33
Tests for the str accessors are in pandas/tests/strings/test_string_array.py
44
"""
5+
import operator
6+
57
import numpy as np
68
import pytest
79

@@ -221,7 +223,10 @@ def test_comparison_methods_scalar(comparison_op, dtype):
221223
result = getattr(a, op_name)(other)
222224
if dtype.storage == "pyarrow_numpy":
223225
expected = np.array([getattr(item, op_name)(other) for item in a])
224-
expected[1] = False
226+
if comparison_op == operator.ne:
227+
expected[1] = True
228+
else:
229+
expected[1] = False
225230
tm.assert_numpy_array_equal(result, expected.astype(np.bool_))
226231
else:
227232
expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
@@ -236,7 +241,10 @@ def test_comparison_methods_scalar_pd_na(comparison_op, dtype):
236241
result = getattr(a, op_name)(pd.NA)
237242

238243
if dtype.storage == "pyarrow_numpy":
239-
expected = np.array([False, False, False])
244+
if operator.ne == comparison_op:
245+
expected = np.array([True, True, True])
246+
else:
247+
expected = np.array([False, False, False])
240248
tm.assert_numpy_array_equal(result, expected)
241249
else:
242250
expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
@@ -262,7 +270,7 @@ def test_comparison_methods_scalar_not_string(comparison_op, dtype):
262270
if dtype.storage == "pyarrow_numpy":
263271
expected_data = {
264272
"__eq__": [False, False, False],
265-
"__ne__": [True, False, True],
273+
"__ne__": [True, True, True],
266274
}[op_name]
267275
expected = np.array(expected_data)
268276
tm.assert_numpy_array_equal(result, expected)
@@ -282,12 +290,18 @@ def test_comparison_methods_array(comparison_op, dtype):
282290
other = [None, None, "c"]
283291
result = getattr(a, op_name)(other)
284292
if dtype.storage == "pyarrow_numpy":
285-
expected = np.array([False, False, False])
286-
expected[-1] = getattr(other[-1], op_name)(a[-1])
293+
if operator.ne == comparison_op:
294+
expected = np.array([True, True, False])
295+
else:
296+
expected = np.array([False, False, False])
297+
expected[-1] = getattr(other[-1], op_name)(a[-1])
287298
tm.assert_numpy_array_equal(result, expected)
288299

289300
result = getattr(a, op_name)(pd.NA)
290-
expected = np.array([False, False, False])
301+
if operator.ne == comparison_op:
302+
expected = np.array([True, True, True])
303+
else:
304+
expected = np.array([False, False, False])
291305
tm.assert_numpy_array_equal(result, expected)
292306

293307
else:

0 commit comments

Comments
 (0)