From acac5eb89691d88ca8e6c4dd97bd5f7adc9c13ba Mon Sep 17 00:00:00 2001 From: Philip Kahn Date: Mon, 12 Aug 2024 14:10:31 -0700 Subject: [PATCH 01/10] Fix issue #47101 Bisecting two years ago ( https://github.com/pandas-dev/pandas/issues/47101#issuecomment-1139606268 ) shows this regression was introduced in b2d54d9 in 2021. Somehow this hasn't been patched since then. PR #48313 was supposed to address this, but the PR was closed and never merged and the bug has persisted. --- pandas/core/missing.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 039d868bccd16..305d54778db80 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -124,11 +124,15 @@ def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]: new_mask = np.zeros(arr.shape, dtype=np.bool_) new_mask[arr_mask] = arr[arr_mask] == x else: - new_mask = arr == x + # GH#47101 + # Fix where type 'bool' has no attribute 'to_numpy()' by first attempting to broadcast + # with np.equal for some cases, and then an explicit type check when checking the mask + # for any straggling cases + new_mask = np.equal(arr, x) if not isinstance(new_mask, np.ndarray): # usually BooleanArray - new_mask = new_mask.to_numpy(dtype=bool, na_value=False) + new_mask = np.array([new_mask]) if isinstance(new_mask, bool) else new_mask.to_numpy(dtype=bool, na_value=False) mask |= new_mask if na_mask.any(): From 33c7ca5eb004d2dbe8e96c357696f275af87a0b0 Mon Sep 17 00:00:00 2001 From: Philip Kahn Date: Mon, 12 Aug 2024 14:27:24 -0700 Subject: [PATCH 02/10] Add a test as per PR guidelines --- pandas/tests/frame/methods/test_replace.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 6b872bf48d550..e80b5c6a16c29 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1518,3 +1518,10 @@ def test_replace_object_splitting(self, using_infer_string): assert len(df._mgr.blocks) == 2 else: assert len(df._mgr.blocks) == 1 + + def test_replace_bool_to_numpy_attributeerror(self): + # GH#47101 + pass_pre_patch = pd.DataFrame({"d":[None]}) + tm.assert_frame_equal(pass_pre_patch, pass_pre_patch.replace('', pd.NA)) + fail_pre_patch = pd.DataFrame({"d":[pd.NA]}) + tm.assert_frame_equal(fail_pre_match, fail_pre_patch.replace('', pd.NA)) From 285d466c1e504521ff2eeaa03e481a7223766a20 Mon Sep 17 00:00:00 2001 From: Philip Kahn Date: Mon, 12 Aug 2024 14:32:46 -0700 Subject: [PATCH 03/10] Fix typo --- pandas/tests/frame/methods/test_replace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index e80b5c6a16c29..0c576b58cf6fe 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1524,4 +1524,4 @@ def test_replace_bool_to_numpy_attributeerror(self): pass_pre_patch = pd.DataFrame({"d":[None]}) tm.assert_frame_equal(pass_pre_patch, pass_pre_patch.replace('', pd.NA)) fail_pre_patch = pd.DataFrame({"d":[pd.NA]}) - tm.assert_frame_equal(fail_pre_match, fail_pre_patch.replace('', pd.NA)) + tm.assert_frame_equal(fail_pre_patch, fail_pre_patch.replace('', pd.NA)) From c01304a3a303fcf940be3abea39553fa2c878dc8 Mon Sep 17 00:00:00 2001 From: Philip Kahn Date: Mon, 12 Aug 2024 14:38:51 -0700 Subject: [PATCH 04/10] Resolve inconsistent namespace as per PR test https://results.pre-commit.ci/run/github/858127/1723498369.6V12SWx7T-WpLZDAXXkz0Q This web UI commit will still fail, as the E501 line-too-long check will fail until the next commit --- pandas/tests/frame/methods/test_replace.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 0c576b58cf6fe..fa02c854e684b 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1521,7 +1521,7 @@ def test_replace_object_splitting(self, using_infer_string): def test_replace_bool_to_numpy_attributeerror(self): # GH#47101 - pass_pre_patch = pd.DataFrame({"d":[None]}) + pass_pre_patch = DataFrame({"d":[None]}) tm.assert_frame_equal(pass_pre_patch, pass_pre_patch.replace('', pd.NA)) - fail_pre_patch = pd.DataFrame({"d":[pd.NA]}) + fail_pre_patch = DataFrame({"d":[pd.NA]}) tm.assert_frame_equal(fail_pre_patch, fail_pre_patch.replace('', pd.NA)) From a0289ad76a1aac043842197dbc1bf5e06237e07b Mon Sep 17 00:00:00 2001 From: Philip Kahn Date: Mon, 12 Aug 2024 14:41:35 -0700 Subject: [PATCH 05/10] Resolve E501 linting errors https://results.pre-commit.ci/run/github/858127/1723498369.6V12SWx7T-WpLZDAXXkz0Q --- pandas/core/missing.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 305d54778db80..1aab12b6104ae 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -125,14 +125,18 @@ def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]: new_mask[arr_mask] = arr[arr_mask] == x else: # GH#47101 - # Fix where type 'bool' has no attribute 'to_numpy()' by first attempting to broadcast - # with np.equal for some cases, and then an explicit type check when checking the mask - # for any straggling cases + # Fix where type 'bool' has no attribute 'to_numpy()' by first + # attempting to broadcast with np.equal for some cases, and then + # an explicit type check when checking the mask for any straggling + # cases new_mask = np.equal(arr, x) if not isinstance(new_mask, np.ndarray): # usually BooleanArray - new_mask = np.array([new_mask]) if isinstance(new_mask, bool) else new_mask.to_numpy(dtype=bool, na_value=False) + if isinstance(new_mask, bool): + new_mask = np.array([new_mask]) + else: + new_mask = new_mask.to_numpy(dtype=bool, na_value=False) mask |= new_mask if na_mask.any(): From 3774ceb8d40e986e1d06ddbe1adb0fd4db9a022f Mon Sep 17 00:00:00 2001 From: Philip Kahn Date: Mon, 12 Aug 2024 15:21:13 -0700 Subject: [PATCH 06/10] Fix test TypeErrors np.equal([1,2,3], "") fails --- pandas/core/missing.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 1aab12b6104ae..b425d1a3f7701 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -129,7 +129,11 @@ def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]: # attempting to broadcast with np.equal for some cases, and then # an explicit type check when checking the mask for any straggling # cases - new_mask = np.equal(arr, x) + try: + new_mask = np.equal(arr, x) + except TypeError: + # Old behaviour for uncastable types + new_mask = arr == x if not isinstance(new_mask, np.ndarray): # usually BooleanArray From 8c596ea707d72b078f19ff7cdf21799a41bc1a10 Mon Sep 17 00:00:00 2001 From: Philip Kahn Date: Mon, 12 Aug 2024 15:21:49 -0700 Subject: [PATCH 07/10] Quote style for Ruff --- pandas/tests/frame/methods/test_replace.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index fa02c854e684b..c24868ee7a641 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1522,6 +1522,6 @@ def test_replace_object_splitting(self, using_infer_string): def test_replace_bool_to_numpy_attributeerror(self): # GH#47101 pass_pre_patch = DataFrame({"d":[None]}) - tm.assert_frame_equal(pass_pre_patch, pass_pre_patch.replace('', pd.NA)) + tm.assert_frame_equal(pass_pre_patch, pass_pre_patch.replace("", pd.NA)) fail_pre_patch = DataFrame({"d":[pd.NA]}) - tm.assert_frame_equal(fail_pre_patch, fail_pre_patch.replace('', pd.NA)) + tm.assert_frame_equal(fail_pre_patch, fail_pre_patch.replace("", pd.NA)) From 2899bcab8de79516c0d84e897869e6fa4c92e407 Mon Sep 17 00:00:00 2001 From: Philip Kahn Date: Mon, 12 Aug 2024 16:18:32 -0700 Subject: [PATCH 08/10] typing and remove code backtick possibly incorrectly triggering ruff formatter --- pandas/core/missing.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index b425d1a3f7701..c49b23e221c9d 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -125,10 +125,11 @@ def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]: new_mask[arr_mask] = arr[arr_mask] == x else: # GH#47101 - # Fix where type 'bool' has no attribute 'to_numpy()' by first + # Fix where type bool has no attribute to_numpy() by first # attempting to broadcast with np.equal for some cases, and then # an explicit type check when checking the mask for any straggling - # cases + # cases. Where a literal comparison would fail np.equal we fall back + # to the original equality check. try: new_mask = np.equal(arr, x) except TypeError: @@ -138,7 +139,7 @@ def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]: if not isinstance(new_mask, np.ndarray): # usually BooleanArray if isinstance(new_mask, bool): - new_mask = np.array([new_mask]) + new_mask = np.array([new_mask], dtype= bool) else: new_mask = new_mask.to_numpy(dtype=bool, na_value=False) mask |= new_mask From f06ecffbc4f49977448e907732f2c3d225d4b382 Mon Sep 17 00:00:00 2001 From: Philip Kahn Date: Tue, 13 Aug 2024 11:59:13 -0700 Subject: [PATCH 09/10] mpy supression for caught error --- pandas/core/missing.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index c49b23e221c9d..8aff6b879fa21 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -124,14 +124,15 @@ def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]: new_mask = np.zeros(arr.shape, dtype=np.bool_) new_mask[arr_mask] = arr[arr_mask] == x else: - # GH#47101 - # Fix where type bool has no attribute to_numpy() by first - # attempting to broadcast with np.equal for some cases, and then - # an explicit type check when checking the mask for any straggling + # GH#47101 + # Fix where type bool has no attribute to_numpy() by first + # attempting to broadcast with np.equal for some cases, and then + # an explicit type check when checking the mask for any straggling # cases. Where a literal comparison would fail np.equal we fall back # to the original equality check. try: - new_mask = np.equal(arr, x) + # In case of an uncastable type, this will emit TypeError + new_mask = np.equal(arr, x) # type: ignore[arg-type] except TypeError: # Old behaviour for uncastable types new_mask = arr == x @@ -139,7 +140,7 @@ def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]: if not isinstance(new_mask, np.ndarray): # usually BooleanArray if isinstance(new_mask, bool): - new_mask = np.array([new_mask], dtype= bool) + new_mask = np.array([new_mask], dtype= bool) else: new_mask = new_mask.to_numpy(dtype=bool, na_value=False) mask |= new_mask From 85c0da9ea224cd3e5389af88d2ef0ccf0b371092 Mon Sep 17 00:00:00 2001 From: Philip Kahn Date: Tue, 13 Aug 2024 12:20:34 -0700 Subject: [PATCH 10/10] trailing space --- pandas/core/missing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 8aff6b879fa21..6a4a3183757ce 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -141,7 +141,7 @@ def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]: # usually BooleanArray if isinstance(new_mask, bool): new_mask = np.array([new_mask], dtype= bool) - else: + else: new_mask = new_mask.to_numpy(dtype=bool, na_value=False) mask |= new_mask