From 5dda80e5ff6ee49af19d1b3904cf6274edf402e6 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 6 Aug 2022 23:39:00 +0200 Subject: [PATCH] BUG: compare returning all nan columns when comparing ea and np dtypes --- doc/source/whatsnew/v1.5.0.rst | 2 ++ pandas/core/internals/blocks.py | 2 ++ pandas/tests/frame/indexing/test_where.py | 10 ++++++++++ pandas/tests/frame/methods/test_compare.py | 16 ++++++++++++++++ 4 files changed, 30 insertions(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index bdf811f6a8f6a..790aa5cefb29f 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -908,6 +908,8 @@ Indexing - Bug in :meth:`DataFrame.mask` with ``inplace=True`` and ``ExtensionDtype`` columns incorrectly raising (:issue:`45577`) - Bug in getting a column from a DataFrame with an object-dtype row index with datetime-like values: the resulting Series now preserves the exact object-dtype Index from the parent DataFrame (:issue:`42950`) - Bug in :meth:`DataFrame.__getattribute__` raising ``AttributeError`` if columns have ``"string"`` dtype (:issue:`46185`) +- Bug in :meth:`DataFrame.compare` returning all ``NaN`` column when comparing extension array dtype and numpy dtype (:issue:`44014`) +- Bug in :meth:`DataFrame.where` setting wrong values with ``"boolean"`` mask for numpy dtype (:issue:`44014`) - Bug in indexing on a :class:`DatetimeIndex` with a ``np.str_`` key incorrectly raising (:issue:`45580`) - Bug in :meth:`CategoricalIndex.get_indexer` when index contains ``NaN`` values, resulting in elements that are in target but not present in the index to be mapped to the index of the NaN element, instead of -1 (:issue:`45361`) - Bug in setting large integer values into :class:`Series` with ``float32`` or ``float16`` dtype incorrectly altering these values instead of coercing to ``float64`` dtype (:issue:`45844`) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index a70f9b7b20d5a..9dd3c780aa59b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1062,6 +1062,8 @@ def where(self, other, cond, _downcast="infer") -> list[Block]: transpose = self.ndim == 2 + cond = extract_bool_array(cond) + # EABlocks override where values = cast(np.ndarray, self.values) orig_other = other diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index aa55a7c91d0e6..fba8978d2128c 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -1046,3 +1046,13 @@ def test_where_mask_deprecated(frame_or_series): with tm.assert_produces_warning(FutureWarning): obj.mask(mask, -1, errors="raise") + + +def test_where_producing_ea_cond_for_np_dtype(): + # GH#44014 + df = DataFrame({"a": Series([1, pd.NA, 2], dtype="Int64"), "b": [1, 2, 3]}) + result = df.where(lambda x: x.apply(lambda y: y > 1, axis=1)) + expected = DataFrame( + {"a": Series([pd.NA, pd.NA, 2], dtype="Int64"), "b": [np.nan, 2, 3]} + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py index 609242db453ba..55e5db9603fe5 100644 --- a/pandas/tests/frame/methods/test_compare.py +++ b/pandas/tests/frame/methods/test_compare.py @@ -236,3 +236,19 @@ def test_invalid_input_result_names(result_names): ), ): df1.compare(df2, result_names=result_names) + + +def test_compare_ea_and_np_dtype(): + # GH#44014 + df1 = pd.DataFrame({"a": [4.0, 4], "b": [1.0, 2]}) + df2 = pd.DataFrame({"a": pd.Series([1, pd.NA], dtype="Int64"), "b": [1.0, 2]}) + result = df1.compare(df2, keep_shape=True) + expected = pd.DataFrame( + { + ("a", "self"): [4.0, np.nan], + ("a", "other"): pd.Series([1, pd.NA], dtype="Int64"), + ("b", "self"): np.nan, + ("b", "other"): np.nan, + } + ) + tm.assert_frame_equal(result, expected)