From 3319dadaacb28e3fb5da1959d8fd9179c75d9dd7 Mon Sep 17 00:00:00 2001 From: Erfan Nariman Date: Tue, 9 Nov 2021 08:05:33 +0100 Subject: [PATCH 1/2] first working version --- pandas/core/frame.py | 2 ++ pandas/core/generic.py | 5 +++-- pandas/tests/frame/methods/test_compare.py | 17 +++++++++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1f26b6d9ae6ae..fa6ef8a849606 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7246,12 +7246,14 @@ def compare( align_axis: Axis = 1, keep_shape: bool = False, keep_equal: bool = False, + suffixes: Suffixes = ("self", "other"), ) -> DataFrame: return super().compare( other=other, align_axis=align_axis, keep_shape=keep_shape, keep_equal=keep_equal, + suffixes=suffixes, ) def combine( diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 93bf70c27f8ff..5f23ec1dcbae8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -53,6 +53,7 @@ RandomState, Renamer, StorageOptions, + Suffixes, T, TimedeltaConvertibleTypes, TimestampConvertibleTypes, @@ -8501,6 +8502,7 @@ def compare( align_axis: Axis = 1, keep_shape: bool_t = False, keep_equal: bool_t = False, + suffixes: Suffixes = ("self", "other"), ): from pandas.core.reshape.concat import concat @@ -8511,7 +8513,6 @@ def compare( ) mask = ~((self == other) | (self.isna() & other.isna())) - keys = ["self", "other"] if not keep_equal: self = self.where(mask) @@ -8532,7 +8533,7 @@ def compare( else: axis = self._get_axis_number(align_axis) - diff = concat([self, other], axis=axis, keys=keys) + diff = concat([self, other], axis=axis, keys=suffixes) if axis >= self.ndim: # No need to reorganize data if stacking on new axis diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py index 468811eba0d39..ad0c924753e62 100644 --- a/pandas/tests/frame/methods/test_compare.py +++ b/pandas/tests/frame/methods/test_compare.py @@ -180,3 +180,20 @@ def test_compare_unaligned_objects(): df1 = pd.DataFrame(np.ones((3, 3))) df2 = pd.DataFrame(np.zeros((2, 1))) df1.compare(df2) + + +def test_compare_suffixes(): + # GH + df1 = pd.DataFrame( + {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}, + columns=["col1", "col2", "col3"], + ) + df2 = df1.copy() + df2.loc[0, "col1"] = "c" + df2.loc[2, "col3"] = 4.0 + + suffixes = ["left", "right"] + comp = df1.compare(df2, suffixes=suffixes) + + result_suffixes = comp.columns.get_level_values(1).unique() + assert result_suffixes.isin(suffixes).all(), "suffixes not equal" From 4418e64b9fc96285a93520078dfce7e0a520acbd Mon Sep 17 00:00:00 2001 From: Erfan Nariman Date: Mon, 15 Nov 2021 17:44:11 +0100 Subject: [PATCH 2/2] add whatsnew --- doc/source/whatsnew/v1.4.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 7828c479e800f..8bb5b22d274a6 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -211,6 +211,7 @@ Other enhancements - :meth:`.GroupBy.mean` now supports `Numba `_ execution with the ``engine`` keyword (:issue:`43731`) - :meth:`Timestamp.isoformat`, now handles the ``timespec`` argument from the base :class:``datetime`` class (:issue:`26131`) - :meth:`NaT.to_numpy` ``dtype`` argument is now respected, so ``np.timedelta64`` can be returned (:issue:`44460`) +- :meth:`DataFrame.compare` now accepts a ``suffixes`` to allow the user to specify the suffixes of both left and right DataFrame which are being compared. This is by default ``self`` and ``other`` (:issue:`44354`) - .. ---------------------------------------------------------------------------