diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 9ead1e4a75d01..461b9b7d3b93c 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -213,6 +213,7 @@ Other enhancements - :meth:`.GroupBy.mean` now supports `Numba `_ execution with the ``engine`` keyword (:issue:`43731`) - :meth:`Timestamp.isoformat`, now handles the ``timespec`` argument from the base :class:``datetime`` class (:issue:`26131`) - :meth:`NaT.to_numpy` ``dtype`` argument is now respected, so ``np.timedelta64`` can be returned (:issue:`44460`) +- :meth:`DataFrame.compare` now accepts a ``suffixes`` to allow the user to specify the suffixes of both left and right DataFrame which are being compared. This is by default ``self`` and ``other`` (:issue:`44354`) - diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 33df7a9f0ac1f..2967bea982a08 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7297,12 +7297,14 @@ def compare( align_axis: Axis = 1, keep_shape: bool = False, keep_equal: bool = False, + suffixes: Suffixes = ("self", "other"), ) -> DataFrame: return super().compare( other=other, align_axis=align_axis, keep_shape=keep_shape, keep_equal=keep_equal, + suffixes=suffixes, ) def combine( diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2db8be19b4399..22ebe91aad6da 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -53,6 +53,7 @@ RandomState, Renamer, StorageOptions, + Suffixes, T, TimedeltaConvertibleTypes, TimestampConvertibleTypes, @@ -8538,6 +8539,7 @@ def compare( align_axis: Axis = 1, keep_shape: bool_t = False, keep_equal: bool_t = False, + suffixes: Suffixes = ("self", "other"), ): from pandas.core.reshape.concat import concat @@ -8548,7 +8550,6 @@ def compare( ) mask = ~((self == other) | (self.isna() & other.isna())) - keys = ["self", "other"] if not keep_equal: self = self.where(mask) @@ -8569,7 +8570,7 @@ def compare( else: axis = self._get_axis_number(align_axis) - diff = concat([self, other], axis=axis, keys=keys) + diff = concat([self, other], axis=axis, keys=suffixes) if axis >= self.ndim: # No need to reorganize data if stacking on new axis diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py index 468811eba0d39..ad0c924753e62 100644 --- a/pandas/tests/frame/methods/test_compare.py +++ b/pandas/tests/frame/methods/test_compare.py @@ -180,3 +180,20 @@ def test_compare_unaligned_objects(): df1 = pd.DataFrame(np.ones((3, 3))) df2 = pd.DataFrame(np.zeros((2, 1))) df1.compare(df2) + + +def test_compare_suffixes(): + # GH + df1 = pd.DataFrame( + {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}, + columns=["col1", "col2", "col3"], + ) + df2 = df1.copy() + df2.loc[0, "col1"] = "c" + df2.loc[2, "col3"] = 4.0 + + suffixes = ["left", "right"] + comp = df1.compare(df2, suffixes=suffixes) + + result_suffixes = comp.columns.get_level_values(1).unique() + assert result_suffixes.isin(suffixes).all(), "suffixes not equal"