Skip to content

Commit a326359

Browse files
committed
ENH: add suffixes argument to DataFrame.compare pandas-dev#44354
1 parent edc9ff4 commit a326359

File tree

5 files changed

+52
-3
lines changed

5 files changed

+52
-3
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,7 @@ Other enhancements
277277
- Allow reading compressed SAS files with :func:`read_sas` (e.g., ``.sas7bdat.gz`` files)
278278
- :meth:`DatetimeIndex.astype` now supports casting timezone-naive indexes to ``datetime64[s]``, ``datetime64[ms]``, and ``datetime64[us]``, and timezone-aware indexes to the corresponding ``datetime64[unit, tzname]`` dtypes (:issue:`47579`)
279279
- :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`)
280+
- :meth:`DataFrame.compare` now accepts a ``suffixes`` to allow the user to specify the suffixes of both left and right DataFrame which are being compared. This is by default ``self`` and ``other`` (:issue:`44354`)
280281
-
281282

282283
.. ---------------------------------------------------------------------------

pandas/core/frame.py

+10
Original file line numberDiff line numberDiff line change
@@ -7776,6 +7776,14 @@ def __rdivmod__(self, other) -> tuple[DataFrame, DataFrame]:
77767776
0 a c NaN NaN
77777777
2 NaN NaN 3.0 4.0
77787778
7779+
Assign suffixes
7780+
7781+
>>> df.compare(df2, suffixes=("left", "right"))
7782+
col1 col3
7783+
left right left right
7784+
0 a c NaN NaN
7785+
2 NaN NaN 3.0 4.0
7786+
77797787
Stack the differences on rows
77807788
77817789
>>> df.compare(df2, align_axis=0)
@@ -7823,12 +7831,14 @@ def compare(
78237831
align_axis: Axis = 1,
78247832
keep_shape: bool = False,
78257833
keep_equal: bool = False,
7834+
suffixes: Suffixes = ("self", "other"),
78267835
) -> DataFrame:
78277836
return super().compare(
78287837
other=other,
78297838
align_axis=align_axis,
78307839
keep_shape=keep_shape,
78317840
keep_equal=keep_equal,
7841+
suffixes=suffixes,
78327842
)
78337843

78347844
def combine(

pandas/core/generic.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
Renamer,
5959
SortKind,
6060
StorageOptions,
61+
Suffixes,
6162
T,
6263
TimedeltaConvertibleTypes,
6364
TimestampConvertibleTypes,
@@ -8965,6 +8966,7 @@ def compare(
89658966
align_axis: Axis = 1,
89668967
keep_shape: bool_t = False,
89678968
keep_equal: bool_t = False,
8969+
suffixes: Suffixes = ("self", "other"),
89688970
):
89698971
from pandas.core.reshape.concat import concat
89708972

@@ -8975,7 +8977,6 @@ def compare(
89758977
)
89768978

89778979
mask = ~((self == other) | (self.isna() & other.isna()))
8978-
keys = ["self", "other"]
89798980

89808981
if not keep_equal:
89818982
self = self.where(mask)
@@ -8990,13 +8991,21 @@ def compare(
89908991
else:
89918992
self = self[mask]
89928993
other = other[mask]
8994+
if not isinstance(suffixes, tuple):
8995+
warnings.warn(
8996+
f"Passing 'suffixes' as a {type(suffixes)}, is not supported and may give "
8997+
"unexpected results. Provide 'suffixes' as a tuple instead. In the "
8998+
"future a 'TypeError' will be raised.",
8999+
FutureWarning,
9000+
stacklevel=find_stack_level(),
9001+
)
89939002

89949003
if align_axis in (1, "columns"): # This is needed for Series
89959004
axis = 1
89969005
else:
89979006
axis = self._get_axis_number(align_axis)
89989007

8999-
diff = concat([self, other], axis=axis, keys=keys)
9008+
diff = concat([self, other], axis=axis, keys=suffixes)
90009009

90019010
if axis >= self.ndim:
90029011
# No need to reorganize data if stacking on new axis

pandas/core/shared_docs.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
] = """
5454
Compare to another {klass} and show the differences.
5555
56-
.. versionadded:: 1.1.0
56+
.. versionadded:: 1.5.0
5757
5858
Parameters
5959
----------
@@ -75,6 +75,10 @@
7575
keep_equal : bool, default False
7676
If true, the result keeps values that are equal.
7777
Otherwise, equal values are shown as NaNs.
78+
79+
suffixes : tuple, default ('self', 'other')
80+
Set the dataframes names in the comparison.
81+
7882
"""
7983

8084
_shared_docs[

pandas/tests/frame/methods/test_compare.py

+25
Original file line numberDiff line numberDiff line change
@@ -180,3 +180,28 @@ def test_compare_unaligned_objects():
180180
df1 = pd.DataFrame(np.ones((3, 3)))
181181
df2 = pd.DataFrame(np.zeros((2, 1)))
182182
df1.compare(df2)
183+
184+
185+
def test_compare_suffixes():
186+
# GH
187+
df1 = pd.DataFrame(
188+
{"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]},
189+
columns=["col1", "col2", "col3"],
190+
)
191+
df2 = df1.copy()
192+
df2.loc[0, "col1"] = "c"
193+
df2.loc[2, "col3"] = 4.0
194+
195+
suffixes = ["left", "right"]
196+
comp = df1.compare(df2, suffixes=suffixes)
197+
expected = pd.DataFrame(
198+
{
199+
("col1", "left"): {0: "a", 2: np.nan},
200+
("col1", "right"): {0: "c", 2: np.nan},
201+
("col3", "left"): {0: np.nan, 2: 3.0},
202+
("col3", "right"): {0: np.nan, 2: np.nan},
203+
}
204+
)
205+
tm.assert_frame_equal(comp, expected)
206+
result_suffixes = comp.columns.get_level_values(1).unique()
207+
assert result_suffixes.isin(suffixes).all(), "suffixes not equal"

0 commit comments

Comments
 (0)