Skip to content

BUG #43767 GroupBy resambler fix #47671

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 17 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ Other enhancements
- Allow reading compressed SAS files with :func:`read_sas` (e.g., ``.sas7bdat.gz`` files)
- :meth:`DatetimeIndex.astype` now supports casting timezone-naive indexes to ``datetime64[s]``, ``datetime64[ms]``, and ``datetime64[us]``, and timezone-aware indexes to the corresponding ``datetime64[unit, tzname]`` dtypes (:issue:`47579`)
- :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`)
- :meth:`DataFrame.compare` now accepts a ``suffixes`` to allow the user to specify the suffixes of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`)
-

.. ---------------------------------------------------------------------------
Expand Down
10 changes: 10 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7776,6 +7776,14 @@ def __rdivmod__(self, other) -> tuple[DataFrame, DataFrame]:
0 a c NaN NaN
2 NaN NaN 3.0 4.0

Assign suffixes

>>> df.compare(df2, suffixes=("left", "right"))
col1 col3
left right left right
0 a c NaN NaN
2 NaN NaN 3.0 4.0

Stack the differences on rows

>>> df.compare(df2, align_axis=0)
Expand Down Expand Up @@ -7823,12 +7831,14 @@ def compare(
align_axis: Axis = 1,
keep_shape: bool = False,
keep_equal: bool = False,
suffixes: Suffixes = ("self", "other"),
) -> DataFrame:
return super().compare(
other=other,
align_axis=align_axis,
keep_shape=keep_shape,
keep_equal=keep_equal,
suffixes=suffixes,
)

def combine(
Expand Down
10 changes: 8 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
Renamer,
SortKind,
StorageOptions,
Suffixes,
T,
TimedeltaConvertibleTypes,
TimestampConvertibleTypes,
Expand Down Expand Up @@ -8965,6 +8966,7 @@ def compare(
align_axis: Axis = 1,
keep_shape: bool_t = False,
keep_equal: bool_t = False,
suffixes: Suffixes = ("self", "other"),
):
from pandas.core.reshape.concat import concat

Expand All @@ -8975,7 +8977,6 @@ def compare(
)

mask = ~((self == other) | (self.isna() & other.isna()))
keys = ["self", "other"]

if not keep_equal:
self = self.where(mask)
Expand All @@ -8990,13 +8991,18 @@ def compare(
else:
self = self[mask]
other = other[mask]
if not isinstance(suffixes, tuple):
raise TypeError(
f"Passing 'suffixes' as a {type(suffixes)}, is not "
"supported Provide 'suffixes' as a tuple instead."
)

if align_axis in (1, "columns"): # This is needed for Series
axis = 1
else:
axis = self._get_axis_number(align_axis)

diff = concat([self, other], axis=axis, keys=keys)
diff = concat([self, other], axis=axis, keys=suffixes)

if axis >= self.ndim:
# No need to reorganize data if stacking on new axis
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@
from pandas._typing import (
NumpySorter,
NumpyValueArrayLike,
Suffixes,
)

from pandas.core.frame import DataFrame
Expand Down Expand Up @@ -3236,12 +3237,14 @@ def compare(
align_axis: Axis = 1,
keep_shape: bool = False,
keep_equal: bool = False,
suffixes: Suffixes = ("self", "other"),
) -> DataFrame | Series:
return super().compare(
other=other,
align_axis=align_axis,
keep_shape=keep_shape,
keep_equal=keep_equal,
suffixes=suffixes,
)

def combine(self, other, func, fill_value=None) -> Series:
Expand Down
5 changes: 5 additions & 0 deletions pandas/core/shared_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@
keep_equal : bool, default False
If true, the result keeps values that are equal.
Otherwise, equal values are shown as NaNs.

suffixes : tuple, default ('self', 'other')
Set the dataframes names in the comparison.

.. versionadded:: 1.5.0
"""

_shared_docs[
Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/frame/methods/test_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,27 @@ def test_compare_unaligned_objects():
df1 = pd.DataFrame(np.ones((3, 3)))
df2 = pd.DataFrame(np.zeros((2, 1)))
df1.compare(df2)


def test_compare_suffixes():
# 44354
df1 = pd.DataFrame(
{"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]},
)
df2 = pd.DataFrame(
{
"col1": ["c", "b", "c"],
"col2": [1.0, 2.0, np.nan],
"col3": [1.0, 2.0, np.nan],
},
)
result = df1.compare(df2, suffixes=("left", "right"))
expected = pd.DataFrame(
{
("col1", "left"): {0: "a", 2: np.nan},
("col1", "right"): {0: "c", 2: np.nan},
("col3", "left"): {0: np.nan, 2: 3.0},
("col3", "right"): {0: np.nan, 2: np.nan},
}
)
tm.assert_frame_equal(result, expected)