Skip to content

Commit 522f855

Browse files
authored
BUG: GroupBy.apply() throws erroneous ValueError with duplicate axes (#35441)
1 parent d82e540 commit 522f855

File tree

4 files changed

+25
-15
lines changed

4 files changed

+25
-15
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ Plotting
133133
Groupby/resample/rolling
134134
^^^^^^^^^^^^^^^^^^^^^^^^
135135

136+
- Bug in :meth:`DataFrameGroupBy.apply` that would some times throw an erroneous ``ValueError`` if the grouping axis had duplicate entries (:issue:`16646`)
136137
-
137138
-
138139

pandas/core/groupby/ops.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0):
211211
# group might be modified
212212
group_axes = group.axes
213213
res = f(group)
214-
if not _is_indexed_like(res, group_axes):
214+
if not _is_indexed_like(res, group_axes, axis):
215215
mutated = True
216216
result_values.append(res)
217217

@@ -897,13 +897,13 @@ def agg_series(
897897
return grouper.get_result()
898898

899899

900-
def _is_indexed_like(obj, axes) -> bool:
900+
def _is_indexed_like(obj, axes, axis: int) -> bool:
901901
if isinstance(obj, Series):
902902
if len(axes) > 1:
903903
return False
904-
return obj.index.equals(axes[0])
904+
return obj.axes[axis].equals(axes[axis])
905905
elif isinstance(obj, DataFrame):
906-
return obj.index.equals(axes[0])
906+
return obj.axes[axis].equals(axes[axis])
907907

908908
return False
909909

pandas/tests/groupby/test_apply.py

+20-7
Original file line numberDiff line numberDiff line change
@@ -63,15 +63,8 @@ def test_apply_trivial():
6363
tm.assert_frame_equal(result, expected)
6464

6565

66-
@pytest.mark.xfail(
67-
reason="GH#20066; function passed into apply "
68-
"returns a DataFrame with the same index "
69-
"as the one to create GroupBy object."
70-
)
7166
def test_apply_trivial_fail():
7267
# GH 20066
73-
# trivial apply fails if the constant dataframe has the same index
74-
# with the one used to create GroupBy object.
7568
df = pd.DataFrame(
7669
{"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]},
7770
columns=["key", "data"],
@@ -1044,3 +1037,23 @@ def test_apply_with_date_in_multiindex_does_not_convert_to_timestamp():
10441037
tm.assert_frame_equal(result, expected)
10451038
for val in result.index.levels[1]:
10461039
assert type(val) is date
1040+
1041+
1042+
def test_apply_by_cols_equals_apply_by_rows_transposed():
1043+
# GH 16646
1044+
# Operating on the columns, or transposing and operating on the rows
1045+
# should give the same result. There was previously a bug where the
1046+
# by_rows operation would work fine, but by_cols would throw a ValueError
1047+
1048+
df = pd.DataFrame(
1049+
np.random.random([6, 4]),
1050+
columns=pd.MultiIndex.from_product([["A", "B"], [1, 2]]),
1051+
)
1052+
1053+
by_rows = df.T.groupby(axis=0, level=0).apply(
1054+
lambda x: x.droplevel(axis=0, level=0)
1055+
)
1056+
by_cols = df.groupby(axis=1, level=0).apply(lambda x: x.droplevel(axis=1, level=0))
1057+
1058+
tm.assert_frame_equal(by_cols, by_rows.T)
1059+
tm.assert_frame_equal(by_cols, df)

pandas/tests/groupby/test_function.py

-4
Original file line numberDiff line numberDiff line change
@@ -940,10 +940,6 @@ def test_frame_describe_multikey(tsframe):
940940
groupedT = tsframe.groupby({"A": 0, "B": 0, "C": 1, "D": 1}, axis=1)
941941
result = groupedT.describe()
942942
expected = tsframe.describe().T
943-
expected.index = pd.MultiIndex(
944-
levels=[[0, 1], expected.index],
945-
codes=[[0, 0, 1, 1], range(len(expected.index))],
946-
)
947943
tm.assert_frame_equal(result, expected)
948944

949945

0 commit comments

Comments
 (0)