diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py index 441f4b380656e..21081ee23a773 100644 --- a/asv_bench/benchmarks/reshape.py +++ b/asv_bench/benchmarks/reshape.py @@ -161,6 +161,9 @@ def time_pivot_table_categorical_observed(self): observed=True, ) + def time_pivot_table_margins_only_column(self): + self.df.pivot_table(columns=["key2", "key3"], margins=True) + class Crosstab: def setup(self): diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 1cd325dad9f07..42c5c9959af65 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -142,6 +142,7 @@ Reshaping - - Bug in :meth:`DataFrame.pivot_table` when only MultiIndexed columns is set (:issue:`17038`) +- Bug in :meth:`DataFrame.pivot_table` when ``margin`` is ``True`` and only ``column`` is defined (:issue:`31016`) - Fix incorrect error message in :meth:`DataFrame.pivot` when ``columns`` is set to ``None``. (:issue:`30924`) - Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 930ff5f454a7b..e250a072766e3 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -226,15 +226,7 @@ def _add_margins( elif values: marginal_result_set = _generate_marginal_results( - table, - data, - values, - rows, - cols, - aggfunc, - observed, - grand_margin, - margins_name, + table, data, values, rows, cols, aggfunc, observed, margins_name, ) if not isinstance(marginal_result_set, tuple): return marginal_result_set @@ -303,15 +295,7 @@ def _compute_grand_margin(data, values, aggfunc, margins_name: str = "All"): def _generate_marginal_results( - table, - data, - values, - rows, - cols, - aggfunc, - observed, - grand_margin, - margins_name: str = "All", + table, data, values, rows, cols, aggfunc, observed, margins_name: str = "All", ): if len(cols) > 0: # need to "interleave" the margins @@ -345,12 +329,22 @@ def _all_key(key): table_pieces.append(piece) margin_keys.append(all_key) else: - margin = grand_margin + from pandas import DataFrame + cat_axis = 0 for key, piece in table.groupby(level=0, axis=cat_axis, observed=observed): - all_key = _all_key(key) + if len(cols) > 1: + all_key = _all_key(key) + else: + all_key = margins_name table_pieces.append(piece) - table_pieces.append(Series(margin[key], index=[all_key])) + # GH31016 this is to calculate margin for each group, and assign + # corresponded key as index + transformed_piece = DataFrame(piece.apply(aggfunc)).T + transformed_piece.index = Index([all_key], name=piece.index.name) + + # append piece for margin into table_piece + table_pieces.append(transformed_piece) margin_keys.append(all_key) result = concat(table_pieces, axis=cat_axis) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 44073f56abfa1..6850c52ca05ea 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -910,6 +910,64 @@ def _check_output( totals = table.loc[("All", ""), item] assert totals == self.data[item].mean() + @pytest.mark.parametrize( + "columns, aggfunc, values, expected_columns", + [ + ( + "A", + np.mean, + [[5.5, 5.5, 2.2, 2.2], [8.0, 8.0, 4.4, 4.4]], + Index(["bar", "All", "foo", "All"], name="A"), + ), + ( + ["A", "B"], + "sum", + [[9, 13, 22, 5, 6, 11], [14, 18, 32, 11, 11, 22]], + MultiIndex.from_tuples( + [ + ("bar", "one"), + ("bar", "two"), + ("bar", "All"), + ("foo", "one"), + ("foo", "two"), + ("foo", "All"), + ], + names=["A", "B"], + ), + ), + ], + ) + def test_margin_with_only_columns_defined( + self, columns, aggfunc, values, expected_columns + ): + # GH 31016 + df = pd.DataFrame( + { + "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"], + "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"], + "C": [ + "small", + "large", + "large", + "small", + "small", + "large", + "small", + "small", + "large", + ], + "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], + "E": [2, 4, 5, 5, 6, 6, 8, 9, 9], + } + ) + + result = df.pivot_table(columns=columns, margins=True, aggfunc=aggfunc) + expected = pd.DataFrame( + values, index=Index(["D", "E"]), columns=expected_columns + ) + + tm.assert_frame_equal(result, expected) + def test_margins_dtype(self): # GH 17013