diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 721bcb0758992..6dfee1a0542aa 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -141,6 +141,8 @@ Reshaping - - Bug in :meth:`DataFrame.pivot_table` when only MultiIndexed columns is set (:issue:`17038`) +- Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`) + Sparse ^^^^^^ diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 7109f23761188..13df39cc0011b 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -581,6 +581,8 @@ def crosstab( from pandas import DataFrame df = DataFrame(data, index=common_idx) + original_df_cols = df.columns + if values is None: df["__dummy__"] = 0 kwargs = {"aggfunc": len, "fill_value": 0} @@ -589,7 +591,7 @@ def crosstab( kwargs = {"aggfunc": aggfunc} table = df.pivot_table( - "__dummy__", + ["__dummy__"], index=rownames, columns=colnames, margins=margins, @@ -598,6 +600,12 @@ def crosstab( **kwargs, ) + # GH18321, after pivoting, an extra top level of column index of `__dummy__` is + # created, and this extra level should not be included in the further steps + if not table.empty: + cols_diff = df.columns.difference(original_df_cols)[0] + table = table[cols_diff] + # Post-process if normalize is not False: table = _normalize( diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index e3a57da450334..a2e6a19996668 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2557,6 +2557,19 @@ def test_crosstab_tuple_name(self, names): result = pd.crosstab(s1, s2) tm.assert_frame_equal(result, expected) + def test_crosstab_both_tuple_names(self): + # GH 18321 + s1 = pd.Series(range(3), name=("a", "b")) + s2 = pd.Series(range(3), name=("c", "d")) + + expected = pd.DataFrame( + np.eye(3, dtype="int64"), + index=pd.Index(range(3), name=("a", "b")), + columns=pd.Index(range(3), name=("c", "d")), + ) + result = crosstab(s1, s2) + tm.assert_frame_equal(result, expected) + def test_crosstab_unsorted_order(self): df = pd.DataFrame({"b": [3, 1, 2], "a": [5, 4, 6]}, index=["C", "A", "B"]) result = pd.crosstab(df.index, [df.b, df.a])