Skip to content

Commit a890239

Browse files
charlesdong1991WillAyd
authored andcommitted
BUG: pd.crosstab(s1, s2) handle column index incorrectly when both series have tuple names (#30978)
1 parent 641346c commit a890239

File tree

3 files changed

+24
-1
lines changed

3 files changed

+24
-1
lines changed

doc/source/whatsnew/v1.1.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,8 @@ Reshaping
142142

143143
-
144144
- Bug in :meth:`DataFrame.pivot_table` when only MultiIndexed columns is set (:issue:`17038`)
145+
- Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`)
146+
145147

146148
Sparse
147149
^^^^^^

pandas/core/reshape/pivot.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,8 @@ def crosstab(
581581
from pandas import DataFrame
582582

583583
df = DataFrame(data, index=common_idx)
584+
original_df_cols = df.columns
585+
584586
if values is None:
585587
df["__dummy__"] = 0
586588
kwargs = {"aggfunc": len, "fill_value": 0}
@@ -589,7 +591,7 @@ def crosstab(
589591
kwargs = {"aggfunc": aggfunc}
590592

591593
table = df.pivot_table(
592-
"__dummy__",
594+
["__dummy__"],
593595
index=rownames,
594596
columns=colnames,
595597
margins=margins,
@@ -598,6 +600,12 @@ def crosstab(
598600
**kwargs,
599601
)
600602

603+
# GH18321, after pivoting, an extra top level of column index of `__dummy__` is
604+
# created, and this extra level should not be included in the further steps
605+
if not table.empty:
606+
cols_diff = df.columns.difference(original_df_cols)[0]
607+
table = table[cols_diff]
608+
601609
# Post-process
602610
if normalize is not False:
603611
table = _normalize(

pandas/tests/reshape/test_pivot.py

+13
Original file line numberDiff line numberDiff line change
@@ -2557,6 +2557,19 @@ def test_crosstab_tuple_name(self, names):
25572557
result = pd.crosstab(s1, s2)
25582558
tm.assert_frame_equal(result, expected)
25592559

2560+
def test_crosstab_both_tuple_names(self):
2561+
# GH 18321
2562+
s1 = pd.Series(range(3), name=("a", "b"))
2563+
s2 = pd.Series(range(3), name=("c", "d"))
2564+
2565+
expected = pd.DataFrame(
2566+
np.eye(3, dtype="int64"),
2567+
index=pd.Index(range(3), name=("a", "b")),
2568+
columns=pd.Index(range(3), name=("c", "d")),
2569+
)
2570+
result = crosstab(s1, s2)
2571+
tm.assert_frame_equal(result, expected)
2572+
25602573
def test_crosstab_unsorted_order(self):
25612574
df = pd.DataFrame({"b": [3, 1, 2], "a": [5, 4, 6]}, index=["C", "A", "B"])
25622575
result = pd.crosstab(df.index, [df.b, df.a])

0 commit comments

Comments
 (0)