Skip to content

Commit a1ac051

Browse files
authored
BUG: pd.crosstab fails when passed multiple columns, margins True and normalize True (#35150)
1 parent bdc7bb1 commit a1ac051

File tree

3 files changed

+49
-4
lines changed

3 files changed

+49
-4
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ Reshaping
258258

259259
- Bug in :meth:`DataFrame.pivot_table` with ``aggfunc='count'`` or ``aggfunc='sum'`` returning ``NaN`` for missing categories when pivoted on a ``Categorical``. Now returning ``0`` (:issue:`31422`)
260260
- Bug in :func:`union_indexes` where input index names are not preserved in some cases. Affects :func:`concat` and :class:`DataFrame` constructor (:issue:`13475`)
261+
- Bug in func :meth:`crosstab` when using multiple columns with ``margins=True`` and ``normalize=True`` (:issue:`35144`)
261262
-
262263

263264
Sparse

pandas/core/reshape/pivot.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -670,12 +670,11 @@ def _normalize(table, normalize, margins: bool, margins_name="All"):
670670
# keep index and column of pivoted table
671671
table_index = table.index
672672
table_columns = table.columns
673+
last_ind_or_col = table.iloc[-1, :].name
673674

674-
# check if margin name is in (for MI cases) or equal to last
675+
# check if margin name is not in (for MI cases) and not equal to last
675676
# index/column and save the column and index margin
676-
if (margins_name not in table.iloc[-1, :].name) | (
677-
margins_name != table.iloc[:, -1].name
678-
):
677+
if (margins_name not in last_ind_or_col) & (margins_name != last_ind_or_col):
679678
raise ValueError(f"{margins_name} not in pivoted DataFrame")
680679
column_margin = table.iloc[:-1, -1]
681680
index_margin = table.iloc[-1, :-1]

pandas/tests/reshape/test_crosstab.py

+45
Original file line numberDiff line numberDiff line change
@@ -698,3 +698,48 @@ def test_margin_normalize(self):
698698
names=["A", "B"],
699699
)
700700
tm.assert_frame_equal(result, expected)
701+
702+
def test_margin_normalize_multiple_columns(self):
703+
# GH 35144
704+
# use multiple columns with margins and normalization
705+
df = DataFrame(
706+
{
707+
"A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
708+
"B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"],
709+
"C": [
710+
"small",
711+
"large",
712+
"large",
713+
"small",
714+
"small",
715+
"large",
716+
"small",
717+
"small",
718+
"large",
719+
],
720+
"D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
721+
"E": [2, 4, 5, 5, 6, 6, 8, 9, 9],
722+
}
723+
)
724+
result = crosstab(
725+
index=df.C,
726+
columns=[df.A, df.B],
727+
margins=True,
728+
margins_name="margin",
729+
normalize=True,
730+
)
731+
expected = DataFrame(
732+
[
733+
[0.111111, 0.111111, 0.222222, 0.000000, 0.444444],
734+
[0.111111, 0.111111, 0.111111, 0.222222, 0.555556],
735+
[0.222222, 0.222222, 0.333333, 0.222222, 1.0],
736+
],
737+
index=["large", "small", "margin"],
738+
)
739+
expected.columns = MultiIndex(
740+
levels=[["bar", "foo", "margin"], ["", "one", "two"]],
741+
codes=[[0, 0, 1, 1, 2], [1, 2, 1, 2, 0]],
742+
names=["A", "B"],
743+
)
744+
expected.index.name = "C"
745+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)