From 31db27e89a340e74e1dd19f8d32a390ab19f83b7 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 3 May 2016 18:52:00 +0100 Subject: [PATCH] BUG: Respect filtered Categorical in crosstab Closes gh-12298. [ci skip] --- doc/source/whatsnew/v0.18.2.txt | 1 + pandas/tools/pivot.py | 11 +++++++++-- pandas/tools/tests/test_pivot.py | 23 +++++++++++++++++++++++ 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index 85209c0dfa03d..60db139d250b1 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -108,6 +108,7 @@ Performance Improvements Bug Fixes ~~~~~~~~~ - Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`) +- ``pd.crosstab`` now respects filtered ``Categorical`` objects (:issue:`12298`) diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index de79e54e22270..578d71c202772 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -439,8 +439,15 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, crosstab : DataFrame """ - index = com._maybe_make_list(index) - columns = com._maybe_make_list(columns) + def _make_list(arr): + # see gh-12298 + if com.is_categorical(arr): + arr = Series(list(arr), name=arr.name) + + return com._maybe_make_list(arr) + + index = _make_list(index) + columns = _make_list(columns) rownames = _get_names(index, rownames, prefix='row') colnames = _get_names(columns, colnames, prefix='col') diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index 5ebd2e4f693cf..e084d836bf092 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -1139,6 +1139,29 @@ def test_crosstab_with_empties(self): normalize=False) tm.assert_frame_equal(nans, calculated) + def test_crosstab_filtered_categorical(self): + # see gh-12298 + df = pd.DataFrame({'col0': list('abcabc'), + 'col1': [1, 1, 2, 1, 2, 3], + 'col2': [1, 1, 0, 1, 1, 0]}) + data = [[2, 0], [1, 1]] + columns = pd.Index([1, 2], name='col1') + index = pd.Index(['a', 'b'], name='col0') + expected = pd.DataFrame(data, columns=columns, index=index) + + # sanity check + filtered = df[df.col2 == 1] + result = pd.crosstab(filtered.col0, filtered.col1) + tm.assert_frame_equal(result, expected) + + # casting columns to Categorical shouldn't change anything + for col in df.columns: + df[col] = df[col].astype('category') + + filtered = df[df.col2 == 1] + result = pd.crosstab(filtered.col0, filtered.col1) + tm.assert_frame_equal(result, expected) + def test_crosstab_errors(self): # Issue 12578