diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst index 21765b3f621ce..9ed2c42610b69 100644 --- a/doc/source/reshaping.rst +++ b/doc/source/reshaping.rst @@ -445,6 +445,16 @@ If ``crosstab`` receives only two Series, it will provide a frequency table. pd.crosstab(df.A, df.B) +Any input passed containing ``Categorical`` data will have **all** of its +categories included in the cross-tabulation, even if the actual data does +not contain any instances of a particular category. + +.. ipython:: python + + foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c']) + bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f']) + pd.crosstab(foo, bar) + Normalization ~~~~~~~~~~~~~ diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index de79e54e22270..a4e6cc404a457 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -410,7 +410,11 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, Notes ----- Any Series passed will have their name attributes used unless row or column - names for the cross-tabulation are specified + names for the cross-tabulation are specified. + + Any input passed containing Categorical data will have **all** of its + categories included in the cross-tabulation, even if the actual data does + not contain any instances of a particular category. In the event that there aren't overlapping indexes an empty DataFrame will be returned. @@ -434,6 +438,16 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, bar 1 2 1 0 foo 2 2 1 2 + >>> foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c']) + >>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f']) + >>> crosstab(foo, bar) # 'c' and 'f' are not represented in the data, + # but they still will be counted in the output + col_0 d e f + row_0 + a 1 0 0 + b 0 1 0 + c 0 0 0 + Returns ------- crosstab : DataFrame