+pd.DataFrame.crosstab

ResidentMario · ResidentMario · commit 20771d10b87c · 2017-02-26T11:46:06.000-05:00
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -108,9 +108,9 @@
             Name or list of names which refer to the axis items.""",
     versionadded_to_excel='',
     versionadded_melt='\n.. versionadded:: 0.20.0\n',
-    other_melt='melt')
-
-import pdb; pdb.set_trace()
+    other_melt='melt',
+    versionadded_crosstab = '\n.. versionadded:: 0.20.0\n',
+    other_crosstab = 'crosstab')
 
 _numeric_only_doc = """numeric_only : boolean, default None
     Include only float, int, boolean data. If None, will attempt to use
@@ -4138,6 +4138,101 @@ def melt(self, id_vars=None, value_vars=None, var_name=None,
                     var_name=var_name, value_name=value_name,
                     col_level=col_level)
 
+    _shared_docs['crosstab'] = """
+    Compute a simple cross-tabulation of two (or more) factors. By default
+    computes a frequency table of the factors unless an array of values and an
+    aggregation function are passed
+
+    %(versionadded_crosstab)s
+
+    Parameters
+    ----------
+    index : array-like, Series, or list of arrays/Series
+        Values to group by in the rows
+    columns : array-like, Series, or list of arrays/Series
+        Values to group by in the columns
+    values : array-like, optional
+        Array of values to aggregate according to the factors.
+        Requires `aggfunc` be specified.
+    aggfunc : function, optional
+        If specified, requires `values` be specified as well
+    rownames : sequence, default None
+        If passed, must match number of row arrays passed
+    colnames : sequence, default None
+        If passed, must match number of column arrays passed
+    margins : boolean, default False
+        Add row/column margins (subtotals)
+    dropna : boolean, default True
+        Do not include columns whose entries are all NaN
+    normalize : boolean, {'all', 'index', 'columns'}, or {0,1}, default False
+        Normalize by dividing all values by the sum of values.
+
+        - If passed 'all' or `True`, will normalize over all values.
+        - If passed 'index' will normalize over each row.
+        - If passed 'columns' will normalize over each column.
+        - If margins is `True`, will also normalize margin values.
+
+        .. versionadded:: 0.18.1
+
+
+    Notes
+    -----
+    Any Series passed will have their name attributes used unless row or column
+    names for the cross-tabulation are specified.
+
+    Any input passed containing Categorical data will have **all** of its
+    categories included in the cross-tabulation, even if the actual data does
+    not contain any instances of a particular category.
+
+    In the event that there aren't overlapping indexes an empty DataFrame will
+    be returned.
+
+    See also
+    --------
+    %(other_crosstab)s
+
+    Examples
+    --------
+    >>> a
+    array([foo, foo, foo, foo, bar, bar,
+           bar, bar, foo, foo, foo], dtype=object)
+    >>> b
+    array([one, one, one, two, one, one,
+           one, two, two, two, one], dtype=object)
+    >>> c
+    array([dull, dull, shiny, dull, dull, shiny,
+           shiny, dull, shiny, shiny, shiny], dtype=object)
+
+    >>> crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c'])
+    b    one          two
+    c    dull  shiny  dull  shiny
+    a
+    bar  1     2      1     0
+    foo  2     2      1     2
+
+    >>> foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])
+    >>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f'])
+    >>> crosstab(foo, bar)  # 'c' and 'f' are not represented in the data,
+                            # but they still will be counted in the output
+    col_0  d  e  f
+    row_0
+    a      1  0  0
+    b      0  1  0
+    c      0  0  0
+
+    Returns
+    -------
+    crosstab : DataFrame
+    """
+
+    @Appender(_shared_docs['crosstab'] % _shared_doc_kwargs)
+    def crosstab(self, columns, values=None, rownames=None, colnames=None,
+                 aggfunc=None, margins=False, dropna=True, normalize=False):
+        from pandas.tools.pivot import crosstab
+        return crosstab(self, columns, values=values, rownames=rownames,
+                        colnames=colnames, aggfunc=aggfunc, margins=margins,
+                        dropna=dropna, normalize=normalize)
+
     # ----------------------------------------------------------------------
     # Time series-related
 
diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py
@@ -28,8 +28,8 @@
 
 from pandas.core.frame import _shared_docs
 from pandas.util.decorators import Appender
-_shared_docs_kwargs = dict(
-    versionadded_melt="", other_melt='DataFrame.melt')
+_shared_docs_kwargs = dict(versionadded_melt="",
+                           other_melt='DataFrame.melt')
 
 from pandas.core.index import MultiIndex, _get_na_value
 
diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py
@@ -10,6 +10,11 @@
 import pandas.core.common as com
 import numpy as np
 
+from pandas.core.frame import _shared_docs
+from pandas.util.decorators import Appender
+_shared_docs_kwargs = dict(versionadded_crosstab="",
+                           other_crosstab="DataFrame.crosstab")
+
 
 def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
                 fill_value=None, margins=False, dropna=True,
@@ -381,89 +386,9 @@ def _convert_by(by):
     return by
 
 
+@Appender(_shared_docs['crosstab'] % _shared_docs_kwargs)
 def crosstab(index, columns, values=None, rownames=None, colnames=None,
              aggfunc=None, margins=False, dropna=True, normalize=False):
-    """
-    Compute a simple cross-tabulation of two (or more) factors. By default
-    computes a frequency table of the factors unless an array of values and an
-    aggregation function are passed
-
-    Parameters
-    ----------
-    index : array-like, Series, or list of arrays/Series
-        Values to group by in the rows
-    columns : array-like, Series, or list of arrays/Series
-        Values to group by in the columns
-    values : array-like, optional
-        Array of values to aggregate according to the factors.
-        Requires `aggfunc` be specified.
-    aggfunc : function, optional
-        If specified, requires `values` be specified as well
-    rownames : sequence, default None
-        If passed, must match number of row arrays passed
-    colnames : sequence, default None
-        If passed, must match number of column arrays passed
-    margins : boolean, default False
-        Add row/column margins (subtotals)
-    dropna : boolean, default True
-        Do not include columns whose entries are all NaN
-    normalize : boolean, {'all', 'index', 'columns'}, or {0,1}, default False
-        Normalize by dividing all values by the sum of values.
-
-        - If passed 'all' or `True`, will normalize over all values.
-        - If passed 'index' will normalize over each row.
-        - If passed 'columns' will normalize over each column.
-        - If margins is `True`, will also normalize margin values.
-
-        .. versionadded:: 0.18.1
-
-
-    Notes
-    -----
-    Any Series passed will have their name attributes used unless row or column
-    names for the cross-tabulation are specified.
-
-    Any input passed containing Categorical data will have **all** of its
-    categories included in the cross-tabulation, even if the actual data does
-    not contain any instances of a particular category.
-
-    In the event that there aren't overlapping indexes an empty DataFrame will
-    be returned.
-
-    Examples
-    --------
-    >>> a
-    array([foo, foo, foo, foo, bar, bar,
-           bar, bar, foo, foo, foo], dtype=object)
-    >>> b
-    array([one, one, one, two, one, one,
-           one, two, two, two, one], dtype=object)
-    >>> c
-    array([dull, dull, shiny, dull, dull, shiny,
-           shiny, dull, shiny, shiny, shiny], dtype=object)
-
-    >>> crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c'])
-    b    one          two
-    c    dull  shiny  dull  shiny
-    a
-    bar  1     2      1     0
-    foo  2     2      1     2
-
-    >>> foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])
-    >>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f'])
-    >>> crosstab(foo, bar)  # 'c' and 'f' are not represented in the data,
-                            # but they still will be counted in the output
-    col_0  d  e  f
-    row_0
-    a      1  0  0
-    b      0  1  0
-    c      0  0  0
-
-    Returns
-    -------
-    crosstab : DataFrame
-    """
-
     index = com._maybe_make_list(index)
     columns = com._maybe_make_list(columns)