move pivot_table doc-string to DataFrame (pandas-dev#17174)

jbrockmendel · jowens · commit a7311d25b600 · 2017-09-20T14:48:27.000-07:00
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4154,6 +4154,92 @@ def pivot(self, index=None, columns=None, values=None):
         from pandas.core.reshape.reshape import pivot
         return pivot(self, index=index, columns=columns, values=values)
 
+    _shared_docs['pivot_table'] = """
+        Create a spreadsheet-style pivot table as a DataFrame. The levels in
+        the pivot table will be stored in MultiIndex objects (hierarchical
+        indexes) on the index and columns of the result DataFrame
+
+        Parameters
+        ----------%s
+        values : column to aggregate, optional
+        index : column, Grouper, array, or list of the previous
+            If an array is passed, it must be the same length as the data. The
+            list can contain any of the other types (except list).
+            Keys to group by on the pivot table index.  If an array is passed,
+            it is being used as the same manner as column values.
+        columns : column, Grouper, array, or list of the previous
+            If an array is passed, it must be the same length as the data. The
+            list can contain any of the other types (except list).
+            Keys to group by on the pivot table column.  If an array is passed,
+            it is being used as the same manner as column values.
+        aggfunc : function or list of functions, default numpy.mean
+            If list of functions passed, the resulting pivot table will have
+            hierarchical columns whose top level are the function names
+            (inferred from the function objects themselves)
+        fill_value : scalar, default None
+            Value to replace missing values with
+        margins : boolean, default False
+            Add all row / columns (e.g. for subtotal / grand totals)
+        dropna : boolean, default True
+            Do not include columns whose entries are all NaN
+        margins_name : string, default 'All'
+            Name of the row / column that will contain the totals
+            when margins is True.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo",
+        ...                          "bar", "bar", "bar", "bar"],
+        ...                    "B": ["one", "one", "one", "two", "two",
+        ...                          "one", "one", "two", "two"],
+        ...                    "C": ["small", "large", "large", "small",
+        ...                          "small", "large", "small", "small",
+        ...                          "large"],
+        ...                    "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]})
+        >>> df
+             A    B      C  D
+        0  foo  one  small  1
+        1  foo  one  large  2
+        2  foo  one  large  2
+        3  foo  two  small  3
+        4  foo  two  small  3
+        5  bar  one  large  4
+        6  bar  one  small  5
+        7  bar  two  small  6
+        8  bar  two  large  7
+
+        >>> table = pivot_table(df, values='D', index=['A', 'B'],
+        ...                     columns=['C'], aggfunc=np.sum)
+        >>> table
+        ... # doctest: +NORMALIZE_WHITESPACE
+        C        large  small
+        A   B
+        bar one    4.0    5.0
+            two    7.0    6.0
+        foo one    4.0    1.0
+            two    NaN    6.0
+
+        Returns
+        -------
+        table : DataFrame
+
+        See also
+        --------
+        DataFrame.pivot : pivot without aggregation that can handle
+            non-numeric data
+        """
+
+    @Substitution('')
+    @Appender(_shared_docs['pivot_table'])
+    def pivot_table(self, values=None, index=None, columns=None,
+                    aggfunc='mean', fill_value=None, margins=False,
+                    dropna=True, margins_name='All'):
+        from pandas.core.reshape.pivot import pivot_table
+        return pivot_table(self, values=values, index=index, columns=columns,
+                           aggfunc=aggfunc, fill_value=fill_value,
+                           margins=margins, dropna=dropna,
+                           margins_name=margins_name)
+
     def stack(self, level=-1, dropna=True):
         """
         Pivot a level of the (possibly hierarchical) column labels, returning a
diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
@@ -2,95 +2,30 @@
 
 
 from pandas.core.dtypes.common import is_list_like, is_scalar
+from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
+
 from pandas.core.reshape.concat import concat
-from pandas import Series, DataFrame, MultiIndex, Index
+from pandas.core.series import Series
 from pandas.core.groupby import Grouper
 from pandas.core.reshape.util import cartesian_product
-from pandas.core.index import _get_combined_index
+from pandas.core.index import Index, _get_combined_index
 from pandas.compat import range, lrange, zip
 from pandas import compat
 import pandas.core.common as com
+from pandas.util._decorators import Appender, Substitution
+
+from pandas.core.frame import _shared_docs
+# Note: We need to make sure `frame` is imported before `pivot`, otherwise
+# _shared_docs['pivot_table'] will not yet exist.  TODO: Fix this dependency
+
 import numpy as np
 
 
+@Substitution('\ndata : DataFrame')
+@Appender(_shared_docs['pivot_table'], indents=1)
 def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
                 fill_value=None, margins=False, dropna=True,
                 margins_name='All'):
-    """
-    Create a spreadsheet-style pivot table as a DataFrame. The levels in the
-    pivot table will be stored in MultiIndex objects (hierarchical indexes) on
-    the index and columns of the result DataFrame
-
-    Parameters
-    ----------
-    data : DataFrame
-    values : column to aggregate, optional
-    index : column, Grouper, array, or list of the previous
-        If an array is passed, it must be the same length as the data. The list
-        can contain any of the other types (except list).
-        Keys to group by on the pivot table index.  If an array is passed, it
-        is being used as the same manner as column values.
-    columns : column, Grouper, array, or list of the previous
-        If an array is passed, it must be the same length as the data. The list
-        can contain any of the other types (except list).
-        Keys to group by on the pivot table column.  If an array is passed, it
-        is being used as the same manner as column values.
-    aggfunc : function or list of functions, default numpy.mean
-        If list of functions passed, the resulting pivot table will have
-        hierarchical columns whose top level are the function names (inferred
-        from the function objects themselves)
-    fill_value : scalar, default None
-        Value to replace missing values with
-    margins : boolean, default False
-        Add all row / columns (e.g. for subtotal / grand totals)
-    dropna : boolean, default True
-        Do not include columns whose entries are all NaN
-    margins_name : string, default 'All'
-        Name of the row / column that will contain the totals
-        when margins is True.
-
-    Examples
-    --------
-    >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo",
-    ...                          "bar", "bar", "bar", "bar"],
-    ...                    "B": ["one", "one", "one", "two", "two",
-    ...                          "one", "one", "two", "two"],
-    ...                    "C": ["small", "large", "large", "small",
-    ...                          "small", "large", "small", "small",
-    ...                          "large"],
-    ...                    "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]})
-    >>> df
-         A    B      C  D
-    0  foo  one  small  1
-    1  foo  one  large  2
-    2  foo  one  large  2
-    3  foo  two  small  3
-    4  foo  two  small  3
-    5  bar  one  large  4
-    6  bar  one  small  5
-    7  bar  two  small  6
-    8  bar  two  large  7
-
-    >>> table = pivot_table(df, values='D', index=['A', 'B'],
-    ...                     columns=['C'], aggfunc=np.sum)
-    >>> table
-    ... # doctest: +NORMALIZE_WHITESPACE
-    C        large  small
-    A   B
-    bar one    4.0    5.0
-        two    7.0    6.0
-    foo one    4.0    1.0
-        two    NaN    6.0
-
-    Returns
-    -------
-    table : DataFrame
-
-    See also
-    --------
-    DataFrame.pivot : pivot without aggregation that can handle
-        non-numeric data
-    """
     index = _convert_by(index)
     columns = _convert_by(columns)
 
@@ -162,6 +97,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
         table = agged.unstack(to_unstack)
 
     if not dropna:
+        from pandas import MultiIndex
         try:
             m = MultiIndex.from_arrays(cartesian_product(table.index.levels),
                                        names=table.index.names)
@@ -176,7 +112,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
         except AttributeError:
             pass  # it's a single level or a series
 
-    if isinstance(table, DataFrame):
+    if isinstance(table, ABCDataFrame):
         table = table.sort_index(axis=1)
 
     if fill_value is not None:
@@ -197,16 +133,13 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
     if len(index) == 0 and len(columns) > 0:
         table = table.T
 
-    # GH 15193 Makse sure empty columns are removed if dropna=True
-    if isinstance(table, DataFrame) and dropna:
+    # GH 15193 Make sure empty columns are removed if dropna=True
+    if isinstance(table, ABCDataFrame) and dropna:
         table = table.dropna(how='all', axis=1)
 
     return table
 
 
-DataFrame.pivot_table = pivot_table
-
-
 def _add_margins(table, data, values, rows, cols, aggfunc,
                  margins_name='All', fill_value=None):
     if not isinstance(margins_name, compat.string_types):
@@ -230,7 +163,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc,
     else:
         key = margins_name
 
-    if not values and isinstance(table, Series):
+    if not values and isinstance(table, ABCSeries):
         # If there are no values and the table is a series, then there is only
         # one column in the data. Compute grand margin and return it.
         return table.append(Series({key: grand_margin[margins_name]}))
@@ -257,6 +190,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc,
         else:
             row_margin[k] = grand_margin[k[0]]
 
+    from pandas import DataFrame
     margin_dummy = DataFrame(row_margin, columns=[key]).T
 
     row_names = result.index.names
@@ -402,7 +336,7 @@ def _convert_by(by):
     if by is None:
         by = []
     elif (is_scalar(by) or
-          isinstance(by, (np.ndarray, Index, Series, Grouper)) or
+          isinstance(by, (np.ndarray, Index, ABCSeries, Grouper)) or
           hasattr(by, '__call__')):
         by = [by]
     else:
@@ -523,6 +457,7 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
     if values is not None and aggfunc is None:
         raise ValueError("values cannot be used without an aggfunc.")
 
+    from pandas import DataFrame
     df = DataFrame(data, index=common_idx)
     if values is None:
         df['__dummy__'] = 0
@@ -620,7 +555,7 @@ def _get_names(arrs, names, prefix='row'):
     if names is None:
         names = []
         for i, arr in enumerate(arrs):
-            if isinstance(arr, Series) and arr.name is not None:
+            if isinstance(arr, ABCSeries) and arr.name is not None:
                 names.append(arr.name)
             else:
                 names.append('%s_%d' % (prefix, i))