DOC: fix PR07 for pandas.pivot_table (#58896)

tuhinsharma121 · mroeschke · web-flow · commit da5408750750 · 2024-06-04T11:06:56.000-07:00
* DOC: fix PR07 for pandas.pivot_table

* DOC: remove redundant comments

Co-authored-by: mroeschke &lt;mroeschke@users.noreply.github.com&gt;

---------

Co-authored-by: mroeschke &lt;mroeschke@users.noreply.github.com&gt;
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -470,7 +470,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.merge_ordered PR07" \
         -i "pandas.period_range RT03,SA01" \
         -i "pandas.pivot PR07" \
-        -i "pandas.pivot_table PR07" \
         -i "pandas.plotting.andrews_curves RT03,SA01" \
         -i "pandas.plotting.lag_plot RT03,SA01" \
         -i "pandas.plotting.scatter_matrix PR07,SA01" \
diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
@@ -54,10 +54,6 @@
     from pandas import DataFrame
 
 
-# Note: We need to make sure `frame` is imported before `pivot`, otherwise
-# _shared_docs['pivot_table'] will not yet exist.  TODO: Fix this dependency
-@Substitution("\ndata : DataFrame")
-@Appender(_shared_docs["pivot_table"], indents=1)
 def pivot_table(
     data: DataFrame,
     values=None,
@@ -71,6 +67,171 @@ def pivot_table(
     observed: bool = True,
     sort: bool = True,
 ) -> DataFrame:
+    """
+    Create a spreadsheet-style pivot table as a DataFrame.
+
+    The levels in the pivot table will be stored in MultiIndex objects
+    (hierarchical indexes) on the index and columns of the result DataFrame.
+
+    Parameters
+    ----------
+    data : DataFrame
+        Input pandas DataFrame object.
+    values : list-like or scalar, optional
+        Column or columns to aggregate.
+    index : column, Grouper, array, or list of the previous
+        Keys to group by on the pivot table index. If a list is passed,
+        it can contain any of the other types (except list). If an array is
+        passed, it must be the same length as the data and will be used in
+        the same manner as column values.
+    columns : column, Grouper, array, or list of the previous
+        Keys to group by on the pivot table column. If a list is passed,
+        it can contain any of the other types (except list). If an array is
+        passed, it must be the same length as the data and will be used in
+        the same manner as column values.
+    aggfunc : function, list of functions, dict, default "mean"
+        If a list of functions is passed, the resulting pivot table will have
+        hierarchical columns whose top level are the function names
+        (inferred from the function objects themselves).
+        If a dict is passed, the key is column to aggregate and the value is
+        function or list of functions. If ``margin=True``, aggfunc will be
+        used to calculate the partial aggregates.
+    fill_value : scalar, default None
+        Value to replace missing values with (in the resulting pivot table,
+        after aggregation).
+    margins : bool, default False
+        If ``margins=True``, special ``All`` columns and rows
+        will be added with partial group aggregates across the categories
+        on the rows and columns.
+    dropna : bool, default True
+        Do not include columns whose entries are all NaN. If True,
+        rows with a NaN value in any column will be omitted before
+        computing margins.
+    margins_name : str, default 'All'
+        Name of the row / column that will contain the totals
+        when margins is True.
+    observed : bool, default False
+        This only applies if any of the groupers are Categoricals.
+        If True: only show observed values for categorical groupers.
+        If False: show all values for categorical groupers.
+
+        .. versionchanged:: 3.0.0
+
+            The default value is now ``True``.
+
+    sort : bool, default True
+        Specifies if the result should be sorted.
+
+        .. versionadded:: 1.3.0
+
+    Returns
+    -------
+    DataFrame
+        An Excel style pivot table.
+
+    See Also
+    --------
+    DataFrame.pivot : Pivot without aggregation that can handle
+        non-numeric data.
+    DataFrame.melt: Unpivot a DataFrame from wide to long format,
+        optionally leaving identifiers set.
+    wide_to_long : Wide panel to long format. Less flexible but more
+        user-friendly than melt.
+
+    Notes
+    -----
+    Reference :ref:`the user guide <reshaping.pivot>` for more examples.
+
+    Examples
+    --------
+    >>> df = pd.DataFrame(
+    ...     {
+    ...         "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
+    ...         "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"],
+    ...         "C": [
+    ...             "small",
+    ...             "large",
+    ...             "large",
+    ...             "small",
+    ...             "small",
+    ...             "large",
+    ...             "small",
+    ...             "small",
+    ...             "large",
+    ...         ],
+    ...         "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
+    ...         "E": [2, 4, 5, 5, 6, 6, 8, 9, 9],
+    ...     }
+    ... )
+    >>> df
+         A    B      C  D  E
+    0  foo  one  small  1  2
+    1  foo  one  large  2  4
+    2  foo  one  large  2  5
+    3  foo  two  small  3  5
+    4  foo  two  small  3  6
+    5  bar  one  large  4  6
+    6  bar  one  small  5  8
+    7  bar  two  small  6  9
+    8  bar  two  large  7  9
+
+    This first example aggregates values by taking the sum.
+
+    >>> table = pd.pivot_table(
+    ...     df, values="D", index=["A", "B"], columns=["C"], aggfunc="sum"
+    ... )
+    >>> table
+    C        large  small
+    A   B
+    bar one    4.0    5.0
+        two    7.0    6.0
+    foo one    4.0    1.0
+        two    NaN    6.0
+
+    We can also fill missing values using the `fill_value` parameter.
+
+    >>> table = pd.pivot_table(
+    ...     df, values="D", index=["A", "B"], columns=["C"], aggfunc="sum", fill_value=0
+    ... )
+    >>> table
+    C        large  small
+    A   B
+    bar one      4      5
+        two      7      6
+    foo one      4      1
+        two      0      6
+
+    The next example aggregates by taking the mean across multiple columns.
+
+    >>> table = pd.pivot_table(
+    ...     df, values=["D", "E"], index=["A", "C"], aggfunc={"D": "mean", "E": "mean"}
+    ... )
+    >>> table
+                    D         E
+    A   C
+    bar large  5.500000  7.500000
+        small  5.500000  8.500000
+    foo large  2.000000  4.500000
+        small  2.333333  4.333333
+
+    We can also calculate multiple types of aggregations for any given
+    value column.
+
+    >>> table = pd.pivot_table(
+    ...     df,
+    ...     values=["D", "E"],
+    ...     index=["A", "C"],
+    ...     aggfunc={"D": "mean", "E": ["min", "max", "mean"]},
+    ... )
+    >>> table
+                      D   E
+                   mean max      mean  min
+    A   C
+    bar large  5.500000   9  7.500000    6
+        small  5.500000   9  8.500000    8
+    foo large  2.000000   5  4.500000    4
+        small  2.333333   6  4.333333    2
+    """
     index = _convert_by(index)
     columns = _convert_by(columns)