Skip to content

Commit da54087

Browse files
DOC: fix PR07 for pandas.pivot_table (#58896)
* DOC: fix PR07 for pandas.pivot_table * DOC: remove redundant comments Co-authored-by: mroeschke <[email protected]> --------- Co-authored-by: mroeschke <[email protected]>
1 parent f55bec1 commit da54087

File tree

2 files changed

+165
-5
lines changed

2 files changed

+165
-5
lines changed

ci/code_checks.sh

-1
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
470470
-i "pandas.merge_ordered PR07" \
471471
-i "pandas.period_range RT03,SA01" \
472472
-i "pandas.pivot PR07" \
473-
-i "pandas.pivot_table PR07" \
474473
-i "pandas.plotting.andrews_curves RT03,SA01" \
475474
-i "pandas.plotting.lag_plot RT03,SA01" \
476475
-i "pandas.plotting.scatter_matrix PR07,SA01" \

pandas/core/reshape/pivot.py

+165-4
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,6 @@
5454
from pandas import DataFrame
5555

5656

57-
# Note: We need to make sure `frame` is imported before `pivot`, otherwise
58-
# _shared_docs['pivot_table'] will not yet exist. TODO: Fix this dependency
59-
@Substitution("\ndata : DataFrame")
60-
@Appender(_shared_docs["pivot_table"], indents=1)
6157
def pivot_table(
6258
data: DataFrame,
6359
values=None,
@@ -71,6 +67,171 @@ def pivot_table(
7167
observed: bool = True,
7268
sort: bool = True,
7369
) -> DataFrame:
70+
"""
71+
Create a spreadsheet-style pivot table as a DataFrame.
72+
73+
The levels in the pivot table will be stored in MultiIndex objects
74+
(hierarchical indexes) on the index and columns of the result DataFrame.
75+
76+
Parameters
77+
----------
78+
data : DataFrame
79+
Input pandas DataFrame object.
80+
values : list-like or scalar, optional
81+
Column or columns to aggregate.
82+
index : column, Grouper, array, or list of the previous
83+
Keys to group by on the pivot table index. If a list is passed,
84+
it can contain any of the other types (except list). If an array is
85+
passed, it must be the same length as the data and will be used in
86+
the same manner as column values.
87+
columns : column, Grouper, array, or list of the previous
88+
Keys to group by on the pivot table column. If a list is passed,
89+
it can contain any of the other types (except list). If an array is
90+
passed, it must be the same length as the data and will be used in
91+
the same manner as column values.
92+
aggfunc : function, list of functions, dict, default "mean"
93+
If a list of functions is passed, the resulting pivot table will have
94+
hierarchical columns whose top level are the function names
95+
(inferred from the function objects themselves).
96+
If a dict is passed, the key is column to aggregate and the value is
97+
function or list of functions. If ``margin=True``, aggfunc will be
98+
used to calculate the partial aggregates.
99+
fill_value : scalar, default None
100+
Value to replace missing values with (in the resulting pivot table,
101+
after aggregation).
102+
margins : bool, default False
103+
If ``margins=True``, special ``All`` columns and rows
104+
will be added with partial group aggregates across the categories
105+
on the rows and columns.
106+
dropna : bool, default True
107+
Do not include columns whose entries are all NaN. If True,
108+
rows with a NaN value in any column will be omitted before
109+
computing margins.
110+
margins_name : str, default 'All'
111+
Name of the row / column that will contain the totals
112+
when margins is True.
113+
observed : bool, default False
114+
This only applies if any of the groupers are Categoricals.
115+
If True: only show observed values for categorical groupers.
116+
If False: show all values for categorical groupers.
117+
118+
.. versionchanged:: 3.0.0
119+
120+
The default value is now ``True``.
121+
122+
sort : bool, default True
123+
Specifies if the result should be sorted.
124+
125+
.. versionadded:: 1.3.0
126+
127+
Returns
128+
-------
129+
DataFrame
130+
An Excel style pivot table.
131+
132+
See Also
133+
--------
134+
DataFrame.pivot : Pivot without aggregation that can handle
135+
non-numeric data.
136+
DataFrame.melt: Unpivot a DataFrame from wide to long format,
137+
optionally leaving identifiers set.
138+
wide_to_long : Wide panel to long format. Less flexible but more
139+
user-friendly than melt.
140+
141+
Notes
142+
-----
143+
Reference :ref:`the user guide <reshaping.pivot>` for more examples.
144+
145+
Examples
146+
--------
147+
>>> df = pd.DataFrame(
148+
... {
149+
... "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
150+
... "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"],
151+
... "C": [
152+
... "small",
153+
... "large",
154+
... "large",
155+
... "small",
156+
... "small",
157+
... "large",
158+
... "small",
159+
... "small",
160+
... "large",
161+
... ],
162+
... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
163+
... "E": [2, 4, 5, 5, 6, 6, 8, 9, 9],
164+
... }
165+
... )
166+
>>> df
167+
A B C D E
168+
0 foo one small 1 2
169+
1 foo one large 2 4
170+
2 foo one large 2 5
171+
3 foo two small 3 5
172+
4 foo two small 3 6
173+
5 bar one large 4 6
174+
6 bar one small 5 8
175+
7 bar two small 6 9
176+
8 bar two large 7 9
177+
178+
This first example aggregates values by taking the sum.
179+
180+
>>> table = pd.pivot_table(
181+
... df, values="D", index=["A", "B"], columns=["C"], aggfunc="sum"
182+
... )
183+
>>> table
184+
C large small
185+
A B
186+
bar one 4.0 5.0
187+
two 7.0 6.0
188+
foo one 4.0 1.0
189+
two NaN 6.0
190+
191+
We can also fill missing values using the `fill_value` parameter.
192+
193+
>>> table = pd.pivot_table(
194+
... df, values="D", index=["A", "B"], columns=["C"], aggfunc="sum", fill_value=0
195+
... )
196+
>>> table
197+
C large small
198+
A B
199+
bar one 4 5
200+
two 7 6
201+
foo one 4 1
202+
two 0 6
203+
204+
The next example aggregates by taking the mean across multiple columns.
205+
206+
>>> table = pd.pivot_table(
207+
... df, values=["D", "E"], index=["A", "C"], aggfunc={"D": "mean", "E": "mean"}
208+
... )
209+
>>> table
210+
D E
211+
A C
212+
bar large 5.500000 7.500000
213+
small 5.500000 8.500000
214+
foo large 2.000000 4.500000
215+
small 2.333333 4.333333
216+
217+
We can also calculate multiple types of aggregations for any given
218+
value column.
219+
220+
>>> table = pd.pivot_table(
221+
... df,
222+
... values=["D", "E"],
223+
... index=["A", "C"],
224+
... aggfunc={"D": "mean", "E": ["min", "max", "mean"]},
225+
... )
226+
>>> table
227+
D E
228+
mean max mean min
229+
A C
230+
bar large 5.500000 9 7.500000 6
231+
small 5.500000 9 8.500000 8
232+
foo large 2.000000 5 4.500000 4
233+
small 2.333333 6 4.333333 2
234+
"""
74235
index = _convert_by(index)
75236
columns = _convert_by(columns)
76237

0 commit comments

Comments
 (0)