54
54
from pandas import DataFrame
55
55
56
56
57
- # Note: We need to make sure `frame` is imported before `pivot`, otherwise
58
- # _shared_docs['pivot_table'] will not yet exist. TODO: Fix this dependency
59
- @Substitution ("\n data : DataFrame" )
60
- @Appender (_shared_docs ["pivot_table" ], indents = 1 )
61
57
def pivot_table (
62
58
data : DataFrame ,
63
59
values = None ,
@@ -71,6 +67,171 @@ def pivot_table(
71
67
observed : bool = True ,
72
68
sort : bool = True ,
73
69
) -> DataFrame :
70
+ """
71
+ Create a spreadsheet-style pivot table as a DataFrame.
72
+
73
+ The levels in the pivot table will be stored in MultiIndex objects
74
+ (hierarchical indexes) on the index and columns of the result DataFrame.
75
+
76
+ Parameters
77
+ ----------
78
+ data : DataFrame
79
+ Input pandas DataFrame object.
80
+ values : list-like or scalar, optional
81
+ Column or columns to aggregate.
82
+ index : column, Grouper, array, or list of the previous
83
+ Keys to group by on the pivot table index. If a list is passed,
84
+ it can contain any of the other types (except list). If an array is
85
+ passed, it must be the same length as the data and will be used in
86
+ the same manner as column values.
87
+ columns : column, Grouper, array, or list of the previous
88
+ Keys to group by on the pivot table column. If a list is passed,
89
+ it can contain any of the other types (except list). If an array is
90
+ passed, it must be the same length as the data and will be used in
91
+ the same manner as column values.
92
+ aggfunc : function, list of functions, dict, default "mean"
93
+ If a list of functions is passed, the resulting pivot table will have
94
+ hierarchical columns whose top level are the function names
95
+ (inferred from the function objects themselves).
96
+ If a dict is passed, the key is column to aggregate and the value is
97
+ function or list of functions. If ``margin=True``, aggfunc will be
98
+ used to calculate the partial aggregates.
99
+ fill_value : scalar, default None
100
+ Value to replace missing values with (in the resulting pivot table,
101
+ after aggregation).
102
+ margins : bool, default False
103
+ If ``margins=True``, special ``All`` columns and rows
104
+ will be added with partial group aggregates across the categories
105
+ on the rows and columns.
106
+ dropna : bool, default True
107
+ Do not include columns whose entries are all NaN. If True,
108
+ rows with a NaN value in any column will be omitted before
109
+ computing margins.
110
+ margins_name : str, default 'All'
111
+ Name of the row / column that will contain the totals
112
+ when margins is True.
113
+ observed : bool, default False
114
+ This only applies if any of the groupers are Categoricals.
115
+ If True: only show observed values for categorical groupers.
116
+ If False: show all values for categorical groupers.
117
+
118
+ .. versionchanged:: 3.0.0
119
+
120
+ The default value is now ``True``.
121
+
122
+ sort : bool, default True
123
+ Specifies if the result should be sorted.
124
+
125
+ .. versionadded:: 1.3.0
126
+
127
+ Returns
128
+ -------
129
+ DataFrame
130
+ An Excel style pivot table.
131
+
132
+ See Also
133
+ --------
134
+ DataFrame.pivot : Pivot without aggregation that can handle
135
+ non-numeric data.
136
+ DataFrame.melt: Unpivot a DataFrame from wide to long format,
137
+ optionally leaving identifiers set.
138
+ wide_to_long : Wide panel to long format. Less flexible but more
139
+ user-friendly than melt.
140
+
141
+ Notes
142
+ -----
143
+ Reference :ref:`the user guide <reshaping.pivot>` for more examples.
144
+
145
+ Examples
146
+ --------
147
+ >>> df = pd.DataFrame(
148
+ ... {
149
+ ... "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
150
+ ... "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"],
151
+ ... "C": [
152
+ ... "small",
153
+ ... "large",
154
+ ... "large",
155
+ ... "small",
156
+ ... "small",
157
+ ... "large",
158
+ ... "small",
159
+ ... "small",
160
+ ... "large",
161
+ ... ],
162
+ ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
163
+ ... "E": [2, 4, 5, 5, 6, 6, 8, 9, 9],
164
+ ... }
165
+ ... )
166
+ >>> df
167
+ A B C D E
168
+ 0 foo one small 1 2
169
+ 1 foo one large 2 4
170
+ 2 foo one large 2 5
171
+ 3 foo two small 3 5
172
+ 4 foo two small 3 6
173
+ 5 bar one large 4 6
174
+ 6 bar one small 5 8
175
+ 7 bar two small 6 9
176
+ 8 bar two large 7 9
177
+
178
+ This first example aggregates values by taking the sum.
179
+
180
+ >>> table = pd.pivot_table(
181
+ ... df, values="D", index=["A", "B"], columns=["C"], aggfunc="sum"
182
+ ... )
183
+ >>> table
184
+ C large small
185
+ A B
186
+ bar one 4.0 5.0
187
+ two 7.0 6.0
188
+ foo one 4.0 1.0
189
+ two NaN 6.0
190
+
191
+ We can also fill missing values using the `fill_value` parameter.
192
+
193
+ >>> table = pd.pivot_table(
194
+ ... df, values="D", index=["A", "B"], columns=["C"], aggfunc="sum", fill_value=0
195
+ ... )
196
+ >>> table
197
+ C large small
198
+ A B
199
+ bar one 4 5
200
+ two 7 6
201
+ foo one 4 1
202
+ two 0 6
203
+
204
+ The next example aggregates by taking the mean across multiple columns.
205
+
206
+ >>> table = pd.pivot_table(
207
+ ... df, values=["D", "E"], index=["A", "C"], aggfunc={"D": "mean", "E": "mean"}
208
+ ... )
209
+ >>> table
210
+ D E
211
+ A C
212
+ bar large 5.500000 7.500000
213
+ small 5.500000 8.500000
214
+ foo large 2.000000 4.500000
215
+ small 2.333333 4.333333
216
+
217
+ We can also calculate multiple types of aggregations for any given
218
+ value column.
219
+
220
+ >>> table = pd.pivot_table(
221
+ ... df,
222
+ ... values=["D", "E"],
223
+ ... index=["A", "C"],
224
+ ... aggfunc={"D": "mean", "E": ["min", "max", "mean"]},
225
+ ... )
226
+ >>> table
227
+ D E
228
+ mean max mean min
229
+ A C
230
+ bar large 5.500000 9 7.500000 6
231
+ small 5.500000 9 8.500000 8
232
+ foo large 2.000000 5 4.500000 4
233
+ small 2.333333 6 4.333333 2
234
+ """
74
235
index = _convert_by (index )
75
236
columns = _convert_by (columns )
76
237
0 commit comments