Skip to content

Commit 06850a1

Browse files
jbrockmendeljreback
authored andcommitted
move pivot_table doc-string to DataFrame (#17174)
1 parent d11fae6 commit 06850a1

File tree

2 files changed

+107
-86
lines changed

2 files changed

+107
-86
lines changed

pandas/core/frame.py

+86
Original file line numberDiff line numberDiff line change
@@ -4154,6 +4154,92 @@ def pivot(self, index=None, columns=None, values=None):
41544154
from pandas.core.reshape.reshape import pivot
41554155
return pivot(self, index=index, columns=columns, values=values)
41564156

4157+
_shared_docs['pivot_table'] = """
4158+
Create a spreadsheet-style pivot table as a DataFrame. The levels in
4159+
the pivot table will be stored in MultiIndex objects (hierarchical
4160+
indexes) on the index and columns of the result DataFrame
4161+
4162+
Parameters
4163+
----------%s
4164+
values : column to aggregate, optional
4165+
index : column, Grouper, array, or list of the previous
4166+
If an array is passed, it must be the same length as the data. The
4167+
list can contain any of the other types (except list).
4168+
Keys to group by on the pivot table index. If an array is passed,
4169+
it is being used as the same manner as column values.
4170+
columns : column, Grouper, array, or list of the previous
4171+
If an array is passed, it must be the same length as the data. The
4172+
list can contain any of the other types (except list).
4173+
Keys to group by on the pivot table column. If an array is passed,
4174+
it is being used as the same manner as column values.
4175+
aggfunc : function or list of functions, default numpy.mean
4176+
If list of functions passed, the resulting pivot table will have
4177+
hierarchical columns whose top level are the function names
4178+
(inferred from the function objects themselves)
4179+
fill_value : scalar, default None
4180+
Value to replace missing values with
4181+
margins : boolean, default False
4182+
Add all row / columns (e.g. for subtotal / grand totals)
4183+
dropna : boolean, default True
4184+
Do not include columns whose entries are all NaN
4185+
margins_name : string, default 'All'
4186+
Name of the row / column that will contain the totals
4187+
when margins is True.
4188+
4189+
Examples
4190+
--------
4191+
>>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo",
4192+
... "bar", "bar", "bar", "bar"],
4193+
... "B": ["one", "one", "one", "two", "two",
4194+
... "one", "one", "two", "two"],
4195+
... "C": ["small", "large", "large", "small",
4196+
... "small", "large", "small", "small",
4197+
... "large"],
4198+
... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]})
4199+
>>> df
4200+
A B C D
4201+
0 foo one small 1
4202+
1 foo one large 2
4203+
2 foo one large 2
4204+
3 foo two small 3
4205+
4 foo two small 3
4206+
5 bar one large 4
4207+
6 bar one small 5
4208+
7 bar two small 6
4209+
8 bar two large 7
4210+
4211+
>>> table = pivot_table(df, values='D', index=['A', 'B'],
4212+
... columns=['C'], aggfunc=np.sum)
4213+
>>> table
4214+
... # doctest: +NORMALIZE_WHITESPACE
4215+
C large small
4216+
A B
4217+
bar one 4.0 5.0
4218+
two 7.0 6.0
4219+
foo one 4.0 1.0
4220+
two NaN 6.0
4221+
4222+
Returns
4223+
-------
4224+
table : DataFrame
4225+
4226+
See also
4227+
--------
4228+
DataFrame.pivot : pivot without aggregation that can handle
4229+
non-numeric data
4230+
"""
4231+
4232+
@Substitution('')
4233+
@Appender(_shared_docs['pivot_table'])
4234+
def pivot_table(self, values=None, index=None, columns=None,
4235+
aggfunc='mean', fill_value=None, margins=False,
4236+
dropna=True, margins_name='All'):
4237+
from pandas.core.reshape.pivot import pivot_table
4238+
return pivot_table(self, values=values, index=index, columns=columns,
4239+
aggfunc=aggfunc, fill_value=fill_value,
4240+
margins=margins, dropna=dropna,
4241+
margins_name=margins_name)
4242+
41574243
def stack(self, level=-1, dropna=True):
41584244
"""
41594245
Pivot a level of the (possibly hierarchical) column labels, returning a

pandas/core/reshape/pivot.py

+21-86
Original file line numberDiff line numberDiff line change
@@ -2,95 +2,30 @@
22

33

44
from pandas.core.dtypes.common import is_list_like, is_scalar
5+
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
6+
57
from pandas.core.reshape.concat import concat
6-
from pandas import Series, DataFrame, MultiIndex, Index
8+
from pandas.core.series import Series
79
from pandas.core.groupby import Grouper
810
from pandas.core.reshape.util import cartesian_product
9-
from pandas.core.index import _get_combined_index
11+
from pandas.core.index import Index, _get_combined_index
1012
from pandas.compat import range, lrange, zip
1113
from pandas import compat
1214
import pandas.core.common as com
15+
from pandas.util._decorators import Appender, Substitution
16+
17+
from pandas.core.frame import _shared_docs
18+
# Note: We need to make sure `frame` is imported before `pivot`, otherwise
19+
# _shared_docs['pivot_table'] will not yet exist. TODO: Fix this dependency
20+
1321
import numpy as np
1422

1523

24+
@Substitution('\ndata : DataFrame')
25+
@Appender(_shared_docs['pivot_table'], indents=1)
1626
def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
1727
fill_value=None, margins=False, dropna=True,
1828
margins_name='All'):
19-
"""
20-
Create a spreadsheet-style pivot table as a DataFrame. The levels in the
21-
pivot table will be stored in MultiIndex objects (hierarchical indexes) on
22-
the index and columns of the result DataFrame
23-
24-
Parameters
25-
----------
26-
data : DataFrame
27-
values : column to aggregate, optional
28-
index : column, Grouper, array, or list of the previous
29-
If an array is passed, it must be the same length as the data. The list
30-
can contain any of the other types (except list).
31-
Keys to group by on the pivot table index. If an array is passed, it
32-
is being used as the same manner as column values.
33-
columns : column, Grouper, array, or list of the previous
34-
If an array is passed, it must be the same length as the data. The list
35-
can contain any of the other types (except list).
36-
Keys to group by on the pivot table column. If an array is passed, it
37-
is being used as the same manner as column values.
38-
aggfunc : function or list of functions, default numpy.mean
39-
If list of functions passed, the resulting pivot table will have
40-
hierarchical columns whose top level are the function names (inferred
41-
from the function objects themselves)
42-
fill_value : scalar, default None
43-
Value to replace missing values with
44-
margins : boolean, default False
45-
Add all row / columns (e.g. for subtotal / grand totals)
46-
dropna : boolean, default True
47-
Do not include columns whose entries are all NaN
48-
margins_name : string, default 'All'
49-
Name of the row / column that will contain the totals
50-
when margins is True.
51-
52-
Examples
53-
--------
54-
>>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo",
55-
... "bar", "bar", "bar", "bar"],
56-
... "B": ["one", "one", "one", "two", "two",
57-
... "one", "one", "two", "two"],
58-
... "C": ["small", "large", "large", "small",
59-
... "small", "large", "small", "small",
60-
... "large"],
61-
... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]})
62-
>>> df
63-
A B C D
64-
0 foo one small 1
65-
1 foo one large 2
66-
2 foo one large 2
67-
3 foo two small 3
68-
4 foo two small 3
69-
5 bar one large 4
70-
6 bar one small 5
71-
7 bar two small 6
72-
8 bar two large 7
73-
74-
>>> table = pivot_table(df, values='D', index=['A', 'B'],
75-
... columns=['C'], aggfunc=np.sum)
76-
>>> table
77-
... # doctest: +NORMALIZE_WHITESPACE
78-
C large small
79-
A B
80-
bar one 4.0 5.0
81-
two 7.0 6.0
82-
foo one 4.0 1.0
83-
two NaN 6.0
84-
85-
Returns
86-
-------
87-
table : DataFrame
88-
89-
See also
90-
--------
91-
DataFrame.pivot : pivot without aggregation that can handle
92-
non-numeric data
93-
"""
9429
index = _convert_by(index)
9530
columns = _convert_by(columns)
9631

@@ -162,6 +97,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
16297
table = agged.unstack(to_unstack)
16398

16499
if not dropna:
100+
from pandas import MultiIndex
165101
try:
166102
m = MultiIndex.from_arrays(cartesian_product(table.index.levels),
167103
names=table.index.names)
@@ -176,7 +112,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
176112
except AttributeError:
177113
pass # it's a single level or a series
178114

179-
if isinstance(table, DataFrame):
115+
if isinstance(table, ABCDataFrame):
180116
table = table.sort_index(axis=1)
181117

182118
if fill_value is not None:
@@ -197,16 +133,13 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
197133
if len(index) == 0 and len(columns) > 0:
198134
table = table.T
199135

200-
# GH 15193 Makse sure empty columns are removed if dropna=True
201-
if isinstance(table, DataFrame) and dropna:
136+
# GH 15193 Make sure empty columns are removed if dropna=True
137+
if isinstance(table, ABCDataFrame) and dropna:
202138
table = table.dropna(how='all', axis=1)
203139

204140
return table
205141

206142

207-
DataFrame.pivot_table = pivot_table
208-
209-
210143
def _add_margins(table, data, values, rows, cols, aggfunc,
211144
margins_name='All', fill_value=None):
212145
if not isinstance(margins_name, compat.string_types):
@@ -230,7 +163,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc,
230163
else:
231164
key = margins_name
232165

233-
if not values and isinstance(table, Series):
166+
if not values and isinstance(table, ABCSeries):
234167
# If there are no values and the table is a series, then there is only
235168
# one column in the data. Compute grand margin and return it.
236169
return table.append(Series({key: grand_margin[margins_name]}))
@@ -257,6 +190,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc,
257190
else:
258191
row_margin[k] = grand_margin[k[0]]
259192

193+
from pandas import DataFrame
260194
margin_dummy = DataFrame(row_margin, columns=[key]).T
261195

262196
row_names = result.index.names
@@ -402,7 +336,7 @@ def _convert_by(by):
402336
if by is None:
403337
by = []
404338
elif (is_scalar(by) or
405-
isinstance(by, (np.ndarray, Index, Series, Grouper)) or
339+
isinstance(by, (np.ndarray, Index, ABCSeries, Grouper)) or
406340
hasattr(by, '__call__')):
407341
by = [by]
408342
else:
@@ -523,6 +457,7 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
523457
if values is not None and aggfunc is None:
524458
raise ValueError("values cannot be used without an aggfunc.")
525459

460+
from pandas import DataFrame
526461
df = DataFrame(data, index=common_idx)
527462
if values is None:
528463
df['__dummy__'] = 0
@@ -620,7 +555,7 @@ def _get_names(arrs, names, prefix='row'):
620555
if names is None:
621556
names = []
622557
for i, arr in enumerate(arrs):
623-
if isinstance(arr, Series) and arr.name is not None:
558+
if isinstance(arr, ABCSeries) and arr.name is not None:
624559
names.append(arr.name)
625560
else:
626561
names.append('%s_%d' % (prefix, i))

0 commit comments

Comments
 (0)