2
2
3
3
4
4
from pandas .core .dtypes .common import is_list_like , is_scalar
5
+ from pandas .core .dtypes .generic import ABCDataFrame , ABCSeries
6
+
5
7
from pandas .core .reshape .concat import concat
6
- from pandas import Series , DataFrame , MultiIndex , Index
8
+ from pandas . core . series import Series
7
9
from pandas .core .groupby import Grouper
8
10
from pandas .core .reshape .util import cartesian_product
9
- from pandas .core .index import _get_combined_index
11
+ from pandas .core .index import Index , _get_combined_index
10
12
from pandas .compat import range , lrange , zip
11
13
from pandas import compat
12
14
import pandas .core .common as com
15
+ from pandas .util ._decorators import Appender , Substitution
16
+
17
+ from pandas .core .frame import _shared_docs
18
+ # Note: We need to make sure `frame` is imported before `pivot`, otherwise
19
+ # _shared_docs['pivot_table'] will not yet exist. TODO: Fix this dependency
20
+
13
21
import numpy as np
14
22
15
23
24
+ @Substitution ('\n data : DataFrame' )
25
+ @Appender (_shared_docs ['pivot_table' ], indents = 1 )
16
26
def pivot_table (data , values = None , index = None , columns = None , aggfunc = 'mean' ,
17
27
fill_value = None , margins = False , dropna = True ,
18
28
margins_name = 'All' ):
19
- """
20
- Create a spreadsheet-style pivot table as a DataFrame. The levels in the
21
- pivot table will be stored in MultiIndex objects (hierarchical indexes) on
22
- the index and columns of the result DataFrame
23
-
24
- Parameters
25
- ----------
26
- data : DataFrame
27
- values : column to aggregate, optional
28
- index : column, Grouper, array, or list of the previous
29
- If an array is passed, it must be the same length as the data. The list
30
- can contain any of the other types (except list).
31
- Keys to group by on the pivot table index. If an array is passed, it
32
- is being used as the same manner as column values.
33
- columns : column, Grouper, array, or list of the previous
34
- If an array is passed, it must be the same length as the data. The list
35
- can contain any of the other types (except list).
36
- Keys to group by on the pivot table column. If an array is passed, it
37
- is being used as the same manner as column values.
38
- aggfunc : function or list of functions, default numpy.mean
39
- If list of functions passed, the resulting pivot table will have
40
- hierarchical columns whose top level are the function names (inferred
41
- from the function objects themselves)
42
- fill_value : scalar, default None
43
- Value to replace missing values with
44
- margins : boolean, default False
45
- Add all row / columns (e.g. for subtotal / grand totals)
46
- dropna : boolean, default True
47
- Do not include columns whose entries are all NaN
48
- margins_name : string, default 'All'
49
- Name of the row / column that will contain the totals
50
- when margins is True.
51
-
52
- Examples
53
- --------
54
- >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo",
55
- ... "bar", "bar", "bar", "bar"],
56
- ... "B": ["one", "one", "one", "two", "two",
57
- ... "one", "one", "two", "two"],
58
- ... "C": ["small", "large", "large", "small",
59
- ... "small", "large", "small", "small",
60
- ... "large"],
61
- ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]})
62
- >>> df
63
- A B C D
64
- 0 foo one small 1
65
- 1 foo one large 2
66
- 2 foo one large 2
67
- 3 foo two small 3
68
- 4 foo two small 3
69
- 5 bar one large 4
70
- 6 bar one small 5
71
- 7 bar two small 6
72
- 8 bar two large 7
73
-
74
- >>> table = pivot_table(df, values='D', index=['A', 'B'],
75
- ... columns=['C'], aggfunc=np.sum)
76
- >>> table
77
- ... # doctest: +NORMALIZE_WHITESPACE
78
- C large small
79
- A B
80
- bar one 4.0 5.0
81
- two 7.0 6.0
82
- foo one 4.0 1.0
83
- two NaN 6.0
84
-
85
- Returns
86
- -------
87
- table : DataFrame
88
-
89
- See also
90
- --------
91
- DataFrame.pivot : pivot without aggregation that can handle
92
- non-numeric data
93
- """
94
29
index = _convert_by (index )
95
30
columns = _convert_by (columns )
96
31
@@ -162,6 +97,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
162
97
table = agged .unstack (to_unstack )
163
98
164
99
if not dropna :
100
+ from pandas import MultiIndex
165
101
try :
166
102
m = MultiIndex .from_arrays (cartesian_product (table .index .levels ),
167
103
names = table .index .names )
@@ -176,7 +112,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
176
112
except AttributeError :
177
113
pass # it's a single level or a series
178
114
179
- if isinstance (table , DataFrame ):
115
+ if isinstance (table , ABCDataFrame ):
180
116
table = table .sort_index (axis = 1 )
181
117
182
118
if fill_value is not None :
@@ -197,16 +133,13 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
197
133
if len (index ) == 0 and len (columns ) > 0 :
198
134
table = table .T
199
135
200
- # GH 15193 Makse sure empty columns are removed if dropna=True
201
- if isinstance (table , DataFrame ) and dropna :
136
+ # GH 15193 Make sure empty columns are removed if dropna=True
137
+ if isinstance (table , ABCDataFrame ) and dropna :
202
138
table = table .dropna (how = 'all' , axis = 1 )
203
139
204
140
return table
205
141
206
142
207
- DataFrame .pivot_table = pivot_table
208
-
209
-
210
143
def _add_margins (table , data , values , rows , cols , aggfunc ,
211
144
margins_name = 'All' , fill_value = None ):
212
145
if not isinstance (margins_name , compat .string_types ):
@@ -230,7 +163,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc,
230
163
else :
231
164
key = margins_name
232
165
233
- if not values and isinstance (table , Series ):
166
+ if not values and isinstance (table , ABCSeries ):
234
167
# If there are no values and the table is a series, then there is only
235
168
# one column in the data. Compute grand margin and return it.
236
169
return table .append (Series ({key : grand_margin [margins_name ]}))
@@ -257,6 +190,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc,
257
190
else :
258
191
row_margin [k ] = grand_margin [k [0 ]]
259
192
193
+ from pandas import DataFrame
260
194
margin_dummy = DataFrame (row_margin , columns = [key ]).T
261
195
262
196
row_names = result .index .names
@@ -402,7 +336,7 @@ def _convert_by(by):
402
336
if by is None :
403
337
by = []
404
338
elif (is_scalar (by ) or
405
- isinstance (by , (np .ndarray , Index , Series , Grouper )) or
339
+ isinstance (by , (np .ndarray , Index , ABCSeries , Grouper )) or
406
340
hasattr (by , '__call__' )):
407
341
by = [by ]
408
342
else :
@@ -523,6 +457,7 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
523
457
if values is not None and aggfunc is None :
524
458
raise ValueError ("values cannot be used without an aggfunc." )
525
459
460
+ from pandas import DataFrame
526
461
df = DataFrame (data , index = common_idx )
527
462
if values is None :
528
463
df ['__dummy__' ] = 0
@@ -620,7 +555,7 @@ def _get_names(arrs, names, prefix='row'):
620
555
if names is None :
621
556
names = []
622
557
for i , arr in enumerate (arrs ):
623
- if isinstance (arr , Series ) and arr .name is not None :
558
+ if isinstance (arr , ABCSeries ) and arr .name is not None :
624
559
names .append (arr .name )
625
560
else :
626
561
names .append ('%s_%d' % (prefix , i ))
0 commit comments