|
7 | 7 | from pandas import DataFrame, Series, Index, MultiIndex, Grouper
|
8 | 8 | from pandas.tools.merge import concat
|
9 | 9 | from pandas.tools.pivot import pivot_table, crosstab
|
| 10 | +from pandas.tools.pivot import DEFAULT_MARGIN_COLUMN_NAME |
10 | 11 | from pandas.compat import range, u, product
|
11 | 12 | import pandas.util.testing as tm
|
12 | 13 |
|
@@ -224,82 +225,106 @@ def test_pivot_with_tz(self):
|
224 | 225 | tm.assert_frame_equal(pv, expected)
|
225 | 226 |
|
226 | 227 | def test_margins(self):
|
227 |
| - def _check_output(res, col, index=['A', 'B'], columns=['C']): |
228 |
| - cmarg = res['All'][:-1] |
229 |
| - exp = self.data.groupby(index)[col].mean() |
230 |
| - tm.assert_series_equal(cmarg, exp, check_names=False) |
231 |
| - self.assertEqual(cmarg.name, 'All') |
232 |
| - |
233 |
| - res = res.sortlevel() |
234 |
| - rmarg = res.xs(('All', ''))[:-1] |
235 |
| - exp = self.data.groupby(columns)[col].mean() |
236 |
| - tm.assert_series_equal(rmarg, exp, check_names=False) |
237 |
| - self.assertEqual(rmarg.name, ('All', '')) |
238 |
| - |
239 |
| - gmarg = res['All']['All', ''] |
240 |
| - exp = self.data[col].mean() |
241 |
| - self.assertEqual(gmarg, exp) |
| 228 | + def _check_output(result, values_col, index=['A', 'B'], |
| 229 | + columns=['C'], |
| 230 | + margins_col=DEFAULT_MARGIN_COLUMN_NAME): |
| 231 | + col_margins = result.ix[:-1, margins_col] |
| 232 | + expected_col_margins = self.data.groupby(index)[values_col].mean() |
| 233 | + tm.assert_series_equal(col_margins, expected_col_margins, |
| 234 | + check_names=False) |
| 235 | + self.assertEqual(col_margins.name, margins_col) |
| 236 | + |
| 237 | + result = result.sortlevel() |
| 238 | + index_margins = result.ix[(margins_col, '')].iloc[:-1] |
| 239 | + expected_ix_margins = self.data.groupby(columns)[values_col].mean() |
| 240 | + tm.assert_series_equal(index_margins, expected_ix_margins, |
| 241 | + check_names=False) |
| 242 | + self.assertEqual(index_margins.name, (margins_col, '')) |
| 243 | + |
| 244 | + grand_total_margins = result.loc[(margins_col, ''), margins_col] |
| 245 | + expected_total_margins = self.data[values_col].mean() |
| 246 | + self.assertEqual(grand_total_margins, expected_total_margins) |
242 | 247 |
|
243 | 248 | # column specified
|
244 |
| - table = self.data.pivot_table('D', index=['A', 'B'], columns='C', |
245 |
| - margins=True, aggfunc=np.mean) |
246 |
| - _check_output(table, 'D') |
| 249 | + result = self.data.pivot_table(values='D', index=['A', 'B'], |
| 250 | + columns='C', |
| 251 | + margins=True, aggfunc=np.mean) |
| 252 | + _check_output(result, 'D') |
| 253 | + |
| 254 | + # Set a different margins_column (not 'All') |
| 255 | + result = self.data.pivot_table(values='D', index=['A', 'B'], |
| 256 | + columns='C', |
| 257 | + margins=True, aggfunc=np.mean, |
| 258 | + margins_column='Totals') |
| 259 | + _check_output(result, 'D', margins_col='Totals') |
247 | 260 |
|
248 | 261 | # no column specified
|
249 | 262 | table = self.data.pivot_table(index=['A', 'B'], columns='C',
|
250 | 263 | margins=True, aggfunc=np.mean)
|
251 |
| - for valcol in table.columns.levels[0]: |
252 |
| - _check_output(table[valcol], valcol) |
| 264 | + for value_col in table.columns.levels[0]: |
| 265 | + _check_output(table[value_col], value_col) |
253 | 266 |
|
254 | 267 | # no col
|
255 | 268 |
|
256 | 269 | # to help with a buglet
|
257 | 270 | self.data.columns = [k * 2 for k in self.data.columns]
|
258 | 271 | table = self.data.pivot_table(index=['AA', 'BB'], margins=True,
|
259 | 272 | aggfunc=np.mean)
|
260 |
| - for valcol in table.columns: |
261 |
| - gmarg = table[valcol]['All', ''] |
262 |
| - self.assertEqual(gmarg, self.data[valcol].mean()) |
263 |
| - |
264 |
| - # this is OK |
265 |
| - table = self.data.pivot_table(index=['AA', 'BB'], margins=True, |
266 |
| - aggfunc='mean') |
| 273 | + for value_col in table.columns: |
| 274 | + totals = table.loc[(DEFAULT_MARGIN_COLUMN_NAME, ''), value_col] |
| 275 | + self.assertEqual(totals, self.data[value_col].mean()) |
267 | 276 |
|
268 | 277 | # no rows
|
269 | 278 | rtable = self.data.pivot_table(columns=['AA', 'BB'], margins=True,
|
270 | 279 | aggfunc=np.mean)
|
271 | 280 | tm.assert_isinstance(rtable, Series)
|
| 281 | + |
| 282 | + table = self.data.pivot_table(index=['AA', 'BB'], margins=True, |
| 283 | + aggfunc='mean') |
272 | 284 | for item in ['DD', 'EE', 'FF']:
|
273 |
| - gmarg = table[item]['All', ''] |
274 |
| - self.assertEqual(gmarg, self.data[item].mean()) |
| 285 | + totals = table.loc[(DEFAULT_MARGIN_COLUMN_NAME, ''), item] |
| 286 | + self.assertEqual(totals, self.data[item].mean()) |
275 | 287 |
|
276 | 288 | # issue number #8349: pivot_table with margins and dictionary aggfunc
|
| 289 | + data = [ |
| 290 | + {'JOB': 'Worker', 'NAME': 'Bob', 'YEAR': 2013, |
| 291 | + 'MONTH': 12, 'DAYS': 3, 'SALARY': 17}, |
| 292 | + {'JOB': 'Employ', 'NAME': |
| 293 | + 'Mary', 'YEAR': 2013, 'MONTH': 12, 'DAYS': 5, 'SALARY': 23}, |
| 294 | + {'JOB': 'Worker', 'NAME': 'Bob', 'YEAR': 2014, |
| 295 | + 'MONTH': 1, 'DAYS': 10, 'SALARY': 100}, |
| 296 | + {'JOB': 'Worker', 'NAME': 'Bob', 'YEAR': 2014, |
| 297 | + 'MONTH': 1, 'DAYS': 11, 'SALARY': 110}, |
| 298 | + {'JOB': 'Employ', 'NAME': 'Mary', 'YEAR': 2014, |
| 299 | + 'MONTH': 1, 'DAYS': 15, 'SALARY': 200}, |
| 300 | + {'JOB': 'Worker', 'NAME': 'Bob', 'YEAR': 2014, |
| 301 | + 'MONTH': 2, 'DAYS': 8, 'SALARY': 80}, |
| 302 | + {'JOB': 'Employ', 'NAME': 'Mary', 'YEAR': 2014, |
| 303 | + 'MONTH': 2, 'DAYS': 5, 'SALARY': 190}, |
| 304 | + ] |
277 | 305 |
|
278 |
| - df=DataFrame([ {'JOB':'Worker','NAME':'Bob' ,'YEAR':2013,'MONTH':12,'DAYS': 3,'SALARY': 17}, |
279 |
| - {'JOB':'Employ','NAME':'Mary','YEAR':2013,'MONTH':12,'DAYS': 5,'SALARY': 23}, |
280 |
| - {'JOB':'Worker','NAME':'Bob' ,'YEAR':2014,'MONTH': 1,'DAYS':10,'SALARY':100}, |
281 |
| - {'JOB':'Worker','NAME':'Bob' ,'YEAR':2014,'MONTH': 1,'DAYS':11,'SALARY':110}, |
282 |
| - {'JOB':'Employ','NAME':'Mary','YEAR':2014,'MONTH': 1,'DAYS':15,'SALARY':200}, |
283 |
| - {'JOB':'Worker','NAME':'Bob' ,'YEAR':2014,'MONTH': 2,'DAYS': 8,'SALARY': 80}, |
284 |
| - {'JOB':'Employ','NAME':'Mary','YEAR':2014,'MONTH': 2,'DAYS': 5,'SALARY':190} ]) |
285 |
| - |
286 |
| - df=df.set_index(['JOB','NAME','YEAR','MONTH'],drop=False,append=False) |
287 |
| - |
288 |
| - rs=df.pivot_table( index=['JOB','NAME'], |
289 |
| - columns=['YEAR','MONTH'], |
290 |
| - values=['DAYS','SALARY'], |
291 |
| - aggfunc={'DAYS':'mean','SALARY':'sum'}, |
292 |
| - margins=True) |
| 306 | + df = DataFrame(data) |
293 | 307 |
|
294 |
| - ex=df.pivot_table(index=['JOB','NAME'],columns=['YEAR','MONTH'],values=['DAYS'],aggfunc='mean',margins=True) |
| 308 | + df = df.set_index(['JOB', 'NAME', 'YEAR', 'MONTH'], drop=False, |
| 309 | + append=False) |
295 | 310 |
|
296 |
| - tm.assert_frame_equal(rs['DAYS'], ex['DAYS']) |
| 311 | + result = df.pivot_table(index=['JOB', 'NAME'], |
| 312 | + columns=['YEAR', 'MONTH'], |
| 313 | + values=['DAYS', 'SALARY'], |
| 314 | + aggfunc={'DAYS': 'mean', 'SALARY': 'sum'}, |
| 315 | + margins=True) |
297 | 316 |
|
298 |
| - ex=df.pivot_table(index=['JOB','NAME'],columns=['YEAR','MONTH'],values=['SALARY'],aggfunc='sum',margins=True) |
| 317 | + expected = df.pivot_table(index=['JOB', 'NAME'], |
| 318 | + columns=['YEAR', 'MONTH'], values=['DAYS'], |
| 319 | + aggfunc='mean', margins=True) |
299 | 320 |
|
300 |
| - tm.assert_frame_equal(rs['SALARY'], ex['SALARY']) |
| 321 | + tm.assert_frame_equal(result['DAYS'], expected['DAYS']) |
301 | 322 |
|
| 323 | + expected = df.pivot_table(index=['JOB', 'NAME'], |
| 324 | + columns=['YEAR', 'MONTH'], values=['SALARY'], |
| 325 | + aggfunc='sum', margins=True) |
302 | 326 |
|
| 327 | + tm.assert_frame_equal(result['SALARY'], expected['SALARY']) |
303 | 328 |
|
304 | 329 | def test_pivot_integer_columns(self):
|
305 | 330 | # caused by upstream bug in unstack
|
@@ -402,6 +427,24 @@ def test_margins_no_values_two_row_two_cols(self):
|
402 | 427 | result = self.data[['A', 'B', 'C', 'D']].pivot_table(index=['A', 'B'], columns=['C', 'D'], aggfunc=len, margins=True)
|
403 | 428 | self.assertEqual(result.All.tolist(), [3.0, 1.0, 4.0, 3.0, 11.0])
|
404 | 429 |
|
| 430 | + def test_pivot_table_with_margins_set_margin_column(self): |
| 431 | + for margin_column in ['foo', 'one']: |
| 432 | + with self.assertRaises(ValueError): |
| 433 | + # multi-index index |
| 434 | + pivot_table(self.data, values='D', index=['A', 'B'], |
| 435 | + columns=['C'], margins=True, |
| 436 | + margins_column=margin_column) |
| 437 | + with self.assertRaises(ValueError): |
| 438 | + # multi-index column |
| 439 | + pivot_table(self.data, values='D', index=['C'], |
| 440 | + columns=['A', 'B'], margins=True, |
| 441 | + margins_column=margin_column) |
| 442 | + with self.assertRaises(ValueError): |
| 443 | + # non-multi-index index/column |
| 444 | + pivot_table(self.data, values='D', index=['A'], |
| 445 | + columns=['B'], margins=True, |
| 446 | + margins_column=margin_column) |
| 447 | + |
405 | 448 | def test_pivot_timegrouper(self):
|
406 | 449 | df = DataFrame({
|
407 | 450 | 'Branch' : 'A A A A A A A B'.split(),
|
@@ -678,17 +721,17 @@ def test_crosstab_margins(self):
|
678 | 721 | self.assertEqual(result.index.names, ('a',))
|
679 | 722 | self.assertEqual(result.columns.names, ['b', 'c'])
|
680 | 723 |
|
681 |
| - all_cols = result['All', ''] |
| 724 | + all_cols = result[DEFAULT_MARGIN_COLUMN_NAME, ''] |
682 | 725 | exp_cols = df.groupby(['a']).size().astype('i8')
|
683 |
| - exp_cols = exp_cols.append(Series([len(df)], index=['All'])) |
684 |
| - exp_cols.name = ('All', '') |
| 726 | + exp_cols = exp_cols.append(Series([len(df)], index=[DEFAULT_MARGIN_COLUMN_NAME])) |
| 727 | + exp_cols.name = (DEFAULT_MARGIN_COLUMN_NAME, '') |
685 | 728 |
|
686 | 729 | tm.assert_series_equal(all_cols, exp_cols)
|
687 | 730 |
|
688 |
| - all_rows = result.ix['All'] |
| 731 | + all_rows = result.ix[DEFAULT_MARGIN_COLUMN_NAME] |
689 | 732 | exp_rows = df.groupby(['b', 'c']).size().astype('i8')
|
690 |
| - exp_rows = exp_rows.append(Series([len(df)], index=[('All', '')])) |
691 |
| - exp_rows.name = 'All' |
| 733 | + exp_rows = exp_rows.append(Series([len(df)], index=[(DEFAULT_MARGIN_COLUMN_NAME, '')])) |
| 734 | + exp_rows.name = DEFAULT_MARGIN_COLUMN_NAME |
692 | 735 |
|
693 | 736 | exp_rows = exp_rows.reindex(all_rows.index)
|
694 | 737 | exp_rows = exp_rows.fillna(0).astype(np.int64)
|
|
0 commit comments