Skip to content

Commit 66ef8df

Browse files
author
Christoph Möhl
committed
pandas-dev#15150 added conditional calculation of crosstable margins based on normalization type, corrected expected margin values in test_margin_dropna
1 parent eeb7416 commit 66ef8df

File tree

2 files changed

+59
-49
lines changed

2 files changed

+59
-49
lines changed

pandas/core/reshape/pivot.py

+48-40
Original file line numberDiff line numberDiff line change
@@ -200,22 +200,11 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
200200

201201
def _add_margins(table, data, values, rows, cols, aggfunc,
202202
margins_name='All'):
203-
if not isinstance(margins_name, compat.string_types):
204-
raise ValueError('margins_name argument must be a string')
205203

206-
exception_msg = 'Conflicting name "{0}" in margins'.format(margins_name)
207-
for level in table.index.names:
208-
if margins_name in table.index.get_level_values(level):
209-
raise ValueError(exception_msg)
204+
_check_margins_name(margins_name, table)
210205

211206
grand_margin = _compute_grand_margin(data, values, aggfunc, margins_name)
212207

213-
# could be passed a Series object with no 'columns'
214-
if hasattr(table, 'columns'):
215-
for level in table.columns.names[1:]:
216-
if margins_name in table.columns.get_level_values(level):
217-
raise ValueError(exception_msg)
218-
219208
if len(rows) > 1:
220209
key = (margins_name,) + ('',) * (len(rows) - 1)
221210
else:
@@ -264,6 +253,21 @@ def _add_margins(table, data, values, rows, cols, aggfunc,
264253
return result
265254

266255

256+
def _check_margins_name(margins_name, table):
257+
if not isinstance(margins_name, compat.string_types):
258+
raise ValueError('margins_name argument must be a string')
259+
260+
exception_msg = 'Conflicting name "{0}" in margins'.format(margins_name)
261+
for level in table.index.names:
262+
if margins_name in table.index.get_level_values(level):
263+
raise ValueError(exception_msg)
264+
# could be passed a Series object with no 'columns'
265+
if hasattr(table, 'columns'):
266+
for level in table.columns.names[1:]:
267+
if margins_name in table.columns.get_level_values(level):
268+
raise ValueError(exception_msg)
269+
270+
267271
def _compute_grand_margin(data, values, aggfunc,
268272
margins_name='All'):
269273

@@ -521,13 +525,29 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
521525
kwargs = {'aggfunc': aggfunc}
522526

523527
table = df.pivot_table('__dummy__', index=rownames, columns=colnames,
524-
margins=margins, margins_name=margins_name,
528+
margins=False, margins_name=margins_name,
525529
dropna=dropna, **kwargs)
526530

527531
# GH 17013:
528532
if values is None and margins:
529533
table = table.fillna(0).astype(np.int64)
530534

535+
536+
if margins:
537+
_check_margins_name(margins_name, table)
538+
539+
if normalize != 'index':
540+
# add margin column
541+
table[margins_name] = table.sum(axis=1)
542+
543+
if normalize != 'columns':
544+
# add margin row
545+
if type(table.index) is MultiIndex:
546+
table = table.transpose()
547+
table[margins_name] = table.sum(axis=1)
548+
table = table.transpose()
549+
else:
550+
table.loc[margins_name] = table.sum(axis=0)
531551
# Post-process
532552
if normalize is not False:
533553
table = _normalize(table, normalize=normalize, margins=margins,
@@ -544,49 +564,37 @@ def _normalize(table, normalize, margins, margins_name='All'):
544564
try:
545565
normalize = axis_subs[normalize]
546566
except KeyError:
547-
raise ValueError("Not a valid normalize argument")
567+
raise ValueError(
568+
"Not a valid normalize argument: {!r}".format(normalize))
569+
570+
# Actual Normalizations
571+
normalizers = {
572+
'columns': lambda x: x / x.sum(),
573+
'index': lambda x: x.div(x.sum(axis=1), axis=0)
574+
}
548575

549576
if margins is False:
550-
# Actual Normalizations
551-
normalizers = {
552-
'all': lambda x: x / x.sum(axis=1).sum(axis=0),
553-
'columns': lambda x: x / x.sum(),
554-
'index': lambda x: x.div(x.sum(axis=1), axis=0)
555-
}
577+
normalizers['all'] = lambda x: x / x.sum(axis=1).sum(axis=0)
556578

557579
elif margins is True:
558-
# skip margin rows and/or cols for normalization
559-
normalizers = {
560-
'all': lambda x: x / x.iloc[:-1, :-1].sum(axis=1).sum(axis=0),
561-
'columns': lambda x: x.div(x.iloc[:-1, :].sum()).iloc[:-1, :],
562-
'index': lambda x: (x.div(x.iloc[:, :-1].sum(axis=1),
563-
axis=0)).iloc[:, :-1]
564-
}
580+
# skip margin rows and cols for normalization
581+
normalizers['all'] = lambda x: x / x.iloc[:-1, :-1].sum(axis=1)\
582+
.sum(axis=0)
565583

566584
else:
567-
raise ValueError("Not a valid margins argument")
585+
raise ValueError("Not a valid margins argument: {!r}".format(margins))
568586

569587
normalizers[True] = normalizers['all']
570588

571589
try:
572590
f = normalizers[normalize]
573591
except KeyError:
574-
raise ValueError("Not a valid normalize argument")
592+
raise ValueError(
593+
"Not a valid normalize argument: {!r}".format(normalize))
575594

576595
table = f(table)
577596
table = table.fillna(0)
578597

579-
if margins is True:
580-
# reset index to ensure default index dtype
581-
if normalize == 'index':
582-
colnames = table.columns.names
583-
table.columns = Index(table.columns.tolist())
584-
table.columns.names = colnames
585-
if normalize == 'columns':
586-
rownames = table.index.names
587-
table.index = Index(table.index.tolist())
588-
table.index.names = rownames
589-
590598
return table
591599

592600

pandas/tests/reshape/test_pivot.py

+11-9
Original file line numberDiff line numberDiff line change
@@ -1218,15 +1218,15 @@ def test_margin_dropna(self):
12181218
df = pd.DataFrame({'a': [1, 2, 2, 2, 2, np.nan],
12191219
'b': [3, 3, 4, 4, 4, 4]})
12201220
actual = pd.crosstab(df.a, df.b, margins=True, dropna=False)
1221-
expected = pd.DataFrame([[1, 0, 1], [1, 3, 4], [2, 4, 6]])
1221+
expected = pd.DataFrame([[1, 0, 1], [1, 3, 4], [2, 3, 5]])
12221222
expected.index = Index([1.0, 2.0, 'All'], name='a')
12231223
expected.columns = Index([3, 4, 'All'], name='b')
12241224
tm.assert_frame_equal(actual, expected)
12251225

12261226
df = DataFrame({'a': [1, np.nan, np.nan, np.nan, 2, np.nan],
12271227
'b': [3, np.nan, 4, 4, 4, 4]})
12281228
actual = pd.crosstab(df.a, df.b, margins=True, dropna=False)
1229-
expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [1, 4, 6]])
1229+
expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [1, 1, 2]])
12301230
expected.index = Index([1.0, 2.0, 'All'], name='a')
12311231
expected.columns = Index([3.0, 4.0, 'All'], name='b')
12321232
tm.assert_frame_equal(actual, expected)
@@ -1243,8 +1243,8 @@ def test_margin_dropna(self):
12431243
m = MultiIndex.from_arrays([['one', 'one', 'two', 'two', 'All'],
12441244
['dull', 'shiny', 'dull', 'shiny', '']],
12451245
names=['b', 'c'])
1246-
expected = DataFrame([[1, 0, 1, 0, 2], [2, 0, 1, 1, 5],
1247-
[3, 0, 2, 1, 7]], columns=m)
1246+
expected = DataFrame([[1, 0, 1, 0, 2], [2, 0, 1, 1, 4],
1247+
[3, 0, 2, 1, 6]], columns=m)
12481248
expected.index = Index(['bar', 'foo', 'All'], name='a')
12491249
tm.assert_frame_equal(actual, expected)
12501250

@@ -1254,7 +1254,7 @@ def test_margin_dropna(self):
12541254
['one', 'two', 'one', 'two', '']],
12551255
names=['a', 'b'])
12561256
expected = DataFrame([[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2],
1257-
[5, 2, 7]], index=m)
1257+
[5, 1, 6]], index=m)
12581258
expected.columns = Index(['dull', 'shiny', 'All'], name='c')
12591259
tm.assert_frame_equal(actual, expected)
12601260

@@ -1455,22 +1455,23 @@ def test_crosstab_errors(self):
14551455
df = pd.DataFrame({'a': [1, 2, 2, 2, 2], 'b': [3, 3, 4, 4, 4],
14561456
'c': [1, 1, np.nan, 1, 1]})
14571457

1458-
error = 'values cannot be used without an aggfunc.'
1458+
error = "values cannot be used without an aggfunc."
14591459
with tm.assert_raises_regex(ValueError, error):
14601460
pd.crosstab(df.a, df.b, values=df.c)
14611461

1462-
error = 'aggfunc cannot be used without values'
1462+
error = "aggfunc cannot be used without values"
14631463
with tm.assert_raises_regex(ValueError, error):
14641464
pd.crosstab(df.a, df.b, aggfunc=np.mean)
14651465

1466-
error = 'Not a valid normalize argument'
1466+
error = "Not a valid normalize argument: '42'"
14671467
with tm.assert_raises_regex(ValueError, error):
14681468
pd.crosstab(df.a, df.b, normalize='42')
14691469

1470+
error = "Not a valid normalize argument: 42"
14701471
with tm.assert_raises_regex(ValueError, error):
14711472
pd.crosstab(df.a, df.b, normalize=42)
14721473

1473-
error = 'Not a valid margins argument'
1474+
error = "Not a valid margins argument: 42"
14741475
with tm.assert_raises_regex(ValueError, error):
14751476
pd.crosstab(df.a, df.b, normalize='all', margins=42)
14761477

@@ -1529,6 +1530,7 @@ def test_crosstab_with_numpy_size(self):
15291530
expected = pd.DataFrame(expected_data,
15301531
index=expected_index,
15311532
columns=expected_column)
1533+
15321534
tm.assert_frame_equal(result, expected)
15331535

15341536

0 commit comments

Comments
 (0)