Skip to content

Commit b0d9ee0

Browse files
cmohl2013TomAugspurger
authored andcommitted
ENH: added margins_name parameter for crosstab (pandas-dev#16489)
* ENH pandas-dev#15972 added margins_name parameter for crosstab * ENH 15972 minor changes as suggested by reviewers * ENH 15972 correction in whatsnew * ENH 15972 style changes in whatsnew
1 parent 75c8698 commit b0d9ee0

File tree

4 files changed

+57
-10
lines changed

4 files changed

+57
-10
lines changed

doc/source/whatsnew/v0.20.0.txt

-1
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,6 @@ Other Enhancements
515515
- Options added to allow one to turn on/off using ``bottleneck`` and ``numexpr``, see :ref:`here <basics.accelerate>` (:issue:`16157`)
516516
- ``DataFrame.style.bar()`` now accepts two more options to further customize the bar chart. Bar alignment is set with ``align='left'|'mid'|'zero'``, the default is "left", which is backward compatible; You can now pass a list of ``color=[color_negative, color_positive]``. (:issue:`14757`)
517517

518-
519518
.. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations
520519

521520

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ Other Enhancements
3737
- :func:`api.types.infer_dtype` now infers decimals. (:issue: `15690`)
3838
- :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`)
3939
- :func:`DataFrame.clip()` and :func: `Series.cip()` have gained an inplace argument. (:issue: `15388`)
40+
- :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when margins=True. (:issue:`15972`)
4041

4142
.. _whatsnew_0210.api_breaking:
4243

pandas/core/reshape/pivot.py

+19-9
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,8 @@ def _convert_by(by):
388388

389389

390390
def crosstab(index, columns, values=None, rownames=None, colnames=None,
391-
aggfunc=None, margins=False, dropna=True, normalize=False):
391+
aggfunc=None, margins=False, margins_name='All', dropna=True,
392+
normalize=False):
392393
"""
393394
Compute a simple cross-tabulation of two (or more) factors. By default
394395
computes a frequency table of the factors unless an array of values and an
@@ -411,6 +412,12 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
411412
If passed, must match number of column arrays passed
412413
margins : boolean, default False
413414
Add row/column margins (subtotals)
415+
margins_name : string, default 'All'
416+
Name of the row / column that will contain the totals
417+
when margins is True.
418+
419+
.. versionadded:: 0.21.0
420+
414421
dropna : boolean, default True
415422
Do not include columns whose entries are all NaN
416423
normalize : boolean, {'all', 'index', 'columns'}, or {0,1}, default False
@@ -490,23 +497,26 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
490497
df = DataFrame(data)
491498
df['__dummy__'] = 0
492499
table = df.pivot_table('__dummy__', index=rownames, columns=colnames,
493-
aggfunc=len, margins=margins, dropna=dropna)
500+
aggfunc=len, margins=margins,
501+
margins_name=margins_name, dropna=dropna)
494502
table = table.fillna(0).astype(np.int64)
495503

496504
else:
497505
data['__dummy__'] = values
498506
df = DataFrame(data)
499507
table = df.pivot_table('__dummy__', index=rownames, columns=colnames,
500-
aggfunc=aggfunc, margins=margins, dropna=dropna)
508+
aggfunc=aggfunc, margins=margins,
509+
margins_name=margins_name, dropna=dropna)
501510

502511
# Post-process
503512
if normalize is not False:
504-
table = _normalize(table, normalize=normalize, margins=margins)
513+
table = _normalize(table, normalize=normalize, margins=margins,
514+
margins_name=margins_name)
505515

506516
return table
507517

508518

509-
def _normalize(table, normalize, margins):
519+
def _normalize(table, normalize, margins, margins_name='All'):
510520

511521
if not isinstance(normalize, bool) and not isinstance(normalize,
512522
compat.string_types):
@@ -537,9 +547,9 @@ def _normalize(table, normalize, margins):
537547

538548
elif margins is True:
539549

540-
column_margin = table.loc[:, 'All'].drop('All')
541-
index_margin = table.loc['All', :].drop('All')
542-
table = table.drop('All', axis=1).drop('All')
550+
column_margin = table.loc[:, margins_name].drop(margins_name)
551+
index_margin = table.loc[margins_name, :].drop(margins_name)
552+
table = table.drop(margins_name, axis=1).drop(margins_name)
543553
# to keep index and columns names
544554
table_index_names = table.index.names
545555
table_columns_names = table.columns.names
@@ -561,7 +571,7 @@ def _normalize(table, normalize, margins):
561571
elif normalize == "all" or normalize is True:
562572
column_margin = column_margin / column_margin.sum()
563573
index_margin = index_margin / index_margin.sum()
564-
index_margin.loc['All'] = 1
574+
index_margin.loc[margins_name] = 1
565575
table = concat([table, column_margin], axis=1)
566576
table = table.append(index_margin)
567577

pandas/tests/reshape/test_pivot.py

+37
Original file line numberDiff line numberDiff line change
@@ -1071,6 +1071,43 @@ def test_crosstab_margins(self):
10711071
exp_rows = exp_rows.fillna(0).astype(np.int64)
10721072
tm.assert_series_equal(all_rows, exp_rows)
10731073

1074+
def test_crosstab_margins_set_margin_name(self):
1075+
# GH 15972
1076+
a = np.random.randint(0, 7, size=100)
1077+
b = np.random.randint(0, 3, size=100)
1078+
c = np.random.randint(0, 5, size=100)
1079+
1080+
df = DataFrame({'a': a, 'b': b, 'c': c})
1081+
1082+
result = crosstab(a, [b, c], rownames=['a'], colnames=('b', 'c'),
1083+
margins=True, margins_name='TOTAL')
1084+
1085+
assert result.index.names == ('a',)
1086+
assert result.columns.names == ['b', 'c']
1087+
1088+
all_cols = result['TOTAL', '']
1089+
exp_cols = df.groupby(['a']).size().astype('i8')
1090+
# to keep index.name
1091+
exp_margin = Series([len(df)], index=Index(['TOTAL'], name='a'))
1092+
exp_cols = exp_cols.append(exp_margin)
1093+
exp_cols.name = ('TOTAL', '')
1094+
1095+
tm.assert_series_equal(all_cols, exp_cols)
1096+
1097+
all_rows = result.loc['TOTAL']
1098+
exp_rows = df.groupby(['b', 'c']).size().astype('i8')
1099+
exp_rows = exp_rows.append(Series([len(df)], index=[('TOTAL', '')]))
1100+
exp_rows.name = 'TOTAL'
1101+
1102+
exp_rows = exp_rows.reindex(all_rows.index)
1103+
exp_rows = exp_rows.fillna(0).astype(np.int64)
1104+
tm.assert_series_equal(all_rows, exp_rows)
1105+
1106+
for margins_name in [666, None, ['a', 'b']]:
1107+
with pytest.raises(ValueError):
1108+
crosstab(a, [b, c], rownames=['a'], colnames=('b', 'c'),
1109+
margins=True, margins_name=margins_name)
1110+
10741111
def test_crosstab_pass_values(self):
10751112
a = np.random.randint(0, 7, size=100)
10761113
b = np.random.randint(0, 3, size=100)

0 commit comments

Comments
 (0)