Skip to content

Commit 20771d1

Browse files
committed
+pd.DataFrame.crosstab
1 parent 0abbf14 commit 20771d1

File tree

3 files changed

+106
-86
lines changed

3 files changed

+106
-86
lines changed

pandas/core/frame.py

+98-3
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,9 @@
108108
Name or list of names which refer to the axis items.""",
109109
versionadded_to_excel='',
110110
versionadded_melt='\n.. versionadded:: 0.20.0\n',
111-
other_melt='melt')
112-
113-
import pdb; pdb.set_trace()
111+
other_melt='melt',
112+
versionadded_crosstab = '\n.. versionadded:: 0.20.0\n',
113+
other_crosstab = 'crosstab')
114114

115115
_numeric_only_doc = """numeric_only : boolean, default None
116116
Include only float, int, boolean data. If None, will attempt to use
@@ -4138,6 +4138,101 @@ def melt(self, id_vars=None, value_vars=None, var_name=None,
41384138
var_name=var_name, value_name=value_name,
41394139
col_level=col_level)
41404140

4141+
_shared_docs['crosstab'] = """
4142+
Compute a simple cross-tabulation of two (or more) factors. By default
4143+
computes a frequency table of the factors unless an array of values and an
4144+
aggregation function are passed
4145+
4146+
%(versionadded_crosstab)s
4147+
4148+
Parameters
4149+
----------
4150+
index : array-like, Series, or list of arrays/Series
4151+
Values to group by in the rows
4152+
columns : array-like, Series, or list of arrays/Series
4153+
Values to group by in the columns
4154+
values : array-like, optional
4155+
Array of values to aggregate according to the factors.
4156+
Requires `aggfunc` be specified.
4157+
aggfunc : function, optional
4158+
If specified, requires `values` be specified as well
4159+
rownames : sequence, default None
4160+
If passed, must match number of row arrays passed
4161+
colnames : sequence, default None
4162+
If passed, must match number of column arrays passed
4163+
margins : boolean, default False
4164+
Add row/column margins (subtotals)
4165+
dropna : boolean, default True
4166+
Do not include columns whose entries are all NaN
4167+
normalize : boolean, {'all', 'index', 'columns'}, or {0,1}, default False
4168+
Normalize by dividing all values by the sum of values.
4169+
4170+
- If passed 'all' or `True`, will normalize over all values.
4171+
- If passed 'index' will normalize over each row.
4172+
- If passed 'columns' will normalize over each column.
4173+
- If margins is `True`, will also normalize margin values.
4174+
4175+
.. versionadded:: 0.18.1
4176+
4177+
4178+
Notes
4179+
-----
4180+
Any Series passed will have their name attributes used unless row or column
4181+
names for the cross-tabulation are specified.
4182+
4183+
Any input passed containing Categorical data will have **all** of its
4184+
categories included in the cross-tabulation, even if the actual data does
4185+
not contain any instances of a particular category.
4186+
4187+
In the event that there aren't overlapping indexes an empty DataFrame will
4188+
be returned.
4189+
4190+
See also
4191+
--------
4192+
%(other_crosstab)s
4193+
4194+
Examples
4195+
--------
4196+
>>> a
4197+
array([foo, foo, foo, foo, bar, bar,
4198+
bar, bar, foo, foo, foo], dtype=object)
4199+
>>> b
4200+
array([one, one, one, two, one, one,
4201+
one, two, two, two, one], dtype=object)
4202+
>>> c
4203+
array([dull, dull, shiny, dull, dull, shiny,
4204+
shiny, dull, shiny, shiny, shiny], dtype=object)
4205+
4206+
>>> crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c'])
4207+
b one two
4208+
c dull shiny dull shiny
4209+
a
4210+
bar 1 2 1 0
4211+
foo 2 2 1 2
4212+
4213+
>>> foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])
4214+
>>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f'])
4215+
>>> crosstab(foo, bar) # 'c' and 'f' are not represented in the data,
4216+
# but they still will be counted in the output
4217+
col_0 d e f
4218+
row_0
4219+
a 1 0 0
4220+
b 0 1 0
4221+
c 0 0 0
4222+
4223+
Returns
4224+
-------
4225+
crosstab : DataFrame
4226+
"""
4227+
4228+
@Appender(_shared_docs['crosstab'] % _shared_doc_kwargs)
4229+
def crosstab(self, columns, values=None, rownames=None, colnames=None,
4230+
aggfunc=None, margins=False, dropna=True, normalize=False):
4231+
from pandas.tools.pivot import crosstab
4232+
return crosstab(self, columns, values=values, rownames=rownames,
4233+
colnames=colnames, aggfunc=aggfunc, margins=margins,
4234+
dropna=dropna, normalize=normalize)
4235+
41414236
# ----------------------------------------------------------------------
41424237
# Time series-related
41434238

pandas/core/reshape.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@
2828

2929
from pandas.core.frame import _shared_docs
3030
from pandas.util.decorators import Appender
31-
_shared_docs_kwargs = dict(
32-
versionadded_melt="", other_melt='DataFrame.melt')
31+
_shared_docs_kwargs = dict(versionadded_melt="",
32+
other_melt='DataFrame.melt')
3333

3434
from pandas.core.index import MultiIndex, _get_na_value
3535

pandas/tools/pivot.py

+6-81
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@
1010
import pandas.core.common as com
1111
import numpy as np
1212

13+
from pandas.core.frame import _shared_docs
14+
from pandas.util.decorators import Appender
15+
_shared_docs_kwargs = dict(versionadded_crosstab="",
16+
other_crosstab="DataFrame.crosstab")
17+
1318

1419
def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
1520
fill_value=None, margins=False, dropna=True,
@@ -381,89 +386,9 @@ def _convert_by(by):
381386
return by
382387

383388

389+
@Appender(_shared_docs['crosstab'] % _shared_docs_kwargs)
384390
def crosstab(index, columns, values=None, rownames=None, colnames=None,
385391
aggfunc=None, margins=False, dropna=True, normalize=False):
386-
"""
387-
Compute a simple cross-tabulation of two (or more) factors. By default
388-
computes a frequency table of the factors unless an array of values and an
389-
aggregation function are passed
390-
391-
Parameters
392-
----------
393-
index : array-like, Series, or list of arrays/Series
394-
Values to group by in the rows
395-
columns : array-like, Series, or list of arrays/Series
396-
Values to group by in the columns
397-
values : array-like, optional
398-
Array of values to aggregate according to the factors.
399-
Requires `aggfunc` be specified.
400-
aggfunc : function, optional
401-
If specified, requires `values` be specified as well
402-
rownames : sequence, default None
403-
If passed, must match number of row arrays passed
404-
colnames : sequence, default None
405-
If passed, must match number of column arrays passed
406-
margins : boolean, default False
407-
Add row/column margins (subtotals)
408-
dropna : boolean, default True
409-
Do not include columns whose entries are all NaN
410-
normalize : boolean, {'all', 'index', 'columns'}, or {0,1}, default False
411-
Normalize by dividing all values by the sum of values.
412-
413-
- If passed 'all' or `True`, will normalize over all values.
414-
- If passed 'index' will normalize over each row.
415-
- If passed 'columns' will normalize over each column.
416-
- If margins is `True`, will also normalize margin values.
417-
418-
.. versionadded:: 0.18.1
419-
420-
421-
Notes
422-
-----
423-
Any Series passed will have their name attributes used unless row or column
424-
names for the cross-tabulation are specified.
425-
426-
Any input passed containing Categorical data will have **all** of its
427-
categories included in the cross-tabulation, even if the actual data does
428-
not contain any instances of a particular category.
429-
430-
In the event that there aren't overlapping indexes an empty DataFrame will
431-
be returned.
432-
433-
Examples
434-
--------
435-
>>> a
436-
array([foo, foo, foo, foo, bar, bar,
437-
bar, bar, foo, foo, foo], dtype=object)
438-
>>> b
439-
array([one, one, one, two, one, one,
440-
one, two, two, two, one], dtype=object)
441-
>>> c
442-
array([dull, dull, shiny, dull, dull, shiny,
443-
shiny, dull, shiny, shiny, shiny], dtype=object)
444-
445-
>>> crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c'])
446-
b one two
447-
c dull shiny dull shiny
448-
a
449-
bar 1 2 1 0
450-
foo 2 2 1 2
451-
452-
>>> foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])
453-
>>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f'])
454-
>>> crosstab(foo, bar) # 'c' and 'f' are not represented in the data,
455-
# but they still will be counted in the output
456-
col_0 d e f
457-
row_0
458-
a 1 0 0
459-
b 0 1 0
460-
c 0 0 0
461-
462-
Returns
463-
-------
464-
crosstab : DataFrame
465-
"""
466-
467392
index = com._maybe_make_list(index)
468393
columns = com._maybe_make_list(columns)
469394

0 commit comments

Comments
 (0)