|
108 | 108 | Name or list of names which refer to the axis items.""",
|
109 | 109 | versionadded_to_excel='',
|
110 | 110 | versionadded_melt='\n.. versionadded:: 0.20.0\n',
|
111 |
| - other_melt='melt') |
112 |
| - |
113 |
| -import pdb; pdb.set_trace() |
| 111 | + other_melt='melt', |
| 112 | + versionadded_crosstab = '\n.. versionadded:: 0.20.0\n', |
| 113 | + other_crosstab = 'crosstab') |
114 | 114 |
|
115 | 115 | _numeric_only_doc = """numeric_only : boolean, default None
|
116 | 116 | Include only float, int, boolean data. If None, will attempt to use
|
@@ -4138,6 +4138,101 @@ def melt(self, id_vars=None, value_vars=None, var_name=None,
|
4138 | 4138 | var_name=var_name, value_name=value_name,
|
4139 | 4139 | col_level=col_level)
|
4140 | 4140 |
|
| 4141 | + _shared_docs['crosstab'] = """ |
| 4142 | + Compute a simple cross-tabulation of two (or more) factors. By default |
| 4143 | + computes a frequency table of the factors unless an array of values and an |
| 4144 | + aggregation function are passed |
| 4145 | +
|
| 4146 | + %(versionadded_crosstab)s |
| 4147 | +
|
| 4148 | + Parameters |
| 4149 | + ---------- |
| 4150 | + index : array-like, Series, or list of arrays/Series |
| 4151 | + Values to group by in the rows |
| 4152 | + columns : array-like, Series, or list of arrays/Series |
| 4153 | + Values to group by in the columns |
| 4154 | + values : array-like, optional |
| 4155 | + Array of values to aggregate according to the factors. |
| 4156 | + Requires `aggfunc` be specified. |
| 4157 | + aggfunc : function, optional |
| 4158 | + If specified, requires `values` be specified as well |
| 4159 | + rownames : sequence, default None |
| 4160 | + If passed, must match number of row arrays passed |
| 4161 | + colnames : sequence, default None |
| 4162 | + If passed, must match number of column arrays passed |
| 4163 | + margins : boolean, default False |
| 4164 | + Add row/column margins (subtotals) |
| 4165 | + dropna : boolean, default True |
| 4166 | + Do not include columns whose entries are all NaN |
| 4167 | + normalize : boolean, {'all', 'index', 'columns'}, or {0,1}, default False |
| 4168 | + Normalize by dividing all values by the sum of values. |
| 4169 | +
|
| 4170 | + - If passed 'all' or `True`, will normalize over all values. |
| 4171 | + - If passed 'index' will normalize over each row. |
| 4172 | + - If passed 'columns' will normalize over each column. |
| 4173 | + - If margins is `True`, will also normalize margin values. |
| 4174 | +
|
| 4175 | + .. versionadded:: 0.18.1 |
| 4176 | +
|
| 4177 | +
|
| 4178 | + Notes |
| 4179 | + ----- |
| 4180 | + Any Series passed will have their name attributes used unless row or column |
| 4181 | + names for the cross-tabulation are specified. |
| 4182 | +
|
| 4183 | + Any input passed containing Categorical data will have **all** of its |
| 4184 | + categories included in the cross-tabulation, even if the actual data does |
| 4185 | + not contain any instances of a particular category. |
| 4186 | +
|
| 4187 | + In the event that there aren't overlapping indexes an empty DataFrame will |
| 4188 | + be returned. |
| 4189 | +
|
| 4190 | + See also |
| 4191 | + -------- |
| 4192 | + %(other_crosstab)s |
| 4193 | +
|
| 4194 | + Examples |
| 4195 | + -------- |
| 4196 | + >>> a |
| 4197 | + array([foo, foo, foo, foo, bar, bar, |
| 4198 | + bar, bar, foo, foo, foo], dtype=object) |
| 4199 | + >>> b |
| 4200 | + array([one, one, one, two, one, one, |
| 4201 | + one, two, two, two, one], dtype=object) |
| 4202 | + >>> c |
| 4203 | + array([dull, dull, shiny, dull, dull, shiny, |
| 4204 | + shiny, dull, shiny, shiny, shiny], dtype=object) |
| 4205 | +
|
| 4206 | + >>> crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c']) |
| 4207 | + b one two |
| 4208 | + c dull shiny dull shiny |
| 4209 | + a |
| 4210 | + bar 1 2 1 0 |
| 4211 | + foo 2 2 1 2 |
| 4212 | +
|
| 4213 | + >>> foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c']) |
| 4214 | + >>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f']) |
| 4215 | + >>> crosstab(foo, bar) # 'c' and 'f' are not represented in the data, |
| 4216 | + # but they still will be counted in the output |
| 4217 | + col_0 d e f |
| 4218 | + row_0 |
| 4219 | + a 1 0 0 |
| 4220 | + b 0 1 0 |
| 4221 | + c 0 0 0 |
| 4222 | +
|
| 4223 | + Returns |
| 4224 | + ------- |
| 4225 | + crosstab : DataFrame |
| 4226 | + """ |
| 4227 | + |
| 4228 | + @Appender(_shared_docs['crosstab'] % _shared_doc_kwargs) |
| 4229 | + def crosstab(self, columns, values=None, rownames=None, colnames=None, |
| 4230 | + aggfunc=None, margins=False, dropna=True, normalize=False): |
| 4231 | + from pandas.tools.pivot import crosstab |
| 4232 | + return crosstab(self, columns, values=values, rownames=rownames, |
| 4233 | + colnames=colnames, aggfunc=aggfunc, margins=margins, |
| 4234 | + dropna=dropna, normalize=normalize) |
| 4235 | + |
4141 | 4236 | # ----------------------------------------------------------------------
|
4142 | 4237 | # Time series-related
|
4143 | 4238 |
|
|
0 commit comments