Merge pull request pandas-dev#5505 from jsexauer/fix5505

jorisvandenbossche · jorisvandenbossche · commit 42161787c148 · 2014-03-14T23:41:31.000+01:00
ENH: Have pivot and pivot_table take similar arguments
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -125,6 +125,11 @@ API Changes
     DataFrame returned by ``GroupBy.apply`` (:issue:`6124`).  This facilitates
     ``DataFrame.stack`` operations where the name of the column index is used as
     the name of the inserted column containing the pivoted data.
+	
+- The :func:`pivot_table`/:meth:`DataFrame.pivot_table` and :func:`crosstab` functions 
+  now take arguments ``index`` and ``columns`` instead of ``rows`` and ``cols``.  A 
+  ``FutureWarning`` is raised  to alert that the old ``rows`` and ``cols`` arguments
+  will not be supported in a future release (:issue:`5505`)
 
 Experimental Features
 ~~~~~~~~~~~~~~~~~~~~~
diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt
@@ -167,6 +167,11 @@ These are out-of-bounds selections
     # New output, 4-level MultiIndex
     df_multi.set_index([df_multi.index, df_multi.index])
 
+- The :func:`pivot_table`/:meth:`DataFrame.pivot_table` and :func:`crosstab` functions 
+  now take arguments ``index`` and ``columns`` instead of ``rows`` and ``cols``.  A 
+  ``FutureWarning`` is raised  to alert that the old ``rows`` and ``cols`` arguments
+  will not be supported in a future release (:issue:`5505`)
+
 
 MultiIndexing Using Slicers
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py
@@ -1,5 +1,7 @@
 # pylint: disable=E1103
 
+import warnings
+
 from pandas import Series, DataFrame
 from pandas.core.index import MultiIndex
 from pandas.tools.merge import concat
@@ -10,8 +12,8 @@
 import numpy as np
 
 
-def pivot_table(data, values=None, rows=None, cols=None, aggfunc='mean',
-                fill_value=None, margins=False, dropna=True):
+def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
+                fill_value=None, margins=False, dropna=True, **kwarg):
     """
     Create a spreadsheet-style pivot table as a DataFrame. The levels in the
     pivot table will be stored in MultiIndex objects (hierarchical indexes) on
@@ -21,9 +23,9 @@ def pivot_table(data, values=None, rows=None, cols=None, aggfunc='mean',
     ----------
     data : DataFrame
     values : column to aggregate, optional
-    rows : list of column names or arrays to group on
+    index : list of column names or arrays to group on
         Keys to group on the x-axis of the pivot table
-    cols : list of column names or arrays to group on
+    columns : list of column names or arrays to group on
         Keys to group on the y-axis of the pivot table
     aggfunc : function, default numpy.mean, or list of functions
         If list of functions passed, the resulting pivot table will have
@@ -35,6 +37,8 @@ def pivot_table(data, values=None, rows=None, cols=None, aggfunc='mean',
         Add all row / columns (e.g. for subtotal / grand totals)
     dropna : boolean, default True
         Do not include columns whose entries are all NaN
+    rows : kwarg only alias of index [deprecated]
+    cols : kwarg only alias of columns [deprecated]
 
     Examples
     --------
@@ -50,8 +54,8 @@ def pivot_table(data, values=None, rows=None, cols=None, aggfunc='mean',
     7  bar two small  6
     8  bar two large  7
 
-    >>> table = pivot_table(df, values='D', rows=['A', 'B'],
-    ...                     cols=['C'], aggfunc=np.sum)
+    >>> table = pivot_table(df, values='D', index=['A', 'B'],
+    ...                     columns=['C'], aggfunc=np.sum)
     >>> table
               small  large
     foo  one  1      4
@@ -63,21 +67,43 @@ def pivot_table(data, values=None, rows=None, cols=None, aggfunc='mean',
     -------
     table : DataFrame
     """
-    rows = _convert_by(rows)
-    cols = _convert_by(cols)
+    # Parse old-style keyword arguments
+    rows = kwarg.pop('rows', None)
+    if rows is not None:
+        warnings.warn("rows is deprecated, use index", FutureWarning)
+        if index is None:
+            index = rows
+        else:
+            msg = "Can only specify either 'rows' or 'index'"
+            raise TypeError(msg)
+
+    cols = kwarg.pop('cols', None)
+    if cols is not None:
+        warnings.warn("cols is deprecated, use columns", FutureWarning)
+        if columns is None:
+            columns = cols
+        else:
+            msg = "Can only specify either 'cols' or 'columns'"
+            raise TypeError(msg)
+    
+    if kwarg:
+        raise TypeError("Unexpected argument(s): %s" % kwarg.keys())
+    
+    index = _convert_by(index)
+    columns = _convert_by(columns)
 
     if isinstance(aggfunc, list):
         pieces = []
         keys = []
         for func in aggfunc:
-            table = pivot_table(data, values=values, rows=rows, cols=cols,
+            table = pivot_table(data, values=values, index=index, columns=columns,
                                 fill_value=fill_value, aggfunc=func,
                                 margins=margins)
             pieces.append(table)
             keys.append(func.__name__)
         return concat(pieces, keys=keys, axis=1)
 
-    keys = rows + cols
+    keys = index + columns
 
     values_passed = values is not None
     if values_passed:
@@ -106,7 +132,7 @@ def pivot_table(data, values=None, rows=None, cols=None, aggfunc='mean',
     table = agged
     if table.index.nlevels > 1:
         to_unstack = [agged.index.names[i]
-                      for i in range(len(rows), len(keys))]
+                      for i in range(len(index), len(keys))]
         table = agged.unstack(to_unstack)
 
     if not dropna:
@@ -132,14 +158,14 @@ def pivot_table(data, values=None, rows=None, cols=None, aggfunc='mean',
         table = table.fillna(value=fill_value, downcast='infer')
 
     if margins:
-        table = _add_margins(table, data, values, rows=rows,
-                             cols=cols, aggfunc=aggfunc)
+        table = _add_margins(table, data, values, rows=index,
+                             cols=columns, aggfunc=aggfunc)
 
     # discard the top level
     if values_passed and not values_multi:
         table = table[values[0]]
 
-    if len(rows) == 0 and len(cols) > 0:
+    if len(index) == 0 and len(columns) > 0:
         table = table.T
 
     return table
@@ -299,18 +325,18 @@ def _convert_by(by):
     return by
 
 
-def crosstab(rows, cols, values=None, rownames=None, colnames=None,
-             aggfunc=None, margins=False, dropna=True):
+def crosstab(index, columns, values=None, rownames=None, colnames=None,
+             aggfunc=None, margins=False, dropna=True, **kwarg):
     """
     Compute a simple cross-tabulation of two (or more) factors. By default
     computes a frequency table of the factors unless an array of values and an
     aggregation function are passed
 
     Parameters
     ----------
-    rows : array-like, Series, or list of arrays/Series
+    index : array-like, Series, or list of arrays/Series
         Values to group by in the rows
-    cols : array-like, Series, or list of arrays/Series
+    columns : array-like, Series, or list of arrays/Series
         Values to group by in the columns
     values : array-like, optional
         Array of values to aggregate according to the factors
@@ -324,6 +350,8 @@ def crosstab(rows, cols, values=None, rownames=None, colnames=None,
         Add row/column margins (subtotals)
     dropna : boolean, default True
         Do not include columns whose entries are all NaN
+    rows : kwarg only alias of index [deprecated]
+    cols : kwarg only alias of columns [deprecated]
 
     Notes
     -----
@@ -353,26 +381,48 @@ def crosstab(rows, cols, values=None, rownames=None, colnames=None,
     -------
     crosstab : DataFrame
     """
-    rows = com._maybe_make_list(rows)
-    cols = com._maybe_make_list(cols)
+    # Parse old-style keyword arguments
+    rows = kwarg.pop('rows', None)
+    if rows is not None:
+        warnings.warn("rows is deprecated, use index", FutureWarning)
+        if index is None:
+            index = rows
+        else:
+            msg = "Can only specify either 'rows' or 'index'"
+            raise TypeError(msg)
+
+    cols = kwarg.pop('cols', None)
+    if cols is not None:
+        warnings.warn("cols is deprecated, use columns", FutureWarning)
+        if columns is None:
+            columns = cols
+        else:
+            msg = "Can only specify either 'cols' or 'columns'"
+            raise TypeError(msg)
+
+    if kwarg:
+        raise TypeError("Unexpected argument(s): %s" % kwarg.keys())
+
+    index = com._maybe_make_list(index)
+    columns = com._maybe_make_list(columns)
 
-    rownames = _get_names(rows, rownames, prefix='row')
-    colnames = _get_names(cols, colnames, prefix='col')
+    rownames = _get_names(index, rownames, prefix='row')
+    colnames = _get_names(columns, colnames, prefix='col')
 
     data = {}
-    data.update(zip(rownames, rows))
-    data.update(zip(colnames, cols))
+    data.update(zip(rownames, index))
+    data.update(zip(colnames, columns))
 
     if values is None:
         df = DataFrame(data)
         df['__dummy__'] = 0
-        table = df.pivot_table('__dummy__', rows=rownames, cols=colnames,
+        table = df.pivot_table('__dummy__', index=rownames, columns=colnames,
                                aggfunc=len, margins=margins, dropna=dropna)
         return table.fillna(0).astype(np.int64)
     else:
         data['__dummy__'] = values
         df = DataFrame(data)
-        table = df.pivot_table('__dummy__', rows=rownames, cols=colnames,
+        table = df.pivot_table('__dummy__', index=rownames, columns=colnames,
                                aggfunc=aggfunc, margins=margins, dropna=dropna)
         return table
 
diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py