API/CLN: Have toplevel pd.pivot mirror pivot instead of pivot_simple (pandas-dev#22209)

mroeschke · victor · commit 861b147926a3 · 2018-10-01T01:54:42.000+02:00
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -5322,8 +5322,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
     # ----------------------------------------------------------------------
     # Data reshaping
 
-    def pivot(self, index=None, columns=None, values=None):
-        """
+    _shared_docs['pivot'] = """
         Return reshaped DataFrame organized by given index / column values.
 
         Reshape data (produce a "pivot" table) based on column values. Uses
@@ -5333,7 +5332,7 @@ def pivot(self, index=None, columns=None, values=None):
         columns. See the :ref:`User Guide <reshaping>` for more on reshaping.
 
         Parameters
-        ----------
+        ----------%s
         index : string or object, optional
             Column to use to make new frame's index. If None, uses
             existing index.
@@ -5425,7 +5424,11 @@ def pivot(self, index=None, columns=None, values=None):
            ...
         ValueError: Index contains duplicate entries, cannot reshape
         """
-        from pandas.core.reshape.reshape import pivot
+
+    @Substitution('')
+    @Appender(_shared_docs['pivot'])
+    def pivot(self, index=None, columns=None, values=None):
+        from pandas.core.reshape.pivot import pivot
         return pivot(self, index=index, columns=columns, values=values)
 
     _shared_docs['pivot_table'] = """
diff --git a/pandas/core/reshape/api.py b/pandas/core/reshape/api.py
@@ -2,7 +2,7 @@
 
 from pandas.core.reshape.concat import concat
 from pandas.core.reshape.melt import melt, lreshape, wide_to_long
-from pandas.core.reshape.reshape import pivot_simple as pivot, get_dummies
+from pandas.core.reshape.reshape import get_dummies
 from pandas.core.reshape.merge import merge, merge_ordered, merge_asof
-from pandas.core.reshape.pivot import pivot_table, crosstab
+from pandas.core.reshape.pivot import pivot_table, pivot, crosstab
 from pandas.core.reshape.tile import cut, qcut
diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
@@ -10,7 +10,7 @@
 from pandas.core.series import Series
 from pandas.core.groupby import Grouper
 from pandas.core.reshape.util import cartesian_product
-from pandas.core.index import Index, _get_objs_combined_axis
+from pandas.core.index import Index, MultiIndex, _get_objs_combined_axis
 from pandas.compat import range, lrange, zip
 from pandas import compat
 import pandas.core.common as com
@@ -369,6 +369,30 @@ def _convert_by(by):
     return by
 
 
+@Substitution('\ndata : DataFrame')
+@Appender(_shared_docs['pivot'], indents=1)
+def pivot(data, index=None, columns=None, values=None):
+    if values is None:
+        cols = [columns] if index is None else [index, columns]
+        append = index is None
+        indexed = data.set_index(cols, append=append)
+    else:
+        if index is None:
+            index = data.index
+        else:
+            index = data[index]
+        index = MultiIndex.from_arrays([index, data[columns]])
+
+        if is_list_like(values) and not isinstance(values, tuple):
+            # Exclude tuple because it is seen as a single column name
+            indexed = data._constructor(data[values].values, index=index,
+                                        columns=values)
+        else:
+            indexed = data._constructor_sliced(data[values].values,
+                                               index=index)
+    return indexed.unstack(columns)
+
+
 def crosstab(index, columns, values=None, rownames=None, colnames=None,
              aggfunc=None, margins=False, margins_name='All', dropna=True,
              normalize=False):
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
@@ -383,97 +383,6 @@ def _unstack_multiple(data, clocs, fill_value=None):
     return unstacked
 
 
-def pivot(self, index=None, columns=None, values=None):
-    """
-    See DataFrame.pivot
-    """
-    if values is None:
-        cols = [columns] if index is None else [index, columns]
-        append = index is None
-        indexed = self.set_index(cols, append=append)
-    else:
-        if index is None:
-            index = self.index
-        else:
-            index = self[index]
-        index = MultiIndex.from_arrays([index, self[columns]])
-
-        if is_list_like(values) and not isinstance(values, tuple):
-            # Exclude tuple because it is seen as a single column name
-            indexed = self._constructor(self[values].values, index=index,
-                                        columns=values)
-        else:
-            indexed = self._constructor_sliced(self[values].values,
-                                               index=index)
-    return indexed.unstack(columns)
-
-
-def pivot_simple(index, columns, values):
-    """
-    Produce 'pivot' table based on 3 columns of this DataFrame.
-    Uses unique values from index / columns and fills with values.
-
-    Parameters
-    ----------
-    index : ndarray
-        Labels to use to make new frame's index
-    columns : ndarray
-        Labels to use to make new frame's columns
-    values : ndarray
-        Values to use for populating new frame's values
-
-    Notes
-    -----
-    Obviously, all 3 of the input arguments must have the same length
-
-    Returns
-    -------
-    DataFrame
-
-    See also
-    --------
-    DataFrame.pivot_table : generalization of pivot that can handle
-        duplicate values for one index/column pair
-    """
-    if (len(index) != len(columns)) or (len(columns) != len(values)):
-        raise AssertionError('Length of index, columns, and values must be the'
-                             ' same')
-
-    if len(index) == 0:
-        return DataFrame(index=[])
-
-    hindex = MultiIndex.from_arrays([index, columns])
-    series = Series(values.ravel(), index=hindex)
-    series = series.sort_index(level=0)
-    return series.unstack()
-
-
-def _slow_pivot(index, columns, values):
-    """
-    Produce 'pivot' table based on 3 columns of this DataFrame.
-    Uses unique values from index / columns and fills with values.
-
-    Parameters
-    ----------
-    index : string or object
-        Column name to use to make new frame's index
-    columns : string or object
-        Column name to use to make new frame's columns
-    values : string or object
-        Column name to use for populating new frame's values
-
-    Could benefit from some Cython here.
-    """
-    tree = {}
-    for i, (idx, col) in enumerate(zip(index, columns)):
-        if col not in tree:
-            tree[col] = {}
-        branch = tree[col]
-        branch[idx] = values[i]
-
-    return DataFrame(tree)
-
-
 def unstack(obj, level, fill_value=None):
     if isinstance(level, (tuple, list)):
         if len(level) != 1:
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
@@ -301,13 +301,17 @@ def test_pivot_multi_functions(self):
         expected = concat([means, stds], keys=['mean', 'std'], axis=1)
         tm.assert_frame_equal(result, expected)
 
-    def test_pivot_index_with_nan(self):
+    @pytest.mark.parametrize('method', [True, False])
+    def test_pivot_index_with_nan(self, method):
         # GH 3588
         nan = np.nan
         df = DataFrame({'a': ['R1', 'R2', nan, 'R4'],
                         'b': ['C1', 'C2', 'C3', 'C4'],
                         'c': [10, 15, 17, 20]})
-        result = df.pivot('a', 'b', 'c')
+        if method:
+            result = df.pivot('a', 'b', 'c')
+        else:
+            result = pd.pivot(df, 'a', 'b', 'c')
         expected = DataFrame([[nan, nan, 17, nan], [10, nan, nan, nan],
                               [nan, 15, nan, nan], [nan, nan, nan, 20]],
                              index=Index([nan, 'R1', 'R2', 'R4'], name='a'),
@@ -322,15 +326,23 @@ def test_pivot_index_with_nan(self):
         df.loc[1, 'a'] = df.loc[3, 'a'] = nan
         df.loc[1, 'b'] = df.loc[4, 'b'] = nan
 
-        pv = df.pivot('a', 'b', 'c')
+        if method:
+            pv = df.pivot('a', 'b', 'c')
+        else:
+            pv = pd.pivot(df, 'a', 'b', 'c')
         assert pv.notna().values.sum() == len(df)
 
         for _, row in df.iterrows():
             assert pv.loc[row['a'], row['b']] == row['c']
 
-        tm.assert_frame_equal(df.pivot('b', 'a', 'c'), pv.T)
+        if method:
+            result = df.pivot('b', 'a', 'c')
+        else:
+            result = pd.pivot(df, 'b', 'a', 'c')
+        tm.assert_frame_equal(result, pv.T)
 
-    def test_pivot_with_tz(self):
+    @pytest.mark.parametrize('method', [True, False])
+    def test_pivot_with_tz(self, method):
         # GH 5878
         df = DataFrame({'dt1': [datetime(2013, 1, 1, 9, 0),
                                 datetime(2013, 1, 2, 9, 0),
@@ -358,7 +370,10 @@ def test_pivot_with_tz(self):
                                                     tz='US/Pacific'),
                              columns=exp_col)
 
-        pv = df.pivot(index='dt1', columns='dt2')
+        if method:
+            pv = df.pivot(index='dt1', columns='dt2')
+        else:
+            pv = pd.pivot(df, index='dt1', columns='dt2')
         tm.assert_frame_equal(pv, expected)
 
         expected = DataFrame([[0, 2], [1, 3]],
@@ -371,10 +386,14 @@ def test_pivot_with_tz(self):
                                                       name='dt2',
                                                       tz='Asia/Tokyo'))
 
-        pv = df.pivot(index='dt1', columns='dt2', values='data1')
+        if method:
+            pv = df.pivot(index='dt1', columns='dt2', values='data1')
+        else:
+            pv = pd.pivot(df, index='dt1', columns='dt2', values='data1')
         tm.assert_frame_equal(pv, expected)
 
-    def test_pivot_periods(self):
+    @pytest.mark.parametrize('method', [True, False])
+    def test_pivot_periods(self, method):
         df = DataFrame({'p1': [pd.Period('2013-01-01', 'D'),
                                pd.Period('2013-01-02', 'D'),
                                pd.Period('2013-01-01', 'D'),
@@ -394,31 +413,39 @@ def test_pivot_periods(self):
                              index=pd.PeriodIndex(['2013-01-01', '2013-01-02'],
                                                   name='p1', freq='D'),
                              columns=exp_col)
-
-        pv = df.pivot(index='p1', columns='p2')
+        if method:
+            pv = df.pivot(index='p1', columns='p2')
+        else:
+            pv = pd.pivot(df, index='p1', columns='p2')
         tm.assert_frame_equal(pv, expected)
 
         expected = DataFrame([[0, 2], [1, 3]],
                              index=pd.PeriodIndex(['2013-01-01', '2013-01-02'],
                                                   name='p1', freq='D'),
                              columns=pd.PeriodIndex(['2013-01', '2013-02'],
                                                     name='p2', freq='M'))
-
-        pv = df.pivot(index='p1', columns='p2', values='data1')
+        if method:
+            pv = df.pivot(index='p1', columns='p2', values='data1')
+        else:
+            pv = pd.pivot(df, index='p1', columns='p2', values='data1')
         tm.assert_frame_equal(pv, expected)
 
     @pytest.mark.parametrize('values', [
         ['baz', 'zoo'], np.array(['baz', 'zoo']),
         pd.Series(['baz', 'zoo']), pd.Index(['baz', 'zoo'])
     ])
-    def test_pivot_with_list_like_values(self, values):
+    @pytest.mark.parametrize('method', [True, False])
+    def test_pivot_with_list_like_values(self, values, method):
         # issue #17160
         df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
                            'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
                            'baz': [1, 2, 3, 4, 5, 6],
                            'zoo': ['x', 'y', 'z', 'q', 'w', 't']})
 
-        result = df.pivot(index='foo', columns='bar', values=values)
+        if method:
+            result = df.pivot(index='foo', columns='bar', values=values)
+        else:
+            result = pd.pivot(df, index='foo', columns='bar', values=values)
 
         data = [[1, 2, 3, 'x', 'y', 'z'],
                 [4, 5, 6, 'q', 'w', 't']]
@@ -434,14 +461,18 @@ def test_pivot_with_list_like_values(self, values):
         ['bar', 'baz'], np.array(['bar', 'baz']),
         pd.Series(['bar', 'baz']), pd.Index(['bar', 'baz'])
     ])
-    def test_pivot_with_list_like_values_nans(self, values):
+    @pytest.mark.parametrize('method', [True, False])
+    def test_pivot_with_list_like_values_nans(self, values, method):
         # issue #17160
         df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
                            'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
                            'baz': [1, 2, 3, 4, 5, 6],
                            'zoo': ['x', 'y', 'z', 'q', 'w', 't']})
 
-        result = df.pivot(index='zoo', columns='foo', values=values)
+        if method:
+            result = df.pivot(index='zoo', columns='foo', values=values)
+        else:
+            result = pd.pivot(df, index='zoo', columns='foo', values=values)
 
         data = [[np.nan, 'A', np.nan, 4],
                 [np.nan, 'C', np.nan, 6],
@@ -460,7 +491,8 @@ def test_pivot_with_list_like_values_nans(self, values):
     @pytest.mark.xfail(reason='MultiIndexed unstack with tuple names fails'
                               'with KeyError GH#19966',
                        strict=True)
-    def test_pivot_with_multiindex(self):
+    @pytest.mark.parametrize('method', [True, False])
+    def test_pivot_with_multiindex(self, method):
         # issue #17160
         index = Index(data=[0, 1, 2, 3, 4, 5])
         data = [['one', 'A', 1, 'x'],
@@ -472,24 +504,35 @@ def test_pivot_with_multiindex(self):
         columns = MultiIndex(levels=[['bar', 'baz'], ['first', 'second']],
                              labels=[[0, 0, 1, 1], [0, 1, 0, 1]])
         df = DataFrame(data=data, index=index, columns=columns, dtype='object')
-        result = df.pivot(index=('bar', 'first'), columns=('bar', 'second'),
-                          values=('baz', 'first'))
+        if method:
+            result = df.pivot(index=('bar', 'first'),
+                              columns=('bar', 'second'),
+                              values=('baz', 'first'))
+        else:
+            result = pd.pivot(df,
+                              index=('bar', 'first'),
+                              columns=('bar', 'second'),
+                              values=('baz', 'first'))
 
         data = {'A': Series([1, 4], index=['one', 'two']),
                 'B': Series([2, 5], index=['one', 'two']),
                 'C': Series([3, 6], index=['one', 'two'])}
         expected = DataFrame(data)
         tm.assert_frame_equal(result, expected)
 
-    def test_pivot_with_tuple_of_values(self):
+    @pytest.mark.parametrize('method', [True, False])
+    def test_pivot_with_tuple_of_values(self, method):
         # issue #17160
         df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
                            'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
                            'baz': [1, 2, 3, 4, 5, 6],
                            'zoo': ['x', 'y', 'z', 'q', 'w', 't']})
         with pytest.raises(KeyError):
             # tuple is seen as a single column name
-            df.pivot(index='zoo', columns='foo', values=('bar', 'baz'))
+            if method:
+                df.pivot(index='zoo', columns='foo', values=('bar', 'baz'))
+            else:
+                pd.pivot(df, index='zoo', columns='foo', values=('bar', 'baz'))
 
     def test_margins(self):
         def _check_output(result, values_col, index=['A', 'B'],
diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py