diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cb251d4648925..7ae2107626973 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5346,8 +5346,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None, # ---------------------------------------------------------------------- # Data reshaping - def pivot(self, index=None, columns=None, values=None): - """ + _shared_docs['pivot'] = """ Return reshaped DataFrame organized by given index / column values. Reshape data (produce a "pivot" table) based on column values. Uses @@ -5357,7 +5356,7 @@ def pivot(self, index=None, columns=None, values=None): columns. See the :ref:`User Guide ` for more on reshaping. Parameters - ---------- + ----------%s index : string or object, optional Column to use to make new frame's index. If None, uses existing index. @@ -5449,7 +5448,11 @@ def pivot(self, index=None, columns=None, values=None): ... ValueError: Index contains duplicate entries, cannot reshape """ - from pandas.core.reshape.reshape import pivot + + @Substitution('') + @Appender(_shared_docs['pivot']) + def pivot(self, index=None, columns=None, values=None): + from pandas.core.reshape.pivot import pivot return pivot(self, index=index, columns=columns, values=values) _shared_docs['pivot_table'] = """ diff --git a/pandas/core/reshape/api.py b/pandas/core/reshape/api.py index 11d69359f5c65..7ac1c0cb52fe3 100644 --- a/pandas/core/reshape/api.py +++ b/pandas/core/reshape/api.py @@ -2,7 +2,7 @@ from pandas.core.reshape.concat import concat from pandas.core.reshape.melt import melt, lreshape, wide_to_long -from pandas.core.reshape.reshape import pivot_simple as pivot, get_dummies +from pandas.core.reshape.reshape import get_dummies from pandas.core.reshape.merge import merge, merge_ordered, merge_asof -from pandas.core.reshape.pivot import pivot_table, crosstab +from pandas.core.reshape.pivot import pivot_table, pivot, crosstab from pandas.core.reshape.tile import cut, qcut diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 0d1caa3d57d73..b525dddeb1ba5 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -10,7 +10,7 @@ from pandas.core.series import Series from pandas.core.groupby import Grouper from pandas.core.reshape.util import cartesian_product -from pandas.core.index import Index, _get_objs_combined_axis +from pandas.core.index import Index, MultiIndex, _get_objs_combined_axis from pandas.compat import range, lrange, zip from pandas import compat import pandas.core.common as com @@ -369,6 +369,30 @@ def _convert_by(by): return by +@Substitution('\ndata : DataFrame') +@Appender(_shared_docs['pivot'], indents=1) +def pivot(data, index=None, columns=None, values=None): + if values is None: + cols = [columns] if index is None else [index, columns] + append = index is None + indexed = data.set_index(cols, append=append) + else: + if index is None: + index = data.index + else: + index = data[index] + index = MultiIndex.from_arrays([index, data[columns]]) + + if is_list_like(values) and not isinstance(values, tuple): + # Exclude tuple because it is seen as a single column name + indexed = data._constructor(data[values].values, index=index, + columns=values) + else: + indexed = data._constructor_sliced(data[values].values, + index=index) + return indexed.unstack(columns) + + def crosstab(index, columns, values=None, rownames=None, colnames=None, aggfunc=None, margins=False, margins_name='All', dropna=True, normalize=False): diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index bd5ce4897e9da..50f6e310705d7 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -383,97 +383,6 @@ def _unstack_multiple(data, clocs, fill_value=None): return unstacked -def pivot(self, index=None, columns=None, values=None): - """ - See DataFrame.pivot - """ - if values is None: - cols = [columns] if index is None else [index, columns] - append = index is None - indexed = self.set_index(cols, append=append) - else: - if index is None: - index = self.index - else: - index = self[index] - index = MultiIndex.from_arrays([index, self[columns]]) - - if is_list_like(values) and not isinstance(values, tuple): - # Exclude tuple because it is seen as a single column name - indexed = self._constructor(self[values].values, index=index, - columns=values) - else: - indexed = self._constructor_sliced(self[values].values, - index=index) - return indexed.unstack(columns) - - -def pivot_simple(index, columns, values): - """ - Produce 'pivot' table based on 3 columns of this DataFrame. - Uses unique values from index / columns and fills with values. - - Parameters - ---------- - index : ndarray - Labels to use to make new frame's index - columns : ndarray - Labels to use to make new frame's columns - values : ndarray - Values to use for populating new frame's values - - Notes - ----- - Obviously, all 3 of the input arguments must have the same length - - Returns - ------- - DataFrame - - See also - -------- - DataFrame.pivot_table : generalization of pivot that can handle - duplicate values for one index/column pair - """ - if (len(index) != len(columns)) or (len(columns) != len(values)): - raise AssertionError('Length of index, columns, and values must be the' - ' same') - - if len(index) == 0: - return DataFrame(index=[]) - - hindex = MultiIndex.from_arrays([index, columns]) - series = Series(values.ravel(), index=hindex) - series = series.sort_index(level=0) - return series.unstack() - - -def _slow_pivot(index, columns, values): - """ - Produce 'pivot' table based on 3 columns of this DataFrame. - Uses unique values from index / columns and fills with values. - - Parameters - ---------- - index : string or object - Column name to use to make new frame's index - columns : string or object - Column name to use to make new frame's columns - values : string or object - Column name to use for populating new frame's values - - Could benefit from some Cython here. - """ - tree = {} - for i, (idx, col) in enumerate(zip(index, columns)): - if col not in tree: - tree[col] = {} - branch = tree[col] - branch[idx] = values[i] - - return DataFrame(tree) - - def unstack(obj, level, fill_value=None): if isinstance(level, (tuple, list)): if len(level) != 1: diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index e3d5880eebd48..e66758f58b1d4 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -301,13 +301,17 @@ def test_pivot_multi_functions(self): expected = concat([means, stds], keys=['mean', 'std'], axis=1) tm.assert_frame_equal(result, expected) - def test_pivot_index_with_nan(self): + @pytest.mark.parametrize('method', [True, False]) + def test_pivot_index_with_nan(self, method): # GH 3588 nan = np.nan df = DataFrame({'a': ['R1', 'R2', nan, 'R4'], 'b': ['C1', 'C2', 'C3', 'C4'], 'c': [10, 15, 17, 20]}) - result = df.pivot('a', 'b', 'c') + if method: + result = df.pivot('a', 'b', 'c') + else: + result = pd.pivot(df, 'a', 'b', 'c') expected = DataFrame([[nan, nan, 17, nan], [10, nan, nan, nan], [nan, 15, nan, nan], [nan, nan, nan, 20]], index=Index([nan, 'R1', 'R2', 'R4'], name='a'), @@ -322,15 +326,23 @@ def test_pivot_index_with_nan(self): df.loc[1, 'a'] = df.loc[3, 'a'] = nan df.loc[1, 'b'] = df.loc[4, 'b'] = nan - pv = df.pivot('a', 'b', 'c') + if method: + pv = df.pivot('a', 'b', 'c') + else: + pv = pd.pivot(df, 'a', 'b', 'c') assert pv.notna().values.sum() == len(df) for _, row in df.iterrows(): assert pv.loc[row['a'], row['b']] == row['c'] - tm.assert_frame_equal(df.pivot('b', 'a', 'c'), pv.T) + if method: + result = df.pivot('b', 'a', 'c') + else: + result = pd.pivot(df, 'b', 'a', 'c') + tm.assert_frame_equal(result, pv.T) - def test_pivot_with_tz(self): + @pytest.mark.parametrize('method', [True, False]) + def test_pivot_with_tz(self, method): # GH 5878 df = DataFrame({'dt1': [datetime(2013, 1, 1, 9, 0), datetime(2013, 1, 2, 9, 0), @@ -358,7 +370,10 @@ def test_pivot_with_tz(self): tz='US/Pacific'), columns=exp_col) - pv = df.pivot(index='dt1', columns='dt2') + if method: + pv = df.pivot(index='dt1', columns='dt2') + else: + pv = pd.pivot(df, index='dt1', columns='dt2') tm.assert_frame_equal(pv, expected) expected = DataFrame([[0, 2], [1, 3]], @@ -371,10 +386,14 @@ def test_pivot_with_tz(self): name='dt2', tz='Asia/Tokyo')) - pv = df.pivot(index='dt1', columns='dt2', values='data1') + if method: + pv = df.pivot(index='dt1', columns='dt2', values='data1') + else: + pv = pd.pivot(df, index='dt1', columns='dt2', values='data1') tm.assert_frame_equal(pv, expected) - def test_pivot_periods(self): + @pytest.mark.parametrize('method', [True, False]) + def test_pivot_periods(self, method): df = DataFrame({'p1': [pd.Period('2013-01-01', 'D'), pd.Period('2013-01-02', 'D'), pd.Period('2013-01-01', 'D'), @@ -394,8 +413,10 @@ def test_pivot_periods(self): index=pd.PeriodIndex(['2013-01-01', '2013-01-02'], name='p1', freq='D'), columns=exp_col) - - pv = df.pivot(index='p1', columns='p2') + if method: + pv = df.pivot(index='p1', columns='p2') + else: + pv = pd.pivot(df, index='p1', columns='p2') tm.assert_frame_equal(pv, expected) expected = DataFrame([[0, 2], [1, 3]], @@ -403,22 +424,28 @@ def test_pivot_periods(self): name='p1', freq='D'), columns=pd.PeriodIndex(['2013-01', '2013-02'], name='p2', freq='M')) - - pv = df.pivot(index='p1', columns='p2', values='data1') + if method: + pv = df.pivot(index='p1', columns='p2', values='data1') + else: + pv = pd.pivot(df, index='p1', columns='p2', values='data1') tm.assert_frame_equal(pv, expected) @pytest.mark.parametrize('values', [ ['baz', 'zoo'], np.array(['baz', 'zoo']), pd.Series(['baz', 'zoo']), pd.Index(['baz', 'zoo']) ]) - def test_pivot_with_list_like_values(self, values): + @pytest.mark.parametrize('method', [True, False]) + def test_pivot_with_list_like_values(self, values, method): # issue #17160 df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'], 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], 'baz': [1, 2, 3, 4, 5, 6], 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) - result = df.pivot(index='foo', columns='bar', values=values) + if method: + result = df.pivot(index='foo', columns='bar', values=values) + else: + result = pd.pivot(df, index='foo', columns='bar', values=values) data = [[1, 2, 3, 'x', 'y', 'z'], [4, 5, 6, 'q', 'w', 't']] @@ -434,14 +461,18 @@ def test_pivot_with_list_like_values(self, values): ['bar', 'baz'], np.array(['bar', 'baz']), pd.Series(['bar', 'baz']), pd.Index(['bar', 'baz']) ]) - def test_pivot_with_list_like_values_nans(self, values): + @pytest.mark.parametrize('method', [True, False]) + def test_pivot_with_list_like_values_nans(self, values, method): # issue #17160 df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'], 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], 'baz': [1, 2, 3, 4, 5, 6], 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) - result = df.pivot(index='zoo', columns='foo', values=values) + if method: + result = df.pivot(index='zoo', columns='foo', values=values) + else: + result = pd.pivot(df, index='zoo', columns='foo', values=values) data = [[np.nan, 'A', np.nan, 4], [np.nan, 'C', np.nan, 6], @@ -460,7 +491,8 @@ def test_pivot_with_list_like_values_nans(self, values): @pytest.mark.xfail(reason='MultiIndexed unstack with tuple names fails' 'with KeyError GH#19966', strict=True) - def test_pivot_with_multiindex(self): + @pytest.mark.parametrize('method', [True, False]) + def test_pivot_with_multiindex(self, method): # issue #17160 index = Index(data=[0, 1, 2, 3, 4, 5]) data = [['one', 'A', 1, 'x'], @@ -472,8 +504,15 @@ def test_pivot_with_multiindex(self): columns = MultiIndex(levels=[['bar', 'baz'], ['first', 'second']], labels=[[0, 0, 1, 1], [0, 1, 0, 1]]) df = DataFrame(data=data, index=index, columns=columns, dtype='object') - result = df.pivot(index=('bar', 'first'), columns=('bar', 'second'), - values=('baz', 'first')) + if method: + result = df.pivot(index=('bar', 'first'), + columns=('bar', 'second'), + values=('baz', 'first')) + else: + result = pd.pivot(df, + index=('bar', 'first'), + columns=('bar', 'second'), + values=('baz', 'first')) data = {'A': Series([1, 4], index=['one', 'two']), 'B': Series([2, 5], index=['one', 'two']), @@ -481,7 +520,8 @@ def test_pivot_with_multiindex(self): expected = DataFrame(data) tm.assert_frame_equal(result, expected) - def test_pivot_with_tuple_of_values(self): + @pytest.mark.parametrize('method', [True, False]) + def test_pivot_with_tuple_of_values(self, method): # issue #17160 df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'], 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], @@ -489,7 +529,10 @@ def test_pivot_with_tuple_of_values(self): 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) with pytest.raises(KeyError): # tuple is seen as a single column name - df.pivot(index='zoo', columns='foo', values=('bar', 'baz')) + if method: + df.pivot(index='zoo', columns='foo', values=('bar', 'baz')) + else: + pd.pivot(df, index='zoo', columns='foo', values=('bar', 'baz')) def test_margins(self): def _check_output(result, values_col, index=['A', 'B'], diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 2f8bc228cf86e..b968c52ce3dfd 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -10,7 +10,7 @@ from pandas.core.dtypes.common import is_float_dtype from pandas import (Series, DataFrame, Index, date_range, isna, notna, - pivot, MultiIndex) + MultiIndex) from pandas.core.nanops import nanall, nanany from pandas.core.panel import Panel @@ -2676,30 +2676,6 @@ def test_join(self): pytest.raises(Exception, lp1.join, self.panel.filter(['ItemB', 'ItemC'])) - def test_pivot(self): - with catch_warnings(record=True): - from pandas.core.reshape.reshape import _slow_pivot - - one, two, three = (np.array([1, 2, 3, 4, 5]), - np.array(['a', 'b', 'c', 'd', 'e']), - np.array([1, 2, 3, 5, 4.])) - df = pivot(one, two, three) - assert df['a'][1] == 1 - assert df['b'][2] == 2 - assert df['c'][3] == 3 - assert df['d'][4] == 5 - assert df['e'][5] == 4 - assert_frame_equal(df, _slow_pivot(one, two, three)) - - # weird overlap, TODO: test? - a, b, c = (np.array([1, 2, 3, 4, 4]), - np.array(['a', 'a', 'a', 'a', 'a']), - np.array([1., 2., 3., 4., 5.])) - pytest.raises(Exception, pivot, a, b, c) - - # corner case, empty - df = pivot(np.array([]), np.array([]), np.array([])) - def test_panel_index(): index = panelm.panel_index([1, 2, 3, 4], [1, 2, 3])