From b29e1d989763fdb55c7f103516de62c0cd34b802 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 5 Aug 2018 10:52:45 -0700 Subject: [PATCH 1/8] CLN: Have pd.pivot mirror pivot instead of pivot_simple --- pandas/core/frame.py | 9 ++++++--- pandas/core/reshape/api.py | 4 ++-- pandas/core/reshape/pivot.py | 25 ++++++++++++++++++++++++- pandas/core/reshape/reshape.py | 25 ------------------------- 4 files changed, 32 insertions(+), 31 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cb251d4648925..68cf9ab2b0b22 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5346,8 +5346,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None, # ---------------------------------------------------------------------- # Data reshaping - def pivot(self, index=None, columns=None, values=None): - """ + _shared_docs['pivot'] = """ Return reshaped DataFrame organized by given index / column values. Reshape data (produce a "pivot" table) based on column values. Uses @@ -5449,7 +5448,11 @@ def pivot(self, index=None, columns=None, values=None): ... ValueError: Index contains duplicate entries, cannot reshape """ - from pandas.core.reshape.reshape import pivot + + @Substitution('') + @Appender(_shared_docs['pivot']) + def pivot(self, index=None, columns=None, values=None): + from pandas.core.reshape.pivot import pivot return pivot(self, index=index, columns=columns, values=values) _shared_docs['pivot_table'] = """ diff --git a/pandas/core/reshape/api.py b/pandas/core/reshape/api.py index 11d69359f5c65..7ac1c0cb52fe3 100644 --- a/pandas/core/reshape/api.py +++ b/pandas/core/reshape/api.py @@ -2,7 +2,7 @@ from pandas.core.reshape.concat import concat from pandas.core.reshape.melt import melt, lreshape, wide_to_long -from pandas.core.reshape.reshape import pivot_simple as pivot, get_dummies +from pandas.core.reshape.reshape import get_dummies from pandas.core.reshape.merge import merge, merge_ordered, merge_asof -from pandas.core.reshape.pivot import pivot_table, crosstab +from pandas.core.reshape.pivot import pivot_table, pivot, crosstab from pandas.core.reshape.tile import cut, qcut diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 0d1caa3d57d73..374ada165c062 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -10,7 +10,7 @@ from pandas.core.series import Series from pandas.core.groupby import Grouper from pandas.core.reshape.util import cartesian_product -from pandas.core.index import Index, _get_objs_combined_axis +from pandas.core.index import Index, MultiIndex, _get_objs_combined_axis from pandas.compat import range, lrange, zip from pandas import compat import pandas.core.common as com @@ -368,6 +368,29 @@ def _convert_by(by): by = list(by) return by +@Substitution('\ndata : DataFrame') +@Appender(_shared_docs['pivot'], indents=1) +def pivot(data, index=None, columns=None, values=None): + if values is None: + cols = [columns] if index is None else [index, columns] + append = index is None + indexed = data.set_index(cols, append=append) + else: + if index is None: + index = data.index + else: + index = data[index] + index = MultiIndex.from_arrays([index, data[columns]]) + + if is_list_like(values) and not isinstance(values, tuple): + # Exclude tuple because it is seen as a single column name + indexed = data._constructor(data[values].values, index=index, + columns=values) + else: + indexed = data._constructor_sliced(data[values].values, + index=index) + return indexed.unstack(columns) + def crosstab(index, columns, values=None, rownames=None, colnames=None, aggfunc=None, margins=False, margins_name='All', dropna=True, diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index bd5ce4897e9da..567bb1c9300e4 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -383,31 +383,6 @@ def _unstack_multiple(data, clocs, fill_value=None): return unstacked -def pivot(self, index=None, columns=None, values=None): - """ - See DataFrame.pivot - """ - if values is None: - cols = [columns] if index is None else [index, columns] - append = index is None - indexed = self.set_index(cols, append=append) - else: - if index is None: - index = self.index - else: - index = self[index] - index = MultiIndex.from_arrays([index, self[columns]]) - - if is_list_like(values) and not isinstance(values, tuple): - # Exclude tuple because it is seen as a single column name - indexed = self._constructor(self[values].values, index=index, - columns=values) - else: - indexed = self._constructor_sliced(self[values].values, - index=index) - return indexed.unstack(columns) - - def pivot_simple(index, columns, values): """ Produce 'pivot' table based on 3 columns of this DataFrame. From 088c6c418610ad7798f26ae95ad8b55dc581f894 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 5 Aug 2018 10:53:25 -0700 Subject: [PATCH 2/8] Remove pivot_simple --- pandas/core/reshape/reshape.py | 40 ---------------------------------- 1 file changed, 40 deletions(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 567bb1c9300e4..8dbae23489a22 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -383,46 +383,6 @@ def _unstack_multiple(data, clocs, fill_value=None): return unstacked -def pivot_simple(index, columns, values): - """ - Produce 'pivot' table based on 3 columns of this DataFrame. - Uses unique values from index / columns and fills with values. - - Parameters - ---------- - index : ndarray - Labels to use to make new frame's index - columns : ndarray - Labels to use to make new frame's columns - values : ndarray - Values to use for populating new frame's values - - Notes - ----- - Obviously, all 3 of the input arguments must have the same length - - Returns - ------- - DataFrame - - See also - -------- - DataFrame.pivot_table : generalization of pivot that can handle - duplicate values for one index/column pair - """ - if (len(index) != len(columns)) or (len(columns) != len(values)): - raise AssertionError('Length of index, columns, and values must be the' - ' same') - - if len(index) == 0: - return DataFrame(index=[]) - - hindex = MultiIndex.from_arrays([index, columns]) - series = Series(values.ravel(), index=hindex) - series = series.sort_index(level=0) - return series.unstack() - - def _slow_pivot(index, columns, values): """ Produce 'pivot' table based on 3 columns of this DataFrame. From d9eca20730835d628fbd60341601bf59c12db6aa Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 5 Aug 2018 18:09:03 -0700 Subject: [PATCH 3/8] fix shared_doc --- pandas/core/frame.py | 2 +- pandas/tests/reshape/test_pivot.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 68cf9ab2b0b22..7ae2107626973 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5356,7 +5356,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None, columns. See the :ref:`User Guide ` for more on reshaping. Parameters - ---------- + ----------%s index : string or object, optional Column to use to make new frame's index. If None, uses existing index. diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index e3d5880eebd48..3d770922d6cfd 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -11,7 +11,7 @@ import pandas as pd from pandas import (DataFrame, Series, Index, MultiIndex, Grouper, date_range, concat, Categorical) -from pandas.core.reshape.pivot import pivot_table, crosstab +from pandas.core.reshape.pivot import pivot_table, pivot, crosstab from pandas.compat import range, product import pandas.util.testing as tm from pandas.api.types import CategoricalDtype as CDT From 3a515397b6a8b5e7f17c618b2d003475e2667e1f Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 5 Aug 2018 19:00:28 -0700 Subject: [PATCH 4/8] Add more tests and whatsnew --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/tests/reshape/test_pivot.py | 85 ++++++++++++++++++++++-------- 2 files changed, 65 insertions(+), 21 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 5c15c7b6a742f..c54739feb2a85 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -467,6 +467,7 @@ Other API Changes - :meth:`PeriodIndex.tz_convert` and :meth:`PeriodIndex.tz_localize` have been removed (:issue:`21781`) - :class:`Index` subtraction will attempt to operate element-wise instead of raising ``TypeError`` (:issue:`19369`) - :class:`pandas.io.formats.style.Styler` supports a ``number-format`` property when using :meth:`~pandas.io.formats.style.Styler.to_excel` (:issue:`22015`) +- Top level function :meth:`pandas.pivot` now maps directly to the instance method :meth:`DataFrame.pivot` (:issue:`22116`) .. _whatsnew_0240.deprecations: diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 3d770922d6cfd..2667570922144 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -301,13 +301,17 @@ def test_pivot_multi_functions(self): expected = concat([means, stds], keys=['mean', 'std'], axis=1) tm.assert_frame_equal(result, expected) - def test_pivot_index_with_nan(self): + @pytest.mark.parametrize('method', [True, False]) + def test_pivot_index_with_nan(self, method): # GH 3588 nan = np.nan df = DataFrame({'a': ['R1', 'R2', nan, 'R4'], 'b': ['C1', 'C2', 'C3', 'C4'], 'c': [10, 15, 17, 20]}) - result = df.pivot('a', 'b', 'c') + if method: + result = df.pivot('a', 'b', 'c') + else: + result = pd.pivot(df, 'a', 'b', 'c') expected = DataFrame([[nan, nan, 17, nan], [10, nan, nan, nan], [nan, 15, nan, nan], [nan, nan, nan, 20]], index=Index([nan, 'R1', 'R2', 'R4'], name='a'), @@ -322,15 +326,23 @@ def test_pivot_index_with_nan(self): df.loc[1, 'a'] = df.loc[3, 'a'] = nan df.loc[1, 'b'] = df.loc[4, 'b'] = nan - pv = df.pivot('a', 'b', 'c') + if method: + pv = df.pivot('a', 'b', 'c') + else: + pv = pd.pivot(df, 'a', 'b', 'c') assert pv.notna().values.sum() == len(df) for _, row in df.iterrows(): assert pv.loc[row['a'], row['b']] == row['c'] - tm.assert_frame_equal(df.pivot('b', 'a', 'c'), pv.T) + if method: + result = df.pivot('b', 'a', 'c') + else: + result = pd.pivot(df, 'b', 'a', 'c') + tm.assert_frame_equal(result, pv.T) - def test_pivot_with_tz(self): + @pytest.mark.parametrize('method', [True, False]) + def test_pivot_with_tz(self, method): # GH 5878 df = DataFrame({'dt1': [datetime(2013, 1, 1, 9, 0), datetime(2013, 1, 2, 9, 0), @@ -358,7 +370,10 @@ def test_pivot_with_tz(self): tz='US/Pacific'), columns=exp_col) - pv = df.pivot(index='dt1', columns='dt2') + if method: + pv = df.pivot(index='dt1', columns='dt2') + else: + pv = pd.pivot(df, index='dt1', columns='dt2') tm.assert_frame_equal(pv, expected) expected = DataFrame([[0, 2], [1, 3]], @@ -371,10 +386,14 @@ def test_pivot_with_tz(self): name='dt2', tz='Asia/Tokyo')) - pv = df.pivot(index='dt1', columns='dt2', values='data1') + if method: + pv = df.pivot(index='dt1', columns='dt2', values='data1') + else: + pv = pd.pivot(df, index='dt1', columns='dt2', values='data1') tm.assert_frame_equal(pv, expected) - def test_pivot_periods(self): + @pytest.mark.parametrize('method', [True, False]) + def test_pivot_periods(self, method): df = DataFrame({'p1': [pd.Period('2013-01-01', 'D'), pd.Period('2013-01-02', 'D'), pd.Period('2013-01-01', 'D'), @@ -394,8 +413,10 @@ def test_pivot_periods(self): index=pd.PeriodIndex(['2013-01-01', '2013-01-02'], name='p1', freq='D'), columns=exp_col) - - pv = df.pivot(index='p1', columns='p2') + if method: + pv = df.pivot(index='p1', columns='p2') + else: + pv = pd.pivot(df, index='p1', columns='p2') tm.assert_frame_equal(pv, expected) expected = DataFrame([[0, 2], [1, 3]], @@ -403,22 +424,28 @@ def test_pivot_periods(self): name='p1', freq='D'), columns=pd.PeriodIndex(['2013-01', '2013-02'], name='p2', freq='M')) - - pv = df.pivot(index='p1', columns='p2', values='data1') + if method: + pv = df.pivot(index='p1', columns='p2', values='data1') + else: + pv = pd.pivot(df, index='p1', columns='p2', values='data1') tm.assert_frame_equal(pv, expected) @pytest.mark.parametrize('values', [ ['baz', 'zoo'], np.array(['baz', 'zoo']), pd.Series(['baz', 'zoo']), pd.Index(['baz', 'zoo']) ]) - def test_pivot_with_list_like_values(self, values): + @pytest.mark.parametrize('method', [True, False]) + def test_pivot_with_list_like_values(self, values, method): # issue #17160 df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'], 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], 'baz': [1, 2, 3, 4, 5, 6], 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) - result = df.pivot(index='foo', columns='bar', values=values) + if method: + result = df.pivot(index='foo', columns='bar', values=values) + else: + result = pd.pivot(df, index='foo', columns='bar', values=values) data = [[1, 2, 3, 'x', 'y', 'z'], [4, 5, 6, 'q', 'w', 't']] @@ -434,14 +461,18 @@ def test_pivot_with_list_like_values(self, values): ['bar', 'baz'], np.array(['bar', 'baz']), pd.Series(['bar', 'baz']), pd.Index(['bar', 'baz']) ]) - def test_pivot_with_list_like_values_nans(self, values): + @pytest.mark.parametrize('method', [True, False]) + def test_pivot_with_list_like_values_nans(self, values, method): # issue #17160 df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'], 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], 'baz': [1, 2, 3, 4, 5, 6], 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) - result = df.pivot(index='zoo', columns='foo', values=values) + if method: + result = df.pivot(index='zoo', columns='foo', values=values) + else: + result = pd.pivot(df, index='zoo', columns='foo', values=values) data = [[np.nan, 'A', np.nan, 4], [np.nan, 'C', np.nan, 6], @@ -460,7 +491,8 @@ def test_pivot_with_list_like_values_nans(self, values): @pytest.mark.xfail(reason='MultiIndexed unstack with tuple names fails' 'with KeyError GH#19966', strict=True) - def test_pivot_with_multiindex(self): + @pytest.mark.parametrize('method', [True, False]) + def test_pivot_with_multiindex(self, method): # issue #17160 index = Index(data=[0, 1, 2, 3, 4, 5]) data = [['one', 'A', 1, 'x'], @@ -472,8 +504,15 @@ def test_pivot_with_multiindex(self): columns = MultiIndex(levels=[['bar', 'baz'], ['first', 'second']], labels=[[0, 0, 1, 1], [0, 1, 0, 1]]) df = DataFrame(data=data, index=index, columns=columns, dtype='object') - result = df.pivot(index=('bar', 'first'), columns=('bar', 'second'), - values=('baz', 'first')) + if method: + result = df.pivot(index=('bar', 'first'), + columns=('bar', 'second'), + values=('baz', 'first')) + else: + result = pd.pivot(df, + index=('bar', 'first'), + columns=('bar', 'second'), + values=('baz', 'first')) data = {'A': Series([1, 4], index=['one', 'two']), 'B': Series([2, 5], index=['one', 'two']), @@ -481,7 +520,8 @@ def test_pivot_with_multiindex(self): expected = DataFrame(data) tm.assert_frame_equal(result, expected) - def test_pivot_with_tuple_of_values(self): + @pytest.mark.parametrize('method', [True, False]) + def test_pivot_with_tuple_of_values(self, method): # issue #17160 df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'], 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], @@ -489,7 +529,10 @@ def test_pivot_with_tuple_of_values(self): 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) with pytest.raises(KeyError): # tuple is seen as a single column name - df.pivot(index='zoo', columns='foo', values=('bar', 'baz')) + if method: + df.pivot(index='zoo', columns='foo', values=('bar', 'baz')) + else: + pd.pivot(df, index='zoo', columns='foo', values=('bar', 'baz')) def test_margins(self): def _check_output(result, values_col, index=['A', 'B'], From e8e7066952efbf2120a6bcf6c71f573a7d5f1e3a Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 5 Aug 2018 19:03:57 -0700 Subject: [PATCH 5/8] flake8 --- pandas/core/reshape/pivot.py | 1 + pandas/tests/reshape/test_pivot.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 374ada165c062..b525dddeb1ba5 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -368,6 +368,7 @@ def _convert_by(by): by = list(by) return by + @Substitution('\ndata : DataFrame') @Appender(_shared_docs['pivot'], indents=1) def pivot(data, index=None, columns=None, values=None): diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 2667570922144..e66758f58b1d4 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -11,7 +11,7 @@ import pandas as pd from pandas import (DataFrame, Series, Index, MultiIndex, Grouper, date_range, concat, Categorical) -from pandas.core.reshape.pivot import pivot_table, pivot, crosstab +from pandas.core.reshape.pivot import pivot_table, crosstab from pandas.compat import range, product import pandas.util.testing as tm from pandas.api.types import CategoricalDtype as CDT From 469aab8b7ce0eea12f7665f3c9b1219fe544b655 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 6 Aug 2018 12:11:09 -0700 Subject: [PATCH 6/8] Add _pivot_simple and _slow_pivot to pivot.py --- doc/source/whatsnew/v0.24.0.txt | 1 - pandas/core/reshape/pivot.py | 66 +++++++++++++++++++++++++++++++++ pandas/core/reshape/reshape.py | 26 ------------- pandas/tests/test_panel.py | 6 +-- 4 files changed, 69 insertions(+), 30 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 6708d0b170877..ea0677a0edf28 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -467,7 +467,6 @@ Other API Changes - :meth:`PeriodIndex.tz_convert` and :meth:`PeriodIndex.tz_localize` have been removed (:issue:`21781`) - :class:`Index` subtraction will attempt to operate element-wise instead of raising ``TypeError`` (:issue:`19369`) - :class:`pandas.io.formats.style.Styler` supports a ``number-format`` property when using :meth:`~pandas.io.formats.style.Styler.to_excel` (:issue:`22015`) -- Top level function :meth:`pandas.pivot` now maps directly to the instance method :meth:`DataFrame.pivot` (:issue:`22116`) .. _whatsnew_0240.deprecations: diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index b525dddeb1ba5..5291f15996ae4 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -8,6 +8,7 @@ from pandas.core.reshape.concat import concat from pandas.core.series import Series +from pandas.core.frame import DataFrame from pandas.core.groupby import Grouper from pandas.core.reshape.util import cartesian_product from pandas.core.index import Index, MultiIndex, _get_objs_combined_axis @@ -622,3 +623,68 @@ def _get_names(arrs, names, prefix='row'): names = list(names) return names + + +def _pivot_simple(index, columns, values): + """ + Produce 'pivot' table based on 3 columns of this DataFrame. + Uses unique values from index / columns and fills with values. + Parameters + ---------- + index : ndarray + Labels to use to make new frame's index + columns : ndarray + Labels to use to make new frame's columns + values : ndarray + Values to use for populating new frame's values + Notes + ----- + Obviously, all 3 of the input arguments must have the same length + This is ONLY used for testing in pandas/test/test_panel.py + Returns + ------- + DataFrame + See also + -------- + DataFrame.pivot_table : generalization of pivot that can handle + duplicate values for one index/column pair + """ + if (len(index) != len(columns)) or (len(columns) != len(values)): + raise AssertionError('Length of index, columns, and values must be the' + ' same') + if len(index) == 0: + return DataFrame(index=[]) + hindex = MultiIndex.from_arrays([index, columns]) + series = Series(values.ravel(), index=hindex) + series = series.sort_index(level=0) + return series.unstack() + + +def _slow_pivot(index, columns, values): + """ + Produce 'pivot' table based on 3 columns of this DataFrame. + Uses unique values from index / columns and fills with values. + + Parameters + ---------- + index : string or object + Column name to use to make new frame's index + columns : string or object + Column name to use to make new frame's columns + values : string or object + Column name to use for populating new frame's values + + Could benefit from some Cython here. + + Note + ---- + This is ONLY used for testing in pandas/test/test_panel.py + """ + tree = {} + for i, (idx, col) in enumerate(zip(index, columns)): + if col not in tree: + tree[col] = {} + branch = tree[col] + branch[idx] = values[i] + + return DataFrame(tree) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 8dbae23489a22..50f6e310705d7 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -383,32 +383,6 @@ def _unstack_multiple(data, clocs, fill_value=None): return unstacked -def _slow_pivot(index, columns, values): - """ - Produce 'pivot' table based on 3 columns of this DataFrame. - Uses unique values from index / columns and fills with values. - - Parameters - ---------- - index : string or object - Column name to use to make new frame's index - columns : string or object - Column name to use to make new frame's columns - values : string or object - Column name to use for populating new frame's values - - Could benefit from some Cython here. - """ - tree = {} - for i, (idx, col) in enumerate(zip(index, columns)): - if col not in tree: - tree[col] = {} - branch = tree[col] - branch[idx] = values[i] - - return DataFrame(tree) - - def unstack(obj, level, fill_value=None): if isinstance(level, (tuple, list)): if len(level) != 1: diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 2f8bc228cf86e..2e492d1ed4db0 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -2678,12 +2678,12 @@ def test_join(self): def test_pivot(self): with catch_warnings(record=True): - from pandas.core.reshape.reshape import _slow_pivot + from pandas.core.reshape.pivot import _slow_pivot, _pivot_simple one, two, three = (np.array([1, 2, 3, 4, 5]), np.array(['a', 'b', 'c', 'd', 'e']), np.array([1, 2, 3, 5, 4.])) - df = pivot(one, two, three) + df = _pivot_simple(one, two, three) assert df['a'][1] == 1 assert df['b'][2] == 2 assert df['c'][3] == 3 @@ -2698,7 +2698,7 @@ def test_pivot(self): pytest.raises(Exception, pivot, a, b, c) # corner case, empty - df = pivot(np.array([]), np.array([]), np.array([])) + df = _pivot_simple(np.array([]), np.array([]), np.array([])) def test_panel_index(): From 49a8e846a91efa43aa7be9aa015e8743cbab24e8 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 6 Aug 2018 12:18:14 -0700 Subject: [PATCH 7/8] Remove unused import --- pandas/tests/test_panel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 2e492d1ed4db0..c77dc8899fa54 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -10,7 +10,7 @@ from pandas.core.dtypes.common import is_float_dtype from pandas import (Series, DataFrame, Index, date_range, isna, notna, - pivot, MultiIndex) + MultiIndex) from pandas.core.nanops import nanall, nanany from pandas.core.panel import Panel @@ -2695,7 +2695,7 @@ def test_pivot(self): a, b, c = (np.array([1, 2, 3, 4, 4]), np.array(['a', 'a', 'a', 'a', 'a']), np.array([1., 2., 3., 4., 5.])) - pytest.raises(Exception, pivot, a, b, c) + pytest.raises(Exception, _pivot_simple, a, b, c) # corner case, empty df = _pivot_simple(np.array([]), np.array([]), np.array([])) From fae6c5b87895d0ad04e20733e5316fa779a3dcf6 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 7 Aug 2018 09:46:30 -0700 Subject: [PATCH 8/8] remove _slow_pivot, _pivot_simple and associated test --- pandas/core/reshape/pivot.py | 66 ------------------------------------ pandas/tests/test_panel.py | 24 ------------- 2 files changed, 90 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 5291f15996ae4..b525dddeb1ba5 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -8,7 +8,6 @@ from pandas.core.reshape.concat import concat from pandas.core.series import Series -from pandas.core.frame import DataFrame from pandas.core.groupby import Grouper from pandas.core.reshape.util import cartesian_product from pandas.core.index import Index, MultiIndex, _get_objs_combined_axis @@ -623,68 +622,3 @@ def _get_names(arrs, names, prefix='row'): names = list(names) return names - - -def _pivot_simple(index, columns, values): - """ - Produce 'pivot' table based on 3 columns of this DataFrame. - Uses unique values from index / columns and fills with values. - Parameters - ---------- - index : ndarray - Labels to use to make new frame's index - columns : ndarray - Labels to use to make new frame's columns - values : ndarray - Values to use for populating new frame's values - Notes - ----- - Obviously, all 3 of the input arguments must have the same length - This is ONLY used for testing in pandas/test/test_panel.py - Returns - ------- - DataFrame - See also - -------- - DataFrame.pivot_table : generalization of pivot that can handle - duplicate values for one index/column pair - """ - if (len(index) != len(columns)) or (len(columns) != len(values)): - raise AssertionError('Length of index, columns, and values must be the' - ' same') - if len(index) == 0: - return DataFrame(index=[]) - hindex = MultiIndex.from_arrays([index, columns]) - series = Series(values.ravel(), index=hindex) - series = series.sort_index(level=0) - return series.unstack() - - -def _slow_pivot(index, columns, values): - """ - Produce 'pivot' table based on 3 columns of this DataFrame. - Uses unique values from index / columns and fills with values. - - Parameters - ---------- - index : string or object - Column name to use to make new frame's index - columns : string or object - Column name to use to make new frame's columns - values : string or object - Column name to use for populating new frame's values - - Could benefit from some Cython here. - - Note - ---- - This is ONLY used for testing in pandas/test/test_panel.py - """ - tree = {} - for i, (idx, col) in enumerate(zip(index, columns)): - if col not in tree: - tree[col] = {} - branch = tree[col] - branch[idx] = values[i] - - return DataFrame(tree) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index c77dc8899fa54..b968c52ce3dfd 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -2676,30 +2676,6 @@ def test_join(self): pytest.raises(Exception, lp1.join, self.panel.filter(['ItemB', 'ItemC'])) - def test_pivot(self): - with catch_warnings(record=True): - from pandas.core.reshape.pivot import _slow_pivot, _pivot_simple - - one, two, three = (np.array([1, 2, 3, 4, 5]), - np.array(['a', 'b', 'c', 'd', 'e']), - np.array([1, 2, 3, 5, 4.])) - df = _pivot_simple(one, two, three) - assert df['a'][1] == 1 - assert df['b'][2] == 2 - assert df['c'][3] == 3 - assert df['d'][4] == 5 - assert df['e'][5] == 4 - assert_frame_equal(df, _slow_pivot(one, two, three)) - - # weird overlap, TODO: test? - a, b, c = (np.array([1, 2, 3, 4, 4]), - np.array(['a', 'a', 'a', 'a', 'a']), - np.array([1., 2., 3., 4., 5.])) - pytest.raises(Exception, _pivot_simple, a, b, c) - - # corner case, empty - df = _pivot_simple(np.array([]), np.array([]), np.array([])) - def test_panel_index(): index = panelm.panel_index([1, 2, 3, 4], [1, 2, 3])