Skip to content

API/CLN: Have toplevel pd.pivot mirror pivot instead of pivot_simple #22209

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Aug 8, 2018
11 changes: 7 additions & 4 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5346,8 +5346,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
# ----------------------------------------------------------------------
# Data reshaping

def pivot(self, index=None, columns=None, values=None):
"""
_shared_docs['pivot'] = """
Return reshaped DataFrame organized by given index / column values.

Reshape data (produce a "pivot" table) based on column values. Uses
Expand All @@ -5357,7 +5356,7 @@ def pivot(self, index=None, columns=None, values=None):
columns. See the :ref:`User Guide <reshaping>` for more on reshaping.

Parameters
----------
----------%s
index : string or object, optional
Column to use to make new frame's index. If None, uses
existing index.
Expand Down Expand Up @@ -5449,7 +5448,11 @@ def pivot(self, index=None, columns=None, values=None):
...
ValueError: Index contains duplicate entries, cannot reshape
"""
from pandas.core.reshape.reshape import pivot

@Substitution('')
@Appender(_shared_docs['pivot'])
def pivot(self, index=None, columns=None, values=None):
from pandas.core.reshape.pivot import pivot
return pivot(self, index=index, columns=columns, values=values)

_shared_docs['pivot_table'] = """
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/reshape/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from pandas.core.reshape.concat import concat
from pandas.core.reshape.melt import melt, lreshape, wide_to_long
from pandas.core.reshape.reshape import pivot_simple as pivot, get_dummies
from pandas.core.reshape.reshape import get_dummies
from pandas.core.reshape.merge import merge, merge_ordered, merge_asof
from pandas.core.reshape.pivot import pivot_table, crosstab
from pandas.core.reshape.pivot import pivot_table, pivot, crosstab
from pandas.core.reshape.tile import cut, qcut
26 changes: 25 additions & 1 deletion pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pandas.core.series import Series
from pandas.core.groupby import Grouper
from pandas.core.reshape.util import cartesian_product
from pandas.core.index import Index, _get_objs_combined_axis
from pandas.core.index import Index, MultiIndex, _get_objs_combined_axis
from pandas.compat import range, lrange, zip
from pandas import compat
import pandas.core.common as com
Expand Down Expand Up @@ -369,6 +369,30 @@ def _convert_by(by):
return by


@Substitution('\ndata : DataFrame')
@Appender(_shared_docs['pivot'], indents=1)
def pivot(data, index=None, columns=None, values=None):
if values is None:
cols = [columns] if index is None else [index, columns]
append = index is None
indexed = data.set_index(cols, append=append)
else:
if index is None:
index = data.index
else:
index = data[index]
index = MultiIndex.from_arrays([index, data[columns]])

if is_list_like(values) and not isinstance(values, tuple):
# Exclude tuple because it is seen as a single column name
indexed = data._constructor(data[values].values, index=index,
columns=values)
else:
indexed = data._constructor_sliced(data[values].values,
index=index)
return indexed.unstack(columns)


def crosstab(index, columns, values=None, rownames=None, colnames=None,
aggfunc=None, margins=False, margins_name='All', dropna=True,
normalize=False):
Expand Down
91 changes: 0 additions & 91 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,97 +383,6 @@ def _unstack_multiple(data, clocs, fill_value=None):
return unstacked


def pivot(self, index=None, columns=None, values=None):
"""
See DataFrame.pivot
"""
if values is None:
cols = [columns] if index is None else [index, columns]
append = index is None
indexed = self.set_index(cols, append=append)
else:
if index is None:
index = self.index
else:
index = self[index]
index = MultiIndex.from_arrays([index, self[columns]])

if is_list_like(values) and not isinstance(values, tuple):
# Exclude tuple because it is seen as a single column name
indexed = self._constructor(self[values].values, index=index,
columns=values)
else:
indexed = self._constructor_sliced(self[values].values,
index=index)
return indexed.unstack(columns)


def pivot_simple(index, columns, values):
"""
Produce 'pivot' table based on 3 columns of this DataFrame.
Uses unique values from index / columns and fills with values.

Parameters
----------
index : ndarray
Labels to use to make new frame's index
columns : ndarray
Labels to use to make new frame's columns
values : ndarray
Values to use for populating new frame's values

Notes
-----
Obviously, all 3 of the input arguments must have the same length

Returns
-------
DataFrame

See also
--------
DataFrame.pivot_table : generalization of pivot that can handle
duplicate values for one index/column pair
"""
if (len(index) != len(columns)) or (len(columns) != len(values)):
raise AssertionError('Length of index, columns, and values must be the'
' same')

if len(index) == 0:
return DataFrame(index=[])

hindex = MultiIndex.from_arrays([index, columns])
series = Series(values.ravel(), index=hindex)
series = series.sort_index(level=0)
return series.unstack()


def _slow_pivot(index, columns, values):
"""
Produce 'pivot' table based on 3 columns of this DataFrame.
Uses unique values from index / columns and fills with values.

Parameters
----------
index : string or object
Column name to use to make new frame's index
columns : string or object
Column name to use to make new frame's columns
values : string or object
Column name to use for populating new frame's values

Could benefit from some Cython here.
"""
tree = {}
for i, (idx, col) in enumerate(zip(index, columns)):
if col not in tree:
tree[col] = {}
branch = tree[col]
branch[idx] = values[i]

return DataFrame(tree)


def unstack(obj, level, fill_value=None):
if isinstance(level, (tuple, list)):
if len(level) != 1:
Expand Down
85 changes: 64 additions & 21 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,13 +301,17 @@ def test_pivot_multi_functions(self):
expected = concat([means, stds], keys=['mean', 'std'], axis=1)
tm.assert_frame_equal(result, expected)

def test_pivot_index_with_nan(self):
@pytest.mark.parametrize('method', [True, False])
def test_pivot_index_with_nan(self, method):
# GH 3588
nan = np.nan
df = DataFrame({'a': ['R1', 'R2', nan, 'R4'],
'b': ['C1', 'C2', 'C3', 'C4'],
'c': [10, 15, 17, 20]})
result = df.pivot('a', 'b', 'c')
if method:
result = df.pivot('a', 'b', 'c')
else:
result = pd.pivot(df, 'a', 'b', 'c')
expected = DataFrame([[nan, nan, 17, nan], [10, nan, nan, nan],
[nan, 15, nan, nan], [nan, nan, nan, 20]],
index=Index([nan, 'R1', 'R2', 'R4'], name='a'),
Expand All @@ -322,15 +326,23 @@ def test_pivot_index_with_nan(self):
df.loc[1, 'a'] = df.loc[3, 'a'] = nan
df.loc[1, 'b'] = df.loc[4, 'b'] = nan

pv = df.pivot('a', 'b', 'c')
if method:
pv = df.pivot('a', 'b', 'c')
else:
pv = pd.pivot(df, 'a', 'b', 'c')
assert pv.notna().values.sum() == len(df)

for _, row in df.iterrows():
assert pv.loc[row['a'], row['b']] == row['c']

tm.assert_frame_equal(df.pivot('b', 'a', 'c'), pv.T)
if method:
result = df.pivot('b', 'a', 'c')
else:
result = pd.pivot(df, 'b', 'a', 'c')
tm.assert_frame_equal(result, pv.T)

def test_pivot_with_tz(self):
@pytest.mark.parametrize('method', [True, False])
def test_pivot_with_tz(self, method):
# GH 5878
df = DataFrame({'dt1': [datetime(2013, 1, 1, 9, 0),
datetime(2013, 1, 2, 9, 0),
Expand Down Expand Up @@ -358,7 +370,10 @@ def test_pivot_with_tz(self):
tz='US/Pacific'),
columns=exp_col)

pv = df.pivot(index='dt1', columns='dt2')
if method:
pv = df.pivot(index='dt1', columns='dt2')
else:
pv = pd.pivot(df, index='dt1', columns='dt2')
tm.assert_frame_equal(pv, expected)

expected = DataFrame([[0, 2], [1, 3]],
Expand All @@ -371,10 +386,14 @@ def test_pivot_with_tz(self):
name='dt2',
tz='Asia/Tokyo'))

pv = df.pivot(index='dt1', columns='dt2', values='data1')
if method:
pv = df.pivot(index='dt1', columns='dt2', values='data1')
else:
pv = pd.pivot(df, index='dt1', columns='dt2', values='data1')
tm.assert_frame_equal(pv, expected)

def test_pivot_periods(self):
@pytest.mark.parametrize('method', [True, False])
def test_pivot_periods(self, method):
df = DataFrame({'p1': [pd.Period('2013-01-01', 'D'),
pd.Period('2013-01-02', 'D'),
pd.Period('2013-01-01', 'D'),
Expand All @@ -394,31 +413,39 @@ def test_pivot_periods(self):
index=pd.PeriodIndex(['2013-01-01', '2013-01-02'],
name='p1', freq='D'),
columns=exp_col)

pv = df.pivot(index='p1', columns='p2')
if method:
pv = df.pivot(index='p1', columns='p2')
else:
pv = pd.pivot(df, index='p1', columns='p2')
tm.assert_frame_equal(pv, expected)

expected = DataFrame([[0, 2], [1, 3]],
index=pd.PeriodIndex(['2013-01-01', '2013-01-02'],
name='p1', freq='D'),
columns=pd.PeriodIndex(['2013-01', '2013-02'],
name='p2', freq='M'))

pv = df.pivot(index='p1', columns='p2', values='data1')
if method:
pv = df.pivot(index='p1', columns='p2', values='data1')
else:
pv = pd.pivot(df, index='p1', columns='p2', values='data1')
tm.assert_frame_equal(pv, expected)

@pytest.mark.parametrize('values', [
['baz', 'zoo'], np.array(['baz', 'zoo']),
pd.Series(['baz', 'zoo']), pd.Index(['baz', 'zoo'])
])
def test_pivot_with_list_like_values(self, values):
@pytest.mark.parametrize('method', [True, False])
def test_pivot_with_list_like_values(self, values, method):
# issue #17160
df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
'baz': [1, 2, 3, 4, 5, 6],
'zoo': ['x', 'y', 'z', 'q', 'w', 't']})

result = df.pivot(index='foo', columns='bar', values=values)
if method:
result = df.pivot(index='foo', columns='bar', values=values)
else:
result = pd.pivot(df, index='foo', columns='bar', values=values)

data = [[1, 2, 3, 'x', 'y', 'z'],
[4, 5, 6, 'q', 'w', 't']]
Expand All @@ -434,14 +461,18 @@ def test_pivot_with_list_like_values(self, values):
['bar', 'baz'], np.array(['bar', 'baz']),
pd.Series(['bar', 'baz']), pd.Index(['bar', 'baz'])
])
def test_pivot_with_list_like_values_nans(self, values):
@pytest.mark.parametrize('method', [True, False])
def test_pivot_with_list_like_values_nans(self, values, method):
# issue #17160
df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
'baz': [1, 2, 3, 4, 5, 6],
'zoo': ['x', 'y', 'z', 'q', 'w', 't']})

result = df.pivot(index='zoo', columns='foo', values=values)
if method:
result = df.pivot(index='zoo', columns='foo', values=values)
else:
result = pd.pivot(df, index='zoo', columns='foo', values=values)

data = [[np.nan, 'A', np.nan, 4],
[np.nan, 'C', np.nan, 6],
Expand All @@ -460,7 +491,8 @@ def test_pivot_with_list_like_values_nans(self, values):
@pytest.mark.xfail(reason='MultiIndexed unstack with tuple names fails'
'with KeyError GH#19966',
strict=True)
def test_pivot_with_multiindex(self):
@pytest.mark.parametrize('method', [True, False])
def test_pivot_with_multiindex(self, method):
# issue #17160
index = Index(data=[0, 1, 2, 3, 4, 5])
data = [['one', 'A', 1, 'x'],
Expand All @@ -472,24 +504,35 @@ def test_pivot_with_multiindex(self):
columns = MultiIndex(levels=[['bar', 'baz'], ['first', 'second']],
labels=[[0, 0, 1, 1], [0, 1, 0, 1]])
df = DataFrame(data=data, index=index, columns=columns, dtype='object')
result = df.pivot(index=('bar', 'first'), columns=('bar', 'second'),
values=('baz', 'first'))
if method:
result = df.pivot(index=('bar', 'first'),
columns=('bar', 'second'),
values=('baz', 'first'))
else:
result = pd.pivot(df,
index=('bar', 'first'),
columns=('bar', 'second'),
values=('baz', 'first'))

data = {'A': Series([1, 4], index=['one', 'two']),
'B': Series([2, 5], index=['one', 'two']),
'C': Series([3, 6], index=['one', 'two'])}
expected = DataFrame(data)
tm.assert_frame_equal(result, expected)

def test_pivot_with_tuple_of_values(self):
@pytest.mark.parametrize('method', [True, False])
def test_pivot_with_tuple_of_values(self, method):
# issue #17160
df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
'baz': [1, 2, 3, 4, 5, 6],
'zoo': ['x', 'y', 'z', 'q', 'w', 't']})
with pytest.raises(KeyError):
# tuple is seen as a single column name
df.pivot(index='zoo', columns='foo', values=('bar', 'baz'))
if method:
df.pivot(index='zoo', columns='foo', values=('bar', 'baz'))
else:
pd.pivot(df, index='zoo', columns='foo', values=('bar', 'baz'))

def test_margins(self):
def _check_output(result, values_col, index=['A', 'B'],
Expand Down
Loading