Skip to content

Commit 861b147

Browse files
mroeschkevictor
authored and
victor
committed
API/CLN: Have toplevel pd.pivot mirror pivot instead of pivot_simple (pandas-dev#22209)
1 parent 61105b9 commit 861b147

File tree

6 files changed

+99
-144
lines changed

6 files changed

+99
-144
lines changed

pandas/core/frame.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -5322,8 +5322,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
53225322
# ----------------------------------------------------------------------
53235323
# Data reshaping
53245324

5325-
def pivot(self, index=None, columns=None, values=None):
5326-
"""
5325+
_shared_docs['pivot'] = """
53275326
Return reshaped DataFrame organized by given index / column values.
53285327
53295328
Reshape data (produce a "pivot" table) based on column values. Uses
@@ -5333,7 +5332,7 @@ def pivot(self, index=None, columns=None, values=None):
53335332
columns. See the :ref:`User Guide <reshaping>` for more on reshaping.
53345333
53355334
Parameters
5336-
----------
5335+
----------%s
53375336
index : string or object, optional
53385337
Column to use to make new frame's index. If None, uses
53395338
existing index.
@@ -5425,7 +5424,11 @@ def pivot(self, index=None, columns=None, values=None):
54255424
...
54265425
ValueError: Index contains duplicate entries, cannot reshape
54275426
"""
5428-
from pandas.core.reshape.reshape import pivot
5427+
5428+
@Substitution('')
5429+
@Appender(_shared_docs['pivot'])
5430+
def pivot(self, index=None, columns=None, values=None):
5431+
from pandas.core.reshape.pivot import pivot
54295432
return pivot(self, index=index, columns=columns, values=values)
54305433

54315434
_shared_docs['pivot_table'] = """

pandas/core/reshape/api.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from pandas.core.reshape.concat import concat
44
from pandas.core.reshape.melt import melt, lreshape, wide_to_long
5-
from pandas.core.reshape.reshape import pivot_simple as pivot, get_dummies
5+
from pandas.core.reshape.reshape import get_dummies
66
from pandas.core.reshape.merge import merge, merge_ordered, merge_asof
7-
from pandas.core.reshape.pivot import pivot_table, crosstab
7+
from pandas.core.reshape.pivot import pivot_table, pivot, crosstab
88
from pandas.core.reshape.tile import cut, qcut

pandas/core/reshape/pivot.py

+25-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from pandas.core.series import Series
1111
from pandas.core.groupby import Grouper
1212
from pandas.core.reshape.util import cartesian_product
13-
from pandas.core.index import Index, _get_objs_combined_axis
13+
from pandas.core.index import Index, MultiIndex, _get_objs_combined_axis
1414
from pandas.compat import range, lrange, zip
1515
from pandas import compat
1616
import pandas.core.common as com
@@ -369,6 +369,30 @@ def _convert_by(by):
369369
return by
370370

371371

372+
@Substitution('\ndata : DataFrame')
373+
@Appender(_shared_docs['pivot'], indents=1)
374+
def pivot(data, index=None, columns=None, values=None):
375+
if values is None:
376+
cols = [columns] if index is None else [index, columns]
377+
append = index is None
378+
indexed = data.set_index(cols, append=append)
379+
else:
380+
if index is None:
381+
index = data.index
382+
else:
383+
index = data[index]
384+
index = MultiIndex.from_arrays([index, data[columns]])
385+
386+
if is_list_like(values) and not isinstance(values, tuple):
387+
# Exclude tuple because it is seen as a single column name
388+
indexed = data._constructor(data[values].values, index=index,
389+
columns=values)
390+
else:
391+
indexed = data._constructor_sliced(data[values].values,
392+
index=index)
393+
return indexed.unstack(columns)
394+
395+
372396
def crosstab(index, columns, values=None, rownames=None, colnames=None,
373397
aggfunc=None, margins=False, margins_name='All', dropna=True,
374398
normalize=False):

pandas/core/reshape/reshape.py

-91
Original file line numberDiff line numberDiff line change
@@ -383,97 +383,6 @@ def _unstack_multiple(data, clocs, fill_value=None):
383383
return unstacked
384384

385385

386-
def pivot(self, index=None, columns=None, values=None):
387-
"""
388-
See DataFrame.pivot
389-
"""
390-
if values is None:
391-
cols = [columns] if index is None else [index, columns]
392-
append = index is None
393-
indexed = self.set_index(cols, append=append)
394-
else:
395-
if index is None:
396-
index = self.index
397-
else:
398-
index = self[index]
399-
index = MultiIndex.from_arrays([index, self[columns]])
400-
401-
if is_list_like(values) and not isinstance(values, tuple):
402-
# Exclude tuple because it is seen as a single column name
403-
indexed = self._constructor(self[values].values, index=index,
404-
columns=values)
405-
else:
406-
indexed = self._constructor_sliced(self[values].values,
407-
index=index)
408-
return indexed.unstack(columns)
409-
410-
411-
def pivot_simple(index, columns, values):
412-
"""
413-
Produce 'pivot' table based on 3 columns of this DataFrame.
414-
Uses unique values from index / columns and fills with values.
415-
416-
Parameters
417-
----------
418-
index : ndarray
419-
Labels to use to make new frame's index
420-
columns : ndarray
421-
Labels to use to make new frame's columns
422-
values : ndarray
423-
Values to use for populating new frame's values
424-
425-
Notes
426-
-----
427-
Obviously, all 3 of the input arguments must have the same length
428-
429-
Returns
430-
-------
431-
DataFrame
432-
433-
See also
434-
--------
435-
DataFrame.pivot_table : generalization of pivot that can handle
436-
duplicate values for one index/column pair
437-
"""
438-
if (len(index) != len(columns)) or (len(columns) != len(values)):
439-
raise AssertionError('Length of index, columns, and values must be the'
440-
' same')
441-
442-
if len(index) == 0:
443-
return DataFrame(index=[])
444-
445-
hindex = MultiIndex.from_arrays([index, columns])
446-
series = Series(values.ravel(), index=hindex)
447-
series = series.sort_index(level=0)
448-
return series.unstack()
449-
450-
451-
def _slow_pivot(index, columns, values):
452-
"""
453-
Produce 'pivot' table based on 3 columns of this DataFrame.
454-
Uses unique values from index / columns and fills with values.
455-
456-
Parameters
457-
----------
458-
index : string or object
459-
Column name to use to make new frame's index
460-
columns : string or object
461-
Column name to use to make new frame's columns
462-
values : string or object
463-
Column name to use for populating new frame's values
464-
465-
Could benefit from some Cython here.
466-
"""
467-
tree = {}
468-
for i, (idx, col) in enumerate(zip(index, columns)):
469-
if col not in tree:
470-
tree[col] = {}
471-
branch = tree[col]
472-
branch[idx] = values[i]
473-
474-
return DataFrame(tree)
475-
476-
477386
def unstack(obj, level, fill_value=None):
478387
if isinstance(level, (tuple, list)):
479388
if len(level) != 1:

pandas/tests/reshape/test_pivot.py

+64-21
Original file line numberDiff line numberDiff line change
@@ -301,13 +301,17 @@ def test_pivot_multi_functions(self):
301301
expected = concat([means, stds], keys=['mean', 'std'], axis=1)
302302
tm.assert_frame_equal(result, expected)
303303

304-
def test_pivot_index_with_nan(self):
304+
@pytest.mark.parametrize('method', [True, False])
305+
def test_pivot_index_with_nan(self, method):
305306
# GH 3588
306307
nan = np.nan
307308
df = DataFrame({'a': ['R1', 'R2', nan, 'R4'],
308309
'b': ['C1', 'C2', 'C3', 'C4'],
309310
'c': [10, 15, 17, 20]})
310-
result = df.pivot('a', 'b', 'c')
311+
if method:
312+
result = df.pivot('a', 'b', 'c')
313+
else:
314+
result = pd.pivot(df, 'a', 'b', 'c')
311315
expected = DataFrame([[nan, nan, 17, nan], [10, nan, nan, nan],
312316
[nan, 15, nan, nan], [nan, nan, nan, 20]],
313317
index=Index([nan, 'R1', 'R2', 'R4'], name='a'),
@@ -322,15 +326,23 @@ def test_pivot_index_with_nan(self):
322326
df.loc[1, 'a'] = df.loc[3, 'a'] = nan
323327
df.loc[1, 'b'] = df.loc[4, 'b'] = nan
324328

325-
pv = df.pivot('a', 'b', 'c')
329+
if method:
330+
pv = df.pivot('a', 'b', 'c')
331+
else:
332+
pv = pd.pivot(df, 'a', 'b', 'c')
326333
assert pv.notna().values.sum() == len(df)
327334

328335
for _, row in df.iterrows():
329336
assert pv.loc[row['a'], row['b']] == row['c']
330337

331-
tm.assert_frame_equal(df.pivot('b', 'a', 'c'), pv.T)
338+
if method:
339+
result = df.pivot('b', 'a', 'c')
340+
else:
341+
result = pd.pivot(df, 'b', 'a', 'c')
342+
tm.assert_frame_equal(result, pv.T)
332343

333-
def test_pivot_with_tz(self):
344+
@pytest.mark.parametrize('method', [True, False])
345+
def test_pivot_with_tz(self, method):
334346
# GH 5878
335347
df = DataFrame({'dt1': [datetime(2013, 1, 1, 9, 0),
336348
datetime(2013, 1, 2, 9, 0),
@@ -358,7 +370,10 @@ def test_pivot_with_tz(self):
358370
tz='US/Pacific'),
359371
columns=exp_col)
360372

361-
pv = df.pivot(index='dt1', columns='dt2')
373+
if method:
374+
pv = df.pivot(index='dt1', columns='dt2')
375+
else:
376+
pv = pd.pivot(df, index='dt1', columns='dt2')
362377
tm.assert_frame_equal(pv, expected)
363378

364379
expected = DataFrame([[0, 2], [1, 3]],
@@ -371,10 +386,14 @@ def test_pivot_with_tz(self):
371386
name='dt2',
372387
tz='Asia/Tokyo'))
373388

374-
pv = df.pivot(index='dt1', columns='dt2', values='data1')
389+
if method:
390+
pv = df.pivot(index='dt1', columns='dt2', values='data1')
391+
else:
392+
pv = pd.pivot(df, index='dt1', columns='dt2', values='data1')
375393
tm.assert_frame_equal(pv, expected)
376394

377-
def test_pivot_periods(self):
395+
@pytest.mark.parametrize('method', [True, False])
396+
def test_pivot_periods(self, method):
378397
df = DataFrame({'p1': [pd.Period('2013-01-01', 'D'),
379398
pd.Period('2013-01-02', 'D'),
380399
pd.Period('2013-01-01', 'D'),
@@ -394,31 +413,39 @@ def test_pivot_periods(self):
394413
index=pd.PeriodIndex(['2013-01-01', '2013-01-02'],
395414
name='p1', freq='D'),
396415
columns=exp_col)
397-
398-
pv = df.pivot(index='p1', columns='p2')
416+
if method:
417+
pv = df.pivot(index='p1', columns='p2')
418+
else:
419+
pv = pd.pivot(df, index='p1', columns='p2')
399420
tm.assert_frame_equal(pv, expected)
400421

401422
expected = DataFrame([[0, 2], [1, 3]],
402423
index=pd.PeriodIndex(['2013-01-01', '2013-01-02'],
403424
name='p1', freq='D'),
404425
columns=pd.PeriodIndex(['2013-01', '2013-02'],
405426
name='p2', freq='M'))
406-
407-
pv = df.pivot(index='p1', columns='p2', values='data1')
427+
if method:
428+
pv = df.pivot(index='p1', columns='p2', values='data1')
429+
else:
430+
pv = pd.pivot(df, index='p1', columns='p2', values='data1')
408431
tm.assert_frame_equal(pv, expected)
409432

410433
@pytest.mark.parametrize('values', [
411434
['baz', 'zoo'], np.array(['baz', 'zoo']),
412435
pd.Series(['baz', 'zoo']), pd.Index(['baz', 'zoo'])
413436
])
414-
def test_pivot_with_list_like_values(self, values):
437+
@pytest.mark.parametrize('method', [True, False])
438+
def test_pivot_with_list_like_values(self, values, method):
415439
# issue #17160
416440
df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
417441
'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
418442
'baz': [1, 2, 3, 4, 5, 6],
419443
'zoo': ['x', 'y', 'z', 'q', 'w', 't']})
420444

421-
result = df.pivot(index='foo', columns='bar', values=values)
445+
if method:
446+
result = df.pivot(index='foo', columns='bar', values=values)
447+
else:
448+
result = pd.pivot(df, index='foo', columns='bar', values=values)
422449

423450
data = [[1, 2, 3, 'x', 'y', 'z'],
424451
[4, 5, 6, 'q', 'w', 't']]
@@ -434,14 +461,18 @@ def test_pivot_with_list_like_values(self, values):
434461
['bar', 'baz'], np.array(['bar', 'baz']),
435462
pd.Series(['bar', 'baz']), pd.Index(['bar', 'baz'])
436463
])
437-
def test_pivot_with_list_like_values_nans(self, values):
464+
@pytest.mark.parametrize('method', [True, False])
465+
def test_pivot_with_list_like_values_nans(self, values, method):
438466
# issue #17160
439467
df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
440468
'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
441469
'baz': [1, 2, 3, 4, 5, 6],
442470
'zoo': ['x', 'y', 'z', 'q', 'w', 't']})
443471

444-
result = df.pivot(index='zoo', columns='foo', values=values)
472+
if method:
473+
result = df.pivot(index='zoo', columns='foo', values=values)
474+
else:
475+
result = pd.pivot(df, index='zoo', columns='foo', values=values)
445476

446477
data = [[np.nan, 'A', np.nan, 4],
447478
[np.nan, 'C', np.nan, 6],
@@ -460,7 +491,8 @@ def test_pivot_with_list_like_values_nans(self, values):
460491
@pytest.mark.xfail(reason='MultiIndexed unstack with tuple names fails'
461492
'with KeyError GH#19966',
462493
strict=True)
463-
def test_pivot_with_multiindex(self):
494+
@pytest.mark.parametrize('method', [True, False])
495+
def test_pivot_with_multiindex(self, method):
464496
# issue #17160
465497
index = Index(data=[0, 1, 2, 3, 4, 5])
466498
data = [['one', 'A', 1, 'x'],
@@ -472,24 +504,35 @@ def test_pivot_with_multiindex(self):
472504
columns = MultiIndex(levels=[['bar', 'baz'], ['first', 'second']],
473505
labels=[[0, 0, 1, 1], [0, 1, 0, 1]])
474506
df = DataFrame(data=data, index=index, columns=columns, dtype='object')
475-
result = df.pivot(index=('bar', 'first'), columns=('bar', 'second'),
476-
values=('baz', 'first'))
507+
if method:
508+
result = df.pivot(index=('bar', 'first'),
509+
columns=('bar', 'second'),
510+
values=('baz', 'first'))
511+
else:
512+
result = pd.pivot(df,
513+
index=('bar', 'first'),
514+
columns=('bar', 'second'),
515+
values=('baz', 'first'))
477516

478517
data = {'A': Series([1, 4], index=['one', 'two']),
479518
'B': Series([2, 5], index=['one', 'two']),
480519
'C': Series([3, 6], index=['one', 'two'])}
481520
expected = DataFrame(data)
482521
tm.assert_frame_equal(result, expected)
483522

484-
def test_pivot_with_tuple_of_values(self):
523+
@pytest.mark.parametrize('method', [True, False])
524+
def test_pivot_with_tuple_of_values(self, method):
485525
# issue #17160
486526
df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
487527
'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
488528
'baz': [1, 2, 3, 4, 5, 6],
489529
'zoo': ['x', 'y', 'z', 'q', 'w', 't']})
490530
with pytest.raises(KeyError):
491531
# tuple is seen as a single column name
492-
df.pivot(index='zoo', columns='foo', values=('bar', 'baz'))
532+
if method:
533+
df.pivot(index='zoo', columns='foo', values=('bar', 'baz'))
534+
else:
535+
pd.pivot(df, index='zoo', columns='foo', values=('bar', 'baz'))
493536

494537
def test_margins(self):
495538
def _check_output(result, values_col, index=['A', 'B'],

0 commit comments

Comments
 (0)