Skip to content

Untangle reshape imports --> Define pivot_table in DataFrame #17215

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@
from pandas.io.formats.printing import pprint_thing
import pandas.plotting._core as gfx

from pandas.core.reshape import pivot as _pivot

from pandas._libs import lib, algos as libalgos

from pandas.core.config import get_option
Expand Down Expand Up @@ -4154,6 +4156,16 @@ def pivot(self, index=None, columns=None, values=None):
from pandas.core.reshape.reshape import pivot
return pivot(self, index=index, columns=columns, values=values)

@Substitution('')
@Appender(_shared_docs['pivot_table'])
def pivot_table(self, values=None, index=None, columns=None,
aggfunc='mean', fill_value=None, margins=False,
dropna=True, margins_name='All'):
return _pivot.pivot_table(self, values=values, index=index,
columns=columns, aggfunc=aggfunc,
fill_value=fill_value, margins=margins,
dropna=dropna, margins_name=margins_name)

def stack(self, level=-1, dropna=True):
"""
Pivot a level of the (possibly hierarchical) column labels, returning a
Expand Down
25 changes: 16 additions & 9 deletions pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
"""

import numpy as np
from pandas import compat, DataFrame, Series, Index, MultiIndex
from pandas import compat
from pandas.core.dtypes.generic import ABCMultiIndex, ABCDataFrame, ABCSeries

from pandas.core.index import (_get_combined_index,
_ensure_index, _get_consensus_names,
_all_indexes_same)
Expand Down Expand Up @@ -253,6 +255,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None,
clean_objs.append(v)
objs = clean_objs
name = getattr(keys, 'name', None)
from pandas import Index
keys = Index(clean_keys, name=name)

if len(objs) == 0:
Expand Down Expand Up @@ -283,7 +286,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None,
# filter out the empties if we have not multi-index possibilities
# note to keep empty Series as it affect to result columns / name
non_empties = [obj for obj in objs
if sum(obj.shape) > 0 or isinstance(obj, Series)]
if sum(obj.shape) > 0 or isinstance(obj, ABCSeries)]

if (len(non_empties) and (keys is None and names is None and
levels is None and
Expand All @@ -297,17 +300,19 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None,
self.objs = objs

# Standardize axis parameter to int
if isinstance(sample, Series):
axis = DataFrame()._get_axis_number(axis)
if isinstance(sample, ABCSeries):
frame_cls = sample._constructor_expanddim
# access DataFrame class without direct import
axis = frame_cls()._get_axis_number(axis)
else:
axis = sample._get_axis_number(axis)

# Need to flip BlockManager axis in the DataFrame special case
self._is_frame = isinstance(sample, DataFrame)
self._is_frame = isinstance(sample, ABCDataFrame)
if self._is_frame:
axis = 1 if axis == 0 else 0

self._is_series = isinstance(sample, Series)
self._is_series = isinstance(sample, ABCSeries)
if not 0 <= axis <= sample.ndim:
raise AssertionError("axis must be between 0 and {0}, "
"input was {1}".format(sample.ndim, axis))
Expand Down Expand Up @@ -471,7 +476,7 @@ def _get_concat_axis(self):
num = 0
has_names = False
for i, x in enumerate(self.objs):
if not isinstance(x, Series):
if not isinstance(x, ABCSeries):
raise TypeError("Cannot concatenate type 'Series' "
"with object of type "
"%r" % type(x).__name__)
Expand All @@ -482,6 +487,7 @@ def _get_concat_axis(self):
names[i] = num
num += 1
if has_names:
from pandas import Index
return Index(names)
else:
return com._default_index(len(self.objs))
Expand Down Expand Up @@ -517,6 +523,7 @@ def _concat_indexes(indexes):


def _make_concat_multiindex(indexes, keys, levels=None, names=None):
from pandas import MultiIndex

if ((levels is None and isinstance(keys[0], tuple)) or
(levels is not None and len(levels) > 1)):
Expand Down Expand Up @@ -559,7 +566,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None):
concat_index = _concat_indexes(indexes)

# these go at the end
if isinstance(concat_index, MultiIndex):
if isinstance(concat_index, ABCMultiIndex):
levels.extend(concat_index.levels)
label_list.extend(concat_index.labels)
else:
Expand Down Expand Up @@ -605,7 +612,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None):

new_labels.append(np.repeat(mapped, n))

if isinstance(new_index, MultiIndex):
if isinstance(new_index, ABCMultiIndex):
new_levels.extend(new_index.levels)
new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels])
else:
Expand Down
46 changes: 30 additions & 16 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,29 @@


from pandas.core.dtypes.common import is_list_like, is_scalar
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries, ABCIndexClass

from pandas.core.reshape.concat import concat
from pandas import Series, DataFrame, MultiIndex, Index
from pandas.core.groupby import Grouper
from pandas.core.reshape.util import cartesian_product
from pandas.core.index import _get_combined_index
from pandas.compat import range, lrange, zip
from pandas import compat
import pandas.core.common as com

from pandas.util._decorators import Appender, Substitution

from pandas.core.generic import _shared_docs

import numpy as np


def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
fill_value=None, margins=False, dropna=True,
margins_name='All'):
"""
_shared_docs['pivot_table'] = """
Create a spreadsheet-style pivot table as a DataFrame. The levels in the
pivot table will be stored in MultiIndex objects (hierarchical indexes) on
the index and columns of the result DataFrame

Parameters
----------
data : DataFrame
----------%s
values : column to aggregate, optional
index : column, Grouper, array, or list of the previous
If an array is passed, it must be the same length as the data. The list
Expand Down Expand Up @@ -91,6 +92,13 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
DataFrame.pivot : pivot without aggregation that can handle
non-numeric data
"""


@Substitution('\ndata : DataFrame')
@Appender(_shared_docs['pivot_table'])
def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
fill_value=None, margins=False, dropna=True,
margins_name='All'):
index = _convert_by(index)
columns = _convert_by(columns)

Expand Down Expand Up @@ -123,6 +131,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
raise KeyError(i)

to_filter = []
from pandas import Grouper
for x in keys + values:
if isinstance(x, Grouper):
x = x.key
Expand Down Expand Up @@ -162,6 +171,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
table = agged.unstack(to_unstack)

if not dropna:
from pandas import MultiIndex
try:
m = MultiIndex.from_arrays(cartesian_product(table.index.levels),
names=table.index.names)
Expand All @@ -176,7 +186,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
except AttributeError:
pass # it's a single level or a series

if isinstance(table, DataFrame):
if isinstance(table, ABCDataFrame):
table = table.sort_index(axis=1)

if fill_value is not None:
Expand All @@ -198,15 +208,12 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
table = table.T

# GH 15193 Makse sure empty columns are removed if dropna=True
if isinstance(table, DataFrame) and dropna:
if isinstance(table, ABCDataFrame) and dropna:
table = table.dropna(how='all', axis=1)

return table


DataFrame.pivot_table = pivot_table


def _add_margins(table, data, values, rows, cols, aggfunc,
margins_name='All', fill_value=None):
if not isinstance(margins_name, compat.string_types):
Expand All @@ -230,9 +237,10 @@ def _add_margins(table, data, values, rows, cols, aggfunc,
else:
key = margins_name

if not values and isinstance(table, Series):
if not values and isinstance(table, ABCSeries):
# If there are no values and the table is a series, then there is only
# one column in the data. Compute grand margin and return it.
from pandas import Series
return table.append(Series({key: grand_margin[margins_name]}))

if values:
Expand All @@ -257,6 +265,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc,
else:
row_margin[k] = grand_margin[k[0]]

from pandas import DataFrame
margin_dummy = DataFrame(row_margin, columns=[key]).T

row_names = result.index.names
Expand Down Expand Up @@ -333,6 +342,7 @@ def _all_key(key):
else:
margin = grand_margin
cat_axis = 0
from pandas import Series
for key, piece in table.groupby(level=0, axis=cat_axis):
all_key = _all_key(key)
table_pieces.append(piece)
Expand All @@ -355,6 +365,7 @@ def _all_key(key):
new_order = [len(cols)] + lrange(len(cols))
row_margin.index = row_margin.index.reorder_levels(new_order)
else:
from pandas import Series
row_margin = Series(np.nan, index=result.columns)

return result, margin_keys, row_margin
Expand Down Expand Up @@ -393,16 +404,18 @@ def _all_key():
if len(cols):
row_margin = data[cols].groupby(cols).apply(aggfunc)
else:
from pandas import Series
row_margin = Series(np.nan, index=result.columns)

return result, margin_keys, row_margin


def _convert_by(by):
from pandas import Grouper
if by is None:
by = []
elif (is_scalar(by) or
isinstance(by, (np.ndarray, Index, Series, Grouper)) or
isinstance(by, (np.ndarray, ABCIndexClass, ABCSeries, Grouper)) or
hasattr(by, '__call__')):
by = [by]
else:
Expand Down Expand Up @@ -523,6 +536,7 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
if values is not None and aggfunc is None:
raise ValueError("values cannot be used without an aggfunc.")

from pandas import DataFrame
df = DataFrame(data, index=common_idx)
if values is None:
df['__dummy__'] = 0
Expand Down Expand Up @@ -620,7 +634,7 @@ def _get_names(arrs, names, prefix='row'):
if names is None:
names = []
for i, arr in enumerate(arrs):
if isinstance(arr, Series) and arr.name is not None:
if isinstance(arr, ABCSeries) and arr.name is not None:
names.append(arr.name)
else:
names.append('%s_%d' % (prefix, i))
Expand Down