Skip to content

BUG/CLN: numpy compat with pandas numeric functions and cln of same (GH4435) #5034

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Sep 29, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,9 @@ API Changes
``SparsePanel``, etc.), now support the entire set of arithmetic operators
and arithmetic flex methods (add, sub, mul, etc.). ``SparsePanel`` does not
support ``pow`` or ``mod`` with non-scalars. (:issue:`3765`)
- Provide numpy compatibility with 1.7 for a calling convention like ``np.prod(pandas_object)`` as numpy
call with additional keyword args (:issue:`4435`)


Internal Refactoring
~~~~~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -345,6 +348,10 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
etc.) into a separate, cleaned up wrapper class. (:issue:`4613`)
- Complex compat for ``Series`` with ``ndarray``. (:issue:`4819`)
- Removed unnecessary ``rwproperty`` from codebase in favor of builtin property. (:issue:`4843`)
- Refactor object level numeric methods (mean/sum/min/max...) from object level modules to
``core/generic.py``(:issue:`4435`).
- Refactor cum objects to core/generic.py (:issue:`4435`), note that these have a more numpy-like
function signature.

.. _release.bug_fixes-0.13.0:

Expand Down
189 changes: 7 additions & 182 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,28 +63,6 @@
# Docstring templates


_stat_doc = """
Return %(name)s over requested axis.
%(na_action)s

Parameters
----------
axis : {0, 1}
0 for row-wise, 1 for column-wise
skipna : boolean, default True
Exclude NA/null values. If an entire row/column is NA, the result
will be NA
level : int, default None
If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a DataFrame
%(extras)s
Returns
-------
%(shortname)s : Series (or DataFrame if level specified)
"""

_doc_exclude_na = "NA/null values are excluded"

_numeric_only_doc = """numeric_only : boolean, default None
Include only float, int, boolean data. If None, will attempt to use
everything, then use only numeric data
Expand Down Expand Up @@ -3869,7 +3847,7 @@ def _count_level(self, level, axis=0, numeric_only=False):
else:
return result

def any(self, axis=0, bool_only=None, skipna=True, level=None):
def any(self, axis=None, bool_only=None, skipna=True, level=None, **kwargs):
"""
Return whether any element is True over requested axis.
%(na_action)s
Expand All @@ -3891,13 +3869,15 @@ def any(self, axis=0, bool_only=None, skipna=True, level=None):
-------
any : Series (or DataFrame if level specified)
"""
if axis is None:
axis = self._stat_axis_number
if level is not None:
return self._agg_by_level('any', axis=axis, level=level,
skipna=skipna)
return self._reduce(nanops.nanany, axis=axis, skipna=skipna,
numeric_only=bool_only, filter_type='bool')

def all(self, axis=0, bool_only=None, skipna=True, level=None):
def all(self, axis=None, bool_only=None, skipna=True, level=None, **kwargs):
"""
Return whether all elements are True over requested axis.
%(na_action)s
Expand All @@ -3919,169 +3899,14 @@ def all(self, axis=0, bool_only=None, skipna=True, level=None):
-------
any : Series (or DataFrame if level specified)
"""
if axis is None:
axis = self._stat_axis_number
if level is not None:
return self._agg_by_level('all', axis=axis, level=level,
skipna=skipna)
return self._reduce(nanops.nanall, axis=axis, skipna=skipna,
numeric_only=bool_only, filter_type='bool')

@Substitution(name='sum', shortname='sum', na_action=_doc_exclude_na,
extras=_numeric_only_doc)
@Appender(_stat_doc)
def sum(self, axis=0, numeric_only=None, skipna=True, level=None):
if level is not None:
return self._agg_by_level('sum', axis=axis, level=level,
skipna=skipna)
return self._reduce(nanops.nansum, axis=axis, skipna=skipna,
numeric_only=numeric_only)

@Substitution(name='mean', shortname='mean', na_action=_doc_exclude_na,
extras='')
@Appender(_stat_doc)
def mean(self, axis=0, skipna=True, level=None):
if level is not None:
return self._agg_by_level('mean', axis=axis, level=level,
skipna=skipna)
return self._reduce(nanops.nanmean, axis=axis, skipna=skipna,
numeric_only=None)

@Substitution(name='minimum', shortname='min', na_action=_doc_exclude_na,
extras='')
@Appender(_stat_doc)
def min(self, axis=0, skipna=True, level=None):
"""
Notes
-----
This method returns the minimum of the values in the DataFrame. If you
want the *index* of the minimum, use ``DataFrame.idxmin``. This is the
equivalent of the ``numpy.ndarray`` method ``argmin``.

See Also
--------
DataFrame.idxmin
Series.idxmin
"""
if level is not None:
return self._agg_by_level('min', axis=axis, level=level,
skipna=skipna)
return self._reduce(nanops.nanmin, axis=axis, skipna=skipna,
numeric_only=None)

@Substitution(name='maximum', shortname='max', na_action=_doc_exclude_na,
extras='')
@Appender(_stat_doc)
def max(self, axis=0, skipna=True, level=None):
"""
Notes
-----
This method returns the maximum of the values in the DataFrame. If you
want the *index* of the maximum, use ``DataFrame.idxmax``. This is the
equivalent of the ``numpy.ndarray`` method ``argmax``.

See Also
--------
DataFrame.idxmax
Series.idxmax
"""
if level is not None:
return self._agg_by_level('max', axis=axis, level=level,
skipna=skipna)
return self._reduce(nanops.nanmax, axis=axis, skipna=skipna,
numeric_only=None)

@Substitution(name='product', shortname='product',
na_action='NA/null values are treated as 1', extras='')
@Appender(_stat_doc)
def prod(self, axis=0, skipna=True, level=None):
if level is not None:
return self._agg_by_level('prod', axis=axis, level=level,
skipna=skipna)
return self._reduce(nanops.nanprod, axis=axis, skipna=skipna,
numeric_only=None)

product = prod

@Substitution(name='median', shortname='median', na_action=_doc_exclude_na,
extras='')
@Appender(_stat_doc)
def median(self, axis=0, skipna=True, level=None):
if level is not None:
return self._agg_by_level('median', axis=axis, level=level,
skipna=skipna)
return self._reduce(nanops.nanmedian, axis=axis, skipna=skipna,
numeric_only=None)

@Substitution(name='mean absolute deviation', shortname='mad',
na_action=_doc_exclude_na, extras='')
@Appender(_stat_doc)
def mad(self, axis=0, skipna=True, level=None):
if level is not None:
return self._agg_by_level('mad', axis=axis, level=level,
skipna=skipna)

frame = self._get_numeric_data()

axis = self._get_axis_number(axis)
if axis == 0:
demeaned = frame - frame.mean(axis=0)
else:
demeaned = frame.sub(frame.mean(axis=1), axis=0)
return np.abs(demeaned).mean(axis=axis, skipna=skipna)

@Substitution(name='variance', shortname='var',
na_action=_doc_exclude_na, extras='')
@Appender(_stat_doc +
"""
Normalized by N-1 (unbiased estimator).
""")
def var(self, axis=0, skipna=True, level=None, ddof=1):
if level is not None:
return self._agg_by_level('var', axis=axis, level=level,
skipna=skipna, ddof=ddof)
return self._reduce(nanops.nanvar, axis=axis, skipna=skipna,
numeric_only=None, ddof=ddof)

@Substitution(name='standard deviation', shortname='std',
na_action=_doc_exclude_na, extras='')
@Appender(_stat_doc +
"""
Normalized by N-1 (unbiased estimator).
""")
def std(self, axis=0, skipna=True, level=None, ddof=1):
if level is not None:
return self._agg_by_level('std', axis=axis, level=level,
skipna=skipna, ddof=ddof)
return np.sqrt(self.var(axis=axis, skipna=skipna, ddof=ddof))

@Substitution(name='unbiased skewness', shortname='skew',
na_action=_doc_exclude_na, extras='')
@Appender(_stat_doc)
def skew(self, axis=0, skipna=True, level=None):
if level is not None:
return self._agg_by_level('skew', axis=axis, level=level,
skipna=skipna)
return self._reduce(nanops.nanskew, axis=axis, skipna=skipna,
numeric_only=None)

@Substitution(name='unbiased kurtosis', shortname='kurt',
na_action=_doc_exclude_na, extras='')
@Appender(_stat_doc)
def kurt(self, axis=0, skipna=True, level=None):
if level is not None:
return self._agg_by_level('kurt', axis=axis, level=level,
skipna=skipna)
return self._reduce(nanops.nankurt, axis=axis, skipna=skipna,
numeric_only=None)

def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwds):
grouped = self.groupby(level=level, axis=axis)
if hasattr(grouped, name) and skipna:
return getattr(grouped, name)(**kwds)
axis = self._get_axis_number(axis)
method = getattr(type(self), name)
applyf = lambda x: method(x, axis=axis, skipna=skipna, **kwds)
return grouped.aggregate(applyf)

def _reduce(self, op, axis=0, skipna=True, numeric_only=None,
filter_type=None, **kwds):
axis = self._get_axis_number(axis)
Expand Down Expand Up @@ -4440,7 +4265,7 @@ def combineMult(self, other):

DataFrame._setup_axes(
['index', 'columns'], info_axis=1, stat_axis=0, axes_are_reversed=True)

DataFrame._add_numeric_operations()

_EMPTY_SERIES = Series([])

Expand Down
Loading