Skip to content

Commit b3fee7c

Browse files
committed
Merge pull request #5034 from jreback/numpy_compat
BUG/CLN: numpy compat with pandas numeric functions and cln of same (GH4435)
2 parents 166d857 + cf57d64 commit b3fee7c

File tree

9 files changed

+281
-725
lines changed

9 files changed

+281
-725
lines changed

doc/source/release.rst

+7
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,9 @@ API Changes
269269
``SparsePanel``, etc.), now support the entire set of arithmetic operators
270270
and arithmetic flex methods (add, sub, mul, etc.). ``SparsePanel`` does not
271271
support ``pow`` or ``mod`` with non-scalars. (:issue:`3765`)
272+
- Provide numpy compatibility with 1.7 for a calling convention like ``np.prod(pandas_object)`` as numpy
273+
call with additional keyword args (:issue:`4435`)
274+
272275

273276
Internal Refactoring
274277
~~~~~~~~~~~~~~~~~~~~
@@ -347,6 +350,10 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
347350
etc.) into a separate, cleaned up wrapper class. (:issue:`4613`)
348351
- Complex compat for ``Series`` with ``ndarray``. (:issue:`4819`)
349352
- Removed unnecessary ``rwproperty`` from codebase in favor of builtin property. (:issue:`4843`)
353+
- Refactor object level numeric methods (mean/sum/min/max...) from object level modules to
354+
``core/generic.py``(:issue:`4435`).
355+
- Refactor cum objects to core/generic.py (:issue:`4435`), note that these have a more numpy-like
356+
function signature.
350357
351358
.. _release.bug_fixes-0.13.0:
352359

pandas/core/frame.py

+7-182
Original file line numberDiff line numberDiff line change
@@ -63,28 +63,6 @@
6363
# Docstring templates
6464

6565

66-
_stat_doc = """
67-
Return %(name)s over requested axis.
68-
%(na_action)s
69-
70-
Parameters
71-
----------
72-
axis : {0, 1}
73-
0 for row-wise, 1 for column-wise
74-
skipna : boolean, default True
75-
Exclude NA/null values. If an entire row/column is NA, the result
76-
will be NA
77-
level : int, default None
78-
If the axis is a MultiIndex (hierarchical), count along a
79-
particular level, collapsing into a DataFrame
80-
%(extras)s
81-
Returns
82-
-------
83-
%(shortname)s : Series (or DataFrame if level specified)
84-
"""
85-
86-
_doc_exclude_na = "NA/null values are excluded"
87-
8866
_numeric_only_doc = """numeric_only : boolean, default None
8967
Include only float, int, boolean data. If None, will attempt to use
9068
everything, then use only numeric data
@@ -3869,7 +3847,7 @@ def _count_level(self, level, axis=0, numeric_only=False):
38693847
else:
38703848
return result
38713849

3872-
def any(self, axis=0, bool_only=None, skipna=True, level=None):
3850+
def any(self, axis=None, bool_only=None, skipna=True, level=None, **kwargs):
38733851
"""
38743852
Return whether any element is True over requested axis.
38753853
%(na_action)s
@@ -3891,13 +3869,15 @@ def any(self, axis=0, bool_only=None, skipna=True, level=None):
38913869
-------
38923870
any : Series (or DataFrame if level specified)
38933871
"""
3872+
if axis is None:
3873+
axis = self._stat_axis_number
38943874
if level is not None:
38953875
return self._agg_by_level('any', axis=axis, level=level,
38963876
skipna=skipna)
38973877
return self._reduce(nanops.nanany, axis=axis, skipna=skipna,
38983878
numeric_only=bool_only, filter_type='bool')
38993879

3900-
def all(self, axis=0, bool_only=None, skipna=True, level=None):
3880+
def all(self, axis=None, bool_only=None, skipna=True, level=None, **kwargs):
39013881
"""
39023882
Return whether all elements are True over requested axis.
39033883
%(na_action)s
@@ -3919,169 +3899,14 @@ def all(self, axis=0, bool_only=None, skipna=True, level=None):
39193899
-------
39203900
any : Series (or DataFrame if level specified)
39213901
"""
3902+
if axis is None:
3903+
axis = self._stat_axis_number
39223904
if level is not None:
39233905
return self._agg_by_level('all', axis=axis, level=level,
39243906
skipna=skipna)
39253907
return self._reduce(nanops.nanall, axis=axis, skipna=skipna,
39263908
numeric_only=bool_only, filter_type='bool')
39273909

3928-
@Substitution(name='sum', shortname='sum', na_action=_doc_exclude_na,
3929-
extras=_numeric_only_doc)
3930-
@Appender(_stat_doc)
3931-
def sum(self, axis=0, numeric_only=None, skipna=True, level=None):
3932-
if level is not None:
3933-
return self._agg_by_level('sum', axis=axis, level=level,
3934-
skipna=skipna)
3935-
return self._reduce(nanops.nansum, axis=axis, skipna=skipna,
3936-
numeric_only=numeric_only)
3937-
3938-
@Substitution(name='mean', shortname='mean', na_action=_doc_exclude_na,
3939-
extras='')
3940-
@Appender(_stat_doc)
3941-
def mean(self, axis=0, skipna=True, level=None):
3942-
if level is not None:
3943-
return self._agg_by_level('mean', axis=axis, level=level,
3944-
skipna=skipna)
3945-
return self._reduce(nanops.nanmean, axis=axis, skipna=skipna,
3946-
numeric_only=None)
3947-
3948-
@Substitution(name='minimum', shortname='min', na_action=_doc_exclude_na,
3949-
extras='')
3950-
@Appender(_stat_doc)
3951-
def min(self, axis=0, skipna=True, level=None):
3952-
"""
3953-
Notes
3954-
-----
3955-
This method returns the minimum of the values in the DataFrame. If you
3956-
want the *index* of the minimum, use ``DataFrame.idxmin``. This is the
3957-
equivalent of the ``numpy.ndarray`` method ``argmin``.
3958-
3959-
See Also
3960-
--------
3961-
DataFrame.idxmin
3962-
Series.idxmin
3963-
"""
3964-
if level is not None:
3965-
return self._agg_by_level('min', axis=axis, level=level,
3966-
skipna=skipna)
3967-
return self._reduce(nanops.nanmin, axis=axis, skipna=skipna,
3968-
numeric_only=None)
3969-
3970-
@Substitution(name='maximum', shortname='max', na_action=_doc_exclude_na,
3971-
extras='')
3972-
@Appender(_stat_doc)
3973-
def max(self, axis=0, skipna=True, level=None):
3974-
"""
3975-
Notes
3976-
-----
3977-
This method returns the maximum of the values in the DataFrame. If you
3978-
want the *index* of the maximum, use ``DataFrame.idxmax``. This is the
3979-
equivalent of the ``numpy.ndarray`` method ``argmax``.
3980-
3981-
See Also
3982-
--------
3983-
DataFrame.idxmax
3984-
Series.idxmax
3985-
"""
3986-
if level is not None:
3987-
return self._agg_by_level('max', axis=axis, level=level,
3988-
skipna=skipna)
3989-
return self._reduce(nanops.nanmax, axis=axis, skipna=skipna,
3990-
numeric_only=None)
3991-
3992-
@Substitution(name='product', shortname='product',
3993-
na_action='NA/null values are treated as 1', extras='')
3994-
@Appender(_stat_doc)
3995-
def prod(self, axis=0, skipna=True, level=None):
3996-
if level is not None:
3997-
return self._agg_by_level('prod', axis=axis, level=level,
3998-
skipna=skipna)
3999-
return self._reduce(nanops.nanprod, axis=axis, skipna=skipna,
4000-
numeric_only=None)
4001-
4002-
product = prod
4003-
4004-
@Substitution(name='median', shortname='median', na_action=_doc_exclude_na,
4005-
extras='')
4006-
@Appender(_stat_doc)
4007-
def median(self, axis=0, skipna=True, level=None):
4008-
if level is not None:
4009-
return self._agg_by_level('median', axis=axis, level=level,
4010-
skipna=skipna)
4011-
return self._reduce(nanops.nanmedian, axis=axis, skipna=skipna,
4012-
numeric_only=None)
4013-
4014-
@Substitution(name='mean absolute deviation', shortname='mad',
4015-
na_action=_doc_exclude_na, extras='')
4016-
@Appender(_stat_doc)
4017-
def mad(self, axis=0, skipna=True, level=None):
4018-
if level is not None:
4019-
return self._agg_by_level('mad', axis=axis, level=level,
4020-
skipna=skipna)
4021-
4022-
frame = self._get_numeric_data()
4023-
4024-
axis = self._get_axis_number(axis)
4025-
if axis == 0:
4026-
demeaned = frame - frame.mean(axis=0)
4027-
else:
4028-
demeaned = frame.sub(frame.mean(axis=1), axis=0)
4029-
return np.abs(demeaned).mean(axis=axis, skipna=skipna)
4030-
4031-
@Substitution(name='variance', shortname='var',
4032-
na_action=_doc_exclude_na, extras='')
4033-
@Appender(_stat_doc +
4034-
"""
4035-
Normalized by N-1 (unbiased estimator).
4036-
""")
4037-
def var(self, axis=0, skipna=True, level=None, ddof=1):
4038-
if level is not None:
4039-
return self._agg_by_level('var', axis=axis, level=level,
4040-
skipna=skipna, ddof=ddof)
4041-
return self._reduce(nanops.nanvar, axis=axis, skipna=skipna,
4042-
numeric_only=None, ddof=ddof)
4043-
4044-
@Substitution(name='standard deviation', shortname='std',
4045-
na_action=_doc_exclude_na, extras='')
4046-
@Appender(_stat_doc +
4047-
"""
4048-
Normalized by N-1 (unbiased estimator).
4049-
""")
4050-
def std(self, axis=0, skipna=True, level=None, ddof=1):
4051-
if level is not None:
4052-
return self._agg_by_level('std', axis=axis, level=level,
4053-
skipna=skipna, ddof=ddof)
4054-
return np.sqrt(self.var(axis=axis, skipna=skipna, ddof=ddof))
4055-
4056-
@Substitution(name='unbiased skewness', shortname='skew',
4057-
na_action=_doc_exclude_na, extras='')
4058-
@Appender(_stat_doc)
4059-
def skew(self, axis=0, skipna=True, level=None):
4060-
if level is not None:
4061-
return self._agg_by_level('skew', axis=axis, level=level,
4062-
skipna=skipna)
4063-
return self._reduce(nanops.nanskew, axis=axis, skipna=skipna,
4064-
numeric_only=None)
4065-
4066-
@Substitution(name='unbiased kurtosis', shortname='kurt',
4067-
na_action=_doc_exclude_na, extras='')
4068-
@Appender(_stat_doc)
4069-
def kurt(self, axis=0, skipna=True, level=None):
4070-
if level is not None:
4071-
return self._agg_by_level('kurt', axis=axis, level=level,
4072-
skipna=skipna)
4073-
return self._reduce(nanops.nankurt, axis=axis, skipna=skipna,
4074-
numeric_only=None)
4075-
4076-
def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwds):
4077-
grouped = self.groupby(level=level, axis=axis)
4078-
if hasattr(grouped, name) and skipna:
4079-
return getattr(grouped, name)(**kwds)
4080-
axis = self._get_axis_number(axis)
4081-
method = getattr(type(self), name)
4082-
applyf = lambda x: method(x, axis=axis, skipna=skipna, **kwds)
4083-
return grouped.aggregate(applyf)
4084-
40853910
def _reduce(self, op, axis=0, skipna=True, numeric_only=None,
40863911
filter_type=None, **kwds):
40873912
axis = self._get_axis_number(axis)
@@ -4440,7 +4265,7 @@ def combineMult(self, other):
44404265

44414266
DataFrame._setup_axes(
44424267
['index', 'columns'], info_axis=1, stat_axis=0, axes_are_reversed=True)
4443-
4268+
DataFrame._add_numeric_operations()
44444269

44454270
_EMPTY_SERIES = Series([])
44464271

0 commit comments

Comments
 (0)