Skip to content

Commit 8fa5b03

Browse files
committed
CLN: refactor all numeric type stats methods to core/generic.py
from the object level modules (e.g. mean/sum/min/max....)
1 parent c420953 commit 8fa5b03

File tree

6 files changed

+229
-446
lines changed

6 files changed

+229
-446
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,8 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
348348
etc.) into a separate, cleaned up wrapper class. (:issue:`4613`)
349349
- Complex compat for ``Series`` with ``ndarray``. (:issue:`4819`)
350350
- Removed unnecessary ``rwproperty`` from codebase in favor of builtin property. (:issue:`4843`)
351+
- Refactor object level numeric methods (mean/sum/min/max...) from object level modules to
352+
``core/generic.py``(:issue:`4435`)
351353
352354
.. _release.bug_fixes-0.13.0:
353355

pandas/core/frame.py

+1-202
Original file line numberDiff line numberDiff line change
@@ -63,28 +63,6 @@
6363
# Docstring templates
6464

6565

66-
_stat_doc = """
67-
Return %(name)s over requested axis.
68-
%(na_action)s
69-
70-
Parameters
71-
----------
72-
axis : {0, 1}
73-
0 for row-wise, 1 for column-wise
74-
skipna : boolean, default True
75-
Exclude NA/null values. If an entire row/column is NA, the result
76-
will be NA
77-
level : int, default None
78-
If the axis is a MultiIndex (hierarchical), count along a
79-
particular level, collapsing into a DataFrame
80-
%(extras)s
81-
Returns
82-
-------
83-
%(shortname)s : Series (or DataFrame if level specified)
84-
"""
85-
86-
_doc_exclude_na = "NA/null values are excluded"
87-
8866
_numeric_only_doc = """numeric_only : boolean, default None
8967
Include only float, int, boolean data. If None, will attempt to use
9068
everything, then use only numeric data
@@ -3929,185 +3907,6 @@ def all(self, axis=None, bool_only=None, skipna=True, level=None, **kwargs):
39293907
return self._reduce(nanops.nanall, axis=axis, skipna=skipna,
39303908
numeric_only=bool_only, filter_type='bool')
39313909

3932-
@Substitution(name='sum', shortname='sum', na_action=_doc_exclude_na,
3933-
extras=_numeric_only_doc)
3934-
@Appender(_stat_doc)
3935-
def sum(self, axis=None, numeric_only=None, skipna=True, level=None, **kwargs):
3936-
if axis is None:
3937-
axis = self._stat_axis_number
3938-
if level is not None:
3939-
return self._agg_by_level('sum', axis=axis, level=level,
3940-
skipna=skipna)
3941-
return self._reduce(nanops.nansum, axis=axis, skipna=skipna,
3942-
numeric_only=numeric_only)
3943-
3944-
@Substitution(name='mean', shortname='mean', na_action=_doc_exclude_na,
3945-
extras='')
3946-
@Appender(_stat_doc)
3947-
def mean(self, axis=None, skipna=True, level=None, **kwargs):
3948-
if axis is None:
3949-
axis = self._stat_axis_number
3950-
if level is not None:
3951-
return self._agg_by_level('mean', axis=axis, level=level,
3952-
skipna=skipna)
3953-
return self._reduce(nanops.nanmean, axis=axis, skipna=skipna,
3954-
numeric_only=None)
3955-
3956-
@Substitution(name='minimum', shortname='min', na_action=_doc_exclude_na,
3957-
extras='')
3958-
@Appender(_stat_doc)
3959-
def min(self, axis=None, skipna=True, level=None, **kwargs):
3960-
"""
3961-
Notes
3962-
-----
3963-
This method returns the minimum of the values in the DataFrame. If you
3964-
want the *index* of the minimum, use ``DataFrame.idxmin``. This is the
3965-
equivalent of the ``numpy.ndarray`` method ``argmin``.
3966-
3967-
See Also
3968-
--------
3969-
DataFrame.idxmin
3970-
Series.idxmin
3971-
"""
3972-
if axis is None:
3973-
axis = self._stat_axis_number
3974-
if level is not None:
3975-
return self._agg_by_level('min', axis=axis, level=level,
3976-
skipna=skipna)
3977-
return self._reduce(nanops.nanmin, axis=axis, skipna=skipna,
3978-
numeric_only=None)
3979-
3980-
@Substitution(name='maximum', shortname='max', na_action=_doc_exclude_na,
3981-
extras='')
3982-
@Appender(_stat_doc)
3983-
def max(self, axis=None, skipna=True, level=None, **kwargs):
3984-
"""
3985-
Notes
3986-
-----
3987-
This method returns the maximum of the values in the DataFrame. If you
3988-
want the *index* of the maximum, use ``DataFrame.idxmax``. This is the
3989-
equivalent of the ``numpy.ndarray`` method ``argmax``.
3990-
3991-
See Also
3992-
--------
3993-
DataFrame.idxmax
3994-
Series.idxmax
3995-
"""
3996-
if axis is None:
3997-
axis = self._stat_axis_number
3998-
if level is not None:
3999-
return self._agg_by_level('max', axis=axis, level=level,
4000-
skipna=skipna)
4001-
return self._reduce(nanops.nanmax, axis=axis, skipna=skipna,
4002-
numeric_only=None)
4003-
4004-
@Substitution(name='product', shortname='product',
4005-
na_action='NA/null values are treated as 1', extras='')
4006-
@Appender(_stat_doc)
4007-
def prod(self, axis=None, skipna=True, level=None, **kwargs):
4008-
if axis is None:
4009-
axis = self._stat_axis_number
4010-
if level is not None:
4011-
return self._agg_by_level('prod', axis=axis, level=level,
4012-
skipna=skipna)
4013-
return self._reduce(nanops.nanprod, axis=axis, skipna=skipna,
4014-
numeric_only=None)
4015-
4016-
product = prod
4017-
4018-
@Substitution(name='median', shortname='median', na_action=_doc_exclude_na,
4019-
extras='')
4020-
@Appender(_stat_doc)
4021-
def median(self, axis=None, skipna=True, level=None, **kwargs):
4022-
if axis is None:
4023-
axis = self._stat_axis_number
4024-
if level is not None:
4025-
return self._agg_by_level('median', axis=axis, level=level,
4026-
skipna=skipna)
4027-
return self._reduce(nanops.nanmedian, axis=axis, skipna=skipna,
4028-
numeric_only=None)
4029-
4030-
@Substitution(name='mean absolute deviation', shortname='mad',
4031-
na_action=_doc_exclude_na, extras='')
4032-
@Appender(_stat_doc)
4033-
def mad(self, axis=None, skipna=True, level=None, **kwargs):
4034-
if axis is None:
4035-
axis = self._stat_axis_number
4036-
if level is not None:
4037-
return self._agg_by_level('mad', axis=axis, level=level,
4038-
skipna=skipna)
4039-
4040-
frame = self._get_numeric_data()
4041-
4042-
axis = self._get_axis_number(axis)
4043-
if axis == 0:
4044-
demeaned = frame - frame.mean(axis=0)
4045-
else:
4046-
demeaned = frame.sub(frame.mean(axis=1), axis=0)
4047-
return np.abs(demeaned).mean(axis=axis, skipna=skipna)
4048-
4049-
@Substitution(name='variance', shortname='var',
4050-
na_action=_doc_exclude_na, extras='')
4051-
@Appender(_stat_doc +
4052-
"""
4053-
Normalized by N-1 (unbiased estimator).
4054-
""")
4055-
def var(self, axis=None, skipna=True, level=None, ddof=1, **kwargs):
4056-
if axis is None:
4057-
axis = self._stat_axis_number
4058-
if level is not None:
4059-
return self._agg_by_level('var', axis=axis, level=level,
4060-
skipna=skipna, ddof=ddof)
4061-
return self._reduce(nanops.nanvar, axis=axis, skipna=skipna,
4062-
numeric_only=None, ddof=ddof)
4063-
4064-
@Substitution(name='standard deviation', shortname='std',
4065-
na_action=_doc_exclude_na, extras='')
4066-
@Appender(_stat_doc +
4067-
"""
4068-
Normalized by N-1 (unbiased estimator).
4069-
""")
4070-
def std(self, axis=None, skipna=True, level=None, ddof=1, **kwargs):
4071-
if axis is None:
4072-
axis = self._stat_axis_number
4073-
if level is not None:
4074-
return self._agg_by_level('std', axis=axis, level=level,
4075-
skipna=skipna, ddof=ddof)
4076-
return np.sqrt(self.var(axis=axis, skipna=skipna, ddof=ddof))
4077-
4078-
@Substitution(name='unbiased skewness', shortname='skew',
4079-
na_action=_doc_exclude_na, extras='')
4080-
@Appender(_stat_doc)
4081-
def skew(self, axis=None, skipna=True, level=None, **kwargs):
4082-
if axis is None:
4083-
axis = self._stat_axis_number
4084-
if level is not None:
4085-
return self._agg_by_level('skew', axis=axis, level=level,
4086-
skipna=skipna)
4087-
return self._reduce(nanops.nanskew, axis=axis, skipna=skipna,
4088-
numeric_only=None)
4089-
4090-
@Substitution(name='unbiased kurtosis', shortname='kurt',
4091-
na_action=_doc_exclude_na, extras='')
4092-
@Appender(_stat_doc)
4093-
def kurt(self, axis=None, skipna=True, level=None, **kwargs):
4094-
if axis is None:
4095-
axis = self._stat_axis_number
4096-
if level is not None:
4097-
return self._agg_by_level('kurt', axis=axis, level=level,
4098-
skipna=skipna)
4099-
return self._reduce(nanops.nankurt, axis=axis, skipna=skipna,
4100-
numeric_only=None)
4101-
4102-
def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwds):
4103-
grouped = self.groupby(level=level, axis=axis)
4104-
if hasattr(grouped, name) and skipna:
4105-
return getattr(grouped, name)(**kwds)
4106-
axis = self._get_axis_number(axis)
4107-
method = getattr(type(self), name)
4108-
applyf = lambda x: method(x, axis=axis, skipna=skipna, **kwds)
4109-
return grouped.aggregate(applyf)
4110-
41113910
def _reduce(self, op, axis=0, skipna=True, numeric_only=None,
41123911
filter_type=None, **kwds):
41133912
axis = self._get_axis_number(axis)
@@ -4466,7 +4265,7 @@ def combineMult(self, other):
44664265

44674266
DataFrame._setup_axes(
44684267
['index', 'columns'], info_axis=1, stat_axis=0, axes_are_reversed=True)
4469-
4268+
DataFrame._add_numeric_operations()
44704269

44714270
_EMPTY_SERIES = Series([])
44724271

0 commit comments

Comments
 (0)