From c420953df9547fcc4fc15ed61708f14b8c4108ab Mon Sep 17 00:00:00 2001 From: jreback Date: Sat, 28 Sep 2013 16:46:26 -0400 Subject: [PATCH 1/4] COMPAT: provide numpy compat with 1.7 for numeric methods when calling like: np.prod(obj) as numpy is now passing extra args to the pandas methods! --- doc/source/release.rst | 3 + pandas/core/frame.py | 52 +++-- pandas/core/generic.py | 402 ++++++++++++++++++----------------- pandas/core/panel.py | 40 +++- pandas/core/series.py | 16 +- pandas/tests/test_generic.py | 21 +- 6 files changed, 303 insertions(+), 231 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 73e7e3affd944..39dc5d192f523 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -267,6 +267,9 @@ API Changes ``SparsePanel``, etc.), now support the entire set of arithmetic operators and arithmetic flex methods (add, sub, mul, etc.). ``SparsePanel`` does not support ``pow`` or ``mod`` with non-scalars. (:issue:`3765`) + - Provide numpy compatibility with 1.7 for a calling convention like ``np.prod(pandas_object)`` as numpy + call with additional keyword args (:issue:`4435`) + Internal Refactoring ~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c6727f91644fc..44bb1ce199bc0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3869,7 +3869,7 @@ def _count_level(self, level, axis=0, numeric_only=False): else: return result - def any(self, axis=0, bool_only=None, skipna=True, level=None): + def any(self, axis=None, bool_only=None, skipna=True, level=None, **kwargs): """ Return whether any element is True over requested axis. %(na_action)s @@ -3891,13 +3891,15 @@ def any(self, axis=0, bool_only=None, skipna=True, level=None): ------- any : Series (or DataFrame if level specified) """ + if axis is None: + axis = self._stat_axis_number if level is not None: return self._agg_by_level('any', axis=axis, level=level, skipna=skipna) return self._reduce(nanops.nanany, axis=axis, skipna=skipna, numeric_only=bool_only, filter_type='bool') - def all(self, axis=0, bool_only=None, skipna=True, level=None): + def all(self, axis=None, bool_only=None, skipna=True, level=None, **kwargs): """ Return whether all elements are True over requested axis. %(na_action)s @@ -3919,6 +3921,8 @@ def all(self, axis=0, bool_only=None, skipna=True, level=None): ------- any : Series (or DataFrame if level specified) """ + if axis is None: + axis = self._stat_axis_number if level is not None: return self._agg_by_level('all', axis=axis, level=level, skipna=skipna) @@ -3928,7 +3932,9 @@ def all(self, axis=0, bool_only=None, skipna=True, level=None): @Substitution(name='sum', shortname='sum', na_action=_doc_exclude_na, extras=_numeric_only_doc) @Appender(_stat_doc) - def sum(self, axis=0, numeric_only=None, skipna=True, level=None): + def sum(self, axis=None, numeric_only=None, skipna=True, level=None, **kwargs): + if axis is None: + axis = self._stat_axis_number if level is not None: return self._agg_by_level('sum', axis=axis, level=level, skipna=skipna) @@ -3938,7 +3944,9 @@ def sum(self, axis=0, numeric_only=None, skipna=True, level=None): @Substitution(name='mean', shortname='mean', na_action=_doc_exclude_na, extras='') @Appender(_stat_doc) - def mean(self, axis=0, skipna=True, level=None): + def mean(self, axis=None, skipna=True, level=None, **kwargs): + if axis is None: + axis = self._stat_axis_number if level is not None: return self._agg_by_level('mean', axis=axis, level=level, skipna=skipna) @@ -3948,7 +3956,7 @@ def mean(self, axis=0, skipna=True, level=None): @Substitution(name='minimum', shortname='min', na_action=_doc_exclude_na, extras='') @Appender(_stat_doc) - def min(self, axis=0, skipna=True, level=None): + def min(self, axis=None, skipna=True, level=None, **kwargs): """ Notes ----- @@ -3961,6 +3969,8 @@ def min(self, axis=0, skipna=True, level=None): DataFrame.idxmin Series.idxmin """ + if axis is None: + axis = self._stat_axis_number if level is not None: return self._agg_by_level('min', axis=axis, level=level, skipna=skipna) @@ -3970,7 +3980,7 @@ def min(self, axis=0, skipna=True, level=None): @Substitution(name='maximum', shortname='max', na_action=_doc_exclude_na, extras='') @Appender(_stat_doc) - def max(self, axis=0, skipna=True, level=None): + def max(self, axis=None, skipna=True, level=None, **kwargs): """ Notes ----- @@ -3983,6 +3993,8 @@ def max(self, axis=0, skipna=True, level=None): DataFrame.idxmax Series.idxmax """ + if axis is None: + axis = self._stat_axis_number if level is not None: return self._agg_by_level('max', axis=axis, level=level, skipna=skipna) @@ -3992,7 +4004,9 @@ def max(self, axis=0, skipna=True, level=None): @Substitution(name='product', shortname='product', na_action='NA/null values are treated as 1', extras='') @Appender(_stat_doc) - def prod(self, axis=0, skipna=True, level=None): + def prod(self, axis=None, skipna=True, level=None, **kwargs): + if axis is None: + axis = self._stat_axis_number if level is not None: return self._agg_by_level('prod', axis=axis, level=level, skipna=skipna) @@ -4004,7 +4018,9 @@ def prod(self, axis=0, skipna=True, level=None): @Substitution(name='median', shortname='median', na_action=_doc_exclude_na, extras='') @Appender(_stat_doc) - def median(self, axis=0, skipna=True, level=None): + def median(self, axis=None, skipna=True, level=None, **kwargs): + if axis is None: + axis = self._stat_axis_number if level is not None: return self._agg_by_level('median', axis=axis, level=level, skipna=skipna) @@ -4014,7 +4030,9 @@ def median(self, axis=0, skipna=True, level=None): @Substitution(name='mean absolute deviation', shortname='mad', na_action=_doc_exclude_na, extras='') @Appender(_stat_doc) - def mad(self, axis=0, skipna=True, level=None): + def mad(self, axis=None, skipna=True, level=None, **kwargs): + if axis is None: + axis = self._stat_axis_number if level is not None: return self._agg_by_level('mad', axis=axis, level=level, skipna=skipna) @@ -4034,7 +4052,9 @@ def mad(self, axis=0, skipna=True, level=None): """ Normalized by N-1 (unbiased estimator). """) - def var(self, axis=0, skipna=True, level=None, ddof=1): + def var(self, axis=None, skipna=True, level=None, ddof=1, **kwargs): + if axis is None: + axis = self._stat_axis_number if level is not None: return self._agg_by_level('var', axis=axis, level=level, skipna=skipna, ddof=ddof) @@ -4047,7 +4067,9 @@ def var(self, axis=0, skipna=True, level=None, ddof=1): """ Normalized by N-1 (unbiased estimator). """) - def std(self, axis=0, skipna=True, level=None, ddof=1): + def std(self, axis=None, skipna=True, level=None, ddof=1, **kwargs): + if axis is None: + axis = self._stat_axis_number if level is not None: return self._agg_by_level('std', axis=axis, level=level, skipna=skipna, ddof=ddof) @@ -4056,7 +4078,9 @@ def std(self, axis=0, skipna=True, level=None, ddof=1): @Substitution(name='unbiased skewness', shortname='skew', na_action=_doc_exclude_na, extras='') @Appender(_stat_doc) - def skew(self, axis=0, skipna=True, level=None): + def skew(self, axis=None, skipna=True, level=None, **kwargs): + if axis is None: + axis = self._stat_axis_number if level is not None: return self._agg_by_level('skew', axis=axis, level=level, skipna=skipna) @@ -4066,7 +4090,9 @@ def skew(self, axis=0, skipna=True, level=None): @Substitution(name='unbiased kurtosis', shortname='kurt', na_action=_doc_exclude_na, extras='') @Appender(_stat_doc) - def kurt(self, axis=0, skipna=True, level=None): + def kurt(self, axis=None, skipna=True, level=None, **kwargs): + if axis is None: + axis = self._stat_axis_number if level is not None: return self._agg_by_level('kurt', axis=axis, level=level, skipna=skipna) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 705679136c3d2..060873e7281f2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1949,33 +1949,6 @@ def interpolate(self, to_replace, method='pad', axis=0, inplace=False, #---------------------------------------------------------------------- # Action Methods - def abs(self): - """ - Return an object with absolute value taken. Only applicable to objects - that are all numeric - - Returns - ------- - abs: type of caller - """ - obj = np.abs(self) - - # suprimo numpy 1.6 hacking - if _np_version_under1p7: - if self.ndim == 1: - if obj.dtype == 'm8[us]': - obj = obj.astype('m8[ns]') - elif self.ndim == 2: - def f(x): - if x.dtype == 'm8[us]': - x = x.astype('m8[ns]') - return x - - if 'm8[us]' in obj.dtypes.values: - obj = obj.apply(f) - - return obj - def clip(self, lower=None, upper=None, out=None): """ Trim values at input threshold(s) @@ -2550,178 +2523,6 @@ def mask(self, cond): """ return self.where(~cond, np.nan) - def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, - **kwds): - """ - Percent change over given number of periods - - Parameters - ---------- - periods : int, default 1 - Periods to shift for forming percent change - fill_method : str, default 'pad' - How to handle NAs before computing percent changes - limit : int, default None - The number of consecutive NAs to fill before stopping - freq : DateOffset, timedelta, or offset alias string, optional - Increment to use from time series API (e.g. 'M' or BDay()) - - Returns - ------- - chg : Series or DataFrame - """ - if fill_method is None: - data = self - else: - data = self.fillna(method=fill_method, limit=limit) - rs = data / data.shift(periods=periods, freq=freq, **kwds) - 1 - if freq is None: - mask = com.isnull(_values_from_object(self)) - np.putmask(rs.values, mask, np.nan) - return rs - - def cumsum(self, axis=None, skipna=True): - """ - Return DataFrame of cumulative sums over requested axis. - - Parameters - ---------- - axis : {0, 1} - 0 for row-wise, 1 for column-wise - skipna : boolean, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA - - Returns - ------- - y : DataFrame - """ - if axis is None: - axis = self._stat_axis_number - else: - axis = self._get_axis_number(axis) - - y = _values_from_object(self).copy() - if not issubclass(y.dtype.type, np.integer): - mask = np.isnan(_values_from_object(self)) - - if skipna: - np.putmask(y, mask, 0.) - - result = y.cumsum(axis) - - if skipna: - np.putmask(result, mask, np.nan) - else: - result = y.cumsum(axis) - return self._wrap_array(result, self.axes, copy=False) - - def cumprod(self, axis=None, skipna=True): - """ - Return cumulative product over requested axis as DataFrame - - Parameters - ---------- - axis : {0, 1} - 0 for row-wise, 1 for column-wise - skipna : boolean, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA - - Returns - ------- - y : DataFrame - """ - if axis is None: - axis = self._stat_axis_number - else: - axis = self._get_axis_number(axis) - - y = _values_from_object(self).copy() - if not issubclass(y.dtype.type, np.integer): - mask = np.isnan(_values_from_object(self)) - - if skipna: - np.putmask(y, mask, 1.) - result = y.cumprod(axis) - - if skipna: - np.putmask(result, mask, np.nan) - else: - result = y.cumprod(axis) - return self._wrap_array(result, self.axes, copy=False) - - def cummax(self, axis=None, skipna=True): - """ - Return DataFrame of cumulative max over requested axis. - - Parameters - ---------- - axis : {0, 1} - 0 for row-wise, 1 for column-wise - skipna : boolean, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA - - Returns - ------- - y : DataFrame - """ - if axis is None: - axis = self._stat_axis_number - else: - axis = self._get_axis_number(axis) - - y = _values_from_object(self).copy() - if not issubclass(y.dtype.type, np.integer): - mask = np.isnan(_values_from_object(self)) - - if skipna: - np.putmask(y, mask, -np.inf) - - result = np.maximum.accumulate(y, axis) - - if skipna: - np.putmask(result, mask, np.nan) - else: - result = np.maximum.accumulate(y, axis) - return self._wrap_array(result, self.axes, copy=False) - - def cummin(self, axis=None, skipna=True): - """ - Return DataFrame of cumulative min over requested axis. - - Parameters - ---------- - axis : {0, 1} - 0 for row-wise, 1 for column-wise - skipna : boolean, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA - - Returns - ------- - y : DataFrame - """ - if axis is None: - axis = self._stat_axis_number - else: - axis = self._get_axis_number(axis) - - y = _values_from_object(self).copy() - if not issubclass(y.dtype.type, np.integer): - mask = np.isnan(_values_from_object(self)) - - if skipna: - np.putmask(y, mask, np.inf) - - result = np.minimum.accumulate(y, axis) - - if skipna: - np.putmask(result, mask, np.nan) - else: - result = np.minimum.accumulate(y, axis) - return self._wrap_array(result, self.axes, copy=False) def shift(self, periods=1, freq=None, axis=0, **kwds): """ @@ -2928,6 +2729,209 @@ def tz_localize(self, tz, axis=0, copy=True): return new_obj + #---------------------------------------------------------------------- + # Numeric Methods + + def abs(self): + """ + Return an object with absolute value taken. Only applicable to objects + that are all numeric + + Returns + ------- + abs: type of caller + """ + obj = np.abs(self) + + # suprimo numpy 1.6 hacking + if _np_version_under1p7: + if self.ndim == 1: + if obj.dtype == 'm8[us]': + obj = obj.astype('m8[ns]') + elif self.ndim == 2: + def f(x): + if x.dtype == 'm8[us]': + x = x.astype('m8[ns]') + return x + + if 'm8[us]' in obj.dtypes.values: + obj = obj.apply(f) + + return obj + + def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, + **kwds): + """ + Percent change over given number of periods + + Parameters + ---------- + periods : int, default 1 + Periods to shift for forming percent change + fill_method : str, default 'pad' + How to handle NAs before computing percent changes + limit : int, default None + The number of consecutive NAs to fill before stopping + freq : DateOffset, timedelta, or offset alias string, optional + Increment to use from time series API (e.g. 'M' or BDay()) + + Returns + ------- + chg : Series or DataFrame + """ + if fill_method is None: + data = self + else: + data = self.fillna(method=fill_method, limit=limit) + rs = data / data.shift(periods=periods, freq=freq, **kwds) - 1 + if freq is None: + mask = com.isnull(_values_from_object(self)) + np.putmask(rs.values, mask, np.nan) + return rs + + def cumsum(self, axis=None, skipna=True, **kwargs): + """ + Return DataFrame of cumulative sums over requested axis. + + Parameters + ---------- + axis : {0, 1} + 0 for row-wise, 1 for column-wise + skipna : boolean, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA + + Returns + ------- + y : DataFrame + """ + if axis is None: + axis = self._stat_axis_number + else: + axis = self._get_axis_number(axis) + + y = _values_from_object(self).copy() + if not issubclass(y.dtype.type, np.integer): + mask = np.isnan(_values_from_object(self)) + + if skipna: + np.putmask(y, mask, 0.) + + result = y.cumsum(axis) + + if skipna: + np.putmask(result, mask, np.nan) + else: + result = y.cumsum(axis) + return self._wrap_array(result, self.axes, copy=False) + + def cumprod(self, axis=None, skipna=True, **kwargs): + """ + Return cumulative product over requested axis as DataFrame + + Parameters + ---------- + axis : {0, 1} + 0 for row-wise, 1 for column-wise + skipna : boolean, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA + + Returns + ------- + y : DataFrame + """ + if axis is None: + axis = self._stat_axis_number + else: + axis = self._get_axis_number(axis) + + y = _values_from_object(self).copy() + if not issubclass(y.dtype.type, np.integer): + mask = np.isnan(_values_from_object(self)) + + if skipna: + np.putmask(y, mask, 1.) + result = y.cumprod(axis) + + if skipna: + np.putmask(result, mask, np.nan) + else: + result = y.cumprod(axis) + return self._wrap_array(result, self.axes, copy=False) + + def cummax(self, axis=None, skipna=True, **kwargs): + """ + Return DataFrame of cumulative max over requested axis. + + Parameters + ---------- + axis : {0, 1} + 0 for row-wise, 1 for column-wise + skipna : boolean, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA + + Returns + ------- + y : DataFrame + """ + if axis is None: + axis = self._stat_axis_number + else: + axis = self._get_axis_number(axis) + + y = _values_from_object(self).copy() + if not issubclass(y.dtype.type, np.integer): + mask = np.isnan(_values_from_object(self)) + + if skipna: + np.putmask(y, mask, -np.inf) + + result = np.maximum.accumulate(y, axis) + + if skipna: + np.putmask(result, mask, np.nan) + else: + result = np.maximum.accumulate(y, axis) + return self._wrap_array(result, self.axes, copy=False) + + def cummin(self, axis=None, skipna=True, **kwargs): + """ + Return DataFrame of cumulative min over requested axis. + + Parameters + ---------- + axis : {0, 1} + 0 for row-wise, 1 for column-wise + skipna : boolean, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA + + Returns + ------- + y : DataFrame + """ + if axis is None: + axis = self._stat_axis_number + else: + axis = self._get_axis_number(axis) + + y = _values_from_object(self).copy() + if not issubclass(y.dtype.type, np.integer): + mask = np.isnan(_values_from_object(self)) + + if skipna: + np.putmask(y, mask, np.inf) + + result = np.minimum.accumulate(y, axis) + + if skipna: + np.putmask(result, mask, np.nan) + else: + result = np.minimum.accumulate(y, axis) + return self._wrap_array(result, self.axes, copy=False) + # install the indexerse for _name, _indexer in indexing.get_indexers_list(): NDFrame._create_indexer(_name, _indexer) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 7208ceff7d1a7..b4748c86a25b1 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -1233,61 +1233,81 @@ def f(self, other, axis=0): @Substitution(desc='sum', outname='sum') @Appender(_agg_doc) - def sum(self, axis='major', skipna=True): + def sum(self, axis=None, skipna=True, **kwargs): + if axis is None: + axis = self._stat_axis_number return self._reduce(nanops.nansum, axis=axis, skipna=skipna) cls.sum = sum @Substitution(desc='mean', outname='mean') @Appender(_agg_doc) - def mean(self, axis='major', skipna=True): + def mean(self, axis=None, skipna=True, **kwargs): + if axis is None: + axis = self._stat_axis_number return self._reduce(nanops.nanmean, axis=axis, skipna=skipna) cls.mean = mean @Substitution(desc='unbiased variance', outname='variance') @Appender(_agg_doc) - def var(self, axis='major', skipna=True): + def var(self, axis=None, skipna=True, **kwargs): + if axis is None: + axis = self._stat_axis_number return self._reduce(nanops.nanvar, axis=axis, skipna=skipna) cls.var = var @Substitution(desc='unbiased standard deviation', outname='stdev') @Appender(_agg_doc) - def std(self, axis='major', skipna=True): + def std(self, axis=None, skipna=True, **kwargs): + if axis is None: + axis = self._stat_axis_number return self.var(axis=axis, skipna=skipna).apply(np.sqrt) cls.std = std @Substitution(desc='unbiased skewness', outname='skew') @Appender(_agg_doc) - def skew(self, axis='major', skipna=True): + def skew(self, axis=None, skipna=True, **wwargs): + if axis is None: + axis = self._stat_axis_number return self._reduce(nanops.nanskew, axis=axis, skipna=skipna) cls.skew = skew @Substitution(desc='product', outname='prod') @Appender(_agg_doc) - def prod(self, axis='major', skipna=True): + def prod(self, axis=None, skipna=True, **kwargs): + if axis is None: + axis = self._stat_axis_number return self._reduce(nanops.nanprod, axis=axis, skipna=skipna) cls.prod = prod @Substitution(desc='compounded percentage', outname='compounded') @Appender(_agg_doc) - def compound(self, axis='major', skipna=True): + def compound(self, axis=None, skipna=True, **kwargs): + if axis is None: + axis = self._stat_axis_number return (1 + self).prod(axis=axis, skipna=skipna) - 1 cls.compound = compound @Substitution(desc='median', outname='median') @Appender(_agg_doc) - def median(self, axis='major', skipna=True): + def median(self, axis=None, skipna=True, **kwargs): + if axis is None: + axis = self._stat_axis_number return self._reduce(nanops.nanmedian, axis=axis, skipna=skipna) cls.median = median @Substitution(desc='maximum', outname='maximum') @Appender(_agg_doc) - def max(self, axis='major', skipna=True): + def max(self, axis=None, skipna=True, **kwargs): + if axis is None: + axis = self._stat_axis_number return self._reduce(nanops.nanmax, axis=axis, skipna=skipna) cls.max = max @Substitution(desc='minimum', outname='minimum') @Appender(_agg_doc) - def min(self, axis='major', skipna=True): + def min(self, axis=None, skipna=True, **kwargs): + if axis is None: + axis = self._stat_axis_number return self._reduce(nanops.nanmin, axis=axis, skipna=skipna) cls.min = min diff --git a/pandas/core/series.py b/pandas/core/series.py index 38e22e7a9ed3a..972f654fe8640 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -104,7 +104,7 @@ def _make_stat_func(nanop, name, shortname, na_action=_doc_exclude_na, @Substitution(name=name, shortname=shortname, na_action=na_action, extras=extras) @Appender(_stat_doc) - def f(self, axis=0, dtype=None, out=None, skipna=True, level=None): + def f(self, axis=0, dtype=None, out=None, skipna=True, level=None, **kwargs): if level is not None: return self._agg_by_level(shortname, level=level, skipna=skipna) return nanop(_values_from_object(self), skipna=skipna) @@ -1202,7 +1202,7 @@ def duplicated(self, take_last=False): @Substitution(name='mean absolute deviation', shortname='mad', na_action=_doc_exclude_na, extras='') @Appender(_stat_doc) - def mad(self, skipna=True, level=None): + def mad(self, skipna=True, level=None, **kwargs): if level is not None: return self._agg_by_level('mad', level=level, skipna=skipna) @@ -1212,7 +1212,7 @@ def mad(self, skipna=True, level=None): @Substitution(name='minimum', shortname='min', na_action=_doc_exclude_na, extras='') @Appender(_stat_doc) - def min(self, axis=None, out=None, skipna=True, level=None): + def min(self, axis=None, out=None, skipna=True, level=None, **kwargs): """ Notes ----- @@ -1232,7 +1232,7 @@ def min(self, axis=None, out=None, skipna=True, level=None): @Substitution(name='maximum', shortname='max', na_action=_doc_exclude_na, extras='') @Appender(_stat_doc) - def max(self, axis=None, out=None, skipna=True, level=None): + def max(self, axis=None, out=None, skipna=True, level=None, **kwargs): """ Notes ----- @@ -1256,7 +1256,7 @@ def max(self, axis=None, out=None, skipna=True, level=None): Normalized by N-1 (unbiased estimator). """) def std(self, axis=None, dtype=None, out=None, ddof=1, skipna=True, - level=None): + level=None, **kwargs): if level is not None: return self._agg_by_level('std', level=level, skipna=skipna, ddof=ddof) @@ -1269,7 +1269,7 @@ def std(self, axis=None, dtype=None, out=None, ddof=1, skipna=True, Normalized by N-1 (unbiased estimator). """) def var(self, axis=None, dtype=None, out=None, ddof=1, skipna=True, - level=None): + level=None, **kwargs): if level is not None: return self._agg_by_level('var', level=level, skipna=skipna, ddof=ddof) @@ -1278,7 +1278,7 @@ def var(self, axis=None, dtype=None, out=None, ddof=1, skipna=True, @Substitution(name='unbiased skewness', shortname='skew', na_action=_doc_exclude_na, extras='') @Appender(_stat_doc) - def skew(self, skipna=True, level=None): + def skew(self, skipna=True, level=None, **kwargs): if level is not None: return self._agg_by_level('skew', level=level, skipna=skipna) @@ -1287,7 +1287,7 @@ def skew(self, skipna=True, level=None): @Substitution(name='unbiased kurtosis', shortname='kurt', na_action=_doc_exclude_na, extras='') @Appender(_stat_doc) - def kurt(self, skipna=True, level=None): + def kurt(self, skipna=True, level=None, **kwargs): if level is not None: return self._agg_by_level('kurt', level=level, skipna=skipna) diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 6ea58ec997e23..5a215d405b277 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -9,7 +9,7 @@ import pandas as pd from pandas import (Index, Series, DataFrame, Panel, - isnull, notnull,date_range) + isnull, notnull,date_range, _np_version_under1p7) from pandas.core.index import Index, MultiIndex from pandas.tseries.index import Timestamp, DatetimeIndex @@ -118,6 +118,7 @@ def test_get_numeric_data(self): self._compare(result, o) # _get_numeric_data is includes _get_bool_data, so can't test for non-inclusion + def test_nonzero(self): # GH 4633 @@ -154,6 +155,24 @@ def f(): self.assertRaises(ValueError, lambda : obj1 or obj2) self.assertRaises(ValueError, lambda : not obj1) + def test_numpy_1_7_compat_numeric_methods(self): + if _np_version_under1p7: + raise nose.SkipTest("numpy < 1.7") + + # GH 4435 + # numpy in 1.7 tries to pass addtional arguments to pandas functions + + o = self._construct(shape=4) + for op in ['min','max','max','var','std','prod','sum', + 'median','skew','kurt','compound','cummax','cummin','all','any']: + f = getattr(np,op,None) + if f is not None: + f(o) + + # numpy broken methods, since these are not passed by keywords, they + # won't work + #'cumsum','cumprod', + class TestSeries(unittest.TestCase, Generic): _typ = Series _comparator = lambda self, x, y: assert_series_equal(x,y) From 8fa5b03d1f6669bde02a7fb1823b95d5020aebbe Mon Sep 17 00:00:00 2001 From: jreback Date: Sat, 28 Sep 2013 20:50:45 -0400 Subject: [PATCH 2/4] CLN: refactor all numeric type stats methods to core/generic.py from the object level modules (e.g. mean/sum/min/max....) --- doc/source/release.rst | 2 + pandas/core/frame.py | 203 +------------------------------------- pandas/core/generic.py | 214 ++++++++++++++++++++++++++++++++++++++++- pandas/core/panel.py | 106 +------------------- pandas/core/panelnd.py | 1 + pandas/core/series.py | 149 ++-------------------------- 6 files changed, 229 insertions(+), 446 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 39dc5d192f523..fe3fb726d79f2 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -348,6 +348,8 @@ See :ref:`Internal Refactoring` etc.) into a separate, cleaned up wrapper class. (:issue:`4613`) - Complex compat for ``Series`` with ``ndarray``. (:issue:`4819`) - Removed unnecessary ``rwproperty`` from codebase in favor of builtin property. (:issue:`4843`) +- Refactor object level numeric methods (mean/sum/min/max...) from object level modules to + ``core/generic.py``(:issue:`4435`) .. _release.bug_fixes-0.13.0: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 44bb1ce199bc0..935dff44ad49e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -63,28 +63,6 @@ # Docstring templates -_stat_doc = """ -Return %(name)s over requested axis. -%(na_action)s - -Parameters ----------- -axis : {0, 1} - 0 for row-wise, 1 for column-wise -skipna : boolean, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA -level : int, default None - If the axis is a MultiIndex (hierarchical), count along a - particular level, collapsing into a DataFrame -%(extras)s -Returns -------- -%(shortname)s : Series (or DataFrame if level specified) -""" - -_doc_exclude_na = "NA/null values are excluded" - _numeric_only_doc = """numeric_only : boolean, default None Include only float, int, boolean data. If None, will attempt to use everything, then use only numeric data @@ -3929,185 +3907,6 @@ def all(self, axis=None, bool_only=None, skipna=True, level=None, **kwargs): return self._reduce(nanops.nanall, axis=axis, skipna=skipna, numeric_only=bool_only, filter_type='bool') - @Substitution(name='sum', shortname='sum', na_action=_doc_exclude_na, - extras=_numeric_only_doc) - @Appender(_stat_doc) - def sum(self, axis=None, numeric_only=None, skipna=True, level=None, **kwargs): - if axis is None: - axis = self._stat_axis_number - if level is not None: - return self._agg_by_level('sum', axis=axis, level=level, - skipna=skipna) - return self._reduce(nanops.nansum, axis=axis, skipna=skipna, - numeric_only=numeric_only) - - @Substitution(name='mean', shortname='mean', na_action=_doc_exclude_na, - extras='') - @Appender(_stat_doc) - def mean(self, axis=None, skipna=True, level=None, **kwargs): - if axis is None: - axis = self._stat_axis_number - if level is not None: - return self._agg_by_level('mean', axis=axis, level=level, - skipna=skipna) - return self._reduce(nanops.nanmean, axis=axis, skipna=skipna, - numeric_only=None) - - @Substitution(name='minimum', shortname='min', na_action=_doc_exclude_na, - extras='') - @Appender(_stat_doc) - def min(self, axis=None, skipna=True, level=None, **kwargs): - """ - Notes - ----- - This method returns the minimum of the values in the DataFrame. If you - want the *index* of the minimum, use ``DataFrame.idxmin``. This is the - equivalent of the ``numpy.ndarray`` method ``argmin``. - - See Also - -------- - DataFrame.idxmin - Series.idxmin - """ - if axis is None: - axis = self._stat_axis_number - if level is not None: - return self._agg_by_level('min', axis=axis, level=level, - skipna=skipna) - return self._reduce(nanops.nanmin, axis=axis, skipna=skipna, - numeric_only=None) - - @Substitution(name='maximum', shortname='max', na_action=_doc_exclude_na, - extras='') - @Appender(_stat_doc) - def max(self, axis=None, skipna=True, level=None, **kwargs): - """ - Notes - ----- - This method returns the maximum of the values in the DataFrame. If you - want the *index* of the maximum, use ``DataFrame.idxmax``. This is the - equivalent of the ``numpy.ndarray`` method ``argmax``. - - See Also - -------- - DataFrame.idxmax - Series.idxmax - """ - if axis is None: - axis = self._stat_axis_number - if level is not None: - return self._agg_by_level('max', axis=axis, level=level, - skipna=skipna) - return self._reduce(nanops.nanmax, axis=axis, skipna=skipna, - numeric_only=None) - - @Substitution(name='product', shortname='product', - na_action='NA/null values are treated as 1', extras='') - @Appender(_stat_doc) - def prod(self, axis=None, skipna=True, level=None, **kwargs): - if axis is None: - axis = self._stat_axis_number - if level is not None: - return self._agg_by_level('prod', axis=axis, level=level, - skipna=skipna) - return self._reduce(nanops.nanprod, axis=axis, skipna=skipna, - numeric_only=None) - - product = prod - - @Substitution(name='median', shortname='median', na_action=_doc_exclude_na, - extras='') - @Appender(_stat_doc) - def median(self, axis=None, skipna=True, level=None, **kwargs): - if axis is None: - axis = self._stat_axis_number - if level is not None: - return self._agg_by_level('median', axis=axis, level=level, - skipna=skipna) - return self._reduce(nanops.nanmedian, axis=axis, skipna=skipna, - numeric_only=None) - - @Substitution(name='mean absolute deviation', shortname='mad', - na_action=_doc_exclude_na, extras='') - @Appender(_stat_doc) - def mad(self, axis=None, skipna=True, level=None, **kwargs): - if axis is None: - axis = self._stat_axis_number - if level is not None: - return self._agg_by_level('mad', axis=axis, level=level, - skipna=skipna) - - frame = self._get_numeric_data() - - axis = self._get_axis_number(axis) - if axis == 0: - demeaned = frame - frame.mean(axis=0) - else: - demeaned = frame.sub(frame.mean(axis=1), axis=0) - return np.abs(demeaned).mean(axis=axis, skipna=skipna) - - @Substitution(name='variance', shortname='var', - na_action=_doc_exclude_na, extras='') - @Appender(_stat_doc + - """ - Normalized by N-1 (unbiased estimator). - """) - def var(self, axis=None, skipna=True, level=None, ddof=1, **kwargs): - if axis is None: - axis = self._stat_axis_number - if level is not None: - return self._agg_by_level('var', axis=axis, level=level, - skipna=skipna, ddof=ddof) - return self._reduce(nanops.nanvar, axis=axis, skipna=skipna, - numeric_only=None, ddof=ddof) - - @Substitution(name='standard deviation', shortname='std', - na_action=_doc_exclude_na, extras='') - @Appender(_stat_doc + - """ - Normalized by N-1 (unbiased estimator). - """) - def std(self, axis=None, skipna=True, level=None, ddof=1, **kwargs): - if axis is None: - axis = self._stat_axis_number - if level is not None: - return self._agg_by_level('std', axis=axis, level=level, - skipna=skipna, ddof=ddof) - return np.sqrt(self.var(axis=axis, skipna=skipna, ddof=ddof)) - - @Substitution(name='unbiased skewness', shortname='skew', - na_action=_doc_exclude_na, extras='') - @Appender(_stat_doc) - def skew(self, axis=None, skipna=True, level=None, **kwargs): - if axis is None: - axis = self._stat_axis_number - if level is not None: - return self._agg_by_level('skew', axis=axis, level=level, - skipna=skipna) - return self._reduce(nanops.nanskew, axis=axis, skipna=skipna, - numeric_only=None) - - @Substitution(name='unbiased kurtosis', shortname='kurt', - na_action=_doc_exclude_na, extras='') - @Appender(_stat_doc) - def kurt(self, axis=None, skipna=True, level=None, **kwargs): - if axis is None: - axis = self._stat_axis_number - if level is not None: - return self._agg_by_level('kurt', axis=axis, level=level, - skipna=skipna) - return self._reduce(nanops.nankurt, axis=axis, skipna=skipna, - numeric_only=None) - - def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwds): - grouped = self.groupby(level=level, axis=axis) - if hasattr(grouped, name) and skipna: - return getattr(grouped, name)(**kwds) - axis = self._get_axis_number(axis) - method = getattr(type(self), name) - applyf = lambda x: method(x, axis=axis, skipna=skipna, **kwds) - return grouped.aggregate(applyf) - def _reduce(self, op, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds): axis = self._get_axis_number(axis) @@ -4466,7 +4265,7 @@ def combineMult(self, other): DataFrame._setup_axes( ['index', 'columns'], info_axis=1, stat_axis=0, axes_are_reversed=True) - +DataFrame._add_numeric_operations() _EMPTY_SERIES = Series([]) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 060873e7281f2..6c7165abe4daf 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -21,6 +21,8 @@ _values_from_object, _infer_dtype_from_scalar, _maybe_promote, ABCSeries) +import pandas.core.nanops as nanops +from pandas.util.decorators import Appender, Substitution def is_dictlike(x): return isinstance(x, (dict, com.ABCSeries)) @@ -2731,7 +2733,6 @@ def tz_localize(self, tz, axis=0, copy=True): #---------------------------------------------------------------------- # Numeric Methods - def abs(self): """ Return an object with absolute value taken. Only applicable to objects @@ -2789,6 +2790,217 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, np.putmask(rs.values, mask, np.nan) return rs + def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwds): + grouped = self.groupby(level=level, axis=axis) + if hasattr(grouped, name) and skipna: + return getattr(grouped, name)(**kwds) + axis = self._get_axis_number(axis) + method = getattr(type(self), name) + applyf = lambda x: method(x, axis=axis, skipna=skipna, **kwds) + return grouped.aggregate(applyf) + + @classmethod + def _add_numeric_operations(cls): + """ add the operations to the cls; evaluate the doc strings again """ + + axis_descr = "{" + ', '.join([ "{0} ({1})".format(a,i) for i, a in enumerate(cls._AXIS_ORDERS)]) + "}" + name = cls._constructor_sliced.__name__ if cls._AXIS_LEN > 1 else 'scalar' + _num_doc = """ + +Parameters +---------- +axis : """ + axis_descr + """ +skipna : boolean, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA +level : int, default None + If the axis is a MultiIndex (hierarchical), count along a + particular level, collapsing into a """ + name + """ + +Returns +------- +%(outname)s : """ + name + "\n" + + @Substitution(outname='sum') + @Appender(_num_doc) + def sum(self, axis=None, skipna=True, level=None, numeric_only=None, + **kwargs): + """ Return sum over requested axis """ + + if axis is None: + axis = self._stat_axis_number + if level is not None: + return self._agg_by_level('sum', axis=axis, level=level, + skipna=skipna) + return self._reduce(nanops.nansum, axis=axis, + skipna=skipna, numeric_only=numeric_only) + cls.sum = sum + + @Substitution(outname='mad') + @Appender(_num_doc) + def mad(self, axis=None, skipna=True, level=None, **kwargs): + """ Return the mean absolute deviation of the values for the requested axis """ + if axis is None: + axis = self._stat_axis_number + if level is not None: + return self._agg_by_level('mad', axis=axis, level=level, + skipna=skipna) + + data = self._get_numeric_data() + if axis == 0: + demeaned = data - data.mean(axis=0) + else: + demeaned = data.sub(data.mean(axis=1), axis=0) + return np.abs(demeaned).mean(axis=axis, skipna=skipna) + cls.mad = mad + + @Substitution(outname='mean') + @Appender(_num_doc) + def mean(self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs): + """ Return mean over requested axis """ + + if axis is None: + axis = self._stat_axis_number + if level is not None: + return self._agg_by_level('mean', axis=axis, level=level, + skipna=skipna) + return self._reduce(nanops.nanmean, axis=axis, + skipna=skipna, numeric_only=numeric_only) + cls.mean = mean + + @Substitution(outname='variance') + @Appender(_num_doc) + def var(self, axis=None, skipna=True, level=None, ddof=1, **kwargs): + """ + Return unbiased variance over requested axis + Normalized by N-1 (unbiased estimator). + """ + if axis is None: + axis = self._stat_axis_number + if level is not None: + return self._agg_by_level('var', axis=axis, level=level, + skipna=skipna, ddof=ddof) + + return self._reduce(nanops.nanvar, axis=axis, skipna=skipna, ddof=ddof) + cls.var = var + + @Substitution(outname='stdev') + @Appender(_num_doc) + def std(self, axis=None, skipna=True, level=None, ddof=1, **kwargs): + """ + Return unbiased standard deviation over requested axis + Normalized by N-1 (unbiased estimator). + """ + if axis is None: + axis = self._stat_axis_number + if level is not None: + return self._agg_by_level('std', axis=axis, level=level, + skipna=skipna, ddof=ddof) + result = self.var(axis=axis, skipna=skipna, ddof=ddof) + if getattr(result,'ndim',0) > 0: + return result.apply(np.sqrt) + return np.sqrt(result) + cls.std = std + + @Substitution(outname='skew') + @Appender(_num_doc) + def skew(self, axis=None, skipna=True, level=None, **wwargs): + """ + Return unbiased standard skewness over requested axis + Normalized by N-1 (unbiased estimator). + """ + if axis is None: + axis = self._stat_axis_number + if level is not None: + return self._agg_by_level('skew', axis=axis, level=level, + skipna=skipna) + return self._reduce(nanops.nanskew, axis=axis, skipna=skipna) + cls.skew = skew + + @Substitution(outname='kurt') + @Appender(_num_doc) + def kurt(self, axis=None, skipna=True, level=None, **kwargs): + """ + Return unbiased standard kurtosis over requested axis + Normalized by N-1 (unbiased estimator). + """ + if axis is None: + axis = self._stat_axis_number + if level is not None: + return self._agg_by_level('kurt', axis=axis, level=level, + skipna=skipna) + return self._reduce(nanops.nankurt, axis=axis, skipna=skipna, + numeric_only=None) + cls.kurt = kurt + + @Substitution(outname='prod') + @Appender(_num_doc) + def prod(self, axis=None, skipna=True, level=None, **kwargs): + """ + Return product of the values over requested axis + """ + if axis is None: + axis = self._stat_axis_number + if level is not None: + return self._agg_by_level('prod', axis=axis, level=level, + skipna=skipna) + return self._reduce(nanops.nanprod, axis=axis, skipna=skipna) + cls.prod = prod + cls.product = prod + + @Substitution(outname='compounded') + @Appender(_num_doc) + def compound(self, axis=None, skipna=True, level=None, **kwargs): + """ return the compound percentage of the values for the requested axis """ + return (1 + self).prod(axis=axis, skipna=skipna, level=level) - 1 + cls.compound = compound + + @Substitution(outname='median') + @Appender(_num_doc) + def median(self, axis=None, skipna=True, level=None, **kwargs): + """ + Return median of the values over requested axis + """ + if axis is None: + axis = self._stat_axis_number + if level is not None: + return self._agg_by_level('median', axis=axis, level=level, + skipna=skipna) + return self._reduce(nanops.nanmedian, axis=axis, skipna=skipna) + cls.median = median + + @Substitution(outname='maximum') + @Appender(_num_doc) + def max(self, axis=None, skipna=True, level=None, **kwargs): + """ + This method returns the maximum of the values in the objec. If you + want the *index* of the maximum, use ``idxmax``. This is the + equivalent of the ``numpy.ndarray`` method ``argmax``. + """ + if axis is None: + axis = self._stat_axis_number + if level is not None: + return self._agg_by_level('max', axis=axis, level=level, + skipna=skipna) + return self._reduce(nanops.nanmax, axis=axis, skipna=skipna) + cls.max = max + + @Substitution(outname='minimum') + @Appender(_num_doc) + def min(self, axis=None, skipna=True, level=None, **kwargs): + """ + This method returns the minimum of the values in the object. If you + want the *index* of the minimum, use ``idxmin``. This is the + equivalent of the ``numpy.ndarray`` method ``argmin``. + """ + if axis is None: + axis = self._stat_axis_number + if level is not None: + return self._agg_by_level('min', axis=axis, level=level, + skipna=skipna) + return self._reduce(nanops.nanmin, axis=axis, skipna=skipna) + cls.min = min + def cumsum(self, axis=None, skipna=True, **kwargs): """ Return DataFrame of cumulative sums over requested axis. diff --git a/pandas/core/panel.py b/pandas/core/panel.py index b4748c86a25b1..f0bad6b796e7c 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -831,10 +831,11 @@ def apply(self, func, axis='major'): result = np.apply_along_axis(func, i, self.values) return self._wrap_result(result, axis=axis) - def _reduce(self, op, axis=0, skipna=True): + def _reduce(self, op, axis=0, skipna=True, numeric_only=None, + filter_type=None, **kwds): axis_name = self._get_axis_name(axis) axis_number = self._get_axis_number(axis_name) - f = lambda x: op(x, axis=axis_number, skipna=skipna) + f = lambda x: op(x, axis=axis_number, skipna=skipna, **kwds) result = f(self.values) @@ -1207,109 +1208,11 @@ def f(self, other, axis=0): return self._combine(other, na_op, axis=axis) f.__name__ = name return f + # add `div`, `mul`, `pow`, etc.. ops.add_flex_arithmetic_methods(cls, _panel_arith_method, use_numexpr=use_numexpr, flex_comp_method=ops._comp_method_PANEL) - _agg_doc = """ -Return %(desc)s over requested axis - -Parameters ----------- -axis : {""" + ', '.join(cls._AXIS_ORDERS) + "} or {" \ - + ', '.join([str(i) for i in range(cls._AXIS_LEN)]) + """} -skipna : boolean, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA - -Returns -------- -%(outname)s : """ + cls._constructor_sliced.__name__ + "\n" - - _na_info = """ - -NA/null values are %s. -If all values are NA, result will be NA""" - - @Substitution(desc='sum', outname='sum') - @Appender(_agg_doc) - def sum(self, axis=None, skipna=True, **kwargs): - if axis is None: - axis = self._stat_axis_number - return self._reduce(nanops.nansum, axis=axis, skipna=skipna) - cls.sum = sum - - @Substitution(desc='mean', outname='mean') - @Appender(_agg_doc) - def mean(self, axis=None, skipna=True, **kwargs): - if axis is None: - axis = self._stat_axis_number - return self._reduce(nanops.nanmean, axis=axis, skipna=skipna) - cls.mean = mean - - @Substitution(desc='unbiased variance', outname='variance') - @Appender(_agg_doc) - def var(self, axis=None, skipna=True, **kwargs): - if axis is None: - axis = self._stat_axis_number - return self._reduce(nanops.nanvar, axis=axis, skipna=skipna) - cls.var = var - - @Substitution(desc='unbiased standard deviation', outname='stdev') - @Appender(_agg_doc) - def std(self, axis=None, skipna=True, **kwargs): - if axis is None: - axis = self._stat_axis_number - return self.var(axis=axis, skipna=skipna).apply(np.sqrt) - cls.std = std - - @Substitution(desc='unbiased skewness', outname='skew') - @Appender(_agg_doc) - def skew(self, axis=None, skipna=True, **wwargs): - if axis is None: - axis = self._stat_axis_number - return self._reduce(nanops.nanskew, axis=axis, skipna=skipna) - cls.skew = skew - - @Substitution(desc='product', outname='prod') - @Appender(_agg_doc) - def prod(self, axis=None, skipna=True, **kwargs): - if axis is None: - axis = self._stat_axis_number - return self._reduce(nanops.nanprod, axis=axis, skipna=skipna) - cls.prod = prod - - @Substitution(desc='compounded percentage', outname='compounded') - @Appender(_agg_doc) - def compound(self, axis=None, skipna=True, **kwargs): - if axis is None: - axis = self._stat_axis_number - return (1 + self).prod(axis=axis, skipna=skipna) - 1 - cls.compound = compound - - @Substitution(desc='median', outname='median') - @Appender(_agg_doc) - def median(self, axis=None, skipna=True, **kwargs): - if axis is None: - axis = self._stat_axis_number - return self._reduce(nanops.nanmedian, axis=axis, skipna=skipna) - cls.median = median - - @Substitution(desc='maximum', outname='maximum') - @Appender(_agg_doc) - def max(self, axis=None, skipna=True, **kwargs): - if axis is None: - axis = self._stat_axis_number - return self._reduce(nanops.nanmax, axis=axis, skipna=skipna) - cls.max = max - - @Substitution(desc='minimum', outname='minimum') - @Appender(_agg_doc) - def min(self, axis=None, skipna=True, **kwargs): - if axis is None: - axis = self._stat_axis_number - return self._reduce(nanops.nanmin, axis=axis, skipna=skipna) - cls.min = min Panel._setup_axes(axes=['items', 'major_axis', 'minor_axis'], info_axis=0, @@ -1321,6 +1224,7 @@ def min(self, axis=None, skipna=True, **kwargs): ops.add_special_arithmetic_methods(Panel, **ops.panel_special_funcs) Panel._add_aggregate_operations() +Panel._add_numeric_operations() WidePanel = Panel LongPanel = DataFrame diff --git a/pandas/core/panelnd.py b/pandas/core/panelnd.py index 8f427568a4102..9ccce1edc9067 100644 --- a/pandas/core/panelnd.py +++ b/pandas/core/panelnd.py @@ -108,5 +108,6 @@ def func(self, *args, **kwargs): # add the aggregate operations klass._add_aggregate_operations() + klass._add_numeric_operations() return klass diff --git a/pandas/core/series.py b/pandas/core/series.py index 972f654fe8640..01a11ed6dffab 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -77,40 +77,6 @@ def f(self, *args, **kwargs): f.__name__ = func.__name__ return f -_stat_doc = """ -Return %(name)s of values -%(na_action)s - -Parameters ----------- -skipna : boolean, default True - Exclude NA/null values -level : int, default None - If the axis is a MultiIndex (hierarchical), count along a - particular level, collapsing into a smaller Series -%(extras)s -Returns -------- -%(shortname)s : float (or Series if level specified) -""" -_doc_exclude_na = "NA/null values are excluded" -_doc_ndarray_interface = ("Extra parameters are to preserve ndarray" - "interface.\n") - - -def _make_stat_func(nanop, name, shortname, na_action=_doc_exclude_na, - extras=_doc_ndarray_interface): - - @Substitution(name=name, shortname=shortname, - na_action=na_action, extras=extras) - @Appender(_stat_doc) - def f(self, axis=0, dtype=None, out=None, skipna=True, level=None, **kwargs): - if level is not None: - return self._agg_by_level(shortname, level=level, skipna=skipna) - return nanop(_values_from_object(self), skipna=skipna) - f.__name__ = shortname - return f - #---------------------------------------------------------------------- # Series class @@ -1194,113 +1160,6 @@ def duplicated(self, take_last=False): duplicated = lib.duplicated(keys, take_last=take_last) return self._constructor(duplicated, index=self.index, name=self.name) - sum = _make_stat_func(nanops.nansum, 'sum', 'sum') - mean = _make_stat_func(nanops.nanmean, 'mean', 'mean') - median = _make_stat_func(nanops.nanmedian, 'median', 'median', extras='') - prod = _make_stat_func(nanops.nanprod, 'product', 'prod', extras='') - - @Substitution(name='mean absolute deviation', shortname='mad', - na_action=_doc_exclude_na, extras='') - @Appender(_stat_doc) - def mad(self, skipna=True, level=None, **kwargs): - if level is not None: - return self._agg_by_level('mad', level=level, skipna=skipna) - - demeaned = self - self.mean(skipna=skipna) - return np.abs(demeaned).mean(skipna=skipna) - - @Substitution(name='minimum', shortname='min', - na_action=_doc_exclude_na, extras='') - @Appender(_stat_doc) - def min(self, axis=None, out=None, skipna=True, level=None, **kwargs): - """ - Notes - ----- - This method returns the minimum of the values in the Series. If you - want the *index* of the minimum, use ``Series.idxmin``. This is the - equivalent of the ``numpy.ndarray`` method ``argmin``. - - See Also - -------- - Series.idxmin - DataFrame.idxmin - """ - if level is not None: - return self._agg_by_level('min', level=level, skipna=skipna) - return nanops.nanmin(_values_from_object(self), skipna=skipna) - - @Substitution(name='maximum', shortname='max', - na_action=_doc_exclude_na, extras='') - @Appender(_stat_doc) - def max(self, axis=None, out=None, skipna=True, level=None, **kwargs): - """ - Notes - ----- - This method returns the maximum of the values in the Series. If you - want the *index* of the maximum, use ``Series.idxmax``. This is the - equivalent of the ``numpy.ndarray`` method ``argmax``. - - See Also - -------- - Series.idxmax - DataFrame.idxmax - """ - if level is not None: - return self._agg_by_level('max', level=level, skipna=skipna) - return nanops.nanmax(_values_from_object(self), skipna=skipna) - - @Substitution(name='standard deviation', shortname='stdev', - na_action=_doc_exclude_na, extras='') - @Appender(_stat_doc + - """ - Normalized by N-1 (unbiased estimator). - """) - def std(self, axis=None, dtype=None, out=None, ddof=1, skipna=True, - level=None, **kwargs): - if level is not None: - return self._agg_by_level('std', level=level, skipna=skipna, - ddof=ddof) - return np.sqrt(nanops.nanvar(_values_from_object(self), skipna=skipna, ddof=ddof)) - - @Substitution(name='variance', shortname='var', - na_action=_doc_exclude_na, extras='') - @Appender(_stat_doc + - """ - Normalized by N-1 (unbiased estimator). - """) - def var(self, axis=None, dtype=None, out=None, ddof=1, skipna=True, - level=None, **kwargs): - if level is not None: - return self._agg_by_level('var', level=level, skipna=skipna, - ddof=ddof) - return nanops.nanvar(_values_from_object(self), skipna=skipna, ddof=ddof) - - @Substitution(name='unbiased skewness', shortname='skew', - na_action=_doc_exclude_na, extras='') - @Appender(_stat_doc) - def skew(self, skipna=True, level=None, **kwargs): - if level is not None: - return self._agg_by_level('skew', level=level, skipna=skipna) - - return nanops.nanskew(_values_from_object(self), skipna=skipna) - - @Substitution(name='unbiased kurtosis', shortname='kurt', - na_action=_doc_exclude_na, extras='') - @Appender(_stat_doc) - def kurt(self, skipna=True, level=None, **kwargs): - if level is not None: - return self._agg_by_level('kurt', level=level, skipna=skipna) - - return nanops.nankurt(_values_from_object(self), skipna=skipna) - - def _agg_by_level(self, name, level=0, skipna=True, **kwds): - grouped = self.groupby(level=level) - if hasattr(grouped, name) and skipna: - return getattr(grouped, name)(**kwds) - method = getattr(type(self), name) - applyf = lambda x: method(x, skipna=skipna, **kwds) - return grouped.aggregate(applyf) - def idxmin(self, axis=None, out=None, skipna=True): """ Index of first occurrence of minimum of values. @@ -2208,6 +2067,11 @@ def apply(self, func, convert_dtype=True, args=(), **kwds): else: return self._constructor(mapped, index=self.index, name=self.name) + def _reduce(self, op, axis=0, skipna=True, numeric_only=None, + filter_type=None, **kwds): + """ perform a reduction operation """ + return op(_values_from_object(self), skipna=skipna, **kwds) + def _reindex_indexer(self, new_index, indexer, copy): if indexer is None: if copy: @@ -2647,7 +2511,8 @@ def to_period(self, freq=None, copy=True): new_index = self.index.to_period(freq=freq) return self._constructor(new_values, index=new_index, name=self.name) -Series._setup_axes(['index'], info_axis=0) +Series._setup_axes(['index'], info_axis=0, stat_axis=0) +Series._add_numeric_operations() _INDEX_TYPES = ndarray, Index, list, tuple # reinstall the SeriesIndexer From e944d01d47fb8413c5a74e64dd58d69e9452421b Mon Sep 17 00:00:00 2001 From: jreback Date: Sat, 28 Sep 2013 21:46:40 -0400 Subject: [PATCH 3/4] CLN: refactor cum methods to core/generic.py --- doc/source/release.rst | 4 +- pandas/core/generic.py | 181 ++++++++++------------------------------- pandas/core/series.py | 118 --------------------------- 3 files changed, 46 insertions(+), 257 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index fe3fb726d79f2..2c975e58d9575 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -349,7 +349,9 @@ See :ref:`Internal Refactoring` - Complex compat for ``Series`` with ``ndarray``. (:issue:`4819`) - Removed unnecessary ``rwproperty`` from codebase in favor of builtin property. (:issue:`4843`) - Refactor object level numeric methods (mean/sum/min/max...) from object level modules to - ``core/generic.py``(:issue:`4435`) + ``core/generic.py``(:issue:`4435`). +- Refactor cum objects to core/generic.py (:issue:`4435`), note that these have a more numpy-like + function signature. .. _release.bug_fixes-0.13.0: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6c7165abe4daf..819a56c5bc1f7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2816,6 +2816,22 @@ def _add_numeric_operations(cls): level : int, default None If the axis is a MultiIndex (hierarchical), count along a particular level, collapsing into a """ + name + """ +numeric_only : boolean, default None + Include only float, int, boolean data. If None, will attempt to use + everything, then use only numeric data + +Returns +------- +%(outname)s : """ + name + "\n" + + _cnum_doc = """ + +Parameters +---------- +axis : """ + axis_descr + """ +skipna : boolean, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA Returns ------- @@ -3001,148 +3017,37 @@ def min(self, axis=None, skipna=True, level=None, **kwargs): return self._reduce(nanops.nanmin, axis=axis, skipna=skipna) cls.min = min - def cumsum(self, axis=None, skipna=True, **kwargs): - """ - Return DataFrame of cumulative sums over requested axis. - - Parameters - ---------- - axis : {0, 1} - 0 for row-wise, 1 for column-wise - skipna : boolean, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA - - Returns - ------- - y : DataFrame - """ - if axis is None: - axis = self._stat_axis_number - else: - axis = self._get_axis_number(axis) - - y = _values_from_object(self).copy() - if not issubclass(y.dtype.type, np.integer): - mask = np.isnan(_values_from_object(self)) - - if skipna: - np.putmask(y, mask, 0.) - - result = y.cumsum(axis) - - if skipna: - np.putmask(result, mask, np.nan) - else: - result = y.cumsum(axis) - return self._wrap_array(result, self.axes, copy=False) - - def cumprod(self, axis=None, skipna=True, **kwargs): - """ - Return cumulative product over requested axis as DataFrame - - Parameters - ---------- - axis : {0, 1} - 0 for row-wise, 1 for column-wise - skipna : boolean, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA - - Returns - ------- - y : DataFrame - """ - if axis is None: - axis = self._stat_axis_number - else: - axis = self._get_axis_number(axis) - - y = _values_from_object(self).copy() - if not issubclass(y.dtype.type, np.integer): - mask = np.isnan(_values_from_object(self)) - - if skipna: - np.putmask(y, mask, 1.) - result = y.cumprod(axis) - - if skipna: - np.putmask(result, mask, np.nan) - else: - result = y.cumprod(axis) - return self._wrap_array(result, self.axes, copy=False) - - def cummax(self, axis=None, skipna=True, **kwargs): - """ - Return DataFrame of cumulative max over requested axis. - - Parameters - ---------- - axis : {0, 1} - 0 for row-wise, 1 for column-wise - skipna : boolean, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA - - Returns - ------- - y : DataFrame - """ - if axis is None: - axis = self._stat_axis_number - else: - axis = self._get_axis_number(axis) - - y = _values_from_object(self).copy() - if not issubclass(y.dtype.type, np.integer): - mask = np.isnan(_values_from_object(self)) + def _make_cum_function(name, accum_func, mask_a, mask_b): - if skipna: - np.putmask(y, mask, -np.inf) - - result = np.maximum.accumulate(y, axis) - - if skipna: - np.putmask(result, mask, np.nan) - else: - result = np.maximum.accumulate(y, axis) - return self._wrap_array(result, self.axes, copy=False) - - def cummin(self, axis=None, skipna=True, **kwargs): - """ - Return DataFrame of cumulative min over requested axis. - - Parameters - ---------- - axis : {0, 1} - 0 for row-wise, 1 for column-wise - skipna : boolean, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA - - Returns - ------- - y : DataFrame - """ - if axis is None: - axis = self._stat_axis_number - else: - axis = self._get_axis_number(axis) - - y = _values_from_object(self).copy() - if not issubclass(y.dtype.type, np.integer): - mask = np.isnan(_values_from_object(self)) + @Substitution(outname=name) + @Appender("Return cumulative {0} over requested axis.".format(name) + _cnum_doc) + def func(self, axis=None, dtype=None, out=None, skipna=True, **kwargs): + if axis is None: + axis = self._stat_axis_number + else: + axis = self._get_axis_number(axis) + + y = _values_from_object(self).copy() + if not issubclass(y.dtype.type, (np.integer,np.bool_)): + mask = isnull(self) + if skipna: + np.putmask(y, mask, mask_a) + result = accum_func(y, axis) + if skipna: + np.putmask(result, mask, mask_b) + else: + result = accum_func(y, axis) - if skipna: - np.putmask(y, mask, np.inf) + d = self._construct_axes_dict() + d['copy'] = False + return self._constructor(result, **d)._propogate_attributes(self) + return func - result = np.minimum.accumulate(y, axis) - if skipna: - np.putmask(result, mask, np.nan) - else: - result = np.minimum.accumulate(y, axis) - return self._wrap_array(result, self.axes, copy=False) + cls.cummin = _make_cum_function('min', lambda y, axis: np.minimum.accumulate(y, axis), np.inf, np.nan) + cls.cumsum = _make_cum_function('sum', lambda y, axis: y.cumsum(axis), 0., np.nan) + cls.cumprod = _make_cum_function('prod', lambda y, axis: y.cumprod(axis), 1., np.nan) + cls.cummax = _make_cum_function('max', lambda y, axis: np.maximum.accumulate(y, axis), -np.inf, np.nan) # install the indexerse for _name, _indexer in indexing.get_indexers_list(): diff --git a/pandas/core/series.py b/pandas/core/series.py index 01a11ed6dffab..90d535e51580c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1216,124 +1216,6 @@ def idxmax(self, axis=None, out=None, skipna=True): argmin = idxmin argmax = idxmax - def cumsum(self, axis=0, dtype=None, out=None, skipna=True): - """ - Cumulative sum of values. Preserves locations of NaN values - - Extra parameters are to preserve ndarray interface. - - Parameters - ---------- - skipna : boolean, default True - Exclude NA/null values - - Returns - ------- - cumsum : Series - """ - arr = _values_from_object(self).copy() - - do_mask = skipna and not issubclass(self.dtype.type, - (np.integer, np.bool_)) - if do_mask: - mask = isnull(arr) - np.putmask(arr, mask, 0.) - - result = arr.cumsum() - - if do_mask: - np.putmask(result, mask, pa.NA) - - return self._constructor(result, index=self.index, name=self.name) - - def cumprod(self, axis=0, dtype=None, out=None, skipna=True): - """ - Cumulative product of values. Preserves locations of NaN values - - Extra parameters are to preserve ndarray interface. - - Parameters - ---------- - skipna : boolean, default True - Exclude NA/null values - - Returns - ------- - cumprod : Series - """ - arr = _values_from_object(self).copy() - - do_mask = skipna and not issubclass(self.dtype.type, - (np.integer, np.bool_)) - if do_mask: - mask = isnull(arr) - np.putmask(arr, mask, 1.) - - result = arr.cumprod() - - if do_mask: - np.putmask(result, mask, pa.NA) - - return self._constructor(result, index=self.index, name=self.name) - - def cummax(self, axis=0, dtype=None, out=None, skipna=True): - """ - Cumulative max of values. Preserves locations of NaN values - - Extra parameters are to preserve ndarray interface. - - Parameters - ---------- - skipna : boolean, default True - Exclude NA/null values - - Returns - ------- - cummax : Series - """ - arr = _values_from_object(self).copy() - - do_mask = skipna and not issubclass(self.dtype.type, np.integer) - if do_mask: - mask = isnull(arr) - np.putmask(arr, mask, -np.inf) - - result = np.maximum.accumulate(arr) - - if do_mask: - np.putmask(result, mask, pa.NA) - - return self._constructor(result, index=self.index, name=self.name) - - def cummin(self, axis=0, dtype=None, out=None, skipna=True): - """ - Cumulative min of values. Preserves locations of NaN values - - Extra parameters are to preserve ndarray interface. - - Parameters - ---------- - skipna : boolean, default True - Exclude NA/null values - - Returns - ------- - cummin : Series - """ - arr = _values_from_object(self).copy() - - do_mask = skipna and not issubclass(self.dtype.type, np.integer) - if do_mask: - mask = isnull(arr) - np.putmask(arr, mask, np.inf) - - result = np.minimum.accumulate(arr) - - if do_mask: - np.putmask(result, mask, pa.NA) - - return self._constructor(result, index=self.index, name=self.name) - @Appender(pa.Array.round.__doc__) def round(self, decimals=0, out=None): """ From cf57d644780d9305999aba8088a11433e4f61605 Mon Sep 17 00:00:00 2001 From: jreback Date: Sat, 28 Sep 2013 22:16:17 -0400 Subject: [PATCH 4/4] CLN: make_stat_function for numeric methods in core/generic.py --- pandas/core/generic.py | 194 ++++++++++------------------------- pandas/tests/test_frame.py | 2 +- pandas/tests/test_generic.py | 6 +- pandas/tests/test_groupby.py | 2 +- 4 files changed, 58 insertions(+), 146 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 819a56c5bc1f7..18a03eb313dd2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2807,6 +2807,8 @@ def _add_numeric_operations(cls): name = cls._constructor_sliced.__name__ if cls._AXIS_LEN > 1 else 'scalar' _num_doc = """ +%(desc)s + Parameters ---------- axis : """ + axis_descr + """ @@ -2822,7 +2824,7 @@ def _add_numeric_operations(cls): Returns ------- -%(outname)s : """ + name + "\n" +%(outname)s : """ + name + " or " + cls.__name__ + " (if level specified)\n" _cnum_doc = """ @@ -2837,25 +2839,46 @@ def _add_numeric_operations(cls): ------- %(outname)s : """ + name + "\n" - @Substitution(outname='sum') - @Appender(_num_doc) - def sum(self, axis=None, skipna=True, level=None, numeric_only=None, - **kwargs): - """ Return sum over requested axis """ + def _make_stat_function(name, desc, f): - if axis is None: - axis = self._stat_axis_number - if level is not None: - return self._agg_by_level('sum', axis=axis, level=level, - skipna=skipna) - return self._reduce(nanops.nansum, axis=axis, - skipna=skipna, numeric_only=numeric_only) - cls.sum = sum - - @Substitution(outname='mad') + @Substitution(outname=name, desc=desc) + @Appender(_num_doc) + def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None, + **kwargs): + if skipna is None: + skipna = True + if axis is None: + axis = self._stat_axis_number + if level is not None: + return self._agg_by_level(name, axis=axis, level=level, + skipna=skipna) + return self._reduce(f, axis=axis, + skipna=skipna, numeric_only=numeric_only) + stat_func.__name__ = name + return stat_func + + cls.sum = _make_stat_function('sum',"Return the sum of the values for the requested axis", nanops.nansum) + cls.mean = _make_stat_function('mean',"Return the mean of the values for the requested axis", nanops.nanmean) + cls.skew = _make_stat_function('skew',"Return unbiased skew over requested axis\nNormalized by N-1", nanops.nanskew) + cls.kurt = _make_stat_function('kurt',"Return unbiased kurtosis over requested axis\nNormalized by N-1", nanops.nankurt) + cls.kurtosis = cls.kurt + cls.prod = _make_stat_function('prod',"Return the product of the values for the requested axis", nanops.nanprod) + cls.product = cls.prod + cls.median = _make_stat_function('median',"Return the median of the values for the requested axis", nanops.nanmedian) + cls.max = _make_stat_function('max',""" +This method returns the maximum of the values in the object. If you +want the *index* of the maximum, use ``idxmax``. This is the +equivalent of the ``numpy.ndarray`` method ``argmax``.""", nanops.nanmax) + cls.min = _make_stat_function('min',""" +This method returns the minimum of the values in the object. If you +want the *index* of the minimum, use ``idxmin``. This is the +equivalent of the ``numpy.ndarray`` method ``argmin``.""", nanops.nanmin) + + @Substitution(outname='mad', desc="Return the mean absolute deviation of the values for the requested axis") @Appender(_num_doc) - def mad(self, axis=None, skipna=True, level=None, **kwargs): - """ Return the mean absolute deviation of the values for the requested axis """ + def mad(self, axis=None, skipna=None, level=None, **kwargs): + if skipna is None: + skipna = True if axis is None: axis = self._stat_axis_number if level is not None: @@ -2870,27 +2893,11 @@ def mad(self, axis=None, skipna=True, level=None, **kwargs): return np.abs(demeaned).mean(axis=axis, skipna=skipna) cls.mad = mad - @Substitution(outname='mean') + @Substitution(outname='variance',desc="Return unbiased variance over requested axis\nNormalized by N-1") @Appender(_num_doc) - def mean(self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs): - """ Return mean over requested axis """ - - if axis is None: - axis = self._stat_axis_number - if level is not None: - return self._agg_by_level('mean', axis=axis, level=level, - skipna=skipna) - return self._reduce(nanops.nanmean, axis=axis, - skipna=skipna, numeric_only=numeric_only) - cls.mean = mean - - @Substitution(outname='variance') - @Appender(_num_doc) - def var(self, axis=None, skipna=True, level=None, ddof=1, **kwargs): - """ - Return unbiased variance over requested axis - Normalized by N-1 (unbiased estimator). - """ + def var(self, axis=None, skipna=None, level=None, ddof=1, **kwargs): + if skipna is None: + skipna = True if axis is None: axis = self._stat_axis_number if level is not None: @@ -2900,13 +2907,11 @@ def var(self, axis=None, skipna=True, level=None, ddof=1, **kwargs): return self._reduce(nanops.nanvar, axis=axis, skipna=skipna, ddof=ddof) cls.var = var - @Substitution(outname='stdev') + @Substitution(outname='stdev',desc="Return unbiased standard deviation over requested axis\nNormalized by N-1") @Appender(_num_doc) - def std(self, axis=None, skipna=True, level=None, ddof=1, **kwargs): - """ - Return unbiased standard deviation over requested axis - Normalized by N-1 (unbiased estimator). - """ + def std(self, axis=None, skipna=None, level=None, ddof=1, **kwargs): + if skipna is None: + skipna = True if axis is None: axis = self._stat_axis_number if level is not None: @@ -2918,105 +2923,14 @@ def std(self, axis=None, skipna=True, level=None, ddof=1, **kwargs): return np.sqrt(result) cls.std = std - @Substitution(outname='skew') - @Appender(_num_doc) - def skew(self, axis=None, skipna=True, level=None, **wwargs): - """ - Return unbiased standard skewness over requested axis - Normalized by N-1 (unbiased estimator). - """ - if axis is None: - axis = self._stat_axis_number - if level is not None: - return self._agg_by_level('skew', axis=axis, level=level, - skipna=skipna) - return self._reduce(nanops.nanskew, axis=axis, skipna=skipna) - cls.skew = skew - - @Substitution(outname='kurt') - @Appender(_num_doc) - def kurt(self, axis=None, skipna=True, level=None, **kwargs): - """ - Return unbiased standard kurtosis over requested axis - Normalized by N-1 (unbiased estimator). - """ - if axis is None: - axis = self._stat_axis_number - if level is not None: - return self._agg_by_level('kurt', axis=axis, level=level, - skipna=skipna) - return self._reduce(nanops.nankurt, axis=axis, skipna=skipna, - numeric_only=None) - cls.kurt = kurt - - @Substitution(outname='prod') - @Appender(_num_doc) - def prod(self, axis=None, skipna=True, level=None, **kwargs): - """ - Return product of the values over requested axis - """ - if axis is None: - axis = self._stat_axis_number - if level is not None: - return self._agg_by_level('prod', axis=axis, level=level, - skipna=skipna) - return self._reduce(nanops.nanprod, axis=axis, skipna=skipna) - cls.prod = prod - cls.product = prod - - @Substitution(outname='compounded') + @Substitution(outname='compounded',desc="Return the compound percentage of the values for the requested axis") @Appender(_num_doc) - def compound(self, axis=None, skipna=True, level=None, **kwargs): - """ return the compound percentage of the values for the requested axis """ + def compound(self, axis=None, skipna=None, level=None, **kwargs): + if skipna is None: + skipna = True return (1 + self).prod(axis=axis, skipna=skipna, level=level) - 1 cls.compound = compound - @Substitution(outname='median') - @Appender(_num_doc) - def median(self, axis=None, skipna=True, level=None, **kwargs): - """ - Return median of the values over requested axis - """ - if axis is None: - axis = self._stat_axis_number - if level is not None: - return self._agg_by_level('median', axis=axis, level=level, - skipna=skipna) - return self._reduce(nanops.nanmedian, axis=axis, skipna=skipna) - cls.median = median - - @Substitution(outname='maximum') - @Appender(_num_doc) - def max(self, axis=None, skipna=True, level=None, **kwargs): - """ - This method returns the maximum of the values in the objec. If you - want the *index* of the maximum, use ``idxmax``. This is the - equivalent of the ``numpy.ndarray`` method ``argmax``. - """ - if axis is None: - axis = self._stat_axis_number - if level is not None: - return self._agg_by_level('max', axis=axis, level=level, - skipna=skipna) - return self._reduce(nanops.nanmax, axis=axis, skipna=skipna) - cls.max = max - - @Substitution(outname='minimum') - @Appender(_num_doc) - def min(self, axis=None, skipna=True, level=None, **kwargs): - """ - This method returns the minimum of the values in the object. If you - want the *index* of the minimum, use ``idxmin``. This is the - equivalent of the ``numpy.ndarray`` method ``argmin``. - """ - if axis is None: - axis = self._stat_axis_number - if level is not None: - return self._agg_by_level('min', axis=axis, level=level, - skipna=skipna) - return self._reduce(nanops.nanmin, axis=axis, skipna=skipna) - cls.min = min - def _make_cum_function(name, accum_func, mask_a, mask_b): @Substitution(outname=name) @@ -3041,6 +2955,8 @@ def func(self, axis=None, dtype=None, out=None, skipna=True, **kwargs): d = self._construct_axes_dict() d['copy'] = False return self._constructor(result, **d)._propogate_attributes(self) + + func.__name__ = name return func diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index a41072d97ddc3..fd37717e73ba0 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -17,7 +17,7 @@ map, zip, range, long, lrange, lmap, lzip, OrderedDict, cPickle as pickle, u, StringIO ) -from pandas import compat +from pandas import compat, _np_version_under1p7 from numpy import random, nan from numpy.random import randn, rand diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 5a215d405b277..7f50cb2453a21 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -163,16 +163,12 @@ def test_numpy_1_7_compat_numeric_methods(self): # numpy in 1.7 tries to pass addtional arguments to pandas functions o = self._construct(shape=4) - for op in ['min','max','max','var','std','prod','sum', + for op in ['min','max','max','var','std','prod','sum','cumsum','cumprod', 'median','skew','kurt','compound','cummax','cummin','all','any']: f = getattr(np,op,None) if f is not None: f(o) - # numpy broken methods, since these are not passed by keywords, they - # won't work - #'cumsum','cumprod', - class TestSeries(unittest.TestCase, Generic): _typ = Series _comparator = lambda self, x, y: assert_series_equal(x,y) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 46ab0fe022e78..fec6460ea31f3 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -18,7 +18,7 @@ from pandas.compat import( range, long, lrange, StringIO, lmap, lzip, map, zip, builtins, OrderedDict ) -from pandas import compat +from pandas import compat, _np_version_under1p7 from pandas.core.panel import Panel from pandas.tools.merge import concat from collections import defaultdict