Skip to content

Commit a0c3a57

Browse files
committed
ENH: use nanops in Panel aggregation methods, GH #536
1 parent 6203957 commit a0c3a57

File tree

4 files changed

+28
-64
lines changed

4 files changed

+28
-64
lines changed

RELEASE.rst

+2
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ pandas 0.6.2
6868
- Avoid Index dict creation in some cases (i.e. when getting slices, etc.),
6969
regression from prior versions
7070
- Friendlier error message in setup.py if NumPy not installed
71+
- Use common set of NA-handling operations (sum, mean, etc.) in Panel class
72+
also (GH #536)
7173

7274
**Bug fixes**
7375

pandas/core/frame.py

-2
Original file line numberDiff line numberDiff line change
@@ -3105,8 +3105,6 @@ def _reduce(self, op, axis=0, skipna=True, numeric_only=None):
31053105
if numeric_only is None:
31063106
try:
31073107
values = self.values
3108-
if not self._is_mixed_type:
3109-
values = values.copy()
31103108
result = f(values)
31113109
except Exception:
31123110
data = self._get_numeric_data()

pandas/core/nanops.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,8 @@ def get_median(x):
5454
if values.dtype != np.float64:
5555
values = values.astype('f8')
5656

57-
if axis == 0:
58-
values = values.T
59-
6057
if values.ndim > 1:
61-
return np.asarray([get_median(arr) for arr in values])
58+
return np.apply_along_axis(get_median, axis, values)
6259
else:
6360
return get_median(values)
6461

pandas/core/panel.py

+25-58
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from pandas.util import py3compat
1717
from pandas.util.decorators import deprecate
1818
import pandas.core.common as com
19+
import pandas.core.nanops as nanops
1920
import pandas._tseries as lib
2021

2122

@@ -939,6 +940,19 @@ def _array_method(self, func, axis='major', fill_value=None, skipna=True):
939940
result = self._values_aggregate(func, axis, fill_value, skipna=skipna)
940941
return self._wrap_result(result, axis=axis)
941942

943+
def _reduce(self, op, axis=0, skipna=True):
944+
axis_name = self._get_axis_name(axis)
945+
axis_number = self._get_axis_number(axis_name)
946+
f = lambda x: op(x, axis=axis_number, skipna=skipna, copy=True)
947+
948+
result = f(self.values)
949+
950+
index, columns = self._get_plane_axes(axis_name)
951+
if axis_name != 'items':
952+
result = result.T
953+
954+
return DataFrame(result, index=index, columns=columns)
955+
942956
def _wrap_result(self, result, axis):
943957
axis = self._get_axis_name(axis)
944958
index, columns = self._get_plane_axes(axis)
@@ -969,90 +983,43 @@ def count(self, axis='major'):
969983
return self._wrap_result(result, axis)
970984

971985
def sum(self, axis='major', skipna=True):
972-
return self._array_method(np.sum, axis=axis, fill_value=0,
973-
skipna=skipna)
974-
986+
return self._reduce(nanops.nansum, axis=axis, skipna=skipna)
975987
_add_docs(sum, 'sum', 'sum')
976988

977989
def mean(self, axis='major', skipna=True):
978-
the_sum = self.sum(axis=axis, skipna=skipna)
979-
the_count = self.count(axis=axis)
980-
return the_sum / the_count
981-
990+
return self._reduce(nanops.nanmean, axis=axis, skipna=skipna)
982991
_add_docs(mean, 'mean', 'mean')
983992

984993
def var(self, axis='major', skipna=True):
985-
i = self._get_axis_number(axis)
986-
y = np.array(self.values)
987-
mask = np.isnan(y)
988-
989-
count = (-mask).sum(axis=i).astype(float)
990-
991-
if skipna:
992-
y[mask] = 0
993-
994-
X = y.sum(axis=i)
995-
XX = (y ** 2).sum(axis=i)
996-
997-
theVar = (XX - X**2 / count) / (count - 1)
998-
999-
return self._wrap_result(theVar, axis)
1000-
994+
return self._reduce(nanops.nanvar, axis=axis, skipna=skipna)
1001995
_add_docs(var, 'unbiased variance', 'variance')
1002996

1003997
def std(self, axis='major', skipna=True):
1004998
return self.var(axis=axis, skipna=skipna).apply(np.sqrt)
1005-
1006999
_add_docs(std, 'unbiased standard deviation', 'stdev')
10071000

1008-
def prod(self, axis='major', skipna=True):
1009-
return self._array_method(np.prod, axis=axis, fill_value=1,
1010-
skipna=skipna)
1001+
def skew(self, axis='major', skipna=True):
1002+
return self._reduce(nanops.nanskew, axis=axis, skipna=skipna)
1003+
_add_docs(std, 'unbiased skewness', 'skew')
10111004

1005+
def prod(self, axis='major', skipna=True):
1006+
return self._reduce(nanops.nanprod, axis=axis, skipna=skipna)
10121007
_add_docs(prod, 'product', 'prod')
10131008

10141009
def compound(self, axis='major', skipna=True):
10151010
return (1 + self).prod(axis=axis, skipna=skipna) - 1
1016-
10171011
_add_docs(compound, 'compounded percentage', 'compounded')
10181012

10191013
def median(self, axis='major', skipna=True):
1020-
def f(arr):
1021-
mask = com.notnull(arr)
1022-
if skipna:
1023-
return lib.median(arr[mask])
1024-
else:
1025-
if not mask.all():
1026-
return np.nan
1027-
return lib.median(arr)
1028-
return self.apply(f, axis=axis)
1029-
1014+
return self._reduce(nanops.nanmedian, axis=axis, skipna=skipna)
10301015
_add_docs(median, 'median', 'median')
10311016

10321017
def max(self, axis='major', skipna=True):
1033-
i = self._get_axis_number(axis)
1034-
1035-
y = np.array(self.values)
1036-
if skipna:
1037-
np.putmask(y, np.isnan(y), -np.inf)
1038-
1039-
result = y.max(axis=i)
1040-
result = np.where(np.isneginf(result), np.nan, result)
1041-
return self._wrap_result(result, axis)
1042-
1018+
return self._reduce(nanops.nanmax, axis=axis, skipna=skipna)
10431019
_add_docs(max, 'maximum', 'maximum')
10441020

10451021
def min(self, axis='major', skipna=True):
1046-
i = self._get_axis_number(axis)
1047-
1048-
y = np.array(self.values)
1049-
if skipna:
1050-
np.putmask(y, np.isnan(y), np.inf)
1051-
1052-
result = y.min(axis=i)
1053-
result = np.where(np.isinf(result), np.nan, result)
1054-
return self._wrap_result(result, axis)
1055-
1022+
return self._reduce(nanops.nanmin, axis=axis, skipna=skipna)
10561023
_add_docs(min, 'minimum', 'minimum')
10571024

10581025
def shift(self, lags, axis='major'):

0 commit comments

Comments
 (0)