Skip to content

Commit 232be97

Browse files
committed
ENH: more nanops refactoring, tests pass, pandas-dev#382
1 parent e0da712 commit 232be97

File tree

3 files changed

+42
-36
lines changed

3 files changed

+42
-36
lines changed

pandas/core/frame.py

+9-35
Original file line numberDiff line numberDiff line change
@@ -2747,16 +2747,8 @@ def prod(self, axis=0, skipna=True, level=None):
27472747
if level is not None:
27482748
return self._agg_by_level('prod', axis=axis, level=level,
27492749
skipna=skipna)
2750-
2751-
values, axis_labels = self._get_agg_data(axis, numeric_only=True)
2752-
2753-
if skipna and not issubclass(values.dtype.type, np.integer):
2754-
values[np.isnan(values)] = 1
2755-
result = values.prod(axis)
2756-
count = self.count(axis, numeric_only=True)
2757-
result[count == 0] = nan
2758-
2759-
return Series(result, index=axis_labels)
2750+
return self._reduce(nanops.nanprod, axis=axis, skipna=skipna,
2751+
numeric_only=None)
27602752
_add_stat_doc(prod, 'product', 'product',
27612753
na_action='NA/null values are treated as 1')
27622754
product = prod
@@ -2765,30 +2757,8 @@ def median(self, axis=0, skipna=True, level=None):
27652757
if level is not None:
27662758
return self._agg_by_level('median', axis=axis, level=level,
27672759
skipna=skipna)
2768-
2769-
frame = self._get_numeric_data()
2770-
2771-
if axis == 0:
2772-
values = frame.values.T
2773-
result_index = frame.columns
2774-
elif axis == 1:
2775-
values = frame.values
2776-
result_index = self.index
2777-
else:
2778-
raise ValueError('axis must be in {0, 1}')
2779-
2780-
def get_median(x):
2781-
mask = notnull(x)
2782-
if not skipna and not mask.all():
2783-
return np.nan
2784-
return lib.median(x[mask])
2785-
2786-
if values.dtype != np.float64:
2787-
values = values.astype('f8')
2788-
2789-
medians = [get_median(arr) for arr in values]
2790-
return Series(medians, index=result_index)
2791-
2760+
return self._reduce(nanops.nanmedian, axis=axis, skipna=skipna,
2761+
numeric_only=None)
27922762
_add_stat_doc(median, 'median', 'median')
27932763

27942764
def mad(self, axis=0, skipna=True, level=None):
@@ -2852,7 +2822,11 @@ def _reduce(self, op, axis=0, skipna=True, numeric_only=None):
28522822
result = f(values)
28532823

28542824
if result.dtype == np.object_:
2855-
result = result.astype('f8')
2825+
try:
2826+
result = result.astype('f8')
2827+
except (ValueError, TypeError):
2828+
pass
2829+
28562830
return Series(result, index=labels)
28572831

28582832
def idxmin(self, axis=0, skipna=True):

pandas/core/nanops.py

+24
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import numpy as np
22

33
from pandas.core.common import isnull, notnull
4+
import pandas._tseries as lib
45

56
def nansum(values, axis=0, skipna=True, copy=True):
67
if values.dtype == np.object_:
@@ -43,6 +44,21 @@ def nanmean(values, axis=0, skipna=True, copy=True):
4344

4445
return the_mean
4546

47+
def nanmedian(values, axis=0, skipna=True, copy=True):
48+
def get_median(x):
49+
mask = notnull(x)
50+
if not skipna and not mask.all():
51+
return np.nan
52+
return lib.median(x[mask])
53+
54+
if values.dtype != np.float64:
55+
values = values.astype('f8')
56+
57+
if axis == 0:
58+
values = values.T
59+
60+
return np.asarray([get_median(arr) for arr in values])
61+
4662
def nanvar(values, axis=0, skipna=True, copy=True):
4763
mask = isnull(values)
4864
count = (values.shape[axis] - mask.sum(axis)).astype(float)
@@ -98,3 +114,11 @@ def nanmax(values, axis=0, skipna=True, copy=True):
98114

99115
return values.max(axis)
100116

117+
def nanprod(values, axis=0, skipna=True, copy=True):
118+
mask = isnull(values)
119+
if skipna and not issubclass(values.dtype.type, np.integer):
120+
values[mask] = 1
121+
result = values.prod(axis)
122+
count = mask.shape[axis] - mask.sum(axis)
123+
result[count == 0] = np.nan
124+
return result

pandas/tests/test_frame.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -3028,7 +3028,7 @@ def test_stat_ops_attempt_obj_array(self):
30283028
}
30293029
df = DataFrame(data, index=['foo', 'bar', 'baz'],
30303030
dtype='O')
3031-
methods = ['sum', 'mean', 'var', 'std', 'skew', 'min', 'max']
3031+
methods = ['sum', 'mean', 'prod', 'var', 'std', 'skew', 'min', 'max']
30323032

30333033
for meth in methods:
30343034
self.assert_(df.values.dtype == np.object_)
@@ -3122,6 +3122,14 @@ def wrapper(x):
31223122
getattr(self.mixed_frame, name)(axis=0)
31233123
getattr(self.mixed_frame, name)(axis=1)
31243124

3125+
# all NA case
3126+
# if has_skipna:
3127+
# all_na = self.frame * np.NaN
3128+
# r0 = getattr(all_na, name)(axis=0)
3129+
# r1 = getattr(all_na, name)(axis=1)
3130+
# self.assert_(np.isnan(r0).all())
3131+
# self.assert_(np.isnan(r1).all())
3132+
31253133
def test_sum_corner(self):
31263134
axis0 = self.empty.sum(0)
31273135
axis1 = self.empty.sum(1)

0 commit comments

Comments
 (0)