Skip to content

Commit b84efc1

Browse files
committed
BUG: catch zero division errors in nanops from object dtype arrays in all NA case, GH #676
1 parent 4d29e47 commit b84efc1

File tree

2 files changed

+28
-10
lines changed

2 files changed

+28
-10
lines changed

pandas/core/nanops.py

+17-4
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import numpy as np
44

55
from pandas.core.common import isnull, notnull
6+
import pandas.core.common as com
67
import pandas._tseries as lib
78

89
try:
@@ -18,7 +19,7 @@ def _bottleneck_switch(bn_name, alt, **kwargs):
1819
bn_func = None
1920
def f(values, axis=None, skipna=True):
2021
try:
21-
if _USE_BOTTLENECK and skipna:
22+
if _USE_BOTTLENECK and skipna and values.dtype != np.object_:
2223
result = bn_func(values, axis=axis, **kwargs)
2324
# prefer to treat inf/-inf as NA
2425
if _has_infs(result):
@@ -62,7 +63,7 @@ def _nanmean(values, axis=None, skipna=True):
6263
values = values.copy()
6364
np.putmask(values, mask, 0)
6465

65-
the_sum = values.sum(axis)
66+
the_sum = _ensure_numeric(values.sum(axis))
6667
count = _get_counts(mask, axis)
6768

6869
if axis is not None:
@@ -101,8 +102,8 @@ def _nanvar(values, axis=None, skipna=True, ddof=1):
101102
values = values.copy()
102103
np.putmask(values, mask, 0)
103104

104-
X = values.sum(axis)
105-
XX = (values ** 2).sum(axis)
105+
X = _ensure_numeric(values.sum(axis))
106+
XX = _ensure_numeric((values ** 2).sum(axis))
106107
return (XX - X ** 2 / count) / (count - ddof)
107108

108109
def _nanmin(values, axis=None, skipna=True):
@@ -307,6 +308,18 @@ def nancov(a, b):
307308

308309
return np.cov(a, b)[0, 1]
309310

311+
def _ensure_numeric(x):
312+
if isinstance(x, np.ndarray):
313+
if x.dtype == np.object_:
314+
x = x.astype(np.float64)
315+
elif not (com.is_float(x) or com.is_integer(x)):
316+
try:
317+
x = float(x)
318+
except Exception:
319+
raise TypeError('Could not convert %s to numeric' % str(x))
320+
321+
return x
322+
310323
# NA-friendly array comparisons
311324

312325
import operator

pandas/tests/test_frame.py

+11-6
Original file line numberDiff line numberDiff line change
@@ -3750,15 +3750,20 @@ def test_stat_operators_attempt_obj_array(self):
37503750
'c': [0.00031111847529610595, 0.0014902627951905339,
37513751
-0.00094099200035979691]
37523752
}
3753-
df = DataFrame(data, index=['foo', 'bar', 'baz'],
3753+
df1 = DataFrame(data, index=['foo', 'bar', 'baz'],
37543754
dtype='O')
37553755
methods = ['sum', 'mean', 'prod', 'var', 'std', 'skew', 'min', 'max']
37563756

3757-
for meth in methods:
3758-
self.assert_(df.values.dtype == np.object_)
3759-
result = getattr(df, meth)(1)
3760-
expected = getattr(df.astype('f8'), meth)(1)
3761-
assert_series_equal(result, expected)
3757+
# GH #676
3758+
df2 = DataFrame({0: [np.nan, 2], 1: [np.nan, 3],
3759+
2: [np.nan, 4]}, dtype=object)
3760+
3761+
for df in [df1, df2]:
3762+
for meth in methods:
3763+
self.assert_(df.values.dtype == np.object_)
3764+
result = getattr(df, meth)(1)
3765+
expected = getattr(df.astype('f8'), meth)(1)
3766+
assert_series_equal(result, expected)
37623767

37633768
def test_mean(self):
37643769
self._check_stat_op('mean', np.mean)

0 commit comments

Comments
 (0)