diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index de15fa29de8dd..48a1924d84041 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -486,3 +486,5 @@ Bug Fixes - Fixed bug with reading CSV files from Amazon S3 on python 3 raising a TypeError (:issue:`9452`) - Bug in the Google BigQuery reader where the 'jobComplete' key may be present but False in the query results (:issue:`8728`) - Bug in ``Series.values_counts`` with excluding ``NaN`` for categorical type ``Series`` with ``dropna=True`` (:issue:`9443`) + +- Fixed mising numeric_only option for ``DataFrame.std/var/sem`` (:issue:`9201`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 67a9ab67c0a98..feebb3efaa9d0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4090,7 +4090,7 @@ def _make_stat_function_ddof(name, desc, f): @Substitution(outname=name, desc=desc) @Appender(_num_doc) def stat_func(self, axis=None, skipna=None, level=None, ddof=1, - **kwargs): + numeric_only=None, **kwargs): if skipna is None: skipna = True if axis is None: @@ -4099,6 +4099,7 @@ def stat_func(self, axis=None, skipna=None, level=None, ddof=1, return self._agg_by_level(name, axis=axis, level=level, skipna=skipna, ddof=ddof) return self._reduce(f, name, axis=axis, + numeric_only=numeric_only, skipna=skipna, ddof=ddof) stat_func.__name__ = name return stat_func diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 602850d859d27..f68f4f9037d97 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -332,7 +332,7 @@ def _get_counts_nanvar(mask, axis, ddof): def _nanvar(values, axis=None, skipna=True, ddof=1): # private nanvar calculator mask = isnull(values) - if not is_floating_dtype(values): + if is_any_int_dtype(values): values = values.astype('f8') count, d = _get_counts_nanvar(mask, axis, ddof) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index e1687fd6a67cf..1acad4cf978a8 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -11503,6 +11503,32 @@ def test_var_std(self): self.assertFalse((result < 0).any()) nanops._USE_BOTTLENECK = True + def test_numeric_only_flag(self): + # GH #9201 + methods = ['sem', 'var', 'std'] + df1 = DataFrame(np.random.randn(5, 3), columns=['foo', 'bar', 'baz']) + # set one entry to a number in str format + df1.ix[0, 'foo'] = '100' + + df2 = DataFrame(np.random.randn(5, 3), columns=['foo', 'bar', 'baz']) + # set one entry to a non-number str + df2.ix[0, 'foo'] = 'a' + + for meth in methods: + result = getattr(df1, meth)(axis=1, numeric_only=True) + expected = getattr(df1[['bar', 'baz']], meth)(axis=1) + assert_series_equal(expected, result) + + result = getattr(df2, meth)(axis=1, numeric_only=True) + expected = getattr(df2[['bar', 'baz']], meth)(axis=1) + assert_series_equal(expected, result) + + assertRaisesRegexp(TypeError, 'float', + getattr(df1, meth), axis=1, numeric_only=False) + + assertRaisesRegexp(TypeError, 'float', + getattr(df2, meth), axis=1, numeric_only=False) + def test_sem(self): alt = lambda x: np.std(x, ddof=1)/np.sqrt(len(x)) self._check_stat_op('sem', alt)