diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 1980e00f1073d..a94cd041448e5 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -769,6 +769,7 @@ Groupby/resample/rolling - Bug in :meth:`pandas.core.groupby.GroupBy.agg` where incorrect results are returned for uint64 columns. (:issue:`26310`) - Bug in :meth:`pandas.core.window.Rolling.median` and :meth:`pandas.core.window.Rolling.quantile` where MemoryError is raised with empty window (:issue:`26005`) - Bug in :meth:`pandas.core.window.Rolling.median` and :meth:`pandas.core.window.Rolling.quantile` where incorrect results are returned with ``closed='left'`` and ``closed='neither'`` (:issue:`26005`) +- Improved :class:`pandas.core.window.Rolling`, :class:`pandas.core.window.Window` and :class:`pandas.core.window.EWM` functions to exclude nuisance columns from results instead of raising errors and raise a ``DataError`` only if all columns are nuisance (:issue:`12537`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/window.py b/pandas/core/window.py index 2b3cc4f0bf00a..8f888ba510b0e 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -22,7 +22,7 @@ ABCDataFrame, ABCDateOffset, ABCDatetimeIndex, ABCPeriodIndex, ABCSeries, ABCTimedeltaIndex) -from pandas.core.base import PandasObject, SelectionMixin +from pandas.core.base import DataError, PandasObject, SelectionMixin import pandas.core.common as com from pandas.core.generic import _shared_docs from pandas.core.groupby.base import GroupByMixin @@ -243,7 +243,7 @@ def _wrap_result(self, result, block=None, obj=None): return type(obj)(result, index=index, columns=block.columns) return result - def _wrap_results(self, results, blocks, obj): + def _wrap_results(self, results, blocks, obj, exclude=None): """ Wrap the results. @@ -252,6 +252,7 @@ def _wrap_results(self, results, blocks, obj): results : list of ndarrays blocks : list of blocks obj : conformed data (may be resampled) + exclude: list of columns to exclude, default to None """ from pandas import Series, concat @@ -285,6 +286,13 @@ def _wrap_results(self, results, blocks, obj): indexer = columns.get_indexer(selection.tolist() + [name]) columns = columns.take(sorted(indexer)) + # exclude nuisance columns so that they are not reindexed + if exclude is not None and exclude: + columns = [c for c in columns if c not in exclude] + + if not columns: + raise DataError('No numeric types to aggregate') + if not len(final): return obj.astype('float64') return concat(final, axis=1).reindex(columns=columns, copy=False) @@ -672,13 +680,21 @@ def _apply_window(self, mean=True, **kwargs): center = self.center blocks, obj, index = self._create_blocks() + block_list = list(blocks) + results = [] - for b in blocks: + exclude = [] + for i, b in enumerate(blocks): try: values = self._prep_values(b.values) - except TypeError: - results.append(b.values.copy()) - continue + + except (TypeError, NotImplementedError): + if isinstance(obj, ABCDataFrame): + exclude.extend(b.columns) + del block_list[i] + continue + else: + raise DataError('No numeric types to aggregate') if values.size == 0: results.append(values.copy()) @@ -700,7 +716,7 @@ def f(arg, *args, **kwargs): result = self._center_window(result, window) results.append(result) - return self._wrap_results(results, blocks, obj) + return self._wrap_results(results, block_list, obj, exclude) _agg_see_also_doc = dedent(""" See Also @@ -843,10 +859,22 @@ def _apply(self, func, name=None, window=None, center=None, check_minp = _use_window blocks, obj, index = self._create_blocks() + block_list = list(blocks) index, indexi = self._get_index(index=index) + results = [] - for b in blocks: - values = self._prep_values(b.values) + exclude = [] + for i, b in enumerate(blocks): + try: + values = self._prep_values(b.values) + + except (TypeError, NotImplementedError): + if isinstance(obj, ABCDataFrame): + exclude.extend(b.columns) + del block_list[i] + continue + else: + raise DataError('No numeric types to aggregate') if values.size == 0: results.append(values.copy()) @@ -892,7 +920,7 @@ def calc(x): results.append(result) - return self._wrap_results(results, blocks, obj) + return self._wrap_results(results, block_list, obj, exclude) class _Rolling_and_Expanding(_Rolling): @@ -2291,13 +2319,21 @@ def _apply(self, func, **kwargs): y : same type as input argument """ blocks, obj, index = self._create_blocks() + block_list = list(blocks) + results = [] - for b in blocks: + exclude = [] + for i, b in enumerate(blocks): try: values = self._prep_values(b.values) - except TypeError: - results.append(b.values.copy()) - continue + + except (TypeError, NotImplementedError): + if isinstance(obj, ABCDataFrame): + exclude.extend(b.columns) + del block_list[i] + continue + else: + raise DataError('No numeric types to aggregate') if values.size == 0: results.append(values.copy()) @@ -2316,7 +2352,7 @@ def func(arg): results.append(np.apply_along_axis(func, self.axis, values)) - return self._wrap_results(results, blocks, obj) + return self._wrap_results(results, block_list, obj, exclude) @Substitution(name='ewm') @Appender(_doc_template) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 4dfdd1c96728b..889754841a078 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -14,7 +14,7 @@ import pandas as pd from pandas import ( DataFrame, Index, Series, Timestamp, bdate_range, concat, isna, notna) -from pandas.core.base import SpecificationError +from pandas.core.base import DataError, SpecificationError from pandas.core.sorting import safe_sort import pandas.core.window as rwindow import pandas.util.testing as tm @@ -118,9 +118,11 @@ def tests_skip_nuisance(self): def test_skip_sum_object_raises(self): df = DataFrame({'A': range(5), 'B': range(5, 10), 'C': 'foo'}) r = df.rolling(window=3) - - with pytest.raises(TypeError, match='cannot handle this type'): - r.sum() + result = r.sum() + expected = DataFrame({'A': [np.nan, np.nan, 3, 6, 9], + 'B': [np.nan, np.nan, 18, 21, 24]}, + columns=list('AB')) + tm.assert_frame_equal(result, expected) def test_agg(self): df = DataFrame({'A': range(5), 'B': range(0, 10, 2)}) @@ -1069,15 +1071,12 @@ class DatetimeLike(Dtype): def check_dtypes(self, f, f_name, d, d_name, exp): roll = d.rolling(window=self.window) - if f_name == 'count': result = f(roll) tm.assert_almost_equal(result, exp) else: - - # other methods not Implemented ATM - with pytest.raises(NotImplementedError): + with pytest.raises(DataError): f(roll)