Skip to content

ENH: Exclude nuisance columns from result of window functions #27044

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jul 1, 2019
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -756,6 +756,7 @@ Groupby/Resample/Rolling
- Bug in :meth:`pandas.core.groupby.GroupBy.agg` where incorrect results are returned for uint64 columns. (:issue:`26310`)
- Bug in :meth:`pandas.core.window.Rolling.median` and :meth:`pandas.core.window.Rolling.quantile` where MemoryError is raised with empty window (:issue:`26005`)
- Bug in :meth:`pandas.core.window.Rolling.median` and :meth:`pandas.core.window.Rolling.quantile` where incorrect results are returned with ``closed='left'`` and ``closed='neither'`` (:issue:`26005`)
- Improved :class:`pandas.core.window.Rolling`, :class:`pandas.core.window.Window` and :class:`pandas.core.window.EWM` functions to exclude nuisance columns from results or return an empty series if all columns are nuisance instead of raising an error (:issue:`12537`)

Reshaping
^^^^^^^^^
Expand Down
48 changes: 36 additions & 12 deletions pandas/core/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
ABCDataFrame, ABCDateOffset, ABCDatetimeIndex, ABCPeriodIndex, ABCSeries,
ABCTimedeltaIndex)

from pandas.core.base import PandasObject, SelectionMixin
from pandas.core.base import DataError, PandasObject, SelectionMixin
import pandas.core.common as com
from pandas.core.generic import _shared_docs
from pandas.core.groupby.base import GroupByMixin
Expand Down Expand Up @@ -243,7 +243,7 @@ def _wrap_result(self, result, block=None, obj=None):
return type(obj)(result, index=index, columns=block.columns)
return result

def _wrap_results(self, results, blocks, obj):
def _wrap_results(self, results, blocks, obj, exclude=None):
"""
Wrap the results.

Expand All @@ -252,6 +252,7 @@ def _wrap_results(self, results, blocks, obj):
results : list of ndarrays
blocks : list of blocks
obj : conformed data (may be resampled)
exclude: list of columns to exclude, default to None
"""

from pandas import Series, concat
Expand Down Expand Up @@ -285,6 +286,13 @@ def _wrap_results(self, results, blocks, obj):
indexer = columns.get_indexer(selection.tolist() + [name])
columns = columns.take(sorted(indexer))

# exlude nuisance columns from final result
if exclude is not None and exclude:
columns = [c for c in columns if c not in exclude]

if not columns:
raise DataError('No numeric types to aggregate')

if not len(final):
return obj.astype('float64')
return concat(final, axis=1).reindex(columns=columns, copy=False)
Expand Down Expand Up @@ -673,12 +681,16 @@ def _apply_window(self, mean=True, **kwargs):

blocks, obj, index = self._create_blocks()
results = []
exclude = []
for b in blocks:
try:
values = self._prep_values(b.values)
except TypeError:
results.append(b.values.copy())
continue
except (TypeError, NotImplementedError):
if hasattr(b, 'columns'):
exclude.extend(b.columns)
continue
else:
raise DataError('No numeric types to aggregate')

if values.size == 0:
results.append(values.copy())
Expand All @@ -700,7 +712,7 @@ def f(arg, *args, **kwargs):
result = self._center_window(result, window)
results.append(result)

return self._wrap_results(results, blocks, obj)
return self._wrap_results(results, blocks, obj, exclude)

_agg_see_also_doc = dedent("""
See Also
Expand Down Expand Up @@ -845,8 +857,16 @@ def _apply(self, func, name=None, window=None, center=None,
blocks, obj, index = self._create_blocks()
index, indexi = self._get_index(index=index)
results = []
exclude = []
for b in blocks:
values = self._prep_values(b.values)
try:
values = self._prep_values(b.values)
except (TypeError, NotImplementedError):
if hasattr(b, 'columns'):
exclude.extend(b.columns)
continue
else:
raise DataError('No numeric types to aggregate')

if values.size == 0:
results.append(values.copy())
Expand Down Expand Up @@ -892,7 +912,7 @@ def calc(x):

results.append(result)

return self._wrap_results(results, blocks, obj)
return self._wrap_results(results, blocks, obj, exclude)


class _Rolling_and_Expanding(_Rolling):
Expand Down Expand Up @@ -2292,12 +2312,16 @@ def _apply(self, func, **kwargs):
"""
blocks, obj, index = self._create_blocks()
results = []
exclude = []
for b in blocks:
try:
values = self._prep_values(b.values)
except TypeError:
results.append(b.values.copy())
continue
except (TypeError, NotImplementedError):
if hasattr(b, 'columns'):
exclude.extend(b.columns)
continue
else:
raise DataError('No numeric types to aggregate')

if values.size == 0:
results.append(values.copy())
Expand All @@ -2316,7 +2340,7 @@ def func(arg):

results.append(np.apply_along_axis(func, self.axis, values))

return self._wrap_results(results, blocks, obj)
return self._wrap_results(results, blocks, obj, exclude)

@Substitution(name='ewm')
@Appender(_doc_template)
Expand Down
15 changes: 7 additions & 8 deletions pandas/tests/test_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import pandas as pd
from pandas import (
DataFrame, Index, Series, Timestamp, bdate_range, concat, isna, notna)
from pandas.core.base import SpecificationError
from pandas.core.base import DataError, SpecificationError
from pandas.core.sorting import safe_sort
import pandas.core.window as rwindow
import pandas.util.testing as tm
Expand Down Expand Up @@ -118,9 +118,11 @@ def tests_skip_nuisance(self):
def test_skip_sum_object_raises(self):
df = DataFrame({'A': range(5), 'B': range(5, 10), 'C': 'foo'})
r = df.rolling(window=3)

with pytest.raises(TypeError, match='cannot handle this type'):
r.sum()
result = r.sum()
expected = DataFrame({'A': [np.nan, np.nan, 3, 6, 9],
'B': [np.nan, np.nan, 18, 21, 24]},
columns=list('AB'))
tm.assert_frame_equal(result, expected)

def test_agg(self):
df = DataFrame({'A': range(5), 'B': range(0, 10, 2)})
Expand Down Expand Up @@ -1069,15 +1071,12 @@ class DatetimeLike(Dtype):
def check_dtypes(self, f, f_name, d, d_name, exp):

roll = d.rolling(window=self.window)

if f_name == 'count':
result = f(roll)
tm.assert_almost_equal(result, exp)

else:

# other methods not Implemented ATM
with pytest.raises(NotImplementedError):
with pytest.raises(DataError):
f(roll)


Expand Down