Skip to content

ENH: Exclude nuisance columns from result of window functions #27044

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jul 1, 2019
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -756,6 +756,7 @@ Groupby/Resample/Rolling
- Bug in :meth:`pandas.core.groupby.GroupBy.agg` where incorrect results are returned for uint64 columns. (:issue:`26310`)
- Bug in :meth:`pandas.core.window.Rolling.median` and :meth:`pandas.core.window.Rolling.quantile` where MemoryError is raised with empty window (:issue:`26005`)
- Bug in :meth:`pandas.core.window.Rolling.median` and :meth:`pandas.core.window.Rolling.quantile` where incorrect results are returned with ``closed='left'`` and ``closed='neither'`` (:issue:`26005`)
- Improved :class:`pandas.core.window.Rolling`, :class:`pandas.core.window.Window` and :class:`pandas.core.window.EWM` functions to exclude nuisance columns from results or return an empty series if all columns are nuisance instead of raising an error (:issue:`12537`)

Reshaping
^^^^^^^^^
Expand Down
49 changes: 38 additions & 11 deletions pandas/core/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ def _wrap_result(self, result, block=None, obj=None):
return type(obj)(result, index=index, columns=block.columns)
return result

def _wrap_results(self, results, blocks, obj):
def _wrap_results(self, results, blocks, obj, exclude=None):
"""
Wrap the results.

Expand All @@ -252,6 +252,7 @@ def _wrap_results(self, results, blocks, obj):
results : list of ndarrays
blocks : list of blocks
obj : conformed data (may be resampled)
exclude: list of columns to exclude, default to None
"""

from pandas import Series, concat
Expand Down Expand Up @@ -285,6 +286,13 @@ def _wrap_results(self, results, blocks, obj):
indexer = columns.get_indexer(selection.tolist() + [name])
columns = columns.take(sorted(indexer))

# exlude nuisance columns from final result
if exclude is not None and exclude:
columns = [c for c in columns if c not in exclude]

if not columns:
return Series()

if not len(final):
return obj.astype('float64')
return concat(final, axis=1).reindex(columns=columns, copy=False)
Expand Down Expand Up @@ -673,12 +681,17 @@ def _apply_window(self, mean=True, **kwargs):

blocks, obj, index = self._create_blocks()
results = []
exclude = []
for b in blocks:
try:
values = self._prep_values(b.values)
except TypeError:
results.append(b.values.copy())
continue
except (TypeError, NotImplementedError):
if hasattr(b, 'columns'):
exclude.extend(b.columns)
continue
else:
from pandas import Series
return Series()

if values.size == 0:
results.append(values.copy())
Expand All @@ -700,7 +713,7 @@ def f(arg, *args, **kwargs):
result = self._center_window(result, window)
results.append(result)

return self._wrap_results(results, blocks, obj)
return self._wrap_results(results, blocks, obj, exclude)

_agg_see_also_doc = dedent("""
See Also
Expand Down Expand Up @@ -845,8 +858,17 @@ def _apply(self, func, name=None, window=None, center=None,
blocks, obj, index = self._create_blocks()
index, indexi = self._get_index(index=index)
results = []
exclude = []
for b in blocks:
values = self._prep_values(b.values)
try:
values = self._prep_values(b.values)
except (TypeError, NotImplementedError):
if hasattr(b, 'columns'):
exclude.extend(b.columns)
continue
else:
from pandas import Series
return Series()

if values.size == 0:
results.append(values.copy())
Expand Down Expand Up @@ -892,7 +914,7 @@ def calc(x):

results.append(result)

return self._wrap_results(results, blocks, obj)
return self._wrap_results(results, blocks, obj, exclude)


class _Rolling_and_Expanding(_Rolling):
Expand Down Expand Up @@ -2292,12 +2314,17 @@ def _apply(self, func, **kwargs):
"""
blocks, obj, index = self._create_blocks()
results = []
exclude = []
for b in blocks:
try:
values = self._prep_values(b.values)
except TypeError:
results.append(b.values.copy())
continue
except (TypeError, NotImplementedError):
if hasattr(b, 'columns'):
exclude.extend(b.columns)
continue
else:
from pandas import Series
return Series()

if values.size == 0:
results.append(values.copy())
Expand All @@ -2316,7 +2343,7 @@ def func(arg):

results.append(np.apply_along_axis(func, self.axis, values))

return self._wrap_results(results, blocks, obj)
return self._wrap_results(results, blocks, obj, exclude)

@Substitution(name='ewm')
@Appender(_doc_template)
Expand Down
16 changes: 8 additions & 8 deletions pandas/tests/test_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,11 @@ def tests_skip_nuisance(self):
def test_skip_sum_object_raises(self):
df = DataFrame({'A': range(5), 'B': range(5, 10), 'C': 'foo'})
r = df.rolling(window=3)

with pytest.raises(TypeError, match='cannot handle this type'):
r.sum()
result = r.sum()
expected = DataFrame({'A': [np.nan, np.nan, 3, 6, 9],
'B': [np.nan, np.nan, 18, 21, 24]},
columns=list('AB'))
tm.assert_frame_equal(result, expected)

def test_agg(self):
df = DataFrame({'A': range(5), 'B': range(0, 10, 2)})
Expand Down Expand Up @@ -1069,16 +1071,14 @@ class DatetimeLike(Dtype):
def check_dtypes(self, f, f_name, d, d_name, exp):

roll = d.rolling(window=self.window)
result = f(roll)

if f_name == 'count':
result = f(roll)
tm.assert_almost_equal(result, exp)

else:

# other methods not Implemented ATM
with pytest.raises(NotImplementedError):
f(roll)
exp = Series()
tm.assert_equal(result, exp)


class TestDtype_timedelta(DatetimeLike):
Expand Down