Skip to content

Commit 355e322

Browse files
ihsansecerWillAyd
authored andcommitted
ENH: Exclude nuisance columns from result of window functions (#27044)
1 parent 46adc5b commit 355e322

File tree

3 files changed

+59
-23
lines changed

3 files changed

+59
-23
lines changed

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -854,6 +854,7 @@ Groupby/resample/rolling
854854
- Bug in :meth:`pandas.core.groupby.GroupBy.agg` where incorrect results are returned for uint64 columns. (:issue:`26310`)
855855
- Bug in :meth:`pandas.core.window.Rolling.median` and :meth:`pandas.core.window.Rolling.quantile` where MemoryError is raised with empty window (:issue:`26005`)
856856
- Bug in :meth:`pandas.core.window.Rolling.median` and :meth:`pandas.core.window.Rolling.quantile` where incorrect results are returned with ``closed='left'`` and ``closed='neither'`` (:issue:`26005`)
857+
- Improved :class:`pandas.core.window.Rolling`, :class:`pandas.core.window.Window` and :class:`pandas.core.window.EWM` functions to exclude nuisance columns from results instead of raising errors and raise a ``DataError`` only if all columns are nuisance (:issue:`12537`)
857858

858859
Reshaping
859860
^^^^^^^^^

pandas/core/window.py

+51-15
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
ABCDataFrame, ABCDateOffset, ABCDatetimeIndex, ABCPeriodIndex, ABCSeries,
2323
ABCTimedeltaIndex)
2424

25-
from pandas.core.base import PandasObject, SelectionMixin
25+
from pandas.core.base import DataError, PandasObject, SelectionMixin
2626
import pandas.core.common as com
2727
from pandas.core.generic import _shared_docs
2828
from pandas.core.groupby.base import GroupByMixin
@@ -243,7 +243,7 @@ def _wrap_result(self, result, block=None, obj=None):
243243
return type(obj)(result, index=index, columns=block.columns)
244244
return result
245245

246-
def _wrap_results(self, results, blocks, obj):
246+
def _wrap_results(self, results, blocks, obj, exclude=None):
247247
"""
248248
Wrap the results.
249249
@@ -252,6 +252,7 @@ def _wrap_results(self, results, blocks, obj):
252252
results : list of ndarrays
253253
blocks : list of blocks
254254
obj : conformed data (may be resampled)
255+
exclude: list of columns to exclude, default to None
255256
"""
256257

257258
from pandas import Series, concat
@@ -285,6 +286,13 @@ def _wrap_results(self, results, blocks, obj):
285286
indexer = columns.get_indexer(selection.tolist() + [name])
286287
columns = columns.take(sorted(indexer))
287288

289+
# exclude nuisance columns so that they are not reindexed
290+
if exclude is not None and exclude:
291+
columns = [c for c in columns if c not in exclude]
292+
293+
if not columns:
294+
raise DataError('No numeric types to aggregate')
295+
288296
if not len(final):
289297
return obj.astype('float64')
290298
return concat(final, axis=1).reindex(columns=columns, copy=False)
@@ -672,13 +680,21 @@ def _apply_window(self, mean=True, **kwargs):
672680
center = self.center
673681

674682
blocks, obj, index = self._create_blocks()
683+
block_list = list(blocks)
684+
675685
results = []
676-
for b in blocks:
686+
exclude = []
687+
for i, b in enumerate(blocks):
677688
try:
678689
values = self._prep_values(b.values)
679-
except TypeError:
680-
results.append(b.values.copy())
681-
continue
690+
691+
except (TypeError, NotImplementedError):
692+
if isinstance(obj, ABCDataFrame):
693+
exclude.extend(b.columns)
694+
del block_list[i]
695+
continue
696+
else:
697+
raise DataError('No numeric types to aggregate')
682698

683699
if values.size == 0:
684700
results.append(values.copy())
@@ -700,7 +716,7 @@ def f(arg, *args, **kwargs):
700716
result = self._center_window(result, window)
701717
results.append(result)
702718

703-
return self._wrap_results(results, blocks, obj)
719+
return self._wrap_results(results, block_list, obj, exclude)
704720

705721
_agg_see_also_doc = dedent("""
706722
See Also
@@ -843,10 +859,22 @@ def _apply(self, func, name=None, window=None, center=None,
843859
check_minp = _use_window
844860

845861
blocks, obj, index = self._create_blocks()
862+
block_list = list(blocks)
846863
index, indexi = self._get_index(index=index)
864+
847865
results = []
848-
for b in blocks:
849-
values = self._prep_values(b.values)
866+
exclude = []
867+
for i, b in enumerate(blocks):
868+
try:
869+
values = self._prep_values(b.values)
870+
871+
except (TypeError, NotImplementedError):
872+
if isinstance(obj, ABCDataFrame):
873+
exclude.extend(b.columns)
874+
del block_list[i]
875+
continue
876+
else:
877+
raise DataError('No numeric types to aggregate')
850878

851879
if values.size == 0:
852880
results.append(values.copy())
@@ -892,7 +920,7 @@ def calc(x):
892920

893921
results.append(result)
894922

895-
return self._wrap_results(results, blocks, obj)
923+
return self._wrap_results(results, block_list, obj, exclude)
896924

897925

898926
class _Rolling_and_Expanding(_Rolling):
@@ -2291,13 +2319,21 @@ def _apply(self, func, **kwargs):
22912319
y : same type as input argument
22922320
"""
22932321
blocks, obj, index = self._create_blocks()
2322+
block_list = list(blocks)
2323+
22942324
results = []
2295-
for b in blocks:
2325+
exclude = []
2326+
for i, b in enumerate(blocks):
22962327
try:
22972328
values = self._prep_values(b.values)
2298-
except TypeError:
2299-
results.append(b.values.copy())
2300-
continue
2329+
2330+
except (TypeError, NotImplementedError):
2331+
if isinstance(obj, ABCDataFrame):
2332+
exclude.extend(b.columns)
2333+
del block_list[i]
2334+
continue
2335+
else:
2336+
raise DataError('No numeric types to aggregate')
23012337

23022338
if values.size == 0:
23032339
results.append(values.copy())
@@ -2316,7 +2352,7 @@ def func(arg):
23162352

23172353
results.append(np.apply_along_axis(func, self.axis, values))
23182354

2319-
return self._wrap_results(results, blocks, obj)
2355+
return self._wrap_results(results, block_list, obj, exclude)
23202356

23212357
@Substitution(name='ewm')
23222358
@Appender(_doc_template)

pandas/tests/test_window.py

+7-8
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import pandas as pd
1515
from pandas import (
1616
DataFrame, Index, Series, Timestamp, bdate_range, concat, isna, notna)
17-
from pandas.core.base import SpecificationError
17+
from pandas.core.base import DataError, SpecificationError
1818
from pandas.core.sorting import safe_sort
1919
import pandas.core.window as rwindow
2020
import pandas.util.testing as tm
@@ -118,9 +118,11 @@ def tests_skip_nuisance(self):
118118
def test_skip_sum_object_raises(self):
119119
df = DataFrame({'A': range(5), 'B': range(5, 10), 'C': 'foo'})
120120
r = df.rolling(window=3)
121-
122-
with pytest.raises(TypeError, match='cannot handle this type'):
123-
r.sum()
121+
result = r.sum()
122+
expected = DataFrame({'A': [np.nan, np.nan, 3, 6, 9],
123+
'B': [np.nan, np.nan, 18, 21, 24]},
124+
columns=list('AB'))
125+
tm.assert_frame_equal(result, expected)
124126

125127
def test_agg(self):
126128
df = DataFrame({'A': range(5), 'B': range(0, 10, 2)})
@@ -1069,15 +1071,12 @@ class DatetimeLike(Dtype):
10691071
def check_dtypes(self, f, f_name, d, d_name, exp):
10701072

10711073
roll = d.rolling(window=self.window)
1072-
10731074
if f_name == 'count':
10741075
result = f(roll)
10751076
tm.assert_almost_equal(result, exp)
10761077

10771078
else:
1078-
1079-
# other methods not Implemented ATM
1080-
with pytest.raises(NotImplementedError):
1079+
with pytest.raises(DataError):
10811080
f(roll)
10821081

10831082

0 commit comments

Comments
 (0)