Skip to content

Commit 3bf35c6

Browse files
mroeschkejreback
authored andcommitted
REF: pandas/core/window.py into multiple files (#27736)
1 parent 9724ace commit 3bf35c6

File tree

12 files changed

+979
-955
lines changed

12 files changed

+979
-955
lines changed

doc/source/reference/window.rst

+6-1
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,15 @@
55
======
66
Window
77
======
8-
.. currentmodule:: pandas.core.window
98

109
Rolling objects are returned by ``.rolling`` calls: :func:`pandas.DataFrame.rolling`, :func:`pandas.Series.rolling`, etc.
1110
Expanding objects are returned by ``.expanding`` calls: :func:`pandas.DataFrame.expanding`, :func:`pandas.Series.expanding`, etc.
1211
EWM objects are returned by ``.ewm`` calls: :func:`pandas.DataFrame.ewm`, :func:`pandas.Series.ewm`, etc.
1312

1413
Standard moving window functions
1514
--------------------------------
15+
.. currentmodule:: pandas.core.window.rolling
16+
1617
.. autosummary::
1718
:toctree: api/
1819

@@ -38,6 +39,8 @@ Standard moving window functions
3839

3940
Standard expanding window functions
4041
-----------------------------------
42+
.. currentmodule:: pandas.core.window.expanding
43+
4144
.. autosummary::
4245
:toctree: api/
4346

@@ -59,6 +62,8 @@ Standard expanding window functions
5962

6063
Exponentially-weighted moving window functions
6164
----------------------------------------------
65+
.. currentmodule:: pandas.core.window.ewm
66+
6267
.. autosummary::
6368
:toctree: api/
6469

pandas/core/generic.py

+20-9
Original file line numberDiff line numberDiff line change
@@ -10683,9 +10683,9 @@ def _add_series_or_dataframe_operations(cls):
1068310683
the doc strings again.
1068410684
"""
1068510685

10686-
from pandas.core import window as rwindow
10686+
from pandas.core.window import EWM, Expanding, Rolling, Window
1068710687

10688-
@Appender(rwindow.rolling.__doc__)
10688+
@Appender(Rolling.__doc__)
1068910689
def rolling(
1069010690
self,
1069110691
window,
@@ -10697,7 +10697,20 @@ def rolling(
1069710697
closed=None,
1069810698
):
1069910699
axis = self._get_axis_number(axis)
10700-
return rwindow.rolling(
10700+
10701+
if win_type is not None:
10702+
return Window(
10703+
self,
10704+
window=window,
10705+
min_periods=min_periods,
10706+
center=center,
10707+
win_type=win_type,
10708+
on=on,
10709+
axis=axis,
10710+
closed=closed,
10711+
)
10712+
10713+
return Rolling(
1070110714
self,
1070210715
window=window,
1070310716
min_periods=min_periods,
@@ -10710,16 +10723,14 @@ def rolling(
1071010723

1071110724
cls.rolling = rolling
1071210725

10713-
@Appender(rwindow.expanding.__doc__)
10726+
@Appender(Expanding.__doc__)
1071410727
def expanding(self, min_periods=1, center=False, axis=0):
1071510728
axis = self._get_axis_number(axis)
10716-
return rwindow.expanding(
10717-
self, min_periods=min_periods, center=center, axis=axis
10718-
)
10729+
return Expanding(self, min_periods=min_periods, center=center, axis=axis)
1071910730

1072010731
cls.expanding = expanding
1072110732

10722-
@Appender(rwindow.ewm.__doc__)
10733+
@Appender(EWM.__doc__)
1072310734
def ewm(
1072410735
self,
1072510736
com=None,
@@ -10732,7 +10743,7 @@ def ewm(
1073210743
axis=0,
1073310744
):
1073410745
axis = self._get_axis_number(axis)
10735-
return rwindow.ewm(
10746+
return EWM(
1073610747
self,
1073710748
com=com,
1073810749
span=span,

pandas/core/window/__init__.py

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from pandas.core.window.ewm import EWM # noqa:F401
2+
from pandas.core.window.expanding import Expanding, ExpandingGroupby # noqa:F401
3+
from pandas.core.window.rolling import Rolling, RollingGroupby, Window # noqa:F401

pandas/core/window/common.py

+276
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,276 @@
1+
"""Common utility functions for rolling operations"""
2+
from collections import defaultdict
3+
import warnings
4+
5+
import numpy as np
6+
7+
from pandas.core.dtypes.common import is_integer
8+
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
9+
10+
import pandas.core.common as com
11+
from pandas.core.generic import _shared_docs
12+
from pandas.core.groupby.base import GroupByMixin
13+
from pandas.core.index import MultiIndex
14+
15+
_shared_docs = dict(**_shared_docs)
16+
_doc_template = """
17+
Returns
18+
-------
19+
Series or DataFrame
20+
Return type is determined by the caller.
21+
22+
See Also
23+
--------
24+
Series.%(name)s : Series %(name)s.
25+
DataFrame.%(name)s : DataFrame %(name)s.
26+
"""
27+
28+
29+
class _GroupByMixin(GroupByMixin):
30+
"""
31+
Provide the groupby facilities.
32+
"""
33+
34+
def __init__(self, obj, *args, **kwargs):
35+
parent = kwargs.pop("parent", None) # noqa
36+
groupby = kwargs.pop("groupby", None)
37+
if groupby is None:
38+
groupby, obj = obj, obj.obj
39+
self._groupby = groupby
40+
self._groupby.mutated = True
41+
self._groupby.grouper.mutated = True
42+
super().__init__(obj, *args, **kwargs)
43+
44+
count = GroupByMixin._dispatch("count")
45+
corr = GroupByMixin._dispatch("corr", other=None, pairwise=None)
46+
cov = GroupByMixin._dispatch("cov", other=None, pairwise=None)
47+
48+
def _apply(
49+
self, func, name=None, window=None, center=None, check_minp=None, **kwargs
50+
):
51+
"""
52+
Dispatch to apply; we are stripping all of the _apply kwargs and
53+
performing the original function call on the grouped object.
54+
"""
55+
56+
def f(x, name=name, *args):
57+
x = self._shallow_copy(x)
58+
59+
if isinstance(name, str):
60+
return getattr(x, name)(*args, **kwargs)
61+
62+
return x.apply(name, *args, **kwargs)
63+
64+
return self._groupby.apply(f)
65+
66+
67+
def _flex_binary_moment(arg1, arg2, f, pairwise=False):
68+
69+
if not (
70+
isinstance(arg1, (np.ndarray, ABCSeries, ABCDataFrame))
71+
and isinstance(arg2, (np.ndarray, ABCSeries, ABCDataFrame))
72+
):
73+
raise TypeError(
74+
"arguments to moment function must be of type "
75+
"np.ndarray/Series/DataFrame"
76+
)
77+
78+
if isinstance(arg1, (np.ndarray, ABCSeries)) and isinstance(
79+
arg2, (np.ndarray, ABCSeries)
80+
):
81+
X, Y = _prep_binary(arg1, arg2)
82+
return f(X, Y)
83+
84+
elif isinstance(arg1, ABCDataFrame):
85+
from pandas import DataFrame
86+
87+
def dataframe_from_int_dict(data, frame_template):
88+
result = DataFrame(data, index=frame_template.index)
89+
if len(result.columns) > 0:
90+
result.columns = frame_template.columns[result.columns]
91+
return result
92+
93+
results = {}
94+
if isinstance(arg2, ABCDataFrame):
95+
if pairwise is False:
96+
if arg1 is arg2:
97+
# special case in order to handle duplicate column names
98+
for i, col in enumerate(arg1.columns):
99+
results[i] = f(arg1.iloc[:, i], arg2.iloc[:, i])
100+
return dataframe_from_int_dict(results, arg1)
101+
else:
102+
if not arg1.columns.is_unique:
103+
raise ValueError("'arg1' columns are not unique")
104+
if not arg2.columns.is_unique:
105+
raise ValueError("'arg2' columns are not unique")
106+
with warnings.catch_warnings(record=True):
107+
warnings.simplefilter("ignore", RuntimeWarning)
108+
X, Y = arg1.align(arg2, join="outer")
109+
X = X + 0 * Y
110+
Y = Y + 0 * X
111+
112+
with warnings.catch_warnings(record=True):
113+
warnings.simplefilter("ignore", RuntimeWarning)
114+
res_columns = arg1.columns.union(arg2.columns)
115+
for col in res_columns:
116+
if col in X and col in Y:
117+
results[col] = f(X[col], Y[col])
118+
return DataFrame(results, index=X.index, columns=res_columns)
119+
elif pairwise is True:
120+
results = defaultdict(dict)
121+
for i, k1 in enumerate(arg1.columns):
122+
for j, k2 in enumerate(arg2.columns):
123+
if j < i and arg2 is arg1:
124+
# Symmetric case
125+
results[i][j] = results[j][i]
126+
else:
127+
results[i][j] = f(
128+
*_prep_binary(arg1.iloc[:, i], arg2.iloc[:, j])
129+
)
130+
131+
from pandas import concat
132+
133+
result_index = arg1.index.union(arg2.index)
134+
if len(result_index):
135+
136+
# construct result frame
137+
result = concat(
138+
[
139+
concat(
140+
[results[i][j] for j, c in enumerate(arg2.columns)],
141+
ignore_index=True,
142+
)
143+
for i, c in enumerate(arg1.columns)
144+
],
145+
ignore_index=True,
146+
axis=1,
147+
)
148+
result.columns = arg1.columns
149+
150+
# set the index and reorder
151+
if arg2.columns.nlevels > 1:
152+
result.index = MultiIndex.from_product(
153+
arg2.columns.levels + [result_index]
154+
)
155+
result = result.reorder_levels([2, 0, 1]).sort_index()
156+
else:
157+
result.index = MultiIndex.from_product(
158+
[range(len(arg2.columns)), range(len(result_index))]
159+
)
160+
result = result.swaplevel(1, 0).sort_index()
161+
result.index = MultiIndex.from_product(
162+
[result_index] + [arg2.columns]
163+
)
164+
else:
165+
166+
# empty result
167+
result = DataFrame(
168+
index=MultiIndex(
169+
levels=[arg1.index, arg2.columns], codes=[[], []]
170+
),
171+
columns=arg2.columns,
172+
dtype="float64",
173+
)
174+
175+
# reset our index names to arg1 names
176+
# reset our column names to arg2 names
177+
# careful not to mutate the original names
178+
result.columns = result.columns.set_names(arg1.columns.names)
179+
result.index = result.index.set_names(
180+
result_index.names + arg2.columns.names
181+
)
182+
183+
return result
184+
185+
else:
186+
raise ValueError("'pairwise' is not True/False")
187+
else:
188+
results = {
189+
i: f(*_prep_binary(arg1.iloc[:, i], arg2))
190+
for i, col in enumerate(arg1.columns)
191+
}
192+
return dataframe_from_int_dict(results, arg1)
193+
194+
else:
195+
return _flex_binary_moment(arg2, arg1, f)
196+
197+
198+
def _get_center_of_mass(comass, span, halflife, alpha):
199+
valid_count = com.count_not_none(comass, span, halflife, alpha)
200+
if valid_count > 1:
201+
raise ValueError("comass, span, halflife, and alpha are mutually exclusive")
202+
203+
# Convert to center of mass; domain checks ensure 0 < alpha <= 1
204+
if comass is not None:
205+
if comass < 0:
206+
raise ValueError("comass must satisfy: comass >= 0")
207+
elif span is not None:
208+
if span < 1:
209+
raise ValueError("span must satisfy: span >= 1")
210+
comass = (span - 1) / 2.0
211+
elif halflife is not None:
212+
if halflife <= 0:
213+
raise ValueError("halflife must satisfy: halflife > 0")
214+
decay = 1 - np.exp(np.log(0.5) / halflife)
215+
comass = 1 / decay - 1
216+
elif alpha is not None:
217+
if alpha <= 0 or alpha > 1:
218+
raise ValueError("alpha must satisfy: 0 < alpha <= 1")
219+
comass = (1.0 - alpha) / alpha
220+
else:
221+
raise ValueError("Must pass one of comass, span, halflife, or alpha")
222+
223+
return float(comass)
224+
225+
226+
def _offset(window, center):
227+
if not is_integer(window):
228+
window = len(window)
229+
offset = (window - 1) / 2.0 if center else 0
230+
try:
231+
return int(offset)
232+
except TypeError:
233+
return offset.astype(int)
234+
235+
236+
def _require_min_periods(p):
237+
def _check_func(minp, window):
238+
if minp is None:
239+
return window
240+
else:
241+
return max(p, minp)
242+
243+
return _check_func
244+
245+
246+
def _use_window(minp, window):
247+
if minp is None:
248+
return window
249+
else:
250+
return minp
251+
252+
253+
def _zsqrt(x):
254+
with np.errstate(all="ignore"):
255+
result = np.sqrt(x)
256+
mask = x < 0
257+
258+
if isinstance(x, ABCDataFrame):
259+
if mask.values.any():
260+
result[mask] = 0
261+
else:
262+
if mask.any():
263+
result[mask] = 0
264+
265+
return result
266+
267+
268+
def _prep_binary(arg1, arg2):
269+
if not isinstance(arg2, type(arg1)):
270+
raise Exception("Input arrays must be of the same type!")
271+
272+
# mask out values, this also makes a common index...
273+
X = arg1 + 0 * arg2
274+
Y = arg2 + 0 * arg1
275+
276+
return X, Y

0 commit comments

Comments
 (0)