Skip to content

REF: pandas/core/window.py into multiple files #27736

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Aug 7, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion doc/source/reference/window.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@
======
Window
======
.. currentmodule:: pandas.core.window

Rolling objects are returned by ``.rolling`` calls: :func:`pandas.DataFrame.rolling`, :func:`pandas.Series.rolling`, etc.
Expanding objects are returned by ``.expanding`` calls: :func:`pandas.DataFrame.expanding`, :func:`pandas.Series.expanding`, etc.
EWM objects are returned by ``.ewm`` calls: :func:`pandas.DataFrame.ewm`, :func:`pandas.Series.ewm`, etc.

Standard moving window functions
--------------------------------
.. currentmodule:: pandas.core.window.rolling

.. autosummary::
:toctree: api/

Expand All @@ -38,6 +39,8 @@ Standard moving window functions

Standard expanding window functions
-----------------------------------
.. currentmodule:: pandas.core.window.expanding

.. autosummary::
:toctree: api/

Expand All @@ -59,6 +62,8 @@ Standard expanding window functions

Exponentially-weighted moving window functions
----------------------------------------------
.. currentmodule:: pandas.core.window.ewm

.. autosummary::
:toctree: api/

Expand Down
18 changes: 11 additions & 7 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -10690,9 +10690,13 @@ def _add_series_or_dataframe_operations(cls):
the doc strings again.
"""

from pandas.core import window as rwindow
from pandas.core.window import (
ewm as lib_ewm,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we usually call the _libs imports like libwindow (etc), so is there a reason you can't just
from pandas.core.window import ewm, expanding, rolling? (or else I guess would be ok to call libewm, etc)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or just directly import what you need from these modules?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This caused a RecursionError since the defined functions here are:

def rolling(...):
    return rolling(...)

cls.rolling = rolling

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay I was able to use more direct imports, and all green.

expanding as lib_expanding,
rolling as lib_rolling,
)

@Appender(rwindow.rolling.__doc__)
@Appender(lib_rolling.rolling.__doc__)
def rolling(
self,
window,
Expand All @@ -10704,7 +10708,7 @@ def rolling(
closed=None,
):
axis = self._get_axis_number(axis)
return rwindow.rolling(
return lib_rolling.rolling(
self,
window=window,
min_periods=min_periods,
Expand All @@ -10717,16 +10721,16 @@ def rolling(

cls.rolling = rolling

@Appender(rwindow.expanding.__doc__)
@Appender(lib_expanding.expanding.__doc__)
def expanding(self, min_periods=1, center=False, axis=0):
axis = self._get_axis_number(axis)
return rwindow.expanding(
return lib_expanding.expanding(
self, min_periods=min_periods, center=center, axis=axis
)

cls.expanding = expanding

@Appender(rwindow.ewm.__doc__)
@Appender(lib_ewm.ewm.__doc__)
def ewm(
self,
com=None,
Expand All @@ -10739,7 +10743,7 @@ def ewm(
axis=0,
):
axis = self._get_axis_number(axis)
return rwindow.ewm(
return lib_ewm.ewm(
self,
com=com,
span=span,
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1563,7 +1563,7 @@ def rolling(self, *args, **kwargs):
"""
Return a rolling grouper, providing rolling functionality per group.
"""
from pandas.core.window import RollingGroupby
from pandas.core.window.rolling import RollingGroupby

return RollingGroupby(self, *args, **kwargs)

Expand All @@ -1574,7 +1574,7 @@ def expanding(self, *args, **kwargs):
Return an expanding grouper, providing expanding
functionality per group.
"""
from pandas.core.window import ExpandingGroupby
from pandas.core.window.expanding import ExpandingGroupby

return ExpandingGroupby(self, *args, **kwargs)

Expand Down
Empty file added pandas/core/window/__init__.py
Empty file.
276 changes: 276 additions & 0 deletions pandas/core/window/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,276 @@
"""Common utility functions for rolling operations"""
from collections import defaultdict
import warnings

import numpy as np

from pandas.core.dtypes.common import is_integer
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries

import pandas.core.common as com
from pandas.core.generic import _shared_docs
from pandas.core.groupby.base import GroupByMixin
from pandas.core.index import MultiIndex

_shared_docs = dict(**_shared_docs)
_doc_template = """
Returns
-------
Series or DataFrame
Return type is determined by the caller.

See Also
--------
Series.%(name)s : Series %(name)s.
DataFrame.%(name)s : DataFrame %(name)s.
"""


class _GroupByMixin(GroupByMixin):
"""
Provide the groupby facilities.
"""

def __init__(self, obj, *args, **kwargs):
parent = kwargs.pop("parent", None) # noqa
groupby = kwargs.pop("groupby", None)
if groupby is None:
groupby, obj = obj, obj.obj
self._groupby = groupby
self._groupby.mutated = True
self._groupby.grouper.mutated = True
super().__init__(obj, *args, **kwargs)

count = GroupByMixin._dispatch("count")
corr = GroupByMixin._dispatch("corr", other=None, pairwise=None)
cov = GroupByMixin._dispatch("cov", other=None, pairwise=None)

def _apply(
self, func, name=None, window=None, center=None, check_minp=None, **kwargs
):
"""
Dispatch to apply; we are stripping all of the _apply kwargs and
performing the original function call on the grouped object.
"""

def f(x, name=name, *args):
x = self._shallow_copy(x)

if isinstance(name, str):
return getattr(x, name)(*args, **kwargs)

return x.apply(name, *args, **kwargs)

return self._groupby.apply(f)


def _flex_binary_moment(arg1, arg2, f, pairwise=False):

if not (
isinstance(arg1, (np.ndarray, ABCSeries, ABCDataFrame))
and isinstance(arg2, (np.ndarray, ABCSeries, ABCDataFrame))
):
raise TypeError(
"arguments to moment function must be of type "
"np.ndarray/Series/DataFrame"
)

if isinstance(arg1, (np.ndarray, ABCSeries)) and isinstance(
arg2, (np.ndarray, ABCSeries)
):
X, Y = _prep_binary(arg1, arg2)
return f(X, Y)

elif isinstance(arg1, ABCDataFrame):
from pandas import DataFrame

def dataframe_from_int_dict(data, frame_template):
result = DataFrame(data, index=frame_template.index)
if len(result.columns) > 0:
result.columns = frame_template.columns[result.columns]
return result

results = {}
if isinstance(arg2, ABCDataFrame):
if pairwise is False:
if arg1 is arg2:
# special case in order to handle duplicate column names
for i, col in enumerate(arg1.columns):
results[i] = f(arg1.iloc[:, i], arg2.iloc[:, i])
return dataframe_from_int_dict(results, arg1)
else:
if not arg1.columns.is_unique:
raise ValueError("'arg1' columns are not unique")
if not arg2.columns.is_unique:
raise ValueError("'arg2' columns are not unique")
with warnings.catch_warnings(record=True):
warnings.simplefilter("ignore", RuntimeWarning)
X, Y = arg1.align(arg2, join="outer")
X = X + 0 * Y
Y = Y + 0 * X

with warnings.catch_warnings(record=True):
warnings.simplefilter("ignore", RuntimeWarning)
res_columns = arg1.columns.union(arg2.columns)
for col in res_columns:
if col in X and col in Y:
results[col] = f(X[col], Y[col])
return DataFrame(results, index=X.index, columns=res_columns)
elif pairwise is True:
results = defaultdict(dict)
for i, k1 in enumerate(arg1.columns):
for j, k2 in enumerate(arg2.columns):
if j < i and arg2 is arg1:
# Symmetric case
results[i][j] = results[j][i]
else:
results[i][j] = f(
*_prep_binary(arg1.iloc[:, i], arg2.iloc[:, j])
)

from pandas import concat

result_index = arg1.index.union(arg2.index)
if len(result_index):

# construct result frame
result = concat(
[
concat(
[results[i][j] for j, c in enumerate(arg2.columns)],
ignore_index=True,
)
for i, c in enumerate(arg1.columns)
],
ignore_index=True,
axis=1,
)
result.columns = arg1.columns

# set the index and reorder
if arg2.columns.nlevels > 1:
result.index = MultiIndex.from_product(
arg2.columns.levels + [result_index]
)
result = result.reorder_levels([2, 0, 1]).sort_index()
else:
result.index = MultiIndex.from_product(
[range(len(arg2.columns)), range(len(result_index))]
)
result = result.swaplevel(1, 0).sort_index()
result.index = MultiIndex.from_product(
[result_index] + [arg2.columns]
)
else:

# empty result
result = DataFrame(
index=MultiIndex(
levels=[arg1.index, arg2.columns], codes=[[], []]
),
columns=arg2.columns,
dtype="float64",
)

# reset our index names to arg1 names
# reset our column names to arg2 names
# careful not to mutate the original names
result.columns = result.columns.set_names(arg1.columns.names)
result.index = result.index.set_names(
result_index.names + arg2.columns.names
)

return result

else:
raise ValueError("'pairwise' is not True/False")
else:
results = {
i: f(*_prep_binary(arg1.iloc[:, i], arg2))
for i, col in enumerate(arg1.columns)
}
return dataframe_from_int_dict(results, arg1)

else:
return _flex_binary_moment(arg2, arg1, f)


def _get_center_of_mass(comass, span, halflife, alpha):
valid_count = com.count_not_none(comass, span, halflife, alpha)
if valid_count > 1:
raise ValueError("comass, span, halflife, and alpha are mutually exclusive")

# Convert to center of mass; domain checks ensure 0 < alpha <= 1
if comass is not None:
if comass < 0:
raise ValueError("comass must satisfy: comass >= 0")
elif span is not None:
if span < 1:
raise ValueError("span must satisfy: span >= 1")
comass = (span - 1) / 2.0
elif halflife is not None:
if halflife <= 0:
raise ValueError("halflife must satisfy: halflife > 0")
decay = 1 - np.exp(np.log(0.5) / halflife)
comass = 1 / decay - 1
elif alpha is not None:
if alpha <= 0 or alpha > 1:
raise ValueError("alpha must satisfy: 0 < alpha <= 1")
comass = (1.0 - alpha) / alpha
else:
raise ValueError("Must pass one of comass, span, halflife, or alpha")

return float(comass)


def _offset(window, center):
if not is_integer(window):
window = len(window)
offset = (window - 1) / 2.0 if center else 0
try:
return int(offset)
except TypeError:
return offset.astype(int)


def _require_min_periods(p):
def _check_func(minp, window):
if minp is None:
return window
else:
return max(p, minp)

return _check_func


def _use_window(minp, window):
if minp is None:
return window
else:
return minp


def _zsqrt(x):
with np.errstate(all="ignore"):
result = np.sqrt(x)
mask = x < 0

if isinstance(x, ABCDataFrame):
if mask.values.any():
result[mask] = 0
else:
if mask.any():
result[mask] = 0

return result


def _prep_binary(arg1, arg2):
if not isinstance(arg2, type(arg1)):
raise Exception("Input arrays must be of the same type!")

# mask out values, this also makes a common index...
X = arg1 + 0 * arg2
Y = arg2 + 0 * arg1

return X, Y
Loading