Skip to content

TST/REF: Fixturize constant functions in ConsistencyBase #33943

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 24 commits into from
May 10, 2020
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
7e461a1
remove \n from docstring
charlesdong1991 Dec 3, 2018
1314059
fix conflicts
charlesdong1991 Jan 19, 2019
8bcb313
Merge remote-tracking branch 'upstream/master'
charlesdong1991 Jul 30, 2019
24c3ede
Merge remote-tracking branch 'upstream/master'
charlesdong1991 Jan 14, 2020
dea38f2
fix issue 17038
charlesdong1991 Jan 14, 2020
cd9e7ac
revert change
charlesdong1991 Jan 14, 2020
e5e912b
revert change
charlesdong1991 Jan 14, 2020
045a76f
Merge remote-tracking branch 'upstream/master'
charlesdong1991 Apr 6, 2020
800ec58
Merge remote-tracking branch 'upstream/master' into moment_test_2
charlesdong1991 May 2, 2020
d37b2c7
fixturize functions
charlesdong1991 May 2, 2020
494f94c
fixup
charlesdong1991 May 2, 2020
15188fc
Merge remote-tracking branch 'upstream/master' into moment_test_2
charlesdong1991 May 2, 2020
8f32d5f
Merge remote-tracking branch 'upstream/master' into moment_test_2
charlesdong1991 May 2, 2020
4918552
mark consistency cov as slow
charlesdong1991 May 3, 2020
6bf8c7f
remove slow for exapnding
charlesdong1991 May 3, 2020
88d8439
fixup
charlesdong1991 May 3, 2020
a8c9bac
run again
charlesdong1991 May 3, 2020
384844b
Merge remote-tracking branch 'upstream/master' into moment_test_2
charlesdong1991 May 3, 2020
ddc8160
run again
charlesdong1991 May 3, 2020
a602629
add comments
charlesdong1991 May 6, 2020
27b50c6
Merge remote-tracking branch 'upstream/master' into moment_test_2
charlesdong1991 May 6, 2020
2a485e1
mark slow
charlesdong1991 May 6, 2020
abf3276
Merge remote-tracking branch 'upstream/master' into moment_test_2
charlesdong1991 May 7, 2020
7b58306
Merge remote-tracking branch 'upstream/master' into moment_test_2
charlesdong1991 May 9, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 0 additions & 34 deletions pandas/tests/window/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,40 +25,6 @@ def _create_data(self):


class ConsistencyBase(Base):
base_functions = [
(lambda v: Series(v).count(), None, "count"),
(lambda v: Series(v).max(), None, "max"),
(lambda v: Series(v).min(), None, "min"),
(lambda v: Series(v).sum(), None, "sum"),
(lambda v: Series(v).mean(), None, "mean"),
(lambda v: Series(v).std(), 1, "std"),
(lambda v: Series(v).cov(Series(v)), None, "cov"),
(lambda v: Series(v).corr(Series(v)), None, "corr"),
(lambda v: Series(v).var(), 1, "var"),
# restore once GH 8086 is fixed
# lambda v: Series(v).skew(), 3, 'skew'),
# (lambda v: Series(v).kurt(), 4, 'kurt'),
# restore once GH 8084 is fixed
# lambda v: Series(v).quantile(0.3), None, 'quantile'),
(lambda v: Series(v).median(), None, "median"),
(np.nanmax, 1, "max"),
(np.nanmin, 1, "min"),
(np.nansum, 1, "sum"),
(np.nanmean, 1, "mean"),
(lambda v: np.nanstd(v, ddof=1), 1, "std"),
(lambda v: np.nanvar(v, ddof=1), 1, "var"),
(np.nanmedian, 1, "median"),
]
no_nan_functions = [
(np.max, None, "max"),
(np.min, None, "min"),
(np.sum, None, "sum"),
(np.mean, None, "mean"),
(lambda v: np.std(v, ddof=1), 1, "std"),
(lambda v: np.var(v, ddof=1), 1, "var"),
(np.median, None, "median"),
]

def _create_data(self):
super()._create_data()

Expand Down
48 changes: 48 additions & 0 deletions pandas/tests/window/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,3 +242,51 @@ def no_nans(x):
def consistency_data(request):
"""Create consistency data"""
return request.param


base_functions_list = [
(lambda v: Series(v).count(), None, "count"),
(lambda v: Series(v).max(), None, "max"),
(lambda v: Series(v).min(), None, "min"),
(lambda v: Series(v).sum(), None, "sum"),
(lambda v: Series(v).mean(), None, "mean"),
(lambda v: Series(v).std(), 1, "std"),
(lambda v: Series(v).cov(Series(v)), None, "cov"),
(lambda v: Series(v).corr(Series(v)), None, "corr"),
(lambda v: Series(v).var(), 1, "var"),
# restore once GH 8086 is fixed
# lambda v: Series(v).skew(), 3, 'skew'),
# (lambda v: Series(v).kurt(), 4, 'kurt'),
# restore once GH 8084 is fixed
# lambda v: Series(v).quantile(0.3), None, 'quantile'),
(lambda v: Series(v).median(), None, "median"),
(np.nanmax, 1, "max"),
(np.nanmin, 1, "min"),
(np.nansum, 1, "sum"),
(np.nanmean, 1, "mean"),
(lambda v: np.nanstd(v, ddof=1), 1, "std"),
(lambda v: np.nanvar(v, ddof=1), 1, "var"),
(np.nanmedian, 1, "median"),
]

no_nan_functions_list = [
(np.max, None, "max"),
(np.min, None, "min"),
(np.sum, None, "sum"),
(np.mean, None, "mean"),
(lambda v: np.std(v, ddof=1), 1, "std"),
(lambda v: np.var(v, ddof=1), 1, "var"),
(np.median, None, "median"),
]


@pytest.fixture()
def base_functions():
"""Fixture for base functions."""
return base_functions_list


@pytest.fixture()
def no_nan_functions():
"""Fixture for no nan functions."""
return no_nan_functions_list
88 changes: 45 additions & 43 deletions pandas/tests/window/moments/test_moments_expanding.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,50 +145,52 @@ def _check_expanding_has_min_periods(self, func, static_comp, has_min_periods):
result = func(ser)
tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50]))

@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
def test_expanding_apply_consistency(self, consistency_data, min_periods):
x, is_constant, no_nans = consistency_data
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
message=".*(empty slice|0 for slice).*",
category=RuntimeWarning,
)
# test consistency between expanding_xyz() and either (a)
# expanding_apply of Series.xyz(), or (b) expanding_apply of
# np.nanxyz()
functions = self.base_functions

# GH 8269
if no_nans:
functions = self.base_functions + self.no_nan_functions
for (f, require_min_periods, name) in functions:
expanding_f = getattr(x.expanding(min_periods=min_periods), name)

if (
require_min_periods
and (min_periods is not None)
and (min_periods < require_min_periods)
):
continue

if name == "count":
expanding_f_result = expanding_f()
expanding_apply_f_result = x.expanding(min_periods=0).apply(
func=f, raw=True
)

@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
def test_expanding_apply_consistency(
consistency_data, base_functions, no_nan_functions, min_periods
):
x, is_constant, no_nans = consistency_data

with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", message=".*(empty slice|0 for slice).*", category=RuntimeWarning,
)
# test consistency between expanding_xyz() and either (a)
# expanding_apply of Series.xyz(), or (b) expanding_apply of
# np.nanxyz()
functions = base_functions

# GH 8269
if no_nans:
functions = no_nan_functions + base_functions
for (f, require_min_periods, name) in functions:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

e.g in an ideal case we don't have this loop here

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

now it may be that this increases the number of tests by 10x so we don'twant to do this, but have a look see

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, refer to the comment above

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep sounds good

expanding_f = getattr(x.expanding(min_periods=min_periods), name)

if (
require_min_periods
and (min_periods is not None)
and (min_periods < require_min_periods)
):
continue

if name == "count":
expanding_f_result = expanding_f()
expanding_apply_f_result = x.expanding(min_periods=0).apply(
func=f, raw=True
)
else:
if name in ["cov", "corr"]:
expanding_f_result = expanding_f(pairwise=False)
else:
if name in ["cov", "corr"]:
expanding_f_result = expanding_f(pairwise=False)
else:
expanding_f_result = expanding_f()
expanding_apply_f_result = x.expanding(
min_periods=min_periods
).apply(func=f, raw=True)

# GH 9422
if name in ["sum", "prod"]:
tm.assert_equal(expanding_f_result, expanding_apply_f_result)
expanding_f_result = expanding_f()
expanding_apply_f_result = x.expanding(min_periods=min_periods).apply(
func=f, raw=True
)

# GH 9422
if name in ["sum", "prod"]:
tm.assert_equal(expanding_f_result, expanding_apply_f_result)


@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
Expand Down
104 changes: 52 additions & 52 deletions pandas/tests/window/moments/test_moments_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -946,58 +946,6 @@ class TestRollingMomentsConsistency(ConsistencyBase):
def setup_method(self, method):
self._create_data()

@pytest.mark.parametrize(
"window,min_periods,center", list(_rolling_consistency_cases())
)
def test_rolling_apply_consistency(
self, consistency_data, window, min_periods, center
):
x, is_constant, no_nans = consistency_data
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
message=".*(empty slice|0 for slice).*",
category=RuntimeWarning,
)
# test consistency between rolling_xyz() and either (a)
# rolling_apply of Series.xyz(), or (b) rolling_apply of
# np.nanxyz()
functions = self.base_functions

# GH 8269
if no_nans:
functions = self.base_functions + self.no_nan_functions
for (f, require_min_periods, name) in functions:
rolling_f = getattr(
x.rolling(window=window, center=center, min_periods=min_periods),
name,
)

if (
require_min_periods
and (min_periods is not None)
and (min_periods < require_min_periods)
):
continue

if name == "count":
rolling_f_result = rolling_f()
rolling_apply_f_result = x.rolling(
window=window, min_periods=min_periods, center=center
).apply(func=f, raw=True)
else:
if name in ["cov", "corr"]:
rolling_f_result = rolling_f(pairwise=False)
else:
rolling_f_result = rolling_f()
rolling_apply_f_result = x.rolling(
window=window, min_periods=min_periods, center=center
).apply(func=f, raw=True)

# GH 9422
if name in ["sum", "prod"]:
tm.assert_equal(rolling_f_result, rolling_apply_f_result)

# binary moments
def test_rolling_cov(self):
A = self.series
Expand Down Expand Up @@ -1052,6 +1000,57 @@ def test_flex_binary_frame(self, method):
tm.assert_frame_equal(res3, exp)


@pytest.mark.parametrize(
"window,min_periods,center", list(_rolling_consistency_cases())
)
def test_rolling_apply_consistency(
consistency_data, base_functions, no_nan_functions, window, min_periods, center
):
x, is_constant, no_nans = consistency_data

with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", message=".*(empty slice|0 for slice).*", category=RuntimeWarning,
)
# test consistency between rolling_xyz() and either (a)
# rolling_apply of Series.xyz(), or (b) rolling_apply of
# np.nanxyz()
functions = base_functions

# GH 8269
if no_nans:
functions = no_nan_functions + base_functions
for (f, require_min_periods, name) in functions:
rolling_f = getattr(
x.rolling(window=window, center=center, min_periods=min_periods), name,
)

if (
require_min_periods
and (min_periods is not None)
and (min_periods < require_min_periods)
):
pass

if name == "count":
rolling_f_result = rolling_f()
rolling_apply_f_result = x.rolling(
window=window, min_periods=min_periods, center=center
).apply(func=f, raw=True)
else:
if name in ["cov", "corr"]:
rolling_f_result = rolling_f(pairwise=False)
else:
rolling_f_result = rolling_f()
rolling_apply_f_result = x.rolling(
window=window, min_periods=min_periods, center=center
).apply(func=f, raw=True)

# GH 9422
if name in ["sum", "prod"]:
tm.assert_equal(rolling_f_result, rolling_apply_f_result)


@pytest.mark.parametrize("window", range(7))
def test_rolling_corr_with_zero_variance(window):
# GH 18430
Expand Down Expand Up @@ -1477,6 +1476,7 @@ def test_rolling_consistency_std(consistency_data, window, min_periods, center):
)


@pytest.mark.slow
Copy link
Member Author

@charlesdong1991 charlesdong1991 May 3, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jbrockmendel
thanks for your comment in #33813 (comment) , indeed, i tested the execution time and seems around 40s which is quite long and should mark as slow. And the execution time of expanding_consistency_cov test is okay (~4 to 5 s)

since this change is very tiny, i think it is okay to just change it here although it is out of scope of this PR.

@pytest.mark.parametrize(
"window,min_periods,center", list(_rolling_consistency_cases())
)
Expand Down