From a4d6ae4812bfa368e8999aada023ee6cfd7c9a03 Mon Sep 17 00:00:00 2001 From: jnecus Date: Thu, 14 May 2020 15:41:14 +0100 Subject: [PATCH 01/53] Update test_period_range.py Attempting to implement test for resolved issue #21793 --- pandas/tests/indexes/period/test_period_range.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py index 2c3d22198df9f..42c42c085fda3 100644 --- a/pandas/tests/indexes/period/test_period_range.py +++ b/pandas/tests/indexes/period/test_period_range.py @@ -51,6 +51,9 @@ def test_construction_from_period(self): result = period_range(start=start, end=end, freq="Q", name="foo") tm.assert_index_equal(result, expected) + result = period_range(start=start, end=end, freq="Q", name="foo") + tm.assert_equal(result ,result.values) + # empty expected = PeriodIndex([], freq="W", name="foo") From 64de146ccc6270cfcd1693359290b3320f785b1f Mon Sep 17 00:00:00 2001 From: jnecus Date: Thu, 14 May 2020 15:51:04 +0100 Subject: [PATCH 02/53] Update test_period_range.py Fixing PEP formatting --- pandas/tests/indexes/period/test_period_range.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py index 42c42c085fda3..2d997bd7359b2 100644 --- a/pandas/tests/indexes/period/test_period_range.py +++ b/pandas/tests/indexes/period/test_period_range.py @@ -50,13 +50,13 @@ def test_construction_from_period(self): ).to_period() result = period_range(start=start, end=end, freq="Q", name="foo") tm.assert_index_equal(result, expected) - + result = period_range(start=start, end=end, freq="Q", name="foo") - tm.assert_equal(result ,result.values) + tm.assert_equal(result, result.values) # empty expected = PeriodIndex([], freq="W", name="foo") - + result = period_range(start=start, periods=0, freq="W", name="foo") tm.assert_index_equal(result, expected) From 80e7f263147fc11c6a6d5674307d26ff04cf37c2 Mon Sep 17 00:00:00 2001 From: jnecus Date: Thu, 14 May 2020 16:14:37 +0100 Subject: [PATCH 03/53] Update test_period_range.py --- pandas/tests/indexes/period/test_period_range.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py index 2d997bd7359b2..9fb44343f91bc 100644 --- a/pandas/tests/indexes/period/test_period_range.py +++ b/pandas/tests/indexes/period/test_period_range.py @@ -51,7 +51,7 @@ def test_construction_from_period(self): result = period_range(start=start, end=end, freq="Q", name="foo") tm.assert_index_equal(result, expected) - result = period_range(start=start, end=end, freq="Q", name="foo") + result = period_range(start=start, end=end, freq="Q", name="foo") tm.assert_equal(result, result.values) # empty From d4ac552cf1f5f56a4d3577582da303755b424a70 Mon Sep 17 00:00:00 2001 From: jnecus Date: Thu, 14 May 2020 16:49:14 +0100 Subject: [PATCH 04/53] Update test_period_range.py --- pandas/tests/indexes/period/test_period_range.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py index 9fb44343f91bc..ce1c439b04986 100644 --- a/pandas/tests/indexes/period/test_period_range.py +++ b/pandas/tests/indexes/period/test_period_range.py @@ -50,13 +50,13 @@ def test_construction_from_period(self): ).to_period() result = period_range(start=start, end=end, freq="Q", name="foo") tm.assert_index_equal(result, expected) - + result = period_range(start=start, end=end, freq="Q", name="foo") tm.assert_equal(result, result.values) - + # empty expected = PeriodIndex([], freq="W", name="foo") - + result = period_range(start=start, periods=0, freq="W", name="foo") tm.assert_index_equal(result, expected) From 24365ab9a8534a5170b908681864f161854a8381 Mon Sep 17 00:00:00 2001 From: jnecus Date: Thu, 14 May 2020 17:01:06 +0100 Subject: [PATCH 05/53] Update test_period_range.py --- pandas/tests/indexes/period/test_period_range.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py index ce1c439b04986..e7ce451103fce 100644 --- a/pandas/tests/indexes/period/test_period_range.py +++ b/pandas/tests/indexes/period/test_period_range.py @@ -54,6 +54,10 @@ def test_construction_from_period(self): result = period_range(start=start, end=end, freq="Q", name="foo") tm.assert_equal(result, result.values) + idx = period_range(start=start, end=end, freq="Q", name="foo") + result = idx == idx.values + assert result.all() + # empty expected = PeriodIndex([], freq="W", name="foo") From 6266771a26ef8bdc044933918900c8a1815892cf Mon Sep 17 00:00:00 2001 From: jnecus Date: Thu, 14 May 2020 17:54:18 +0100 Subject: [PATCH 06/53] Update test_period_range.py --- pandas/tests/indexes/period/test_period_range.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py index e7ce451103fce..6662f45b08f69 100644 --- a/pandas/tests/indexes/period/test_period_range.py +++ b/pandas/tests/indexes/period/test_period_range.py @@ -51,9 +51,6 @@ def test_construction_from_period(self): result = period_range(start=start, end=end, freq="Q", name="foo") tm.assert_index_equal(result, expected) - result = period_range(start=start, end=end, freq="Q", name="foo") - tm.assert_equal(result, result.values) - idx = period_range(start=start, end=end, freq="Q", name="foo") result = idx == idx.values assert result.all() From e1ccaaecebf3b38eeccb1ebf8fcecb0772617d37 Mon Sep 17 00:00:00 2001 From: jnecus Date: Thu, 14 May 2020 18:08:58 +0100 Subject: [PATCH 07/53] Update test_period_range.py added comment above test for resolved issue #21793 --- pandas/tests/indexes/period/test_period_range.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py index 6662f45b08f69..0a8a9964dfc59 100644 --- a/pandas/tests/indexes/period/test_period_range.py +++ b/pandas/tests/indexes/period/test_period_range.py @@ -51,6 +51,7 @@ def test_construction_from_period(self): result = period_range(start=start, end=end, freq="Q", name="foo") tm.assert_index_equal(result, expected) + #test for issue #21793 idx = period_range(start=start, end=end, freq="Q", name="foo") result = idx == idx.values assert result.all() From 4df4bc78bdd7aaaead5c8c425f1dd71142d5af6f Mon Sep 17 00:00:00 2001 From: jnecus Date: Thu, 14 May 2020 18:10:15 +0100 Subject: [PATCH 08/53] Update test_period_range.py --- pandas/tests/indexes/period/test_period_range.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py index 0a8a9964dfc59..4159d85c6a54a 100644 --- a/pandas/tests/indexes/period/test_period_range.py +++ b/pandas/tests/indexes/period/test_period_range.py @@ -51,7 +51,7 @@ def test_construction_from_period(self): result = period_range(start=start, end=end, freq="Q", name="foo") tm.assert_index_equal(result, expected) - #test for issue #21793 + # test for issue #21793 idx = period_range(start=start, end=end, freq="Q", name="foo") result = idx == idx.values assert result.all() From abce52f5d4741191646be471a601f284432f5b36 Mon Sep 17 00:00:00 2001 From: jnecus Date: Thu, 14 May 2020 18:18:16 +0100 Subject: [PATCH 09/53] Update test_period_range.py --- pandas/tests/indexes/period/test_period_range.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py index 4159d85c6a54a..e59aa12fa9ea6 100644 --- a/pandas/tests/indexes/period/test_period_range.py +++ b/pandas/tests/indexes/period/test_period_range.py @@ -2,7 +2,7 @@ from pandas import NaT, Period, PeriodIndex, date_range, period_range import pandas._testing as tm - +import numpy as np class TestPeriodRange: @pytest.mark.parametrize("freq", ["D", "W", "M", "Q", "A"]) @@ -53,8 +53,13 @@ def test_construction_from_period(self): # test for issue #21793 idx = period_range(start=start, end=end, freq="Q", name="foo") - result = idx == idx.values - assert result.all() + result = idx.values + expected = np.array([Period('2017Q1', 'Q-DEC'), + Period('2017Q2', 'Q-DEC'), + Period('2017Q3', 'Q-DEC'), + Period('2017Q4', 'Q-DEC'), + Period('2018Q1', 'Q-DEC')]) + tm.assert_numpy_array_equal(result, expected) # empty expected = PeriodIndex([], freq="W", name="foo") From a0de23bb16272f219f570d0afe2880a81e3e8585 Mon Sep 17 00:00:00 2001 From: jnecus Date: Thu, 14 May 2020 18:44:07 +0100 Subject: [PATCH 10/53] Update test_period_range.py --- pandas/tests/indexes/period/test_period_range.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py index e59aa12fa9ea6..21568426b53c3 100644 --- a/pandas/tests/indexes/period/test_period_range.py +++ b/pandas/tests/indexes/period/test_period_range.py @@ -51,14 +51,10 @@ def test_construction_from_period(self): result = period_range(start=start, end=end, freq="Q", name="foo") tm.assert_index_equal(result, expected) - # test for issue #21793 + # test for issue # 21793 idx = period_range(start=start, end=end, freq="Q", name="foo") - result = idx.values - expected = np.array([Period('2017Q1', 'Q-DEC'), - Period('2017Q2', 'Q-DEC'), - Period('2017Q3', 'Q-DEC'), - Period('2017Q4', 'Q-DEC'), - Period('2018Q1', 'Q-DEC')]) + result = idx == idx.values + expected = np.array([True, True, True, True, True]) tm.assert_numpy_array_equal(result, expected) # empty From ae631915b4e3ea911989e3fa0f77143613ce20ec Mon Sep 17 00:00:00 2001 From: jnecus Date: Thu, 14 May 2020 18:49:22 +0100 Subject: [PATCH 11/53] Update test_period_range.py --- pandas/tests/indexes/period/test_period_range.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py index 21568426b53c3..251fba562fab2 100644 --- a/pandas/tests/indexes/period/test_period_range.py +++ b/pandas/tests/indexes/period/test_period_range.py @@ -1,8 +1,8 @@ import pytest - +import numpy as np from pandas import NaT, Period, PeriodIndex, date_range, period_range import pandas._testing as tm -import numpy as np + class TestPeriodRange: @pytest.mark.parametrize("freq", ["D", "W", "M", "Q", "A"]) From 054ade50f7b12d6390a80fc7051b0fbc8f755e28 Mon Sep 17 00:00:00 2001 From: jnecus Date: Thu, 14 May 2020 19:18:50 +0100 Subject: [PATCH 12/53] Update test_period_range.py --- pandas/tests/indexes/period/test_period_range.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py index 251fba562fab2..595973f3f54e6 100644 --- a/pandas/tests/indexes/period/test_period_range.py +++ b/pandas/tests/indexes/period/test_period_range.py @@ -1,5 +1,5 @@ -import pytest import numpy as np +import pytest from pandas import NaT, Period, PeriodIndex, date_range, period_range import pandas._testing as tm From a40ab465f6a62686e4d1a3b4df53d81e87b7cc80 Mon Sep 17 00:00:00 2001 From: jnecus Date: Thu, 14 May 2020 19:42:19 +0100 Subject: [PATCH 13/53] Update test_period_range.py --- pandas/tests/indexes/period/test_period_range.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py index 595973f3f54e6..65cdd1fc56214 100644 --- a/pandas/tests/indexes/period/test_period_range.py +++ b/pandas/tests/indexes/period/test_period_range.py @@ -1,5 +1,6 @@ import numpy as np import pytest + from pandas import NaT, Period, PeriodIndex, date_range, period_range import pandas._testing as tm From b749f8e1df3caa9315b29c91a0be5748ad8c8f0e Mon Sep 17 00:00:00 2001 From: jnecus Date: Thu, 14 May 2020 20:51:24 +0100 Subject: [PATCH 14/53] Update test_period_range.py --- pandas/tests/indexes/period/test_period_range.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py index 65cdd1fc56214..4685850e75ba2 100644 --- a/pandas/tests/indexes/period/test_period_range.py +++ b/pandas/tests/indexes/period/test_period_range.py @@ -53,6 +53,10 @@ def test_construction_from_period(self): tm.assert_index_equal(result, expected) # test for issue # 21793 + start, end = Period("2017Q1", freq="Q"), Period("2018Q1", freq="Q") + expected = date_range( + start="2017-03-31", end="2018-03-31", freq="M", name="foo" + ).to_period() idx = period_range(start=start, end=end, freq="Q", name="foo") result = idx == idx.values expected = np.array([True, True, True, True, True]) From 14e2c7901ab139079a625bc435cb288a1724ddf8 Mon Sep 17 00:00:00 2001 From: jnecus Date: Fri, 15 May 2020 10:06:59 +0100 Subject: [PATCH 15/53] Update test_period_range.py --- pandas/tests/indexes/period/test_period_range.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py index 4685850e75ba2..68b48a55957ff 100644 --- a/pandas/tests/indexes/period/test_period_range.py +++ b/pandas/tests/indexes/period/test_period_range.py @@ -54,9 +54,6 @@ def test_construction_from_period(self): # test for issue # 21793 start, end = Period("2017Q1", freq="Q"), Period("2018Q1", freq="Q") - expected = date_range( - start="2017-03-31", end="2018-03-31", freq="M", name="foo" - ).to_period() idx = period_range(start=start, end=end, freq="Q", name="foo") result = idx == idx.values expected = np.array([True, True, True, True, True]) From dce777b68716b62939d521fa56abad7d224fde3e Mon Sep 17 00:00:00 2001 From: jnecus Date: Mon, 18 May 2020 12:02:49 +0100 Subject: [PATCH 16/53] Revert "Merge remote-tracking branch 'upstream/master'" This reverts commit 0d68e313113271413809ef5df7c88a601132e1c6, reversing changes made to 14e2c7901ab139079a625bc435cb288a1724ddf8. --- .travis.yml | 3 +- README.md | 1 - doc/source/ecosystem.rst | 18 +- doc/source/reference/offset_frequency.rst | 8 - doc/source/user_guide/computation.rst | 18 - doc/source/whatsnew/v1.1.0.rst | 6 - pandas/_libs/groupby.pyx | 4 +- pandas/_libs/hashtable.pyx | 5 +- pandas/_libs/index.pyx | 9 +- pandas/_libs/internals.pyx | 3 +- pandas/_libs/interval.pyx | 11 +- pandas/_libs/lib.pyx | 3 +- pandas/_libs/missing.pyx | 2 +- pandas/_libs/parsers.pyx | 2 +- pandas/_libs/reduction.pyx | 4 +- pandas/_libs/reshape.pyx | 5 +- pandas/_libs/testing.pyx | 8 +- pandas/_libs/tslibs/frequencies.pxd | 1 + pandas/_libs/tslibs/frequencies.pyx | 19 + pandas/_libs/tslibs/np_datetime.pxd | 2 + pandas/_libs/tslibs/np_datetime.pyx | 9 + pandas/_libs/tslibs/offsets.pyx | 108 +- pandas/_libs/tslibs/strptime.pyx | 4 +- pandas/_libs/tslibs/timedeltas.pyx | 2 +- pandas/_libs/tslibs/timestamps.pxd | 2 +- pandas/_libs/tslibs/timestamps.pyx | 32 +- pandas/core/arrays/categorical.py | 4 - pandas/core/arrays/datetimelike.py | 6 - pandas/core/arrays/datetimes.py | 3 - pandas/core/arrays/integer.py | 4 - pandas/core/arrays/period.py | 3 +- pandas/core/arrays/timedeltas.py | 23 +- pandas/core/base.py | 3 +- pandas/core/groupby/groupby.py | 197 +- pandas/core/groupby/ops.py | 15 +- pandas/core/indexes/base.py | 36 +- pandas/core/indexes/category.py | 7 + pandas/core/indexes/extension.py | 46 +- pandas/core/indexes/numeric.py | 4 + pandas/core/indexes/period.py | 10 +- pandas/core/indexes/range.py | 12 +- pandas/core/internals/blocks.py | 2 +- pandas/core/ops/common.py | 24 +- pandas/core/strings.py | 17 +- pandas/core/window/rolling.py | 20 +- pandas/plotting/_matplotlib/converter.py | 8 +- pandas/plotting/_matplotlib/timeseries.py | 6 +- pandas/tests/arithmetic/test_datetime64.py | 2 +- pandas/tests/arithmetic/test_numeric.py | 2 +- pandas/tests/arithmetic/test_timedelta64.py | 16 +- pandas/tests/arrays/boolean/test_logical.py | 2 +- .../tests/arrays/integer/test_arithmetic.py | 5 +- pandas/tests/arrays/string_/test_string.py | 2 +- pandas/tests/arrays/test_datetimelike.py | 3 +- .../indexes/categorical/test_category.py | 2 +- .../tests/indexes/interval/test_interval.py | 2 +- pandas/tests/indexes/test_base.py | 10 - .../tests/scalar/timestamp/test_arithmetic.py | 2 +- .../tseries/frequencies/test_freq_code.py | 47 +- pandas/tests/tseries/offsets/test_offsets.py | 44 +- pandas/tests/tslibs/test_period_asfreq.py | 13 +- pandas/tests/window/common.py | 22 +- pandas/tests/window/conftest.py | 62 +- .../moments/test_moments_consistency_ewm.py | 11 +- .../test_moments_consistency_expanding.py | 202 +- .../test_moments_consistency_rolling.py | 81 +- .../tests/window/moments/test_moments_ewm.py | 481 ++--- .../window/moments/test_moments_rolling.py | 1691 ++++++++--------- pandas/tests/window/test_api.py | 565 +++--- pandas/tests/window/test_apply.py | 27 +- pandas/tests/window/test_ewm.py | 92 +- pandas/tests/window/test_expanding.py | 264 +-- pandas/tests/window/test_rolling.py | 888 ++++----- pandas/tests/window/test_window.py | 126 +- pandas/tseries/frequencies.py | 2 +- pandas/tseries/offsets.py | 152 +- pandas/util/_decorators.py | 6 +- 77 files changed, 2609 insertions(+), 2954 deletions(-) diff --git a/.travis.yml b/.travis.yml index c5dbddacc6a43..7943ca370af1a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -75,7 +75,8 @@ matrix: before_install: - echo "before_install" - # Use blocking IO on travis. Ref: https://github.com/travis-ci/travis-ci/issues/8920#issuecomment-352661024 + # set non-blocking IO on travis + # https://github.com/travis-ci/travis-ci/issues/8920#issuecomment-352661024 - python -c 'import os,sys,fcntl; flags = fcntl.fcntl(sys.stdout, fcntl.F_GETFL); fcntl.fcntl(sys.stdout, fcntl.F_SETFL, flags&~os.O_NONBLOCK);' - source ci/travis_process_gbq_encryption.sh - export PATH="$HOME/miniconda3/bin:$PATH" diff --git a/README.md b/README.md index 7edee8d3feeed..33dfbf10ff743 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,6 @@ [![Downloads](https://anaconda.org/conda-forge/pandas/badges/downloads.svg)](https://pandas.pydata.org) [![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/pydata/pandas) [![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](https://numfocus.org) -[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) ## What is it? diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst index 6c6a7f42d4b7e..fd5e7c552fe0a 100644 --- a/doc/source/ecosystem.rst +++ b/doc/source/ecosystem.rst @@ -30,7 +30,7 @@ substantial projects that you feel should be on this list, please let us know. Data cleaning and validation ---------------------------- -`Pyjanitor `__ +`pyjanitor `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Pyjanitor provides a clean API for cleaning data, using method chaining. @@ -115,7 +115,7 @@ It is very similar to the matplotlib plotting backend, but provides interactive web-based charts and maps. -`Seaborn `__ +`seaborn `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Seaborn is a Python visualization library based on @@ -136,7 +136,7 @@ provides a powerful, declarative and extremely general way to generate bespoke p Various implementations to other languages are available. A good implementation for Python users is `has2k1/plotnine `__. -`IPython vega `__ +`IPython Vega `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `IPython Vega `__ leverages `Vega @@ -147,7 +147,7 @@ A good implementation for Python users is `has2k1/plotnine `__ `Python API `__ enables interactive figures and web shareability. Maps, 2D, 3D, and live-streaming graphs are rendered with WebGL and `D3.js `__. The library supports plotting directly from a pandas DataFrame and cloud-based collaboration. Users of `matplotlib, ggplot for Python, and Seaborn `__ can convert figures into interactive web-based plots. Plots can be drawn in `IPython Notebooks `__ , edited with R or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly is free for unlimited sharing, and has `cloud `__, `offline `__, or `on-premise `__ accounts for private use. -`Qtpandas `__ +`QtPandas `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Spun off from the main pandas library, the `qtpandas `__ @@ -187,7 +187,7 @@ See :ref:`Options and Settings ` and :ref:`Available Options ` for pandas ``display.`` settings. -`Quantopian/qgrid `__ +`quantopian/qgrid `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ qgrid is "an interactive grid for sorting and filtering @@ -249,12 +249,12 @@ The following data feeds are available: * Stooq Index Data * MOEX Data -`Quandl/Python `__ +`quandl/Python `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Quandl API for Python wraps the Quandl REST API to return Pandas DataFrames with timeseries indexes. -`Pydatastream `__ +`pydatastream `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PyDatastream is a Python interface to the `Refinitiv Datastream (DWS) `__ @@ -384,7 +384,7 @@ Pandas provides an interface for defining system. The following libraries implement that interface to provide types not found in NumPy or pandas, which work well with pandas' data containers. -`Cyberpandas`_ +`cyberpandas`_ ~~~~~~~~~~~~~~ Cyberpandas provides an extension type for storing arrays of IP Addresses. These @@ -411,4 +411,4 @@ Library Accessor Classes Description .. _pdvega: https://altair-viz.github.io/pdvega/ .. _Altair: https://altair-viz.github.io/ .. _pandas_path: https://github.com/drivendataorg/pandas-path/ -.. _pathlib.Path: https://docs.python.org/3/library/pathlib.html +.. _pathlib.Path: https://docs.python.org/3/library/pathlib.html \ No newline at end of file diff --git a/doc/source/reference/offset_frequency.rst b/doc/source/reference/offset_frequency.rst index 9b2753ca02495..6240181708f97 100644 --- a/doc/source/reference/offset_frequency.rst +++ b/doc/source/reference/offset_frequency.rst @@ -1044,7 +1044,6 @@ Properties Tick.nanos Tick.normalize Tick.rule_code - Tick.n Methods ~~~~~~~ @@ -1078,7 +1077,6 @@ Properties Day.nanos Day.normalize Day.rule_code - Day.n Methods ~~~~~~~ @@ -1112,7 +1110,6 @@ Properties Hour.nanos Hour.normalize Hour.rule_code - Hour.n Methods ~~~~~~~ @@ -1146,7 +1143,6 @@ Properties Minute.nanos Minute.normalize Minute.rule_code - Minute.n Methods ~~~~~~~ @@ -1180,7 +1176,6 @@ Properties Second.nanos Second.normalize Second.rule_code - Second.n Methods ~~~~~~~ @@ -1214,7 +1209,6 @@ Properties Milli.nanos Milli.normalize Milli.rule_code - Milli.n Methods ~~~~~~~ @@ -1248,7 +1242,6 @@ Properties Micro.nanos Micro.normalize Micro.rule_code - Micro.n Methods ~~~~~~~ @@ -1282,7 +1275,6 @@ Properties Nano.nanos Nano.normalize Nano.rule_code - Nano.n Methods ~~~~~~~ diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst index cf630a9671013..d371f6d5f273c 100644 --- a/doc/source/user_guide/computation.rst +++ b/doc/source/user_guide/computation.rst @@ -648,24 +648,6 @@ from present information back to past information. This allows the rolling windo Currently, this feature is only implemented for time-based windows. For fixed windows, the closed parameter cannot be set and the rolling window will always have both endpoints closed. -.. _stats.iter_rolling_window: - -Iteration over window: -~~~~~~~~~~~~~~~~~~~~~~ - -.. versionadded:: 1.1.0 - -``Rolling`` and ``Expanding`` objects now support iteration. Be noted that ``min_periods`` is ignored in iteration. - -.. ipython:: - - In [1]: df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - - In [2]: for i in df.rolling(2): - ...: print(i) - ...: - - .. _stats.moments.ts-versus-resampling: Time-aware rolling vs. resampling diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index eaf8c19b9a21b..1437006ee3fb8 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -235,7 +235,6 @@ Other enhancements :class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`, and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`26599`). - :meth:`HDFStore.put` now accepts `track_times` parameter. Parameter is passed to ``create_table`` method of ``PyTables`` (:issue:`32682`). -- Make :class:`pandas.core.window.Rolling` and :class:`pandas.core.window.Expanding` iterable(:issue:`11704`) .. --------------------------------------------------------------------------- @@ -586,7 +585,6 @@ Deprecations - :func:`pandas.api.types.is_categorical` is deprecated and will be removed in a future version; use `:func:pandas.api.types.is_categorical_dtype` instead (:issue:`33385`) - :meth:`Index.get_value` is deprecated and will be removed in a future version (:issue:`19728`) -- :meth:`DateOffset.__call__` is deprecated and will be removed in a future version, use ``offset + other`` instead (:issue:`34171`) .. --------------------------------------------------------------------------- @@ -607,8 +605,6 @@ Performance improvements sparse values from ``scipy.sparse`` matrices using the :meth:`DataFrame.sparse.from_spmatrix` constructor (:issue:`32821`, :issue:`32825`, :issue:`32826`, :issue:`32856`, :issue:`32858`). -- Performance improvement for groupby methods :meth:`~pandas.core.groupby.groupby.Groupby.first` - and :meth:`~pandas.core.groupby.groupby.Groupby.last` (:issue:`34178`) - Performance improvement in :func:`factorize` for nullable (integer and boolean) dtypes (:issue:`33064`). - Performance improvement in reductions (sum, prod, min, max) for nullable (integer and boolean) dtypes (:issue:`30982`, :issue:`33261`, :issue:`33442`). @@ -817,8 +813,6 @@ Groupby/resample/rolling - Bug in :meth:`GroupBy.first` and :meth:`GroupBy.last` where None is not preserved in object dtype (:issue:`32800`) - Bug in :meth:`Rolling.min` and :meth:`Rolling.max`: Growing memory usage after multiple calls when using a fixed window (:issue:`30726`) - Bug in :meth:`GroupBy.agg`, :meth:`GroupBy.transform`, and :meth:`GroupBy.resample` where subclasses are not preserved (:issue:`28330`) -- Bug in :meth:`GroupBy.rolling.apply` ignores args and kwargs parameters (:issue:`33433`) - Reshaping ^^^^^^^^^ diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index d5d706650bb34..53e66c4b8723d 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -9,9 +9,11 @@ cimport numpy as cnp from numpy cimport (ndarray, int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t, float32_t, float64_t, complex64_t, complex128_t) -from numpy.math cimport NAN cnp.import_array() +cdef extern from "numpy/npy_math.h": + float64_t NAN "NPY_NAN" + from pandas._libs.util cimport numeric, get_nat from pandas._libs.algos cimport (swap, TiebreakEnumType, TIEBREAK_AVERAGE, diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index c3dcbb942d7fe..e80f134290a7e 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -8,9 +8,10 @@ from libc.stdlib cimport malloc, free import numpy as np cimport numpy as cnp from numpy cimport ndarray, uint8_t, uint32_t, float64_t -from numpy.math cimport NAN cnp.import_array() +cdef extern from "numpy/npy_math.h": + float64_t NAN "NPY_NAN" from pandas._libs.khash cimport ( khiter_t, @@ -53,7 +54,7 @@ from pandas._libs.khash cimport ( ) -from pandas._libs cimport util +cimport pandas._libs.util as util from pandas._libs.missing cimport checknull diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index b4dcdaa10d0ef..245c554570ce4 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -19,10 +19,11 @@ from numpy cimport ( cnp.import_array() -from pandas._libs cimport util +cimport pandas._libs.util as util +from pandas._libs.tslibs import Period, Timedelta from pandas._libs.tslibs.nattype cimport c_NaT as NaT -from pandas._libs.tslibs.base cimport ABCTimestamp, ABCTimedelta, ABCPeriod +from pandas._libs.tslibs.base cimport ABCTimestamp from pandas._libs.hashtable cimport HashTable @@ -469,7 +470,7 @@ cdef class TimedeltaEngine(DatetimeEngine): return 'm8[ns]' cdef int64_t _unbox_scalar(self, scalar) except? -1: - if not (isinstance(scalar, ABCTimedelta) or scalar is NaT): + if not (isinstance(scalar, Timedelta) or scalar is NaT): raise TypeError(scalar) return scalar.value @@ -479,7 +480,7 @@ cdef class PeriodEngine(Int64Engine): cdef int64_t _unbox_scalar(self, scalar) except? -1: if scalar is NaT: return scalar.value - if isinstance(scalar, ABCPeriod): + if isinstance(scalar, Period): # NB: we assume that we have the correct freq here. return scalar.ordinal raise TypeError(scalar) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 1aa95e92b73d1..1e53b789aa05c 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -1,6 +1,5 @@ -from collections import defaultdict - import cython +from collections import defaultdict from cython import Py_ssize_t from cpython.slice cimport PySlice_GetIndicesEx diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 3850b24fdf519..657a2798f7267 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -33,7 +33,7 @@ from numpy cimport ( cnp.import_array() -from pandas._libs cimport util +cimport pandas._libs.util as util from pandas._libs.hashtable cimport Int64Vector from pandas._libs.tslibs.util cimport ( @@ -42,7 +42,8 @@ from pandas._libs.tslibs.util cimport ( is_timedelta64_object, ) -from pandas._libs.tslibs.base cimport ABCTimestamp, ABCTimedelta +from pandas._libs.tslibs import Timestamp +from pandas._libs.tslibs.timedeltas import Timedelta from pandas._libs.tslibs.timezones cimport tz_compare @@ -328,7 +329,7 @@ cdef class Interval(IntervalMixin): raise ValueError(f"invalid option for 'closed': {closed}") if not left <= right: raise ValueError("left side of interval must be <= right side") - if (isinstance(left, ABCTimestamp) and + if (isinstance(left, Timestamp) and not tz_compare(left.tzinfo, right.tzinfo)): # GH 18538 raise ValueError("left and right must have the same time zone, got " @@ -340,7 +341,7 @@ cdef class Interval(IntervalMixin): def _validate_endpoint(self, endpoint): # GH 23013 if not (is_integer_object(endpoint) or is_float_object(endpoint) or - isinstance(endpoint, (ABCTimestamp, ABCTimedelta))): + isinstance(endpoint, (Timestamp, Timedelta))): raise ValueError("Only numeric, Timestamp and Timedelta endpoints " "are allowed when constructing an Interval.") @@ -370,7 +371,7 @@ cdef class Interval(IntervalMixin): right = self.right # TODO: need more general formatting methodology here - if isinstance(left, ABCTimestamp) and isinstance(right, ABCTimestamp): + if isinstance(left, Timestamp) and isinstance(right, Timestamp): left = left._short_repr right = right._short_repr diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index bd623a39010f6..658f7fb202531 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1,5 +1,6 @@ from collections import abc from decimal import Decimal + import warnings import cython @@ -62,7 +63,7 @@ cdef extern from "numpy/arrayobject.h": cdef extern from "src/parse_helper.h": int floatify(object, float64_t *result, int *maybe_int) except -1 -from pandas._libs cimport util +cimport pandas._libs.util as util from pandas._libs.util cimport is_nan, UINT64_MAX, INT64_MAX, INT64_MIN from pandas._libs.tslib import array_to_datetime diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 6d4d1e95fe8c3..6bca5e370ac89 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -8,7 +8,7 @@ cimport numpy as cnp from numpy cimport ndarray, int64_t, uint8_t, float64_t cnp.import_array() -from pandas._libs cimport util +cimport pandas._libs.util as util from pandas._libs.tslibs.np_datetime cimport get_datetime64_value, get_timedelta64_value diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 461419239c730..9bb5e10348e47 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -34,7 +34,7 @@ cimport numpy as cnp from numpy cimport ndarray, uint8_t, uint64_t, int64_t, float64_t cnp.import_array() -from pandas._libs cimport util +cimport pandas._libs.util as util from pandas._libs.util cimport UINT64_MAX, INT64_MAX, INT64_MIN import pandas._libs.lib as lib diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 0988cd7ff0dde..a7b2d5d5491d5 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -14,7 +14,7 @@ from numpy cimport (ndarray, flatiter) cnp.import_array() -from pandas._libs cimport util +cimport pandas._libs.util as util from pandas._libs.lib import maybe_convert_objects, is_scalar @@ -603,7 +603,7 @@ cdef class BlockSlider: arr.shape[1] = 0 -def compute_reduction(arr: ndarray, f, axis: int = 0, dummy=None, labels=None): +def compute_reduction(arr: np.ndarray, f, axis: int = 0, dummy=None, labels=None): """ Parameters diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx index da4dd00027395..aed5e1d612088 100644 --- a/pandas/_libs/reshape.pyx +++ b/pandas/_libs/reshape.pyx @@ -15,11 +15,10 @@ from numpy cimport ( uint64_t, ) -import numpy as np cimport numpy as cnp -cnp.import_array() - +import numpy as np from pandas._libs.lib cimport c_is_list_like +cnp.import_array() ctypedef fused reshape_t: uint8_t diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index 9d3959d0a070a..0460a69f366c4 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -1,8 +1,4 @@ import numpy as np -from numpy cimport import_array -import_array() - -from pandas._libs.util cimport is_array from pandas.core.dtypes.missing import isna, array_equivalent from pandas.core.dtypes.common import is_dtype_equal @@ -120,8 +116,8 @@ cpdef assert_almost_equal(a, b, assert a == b, f"{a} != {b}" return True - a_is_ndarray = is_array(a) - b_is_ndarray = is_array(b) + a_is_ndarray = isinstance(a, np.ndarray) + b_is_ndarray = isinstance(b, np.ndarray) if obj is None: if a_is_ndarray or b_is_ndarray: diff --git a/pandas/_libs/tslibs/frequencies.pxd b/pandas/_libs/tslibs/frequencies.pxd index d6bae78576f50..1b7efb8c5dfdf 100644 --- a/pandas/_libs/tslibs/frequencies.pxd +++ b/pandas/_libs/tslibs/frequencies.pxd @@ -1,6 +1,7 @@ cpdef str get_rule_month(object source, str default=*) cpdef get_freq_code(freqstr) +cpdef object get_freq(object freq) cpdef str get_base_alias(freqstr) cpdef int get_to_timestamp_base(int base) cpdef str get_freq_str(base, mult=*) diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index c1f10b3dda612..d97a9fa0ba2fa 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -306,6 +306,25 @@ cpdef int get_to_timestamp_base(int base): return base +cpdef object get_freq(object freq): + """ + Return frequency code of given frequency str. + If input is not string, return input as it is. + + Examples + -------- + >>> get_freq('A') + 1000 + + >>> get_freq('3A') + 1000 + """ + if isinstance(freq, str): + base, mult = get_freq_code(freq) + freq = base + return freq + + # ---------------------------------------------------------------------- # Frequency comparison diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 038632e1575c3..c936d42b34db5 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -53,6 +53,8 @@ cdef extern from "src/datetime/np_datetime.h": npy_datetimestruct *result) nogil +cdef int reverse_ops[6] + cdef bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1 cdef check_dts_bounds(npy_datetimestruct *dts) diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 5ac0e4fa44bee..9a8a8fdae6d2f 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -68,6 +68,15 @@ cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil: # ---------------------------------------------------------------------- # Comparison +cdef int reverse_ops[6] + +reverse_ops[Py_LT] = Py_GT +reverse_ops[Py_LE] = Py_GE +reverse_ops[Py_EQ] = Py_EQ +reverse_ops[Py_NE] = Py_NE +reverse_ops[Py_GT] = Py_LT +reverse_ops[Py_GE] = Py_LE + cdef inline bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1: """ diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index c113897e4fe82..5efb9b3534f14 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -19,11 +19,11 @@ cnp.import_array() from pandas._libs.tslibs cimport util -from pandas._libs.tslibs.util cimport is_integer_object, is_datetime64_object +from pandas._libs.tslibs.util cimport is_integer_object from pandas._libs.tslibs.base cimport ABCTick, ABCTimestamp, is_tick_object -from pandas._libs.tslibs.ccalendar import MONTHS, DAYS, weekday_to_int, int_to_weekday +from pandas._libs.tslibs.ccalendar import MONTHS, DAYS from pandas._libs.tslibs.ccalendar cimport get_days_in_month, dayofweek from pandas._libs.tslibs.conversion cimport ( convert_datetime_to_tsobject, @@ -35,7 +35,6 @@ from pandas._libs.tslibs.np_datetime cimport ( from pandas._libs.tslibs.timezones cimport utc_pytz as UTC from pandas._libs.tslibs.tzconversion cimport tz_convert_single -from pandas._libs.tslibs.timedeltas import Timedelta from pandas._libs.tslibs.timestamps import Timestamp # --------------------------------------------------------------------- @@ -162,7 +161,7 @@ def apply_wraps(func): elif isinstance(other, (timedelta, BaseOffset)): # timedelta path return func(self, other) - elif isinstance(other, (datetime, date)) or is_datetime64_object(other): + elif isinstance(other, (np.datetime64, datetime, date)): other = Timestamp(other) else: # This will end up returning NotImplemented back in __add__ @@ -487,12 +486,6 @@ class _BaseOffset: return NotImplemented def __call__(self, other): - warnings.warn( - "DateOffset.__call__ is deprecated and will be removed in a future " - "version. Use `offset + other` instead.", - FutureWarning, - stacklevel=1, - ) return self.apply(other) def __mul__(self, other): @@ -650,10 +643,7 @@ class _BaseOffset: # ------------------------------------------------------------------ - # Staticmethod so we can call from _Tick.__init__, will be unnecessary - # once BaseOffset is a cdef class and is inherited by _Tick - @staticmethod - def _validate_n(n): + def _validate_n(self, n): """ Require that `n` be an integer. @@ -770,33 +760,6 @@ cdef class _Tick(ABCTick): # ensure that reversed-ops with numpy scalars return NotImplemented __array_priority__ = 1000 _adjust_dst = False - _inc = Timedelta(microseconds=1000) - _prefix = "undefined" - _attributes = frozenset(["n", "normalize"]) - - cdef readonly: - int64_t n - bint normalize - dict _cache - - def __init__(self, n=1, normalize=False): - n = _BaseOffset._validate_n(n) - self.n = n - self.normalize = False - self._cache = {} - if normalize: - # GH#21427 - raise ValueError( - "Tick offset with `normalize=True` are not allowed." - ) - - @property - def delta(self) -> Timedelta: - return self.n * self._inc - - @property - def nanos(self) -> int64_t: - return self.delta.value def is_on_offset(self, dt) -> bool: return True @@ -804,35 +767,6 @@ cdef class _Tick(ABCTick): def is_anchored(self) -> bool: return False - # -------------------------------------------------------------------- - # Comparison and Arithmetic Methods - - def __eq__(self, other): - if isinstance(other, str): - try: - # GH#23524 if to_offset fails, we are dealing with an - # incomparable type so == is False and != is True - other = to_offset(other) - except ValueError: - # e.g. "infer" - return False - return self.delta == other - - def __ne__(self, other): - return not (self == other) - - def __le__(self, other): - return self.delta.__le__(other) - - def __lt__(self, other): - return self.delta.__lt__(other) - - def __ge__(self, other): - return self.delta.__ge__(other) - - def __gt__(self, other): - return self.delta.__gt__(other) - def __truediv__(self, other): if not isinstance(self, _Tick): # cython semantics mean the args are sometimes swapped @@ -841,24 +775,17 @@ cdef class _Tick(ABCTick): result = self.delta.__truediv__(other) return _wrap_timedelta_result(result) - # -------------------------------------------------------------------- - # Pickle Methods - def __reduce__(self): return (type(self), (self.n,)) def __setstate__(self, state): - self.n = state["n"] - self.normalize = False + object.__setattr__(self, "n", state["n"]) -class BusinessMixin(BaseOffset): +class BusinessMixin: """ Mixin to business types to provide related functions. """ - def __init__(self, n=1, normalize=False, offset=timedelta(0)): - BaseOffset.__init__(self, n, normalize) - object.__setattr__(self, "_offset", offset) @property def offset(self): @@ -882,11 +809,7 @@ class BusinessMixin(BaseOffset): class BusinessHourMixin(BusinessMixin): _adjust_dst = False - def __init__( - self, n=1, normalize=False, start="09:00", end="17:00", offset=timedelta(0) - ): - BusinessMixin.__init__(self, n, normalize, offset) - + def __init__(self, start="09:00", end="17:00", offset=timedelta(0)): # must be validated here to equality check if np.ndim(start) == 0: # i.e. not is_list_like @@ -930,6 +853,7 @@ class BusinessHourMixin(BusinessMixin): object.__setattr__(self, "start", start) object.__setattr__(self, "end", end) + object.__setattr__(self, "_offset", offset) def _repr_attrs(self) -> str: out = super()._repr_attrs() @@ -992,16 +916,10 @@ class CustomMixin: object.__setattr__(self, "calendar", calendar) -class WeekOfMonthMixin(BaseOffset): +class WeekOfMonthMixin: """ Mixin for methods common to WeekOfMonth and LastWeekOfMonth. """ - def __init__(self, n=1, normalize=False, weekday=0): - BaseOffset.__init__(self, n, normalize) - object.__setattr__(self, "weekday", weekday) - - if weekday < 0 or weekday > 6: - raise ValueError(f"Day must be 0<=day<=6, got {weekday}") @apply_wraps def apply(self, other): @@ -1022,14 +940,6 @@ class WeekOfMonthMixin(BaseOffset): return False return dt.day == self._get_offset_day(dt) - @property - def rule_code(self) -> str: - weekday = int_to_weekday.get(self.weekday, "") - if self.week == -1: - # LastWeekOfMonth - return f"{self._prefix}-{weekday}" - return f"{self._prefix}-{self.week + 1}{weekday}" - # ---------------------------------------------------------------------- # RelativeDelta Arithmetic diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 884578df3e00b..a209f71dd0676 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -5,8 +5,6 @@ import locale import calendar import re -from cpython cimport datetime - from _thread import allocate_lock as _thread_allocate_lock import pytz @@ -14,6 +12,8 @@ import pytz import numpy as np from numpy cimport int64_t +cimport cpython.datetime as datetime + from pandas._libs.tslibs.np_datetime cimport ( check_dts_bounds, dtstruct_to_dt64, npy_datetimestruct) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 7a3af169a960e..03419a6267983 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -26,7 +26,7 @@ from pandas._libs.tslibs.base cimport ABCTimedelta, ABCTimestamp, is_tick_object from pandas._libs.tslibs.ccalendar cimport DAY_NANOS from pandas._libs.tslibs.np_datetime cimport ( - cmp_scalar, td64_to_tdstruct, pandas_timedeltastruct) + cmp_scalar, reverse_ops, td64_to_tdstruct, pandas_timedeltastruct) from pandas._libs.tslibs.nattype cimport ( checknull_with_nat, diff --git a/pandas/_libs/tslibs/timestamps.pxd b/pandas/_libs/tslibs/timestamps.pxd index 88d21b19e1e37..90f50e3af503c 100644 --- a/pandas/_libs/tslibs/timestamps.pxd +++ b/pandas/_libs/tslibs/timestamps.pxd @@ -15,7 +15,7 @@ cdef class _Timestamp(ABCTimestamp): cdef readonly: int64_t value, nanosecond object freq - + list _date_attributes cpdef bint _get_start_end_field(self, str field) cpdef _get_date_name_field(self, object field, object locale) cdef int64_t _maybe_convert_value_to_local(self) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 4f8b85240c79f..ab8f9b6c30eb1 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -25,7 +25,7 @@ from pandas._libs.tslibs.util cimport ( is_timedelta64_object, is_array, ) -from pandas._libs.tslibs.base cimport ABCTimedelta, ABCTimestamp, is_tick_object +from pandas._libs.tslibs.base cimport ABCTimestamp, is_tick_object from pandas._libs.tslibs cimport ccalendar @@ -355,10 +355,10 @@ cdef class _Timestamp(ABCTimestamp): elif PyDelta_Check(other): # logic copied from delta_to_nanoseconds to prevent circular import - if isinstance(other, ABCTimedelta): + if hasattr(other, 'delta'): # pd.Timedelta nanos = other.value - else: + elif PyDelta_Check(other): nanos = (other.days * 24 * 60 * 60 * 1000000 + other.seconds * 1000000 + other.microseconds) * 1000 @@ -387,10 +387,6 @@ cdef class _Timestamp(ABCTimestamp): dtype=object, ) - elif not isinstance(self, _Timestamp): - # cython semantics, args have been switched and this is __radd__ - return other.__add__(self) - return NotImplemented def __sub__(self, other): @@ -1055,7 +1051,7 @@ timedelta}, default 'raise' return Period(self, freq=freq) @property - def dayofweek(self) -> int: + def dayofweek(self): """ Return day of the week. """ @@ -1096,7 +1092,7 @@ timedelta}, default 'raise' return self._get_date_name_field('month_name', locale) @property - def dayofyear(self) -> int: + def dayofyear(self): """ Return the day of the year. """ @@ -1119,7 +1115,7 @@ timedelta}, default 'raise' return ((self.month - 1) // 3) + 1 @property - def days_in_month(self) -> int: + def days_in_month(self): """ Return the number of days in the month. """ @@ -1432,7 +1428,16 @@ default 'raise' return base1 + base2 - def to_julian_date(self) -> np.float64: + def _has_time_component(self) -> bool: + """ + Returns if the Timestamp has a time component + in addition to the date part + """ + return (self.time() != _zero_time + or self.tzinfo is not None + or self.nanosecond != 0) + + def to_julian_date(self): """ Convert TimeStamp to a Julian Date. 0 Julian date is noon January 1, 4713 BC. @@ -1469,6 +1474,11 @@ default 'raise' np.array([self.value], dtype='i8'), tz=self.tz)[0] return Timestamp(normalized_value).tz_localize(self.tz) + def __radd__(self, other): + # __radd__ on cython extension types like _Timestamp is not used, so + # define it here instead + return self + other + # Add the min and max fields at the class level cdef int64_t _NS_UPPER_BOUND = np.iinfo(np.int64).max diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 2a01ab3802e62..737c130161246 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -64,10 +64,6 @@ def _cat_compare_op(op): @unpack_zerodim_and_defer(opname) def func(self, other): - if is_list_like(other) and len(other) != len(self): - # TODO: Could this fail if the categories are listlike objects? - raise ValueError("Lengths must match.") - if not self.ordered: if opname in ["__lt__", "__gt__", "__le__", "__ge__"]: raise TypeError( diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 145654805cc6b..bbaa64dae3eea 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -84,9 +84,6 @@ def _validate_comparison_value(self, other): elif not is_list_like(other): raise InvalidComparison(other) - elif len(other) != len(self): - raise ValueError("Lengths must match") - else: try: other = self._validate_listlike(other, opname, allow_object=True) @@ -1237,9 +1234,6 @@ def _add_timedelta_arraylike(self, other): """ # overridden by PeriodArray - if len(self) != len(other): - raise ValueError("cannot add indices of unequal length") - if isinstance(other, np.ndarray): # ndarray[timedelta64]; wrap in TimedeltaIndex for op from pandas.core.arrays import TimedeltaArray diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 90088c370697e..897c53c5c75d1 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -654,9 +654,6 @@ def _assert_tzawareness_compat(self, other): def _sub_datetime_arraylike(self, other): """subtract DatetimeArray/Index or ndarray[datetime64]""" - if len(self) != len(other): - raise ValueError("cannot add indices of unequal length") - if isinstance(other, np.ndarray): assert is_datetime64_dtype(other) other = type(self)(other) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 5a90ea4a36a21..3ca7e028913c6 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -517,8 +517,6 @@ def cmp_method(self, other): raise NotImplementedError( "can only perform ops with 1-d structures" ) - if len(self) != len(other): - raise ValueError("Lengths must match to compare") if other is libmissing.NA: # numpy does not handle pd.NA well as "other" scalar (it returns @@ -622,8 +620,6 @@ def integer_arithmetic_method(self, other): raise NotImplementedError( "can only perform ops with 1-d structures" ) - if len(self) != len(other): - raise ValueError("Lengths must match") if not (is_float_dtype(other) or is_integer_dtype(other)): raise TypeError("can only perform ops with numeric values") diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 3978161829481..5c700a53ceac4 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -746,7 +746,8 @@ def _check_timedeltalike_freq_compat(self, other): IncompatibleFrequency """ assert isinstance(self.freq, Tick) # checked by calling function - base_nanos = self.freq.base.nanos + own_offset = frequencies.to_offset(self.freq.rule_code) + base_nanos = delta_to_nanoseconds(own_offset) if isinstance(other, (timedelta, np.timedelta64, Tick)): nanos = delta_to_nanoseconds(other) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index bc215eec4c345..4b84b3ea8b46a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -471,10 +471,6 @@ def __mul__(self, other): if not hasattr(other, "dtype"): # list, tuple other = np.array(other) - if len(other) != len(self) and not is_timedelta64_dtype(other.dtype): - # Exclude timedelta64 here so we correctly raise TypeError - # for that instead of ValueError - raise ValueError("Cannot multiply with unequal lengths") if is_object_dtype(other.dtype): # this multiplication will succeed only if all elements of other @@ -518,10 +514,7 @@ def __truediv__(self, other): # e.g. list, tuple other = np.array(other) - if len(other) != len(self): - raise ValueError("Cannot divide vectors with unequal lengths") - - elif is_timedelta64_dtype(other.dtype): + if is_timedelta64_dtype(other.dtype): # let numpy handle it return self._data / other @@ -571,10 +564,7 @@ def __rtruediv__(self, other): # e.g. list, tuple other = np.array(other) - if len(other) != len(self): - raise ValueError("Cannot divide vectors with unequal lengths") - - elif is_timedelta64_dtype(other.dtype): + if is_timedelta64_dtype(other.dtype): # let numpy handle it return other / self._data @@ -623,10 +613,8 @@ def __floordiv__(self, other): if not hasattr(other, "dtype"): # list, tuple other = np.array(other) - if len(other) != len(self): - raise ValueError("Cannot divide with unequal lengths") - elif is_timedelta64_dtype(other.dtype): + if is_timedelta64_dtype(other.dtype): other = type(self)(other) # numpy timedelta64 does not natively support floordiv, so operate @@ -678,10 +666,7 @@ def __rfloordiv__(self, other): # list, tuple other = np.array(other) - if len(other) != len(self): - raise ValueError("Cannot divide with unequal lengths") - - elif is_timedelta64_dtype(other.dtype): + if is_timedelta64_dtype(other.dtype): other = type(self)(other) # numpy timedelta64 does not natively support floordiv, so operate diff --git a/pandas/core/base.py b/pandas/core/base.py index a8a736b6aafdf..309b6e0ad5e1a 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1257,7 +1257,8 @@ def value_counts( def unique(self): values = self._values - if not isinstance(values, np.ndarray): + if hasattr(values, "unique"): + result = values.unique() if self.dtype.kind in ["m", "M"] and isinstance(self, ABCSeries): # GH#31182 Series._values returns EA, unpack for backward-compat diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 55b9c28c74cb2..b9b403ffdc69a 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -36,6 +36,7 @@ class providing the base-class of operations. from pandas._libs import Timestamp import pandas._libs.groupby as libgroupby from pandas._typing import FrameOrSeries, Scalar +from pandas.compat import set_function_name from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, Substitution, cache_readonly, doc @@ -191,24 +192,6 @@ class providing the base-class of operations. """, ) -_groupby_agg_method_template = """ -Compute {fname} of group values. - -Parameters ----------- -numeric_only : bool, default {no} - Include only float, int, boolean columns. If None, will attempt to use - everything, then use only numeric data. -min_count : int, default {mc} - The required number of valid values to perform the operation. If fewer - than ``min_count`` non-NA values are present the result will be NA. - -Returns -------- -Series or DataFrame - Computed {fname} of values within each group. -""" - _pipe_template = """ Apply a function `func` with arguments to this %(klass)s object and return the function's result. @@ -962,37 +945,6 @@ def _wrap_transformed_output(self, output: Mapping[base.OutputKey, np.ndarray]): def _wrap_applied_output(self, keys, values, not_indexed_same: bool = False): raise AbstractMethodError(self) - def _agg_general( - self, - numeric_only: bool = True, - min_count: int = -1, - *, - alias: str, - npfunc: Callable, - ): - self._set_group_selection() - - # try a cython aggregation if we can - try: - return self._cython_agg_general( - how=alias, alt=npfunc, numeric_only=numeric_only, min_count=min_count, - ) - except DataError: - pass - except NotImplementedError as err: - if "function is not implemented for this dtype" in str( - err - ) or "category dtype not supported" in str(err): - # raised in _get_cython_function, in some cases can - # be trimmed by implementing cython funcs for more dtypes - pass - else: - raise - - # apply a non-cython aggregation - result = self.aggregate(lambda x: npfunc(x, axis=self.axis)) - return result - def _cython_agg_general( self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1 ): @@ -1486,79 +1438,105 @@ def size(self): result = self._obj_1d_constructor(result) return self._reindex_output(result, fill_value=0) - @doc(_groupby_agg_method_template, fname="sum", no=True, mc=0) - def sum(self, numeric_only: bool = True, min_count: int = 0): - return self._agg_general( - numeric_only=numeric_only, min_count=min_count, alias="add", npfunc=np.sum - ) - - @doc(_groupby_agg_method_template, fname="prod", no=True, mc=0) - def prod(self, numeric_only: bool = True, min_count: int = 0): - return self._agg_general( - numeric_only=numeric_only, min_count=min_count, alias="prod", npfunc=np.prod - ) - - @doc(_groupby_agg_method_template, fname="min", no=False, mc=-1) - def min(self, numeric_only: bool = False, min_count: int = -1): - return self._agg_general( - numeric_only=numeric_only, min_count=min_count, alias="min", npfunc=np.min - ) + @classmethod + def _add_numeric_operations(cls): + """ + Add numeric operations to the GroupBy generically. + """ - @doc(_groupby_agg_method_template, fname="max", no=False, mc=-1) - def max(self, numeric_only: bool = False, min_count: int = -1): - return self._agg_general( - numeric_only=numeric_only, min_count=min_count, alias="max", npfunc=np.max - ) + def groupby_function( + name: str, + alias: str, + npfunc, + numeric_only: bool = True, + min_count: int = -1, + ): - @doc(_groupby_agg_method_template, fname="first", no=False, mc=-1) - def first(self, numeric_only: bool = False, min_count: int = -1): - def first_compat(obj: FrameOrSeries, axis: int = 0): - def first(x: Series): - """Helper function for first item that isn't NA. - """ - x = x.array[notna(x.array)] + _local_template = """ + Compute %(f)s of group values. + + Parameters + ---------- + numeric_only : bool, default %(no)s + Include only float, int, boolean columns. If None, will attempt to use + everything, then use only numeric data. + min_count : int, default %(mc)s + The required number of valid values to perform the operation. If fewer + than ``min_count`` non-NA values are present the result will be NA. + + Returns + ------- + Series or DataFrame + Computed %(f)s of values within each group. + """ + + @Substitution(name="groupby", f=name, no=numeric_only, mc=min_count) + @Appender(_common_see_also) + @Appender(_local_template) + def func(self, numeric_only=numeric_only, min_count=min_count): + self._set_group_selection() + + # try a cython aggregation if we can + try: + return self._cython_agg_general( + how=alias, + alt=npfunc, + numeric_only=numeric_only, + min_count=min_count, + ) + except DataError: + pass + except NotImplementedError as err: + if "function is not implemented for this dtype" in str( + err + ) or "category dtype not supported" in str(err): + # raised in _get_cython_function, in some cases can + # be trimmed by implementing cython funcs for more dtypes + pass + else: + raise + + # apply a non-cython aggregation + result = self.aggregate(lambda x: npfunc(x, axis=self.axis)) + return result + + set_function_name(func, name, cls) + + return func + + def first_compat(x, axis=0): + def first(x): + x = x.to_numpy() + + x = x[notna(x)] if len(x) == 0: return np.nan return x[0] - if isinstance(obj, DataFrame): - return obj.apply(first, axis=axis) - elif isinstance(obj, Series): - return first(obj) + if isinstance(x, DataFrame): + return x.apply(first, axis=axis) else: - raise TypeError(type(obj)) - - return self._agg_general( - numeric_only=numeric_only, - min_count=min_count, - alias="first", - npfunc=first_compat, - ) + return first(x) - @doc(_groupby_agg_method_template, fname="last", no=False, mc=-1) - def last(self, numeric_only: bool = False, min_count: int = -1): - def last_compat(obj: FrameOrSeries, axis: int = 0): - def last(x: Series): - """Helper function for last item that isn't NA. - """ - x = x.array[notna(x.array)] + def last_compat(x, axis=0): + def last(x): + x = x.to_numpy() + x = x[notna(x)] if len(x) == 0: return np.nan return x[-1] - if isinstance(obj, DataFrame): - return obj.apply(last, axis=axis) - elif isinstance(obj, Series): - return last(obj) + if isinstance(x, DataFrame): + return x.apply(last, axis=axis) else: - raise TypeError(type(obj)) + return last(x) - return self._agg_general( - numeric_only=numeric_only, - min_count=min_count, - alias="last", - npfunc=last_compat, - ) + cls.sum = groupby_function("sum", "add", np.sum, min_count=0) + cls.prod = groupby_function("prod", "prod", np.prod, min_count=0) + cls.min = groupby_function("min", "min", np.min, numeric_only=False) + cls.max = groupby_function("max", "max", np.max, numeric_only=False) + cls.first = groupby_function("first", "first", first_compat, numeric_only=False) + cls.last = groupby_function("last", "last", last_compat, numeric_only=False) @Substitution(name="groupby") @Appender(_common_see_also) @@ -2658,6 +2636,9 @@ def _reindex_output( return output.reset_index(drop=True) +GroupBy._add_numeric_operations() + + @doc(GroupBy) def get_groupby( obj: NDFrame, diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 74db87f46c5e2..597a160995eef 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -952,9 +952,7 @@ def _chop(self, sdata, slice_obj: slice) -> NDFrame: class SeriesSplitter(DataSplitter): def _chop(self, sdata: Series, slice_obj: slice) -> Series: - # fastpath equivalent to `sdata.iloc[slice_obj]` - mgr = sdata._mgr.get_slice(slice_obj) - return type(sdata)(mgr, name=sdata.name, fastpath=True) + return sdata.iloc[slice_obj] class FrameSplitter(DataSplitter): @@ -964,13 +962,10 @@ def fast_apply(self, f: F, sdata: FrameOrSeries, names): return libreduction.apply_frame_axis0(sdata, f, names, starts, ends) def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: - # Fastpath equivalent to: - # if self.axis == 0: - # return sdata.iloc[slice_obj] - # else: - # return sdata.iloc[:, slice_obj] - mgr = sdata._mgr.get_slice(slice_obj, axis=1 - self.axis) - return type(sdata)(mgr) + if self.axis == 0: + return sdata.iloc[slice_obj] + else: + return sdata.iloc[:, slice_obj] def get_splitter( diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index d9828707b6164..b8a9827b5effd 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -54,6 +54,7 @@ ABCCategorical, ABCDataFrame, ABCDatetimeIndex, + ABCIntervalIndex, ABCMultiIndex, ABCPandasArray, ABCPeriodIndex, @@ -74,6 +75,7 @@ from pandas.core.indexes.frozen import FrozenList import pandas.core.missing as missing from pandas.core.ops import get_op_result_name +from pandas.core.ops.common import unpack_zerodim_and_defer from pandas.core.ops.invalid import make_invalid_op from pandas.core.sorting import ensure_key_mapped from pandas.core.strings import StringMethods @@ -107,10 +109,8 @@ def _make_comparison_op(op, cls): + @unpack_zerodim_and_defer(op.__name__) def cmp_method(self, other): - if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)): - if other.ndim > 0 and len(self) != len(other): - raise ValueError("Lengths must match to compare") if is_object_dtype(self.dtype) and isinstance(other, ABCCategorical): left = type(other)(self._values, dtype=other.dtype) @@ -4101,13 +4101,37 @@ def append(self, other): return self._concat(to_concat, name) def _concat(self, to_concat, name): + + typs = _concat.get_dtype_kinds(to_concat) + + if len(typs) == 1: + return self._concat_same_dtype(to_concat, name=name) + return Index._concat_same_dtype(self, to_concat, name=name) + + def _concat_same_dtype(self, to_concat, name): """ - Concatenate multiple Index objects. + Concatenate to_concat which has the same class. """ + # must be overridden in specific classes + klasses = ( + ABCDatetimeIndex, + ABCTimedeltaIndex, + ABCPeriodIndex, + ExtensionArray, + ABCIntervalIndex, + ) + to_concat = [ + x.astype(object) if isinstance(x, klasses) else x for x in to_concat + ] + + self = to_concat[0] + attribs = self._get_attributes_dict() + attribs["name"] = name + to_concat = [x._values if isinstance(x, Index) else x for x in to_concat] - result = _concat.concat_compat(to_concat) - return Index(result, name=name) + res_values = np.concatenate(to_concat) + return Index(res_values, name=name) def putmask(self, mask, value): """ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 2a79c83de7ef2..25df4a0bee737 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -738,6 +738,13 @@ def insert(self, loc: int, item): def _concat(self, to_concat, name): # if calling index is category, don't check dtype of others + return CategoricalIndex._concat_same_dtype(self, to_concat, name) + + def _concat_same_dtype(self, to_concat, name): + """ + Concatenate to_concat which has the same class + ValueError if other is not in the categories + """ codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat]) result = self._create_from_codes(codes, name=name) # if name is None, _create_from_codes sets self.name diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index badf6502aa723..6e965ecea7cd8 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -9,7 +9,11 @@ from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly, doc -from pandas.core.dtypes.common import is_dtype_equal, is_object_dtype +from pandas.core.dtypes.common import ( + ensure_platform_int, + is_dtype_equal, + is_object_dtype, +) from pandas.core.dtypes.generic import ABCSeries from pandas.core.arrays import ExtensionArray @@ -219,14 +223,29 @@ def __getitem__(self, key): deprecate_ndim_indexing(result) return result + def __iter__(self): + return self._data.__iter__() + # --------------------------------------------------------------------- + def __array__(self, dtype=None) -> np.ndarray: + return np.asarray(self._data, dtype=dtype) + def _get_engine_target(self) -> np.ndarray: # NB: _values_for_argsort happens to match the desired engine targets # for all of our existing EA-backed indexes, but in general # cannot be relied upon to exist. return self._data._values_for_argsort() + @doc(Index.dropna) + def dropna(self, how="any"): + if how not in ("any", "all"): + raise ValueError(f"invalid how option: {how}") + + if self.hasnans: + return self._shallow_copy(self._data[~self._isnan]) + return self._shallow_copy() + def repeat(self, repeats, axis=None): nv.validate_repeat(tuple(), dict(axis=axis)) result = self._data.repeat(repeats, axis=axis) @@ -236,6 +255,31 @@ def insert(self, loc: int, item): # ExtensionIndex subclasses must override Index.insert raise AbstractMethodError(self) + def _concat_same_dtype(self, to_concat, name): + arr = type(self._data)._concat_same_type(to_concat) + return type(self)._simple_new(arr, name=name) + + @doc(Index.take) + def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): + nv.validate_take(tuple(), kwargs) + indices = ensure_platform_int(indices) + + taken = self._assert_take_fillable( + self._data, + indices, + allow_fill=allow_fill, + fill_value=fill_value, + na_value=self._na_value, + ) + return type(self)(taken, name=self.name) + + def unique(self, level=None): + if level is not None: + self._validate_index_level(level) + + result = self._data.unique() + return self._shallow_copy(result) + def _get_unique_index(self, dropna=False): if self.is_unique and not dropna: return self diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 5020a25c88ff4..06040166d0f9e 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -147,6 +147,10 @@ def _assert_safe_casting(cls, data, subarr): """ pass + def _concat_same_dtype(self, indexes, name): + result = type(indexes[0])(np.concatenate([x._values for x in indexes])) + return result.rename(name) + @property def is_all_dates(self) -> bool: """ diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index be243d7014233..b0b85f69396ba 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -5,7 +5,7 @@ from pandas._libs import index as libindex from pandas._libs.lib import no_default -from pandas._libs.tslibs import Period, resolution +from pandas._libs.tslibs import Period, frequencies as libfrequencies, resolution from pandas._libs.tslibs.parsing import parse_time_string from pandas._typing import DtypeObj, Label from pandas.util._decorators import Appender, cache_readonly, doc @@ -44,6 +44,7 @@ from pandas.core.ops import get_op_result_name from pandas.core.tools.datetimes import DateParseError +from pandas.tseries import frequencies from pandas.tseries.offsets import DateOffset, Tick _index_doc_kwargs = dict(ibase._index_doc_kwargs) @@ -277,12 +278,15 @@ def _maybe_convert_timedelta(self, other): of self.freq. Note IncompatibleFrequency subclasses ValueError. """ if isinstance(other, (timedelta, np.timedelta64, Tick, np.ndarray)): - if isinstance(self.freq, Tick): + offset = frequencies.to_offset(self.freq.rule_code) + if isinstance(offset, Tick): # _check_timedeltalike_freq_compat will raise if incompatible delta = self._data._check_timedeltalike_freq_compat(other) return delta elif isinstance(other, DateOffset): - if other.base == self.freq.base: + freqstr = other.rule_code + base = libfrequencies.get_base_alias(freqstr) + if base == self.freq.rule_code: return other.n raise raise_on_incompatible(self, other) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 49a0f0fb7ae92..c34b8965ca36a 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -627,18 +627,14 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False) return super().join(other, how, level, return_indexers, sort) - def _concat(self, indexes, name): + def _concat_same_dtype(self, indexes, name): """ - Overriding parent method for the case of all RangeIndex instances. - - When all members of "indexes" are of type RangeIndex: result will be - RangeIndex if possible, Int64Index otherwise. E.g.: + Concatenates multiple RangeIndex instances. All members of "indexes" must + be of type RangeIndex; result will be RangeIndex if possible, Int64Index + otherwise. E.g.: indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6) indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5]) """ - if not all(isinstance(x, RangeIndex) for x in indexes): - return super()._concat(indexes, name) - start = step = next_ = None # Filter the empty indexes diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index c052c6c9d7d1d..3e2b5bdccd5d1 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -251,7 +251,7 @@ def make_block_same_class(self, values, placement=None, ndim=None): placement = self.mgr_locs if ndim is None: ndim = self.ndim - return type(self)(values, placement=placement, ndim=ndim) + return make_block(values, placement=placement, ndim=ndim, klass=type(self)) def __repr__(self) -> str: # don't want to print out all of the items here diff --git a/pandas/core/ops/common.py b/pandas/core/ops/common.py index 515a0a5198d74..1fb9398083884 100644 --- a/pandas/core/ops/common.py +++ b/pandas/core/ops/common.py @@ -1,10 +1,13 @@ """ Boilerplate functions used in defining binary operations. """ +from collections import UserDict from functools import wraps from typing import Callable -from pandas._libs.lib import item_from_zerodim +import numpy as np + +from pandas._libs.lib import is_list_like, item_from_zerodim from pandas._typing import F from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries @@ -62,6 +65,25 @@ def new_method(self, other): other = item_from_zerodim(other) + if isinstance(self, (ABCSeries, ABCDataFrame)) and isinstance( + other, (ABCSeries, ABCDataFrame) + ): + # we dont require length matches + pass + elif is_list_like(other, allow_sets=False) and not isinstance( + other, (dict, UserDict) + ): + if len(other) != len(self): + if len(other) == 1 and not hasattr(other, "dtype"): + # i.e. unpack scalar list, but leave e.g. Categorical, + # for which the scalar behavior doesnt match the + # array behavior + other = other[0] + else: + raise ValueError( + "Lengths must match", self.shape, np.shape(other), type(other) + ) + return method(self, other) return new_method diff --git a/pandas/core/strings.py b/pandas/core/strings.py index b27ad744dbdba..bb62cd6b34722 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2975,7 +2975,7 @@ def encode(self, encoding, errors="strict"): _shared_docs[ "str_strip" ] = r""" - Remove %(position)s characters. + Remove leading and trailing characters. Strip whitespaces (including newlines) or a set of specified characters from each string in the Series/Index from %(side)s. @@ -3038,29 +3038,20 @@ def encode(self, encoding, errors="strict"): """ @Appender( - _shared_docs["str_strip"] - % dict( - side="left and right sides", method="strip", position="leading and trailing" - ) + _shared_docs["str_strip"] % dict(side="left and right sides", method="strip") ) @forbid_nonstring_types(["bytes"]) def strip(self, to_strip=None): result = str_strip(self._parent, to_strip, side="both") return self._wrap_result(result) - @Appender( - _shared_docs["str_strip"] - % dict(side="left side", method="lstrip", position="leading") - ) + @Appender(_shared_docs["str_strip"] % dict(side="left side", method="lstrip")) @forbid_nonstring_types(["bytes"]) def lstrip(self, to_strip=None): result = str_strip(self._parent, to_strip, side="left") return self._wrap_result(result) - @Appender( - _shared_docs["str_strip"] - % dict(side="right side", method="rstrip", position="trailing") - ) + @Appender(_shared_docs["str_strip"] % dict(side="right side", method="rstrip")) @forbid_nonstring_types(["bytes"]) def rstrip(self, to_strip=None): result = str_strip(self._parent, to_strip, side="right") diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index c615e18af68e6..166ab13344816 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -247,22 +247,8 @@ def __repr__(self) -> str: return f"{self._window_type} [{attrs}]" def __iter__(self): - window = self._get_window(win_type=None) - blocks, obj = self._create_blocks() - index = self._get_window_indexer(window=window) - - start, end = index.get_window_bounds( - num_values=len(obj), - min_periods=self.min_periods, - center=self.center, - closed=self.closed, - ) - # From get_window_bounds, those two should be equal in length of array - assert len(start) == len(end) - - for s, e in zip(start, end): - result = obj.iloc[slice(s, e)] - yield result + url = "https://github.com/pandas-dev/pandas/issues/11704" + raise NotImplementedError(f"See issue #11704 {url}") def _prep_values(self, values: Optional[np.ndarray] = None) -> np.ndarray: """Convert input to numpy arrays for Cython routines""" @@ -1316,8 +1302,6 @@ def apply( use_numba_cache=engine == "numba", raw=raw, original_func=func, - args=args, - kwargs=kwargs, ) def _generate_cython_apply_func(self, args, kwargs, raw, offset, func): diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py index 132cbdb160bec..c6d159d3d016b 100644 --- a/pandas/plotting/_matplotlib/converter.py +++ b/pandas/plotting/_matplotlib/converter.py @@ -12,7 +12,7 @@ from pandas._libs import lib, tslibs from pandas._libs.tslibs import resolution -from pandas._libs.tslibs.frequencies import FreqGroup, get_freq_code +from pandas._libs.tslibs.frequencies import FreqGroup, get_freq from pandas.core.dtypes.common import ( is_datetime64_ns_dtype, @@ -887,7 +887,7 @@ def _annual_finder(vmin, vmax, freq): def get_finder(freq): if isinstance(freq, str): - freq = get_freq_code(freq)[0] + freq = get_freq(freq) fgroup = resolution.get_freq_group(freq) if fgroup == FreqGroup.FR_ANN: @@ -932,7 +932,7 @@ def __init__( plot_obj=None, ): if isinstance(freq, str): - freq = get_freq_code(freq)[0] + freq = get_freq(freq) self.freq = freq self.base = base (self.quarter, self.month, self.day) = (quarter, month, day) @@ -1011,7 +1011,7 @@ class TimeSeries_DateFormatter(Formatter): def __init__(self, freq, minor_locator=False, dynamic_mode=True, plot_obj=None): if isinstance(freq, str): - freq = get_freq_code(freq)[0] + freq = get_freq(freq) self.format = None self.freq = freq self.locs = [] diff --git a/pandas/plotting/_matplotlib/timeseries.py b/pandas/plotting/_matplotlib/timeseries.py index f6e120e2f91e7..3abce690cbe6b 100644 --- a/pandas/plotting/_matplotlib/timeseries.py +++ b/pandas/plotting/_matplotlib/timeseries.py @@ -7,7 +7,7 @@ from pandas._libs.tslibs.frequencies import ( FreqGroup, get_base_alias, - get_freq_code, + get_freq, is_subperiod, is_superperiod, ) @@ -209,9 +209,9 @@ def _use_dynamic_x(ax, data): if freq is None: return False - # FIXME: hack this for 0.10.1, creating more technical debt...sigh + # hack this for 0.10.1, creating more technical debt...sigh if isinstance(data.index, ABCDatetimeIndex): - base = get_freq_code(freq)[0] + base = get_freq(freq) x = data.index if base <= FreqGroup.FR_DAY: return x[:1].is_normalized diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 8c480faa4ee81..0fb3cb1025639 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -2206,7 +2206,7 @@ def test_sub_dti_dti(self): # different length raises ValueError dti1 = date_range("20130101", periods=3) dti2 = date_range("20130101", periods=4) - msg = "cannot add indices of unequal length" + msg = "Lengths must match" with pytest.raises(ValueError, match=msg): dti1 - dti2 diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index a37339c66bf6e..b6456a2141c06 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -649,7 +649,7 @@ def test_mul_datelike_raises(self, numeric_idx): def test_mul_size_mismatch_raises(self, numeric_idx): idx = numeric_idx - msg = "operands could not be broadcast together" + msg = "Lengths must match" with pytest.raises(ValueError, match=msg): idx * idx[0:3] with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 65e3c6a07d4f3..180364420b021 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -451,7 +451,7 @@ def test_addition_ops(self): tm.assert_index_equal(result, expected) # unequal length - msg = "cannot add indices of unequal length" + msg = "Lengths must match" with pytest.raises(ValueError, match=msg): tdi + dti[0:1] with pytest.raises(ValueError, match=msg): @@ -1723,7 +1723,7 @@ def test_tdarr_div_length_mismatch(self, box_with_array): mismatched = [1, 2, 3, 4] rng = tm.box_expected(rng, box_with_array) - msg = "Cannot divide vectors|Unable to coerce to Series" + msg = "Lengths must match|Unable to coerce to Series" for obj in [mismatched, mismatched[:2]]: # one shorter, one longer for other in [obj, np.array(obj), pd.Index(obj)]: @@ -1905,12 +1905,14 @@ def test_td64arr_mul_tdscalar_invalid(self, box_with_array, scalar_td): def test_td64arr_mul_too_short_raises(self, box_with_array): idx = TimedeltaIndex(np.arange(5, dtype="int64")) idx = tm.box_expected(idx, box_with_array) - msg = ( - "cannot use operands with types dtype|" - "Cannot multiply with unequal lengths|" - "Unable to coerce to Series" + msg = "|".join( + [ + "Lengths must match", # <- EA, Index, Series + "cannot use operands with types dtype", # <- DataFrame + "Unable to coerce to Series", # <- Series + ] ) - with pytest.raises(TypeError, match=msg): + with pytest.raises((ValueError, TypeError), match=msg): # length check before dtype check idx * idx[:3] with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/arrays/boolean/test_logical.py b/pandas/tests/arrays/boolean/test_logical.py index bf4775bbd7b32..a61746d46daeb 100644 --- a/pandas/tests/arrays/boolean/test_logical.py +++ b/pandas/tests/arrays/boolean/test_logical.py @@ -46,7 +46,7 @@ def test_empty_ok(self, all_logical_operators): def test_logical_length_mismatch_raises(self, all_logical_operators): op_name = all_logical_operators a = pd.array([True, False, None], dtype="boolean") - msg = "Lengths must match to compare" + msg = "Lengths must match" with pytest.raises(ValueError, match=msg): getattr(a, op_name)([True, False]) diff --git a/pandas/tests/arrays/integer/test_arithmetic.py b/pandas/tests/arrays/integer/test_arithmetic.py index 18f1dac3c13b2..b7fdd8581101b 100644 --- a/pandas/tests/arrays/integer/test_arithmetic.py +++ b/pandas/tests/arrays/integer/test_arithmetic.py @@ -232,8 +232,9 @@ def test_error(self, data, all_arithmetic_operators): result = opa(pd.DataFrame({"A": s})) assert result is NotImplemented - msg = r"can only perform ops with 1-d structures" - with pytest.raises(NotImplementedError, match=msg): + # msg = r"can only perform ops with 1-d structures" + msg = "Lengths must match" + with pytest.raises(ValueError, match=msg): opa(np.arange(len(s)).reshape(-1, len(s))) @pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)]) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 6f9a1a5be4c43..2e047b5c4eb60 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -98,7 +98,7 @@ def test_add_2d(): a + b s = pd.Series(a) - with pytest.raises(ValueError, match="3 != 1"): + with pytest.raises(ValueError, match="Lengths must match"): s + b diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index d0bf5bb41bb2c..61d78034f0747 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -168,8 +168,7 @@ def test_concat_same_type(self): arr = self.array_cls(idx) result = arr._concat_same_type([arr[:-1], arr[1:], arr]) - arr2 = arr.astype(object) - expected = self.index_cls(np.concatenate([arr2[:-1], arr2[1:], arr2]), None) + expected = idx._concat_same_dtype([idx[:-1], idx[1:], idx], None) tm.assert_index_equal(self.index_cls(result), expected) diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index 8a84090ea6e94..9765c77c6b60c 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -136,7 +136,7 @@ def test_append(self): tm.assert_index_equal(result, expected, exact=True) def test_append_to_another(self): - # hits Index._concat + # hits Index._concat_same_dtype fst = Index(["a", "b"]) snd = CategoricalIndex(["d", "e"]) result = fst.append(snd) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 997887cc18d61..fac9eb1c34dbf 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -579,7 +579,7 @@ def test_comparison(self): with pytest.raises(TypeError, match=msg): self.index > np.arange(2) - msg = "Lengths must match to compare" + msg = "Lengths must match" with pytest.raises(ValueError, match=msg): self.index > np.arange(3) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 466b491eb7a2c..9f235dcdbb295 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2423,16 +2423,6 @@ def test_index_repr_bool_nan(self): out2 = "Index([True, False, nan], dtype='object')" assert out2 == exp2 - @pytest.mark.filterwarnings("ignore:elementwise comparison failed:FutureWarning") - def test_index_with_tuple_bool(self): - # GH34123 - # TODO: remove tupleize_cols=False once correct behaviour is restored - # TODO: also this op right now produces FutureWarning from numpy - idx = Index([("a", "b"), ("b", "c"), ("c", "a")], tupleize_cols=False) - result = idx == ("c", "a",) - expected = np.array([False, False, True]) - tm.assert_numpy_array_equal(result, expected) - class TestIndexUtils: @pytest.mark.parametrize( diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py index ed0045bcab989..b038ee1aee106 100644 --- a/pandas/tests/scalar/timestamp/test_arithmetic.py +++ b/pandas/tests/scalar/timestamp/test_arithmetic.py @@ -52,7 +52,7 @@ def test_overflow_offset_raises(self): # used to crash, so check for proper overflow exception stamp = Timestamp("2000/1/1") - offset_overflow = to_offset("D") * 100 ** 5 + offset_overflow = to_offset("D") * 100 ** 25 with pytest.raises(OverflowError, match=msg): stamp + offset_overflow diff --git a/pandas/tests/tseries/frequencies/test_freq_code.py b/pandas/tests/tseries/frequencies/test_freq_code.py index 273e03925dd36..be07f829dbae8 100644 --- a/pandas/tests/tseries/frequencies/test_freq_code.py +++ b/pandas/tests/tseries/frequencies/test_freq_code.py @@ -1,7 +1,12 @@ import pytest from pandas._libs.tslibs import frequencies as libfrequencies, resolution -from pandas._libs.tslibs.frequencies import FreqGroup, _period_code_map, get_freq_code +from pandas._libs.tslibs.frequencies import ( + FreqGroup, + _period_code_map, + get_freq, + get_freq_code, +) import pandas.tseries.offsets as offsets @@ -26,12 +31,12 @@ def period_code_item(request): ], ) def test_freq_code(freqstr, expected): - assert get_freq_code(freqstr)[0] == expected + assert get_freq(freqstr) == expected def test_freq_code_match(period_code_item): freqstr, code = period_code_item - assert get_freq_code(freqstr)[0] == code + assert get_freq(freqstr) == code @pytest.mark.parametrize( @@ -151,31 +156,31 @@ def test_cat(args): "freq_input,expected", [ # Frequency string. - ("A", (get_freq_code("A")[0], 1)), - ("3D", (get_freq_code("D")[0], 3)), - ("-2M", (get_freq_code("M")[0], -2)), + ("A", (get_freq("A"), 1)), + ("3D", (get_freq("D"), 3)), + ("-2M", (get_freq("M"), -2)), # Tuple. - (("D", 1), (get_freq_code("D")[0], 1)), - (("A", 3), (get_freq_code("A")[0], 3)), - (("M", -2), (get_freq_code("M")[0], -2)), + (("D", 1), (get_freq("D"), 1)), + (("A", 3), (get_freq("A"), 3)), + (("M", -2), (get_freq("M"), -2)), ((5, "T"), (FreqGroup.FR_MIN, 5)), # Numeric Tuple. ((1000, 1), (1000, 1)), # Offsets. - (offsets.Day(), (get_freq_code("D")[0], 1)), - (offsets.Day(3), (get_freq_code("D")[0], 3)), - (offsets.Day(-2), (get_freq_code("D")[0], -2)), - (offsets.MonthEnd(), (get_freq_code("M")[0], 1)), - (offsets.MonthEnd(3), (get_freq_code("M")[0], 3)), - (offsets.MonthEnd(-2), (get_freq_code("M")[0], -2)), - (offsets.Week(), (get_freq_code("W")[0], 1)), - (offsets.Week(3), (get_freq_code("W")[0], 3)), - (offsets.Week(-2), (get_freq_code("W")[0], -2)), + (offsets.Day(), (get_freq("D"), 1)), + (offsets.Day(3), (get_freq("D"), 3)), + (offsets.Day(-2), (get_freq("D"), -2)), + (offsets.MonthEnd(), (get_freq("M"), 1)), + (offsets.MonthEnd(3), (get_freq("M"), 3)), + (offsets.MonthEnd(-2), (get_freq("M"), -2)), + (offsets.Week(), (get_freq("W"), 1)), + (offsets.Week(3), (get_freq("W"), 3)), + (offsets.Week(-2), (get_freq("W"), -2)), (offsets.Hour(), (FreqGroup.FR_HR, 1)), # Monday is weekday=0. - (offsets.Week(weekday=1), (get_freq_code("W-TUE")[0], 1)), - (offsets.Week(3, weekday=0), (get_freq_code("W-MON")[0], 3)), - (offsets.Week(-2, weekday=4), (get_freq_code("W-FRI")[0], -2)), + (offsets.Week(weekday=1), (get_freq("W-TUE"), 1)), + (offsets.Week(3, weekday=0), (get_freq("W-MON"), 3)), + (offsets.Week(-2, weekday=4), (get_freq("W-FRI"), -2)), ], ) def test_get_freq_code(freq_input, expected): diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index f0dcef4dbc967..0a7eaa7b7be3e 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -756,9 +756,7 @@ def test_hash(self): assert hash(self.offset2) == hash(self.offset2) def test_call(self): - with tm.assert_produces_warning(FutureWarning): - # GH#34171 DateOffset.__call__ is deprecated - assert self.offset2(self.d) == datetime(2008, 1, 3) + assert self.offset2(self.d) == datetime(2008, 1, 3) def testRollback1(self): assert BDay(10).rollback(self.d) == self.d @@ -1042,15 +1040,13 @@ def test_hash(self, offset_name): assert offset == offset def test_call(self): - with tm.assert_produces_warning(FutureWarning): - # GH#34171 DateOffset.__call__ is deprecated - assert self.offset1(self.d) == datetime(2014, 7, 1, 11) - assert self.offset2(self.d) == datetime(2014, 7, 1, 13) - assert self.offset3(self.d) == datetime(2014, 6, 30, 17) - assert self.offset4(self.d) == datetime(2014, 6, 30, 14) - assert self.offset8(self.d) == datetime(2014, 7, 1, 11) - assert self.offset9(self.d) == datetime(2014, 7, 1, 22) - assert self.offset10(self.d) == datetime(2014, 7, 1, 1) + assert self.offset1(self.d) == datetime(2014, 7, 1, 11) + assert self.offset2(self.d) == datetime(2014, 7, 1, 13) + assert self.offset3(self.d) == datetime(2014, 6, 30, 17) + assert self.offset4(self.d) == datetime(2014, 6, 30, 14) + assert self.offset8(self.d) == datetime(2014, 7, 1, 11) + assert self.offset9(self.d) == datetime(2014, 7, 1, 22) + assert self.offset10(self.d) == datetime(2014, 7, 1, 1) def test_sub(self): # we have to override test_sub here because self.offset2 is not @@ -2381,10 +2377,8 @@ def test_hash(self): assert hash(self.offset2) == hash(self.offset2) def test_call(self): - with tm.assert_produces_warning(FutureWarning): - # GH#34171 DateOffset.__call__ is deprecated - assert self.offset1(self.d) == datetime(2014, 7, 1, 11) - assert self.offset2(self.d) == datetime(2014, 7, 1, 11) + assert self.offset1(self.d) == datetime(2014, 7, 1, 11) + assert self.offset2(self.d) == datetime(2014, 7, 1, 11) def testRollback1(self): assert self.offset1.rollback(self.d) == self.d @@ -2648,10 +2642,8 @@ def test_hash(self): assert hash(self.offset2) == hash(self.offset2) def test_call(self): - with tm.assert_produces_warning(FutureWarning): - # GH#34171 DateOffset.__call__ is deprecated - assert self.offset2(self.d) == datetime(2008, 1, 3) - assert self.offset2(self.nd) == datetime(2008, 1, 3) + assert self.offset2(self.d) == datetime(2008, 1, 3) + assert self.offset2(self.nd) == datetime(2008, 1, 3) def testRollback1(self): assert CDay(10).rollback(self.d) == self.d @@ -2900,10 +2892,8 @@ def test_repr(self): assert repr(self.offset) == "" assert repr(self.offset2) == "<2 * CustomBusinessMonthEnds>" - def test_call(self): - with tm.assert_produces_warning(FutureWarning): - # GH#34171 DateOffset.__call__ is deprecated - assert self.offset2(self.d) == datetime(2008, 2, 29) + def testCall(self): + assert self.offset2(self.d) == datetime(2008, 2, 29) def testRollback1(self): assert CDay(10).rollback(datetime(2007, 12, 31)) == datetime(2007, 12, 31) @@ -3051,10 +3041,8 @@ def test_repr(self): assert repr(self.offset) == "" assert repr(self.offset2) == "<2 * CustomBusinessMonthBegins>" - def test_call(self): - with tm.assert_produces_warning(FutureWarning): - # GH#34171 DateOffset.__call__ is deprecated - assert self.offset2(self.d) == datetime(2008, 3, 3) + def testCall(self): + assert self.offset2(self.d) == datetime(2008, 3, 3) def testRollback1(self): assert CDay(10).rollback(datetime(2007, 12, 31)) == datetime(2007, 12, 31) diff --git a/pandas/tests/tslibs/test_period_asfreq.py b/pandas/tests/tslibs/test_period_asfreq.py index 7205c3cc676cf..5497cb65c5373 100644 --- a/pandas/tests/tslibs/test_period_asfreq.py +++ b/pandas/tests/tslibs/test_period_asfreq.py @@ -1,6 +1,6 @@ import pytest -from pandas._libs.tslibs.frequencies import get_freq_code +from pandas._libs.tslibs.frequencies import get_freq from pandas._libs.tslibs.period import period_asfreq, period_ordinal @@ -31,10 +31,7 @@ ], ) def test_intra_day_conversion_factors(freq1, freq2, expected): - assert ( - period_asfreq(1, get_freq_code(freq1)[0], get_freq_code(freq2)[0], False) - == expected - ) + assert period_asfreq(1, get_freq(freq1), get_freq(freq2), False) == expected @pytest.mark.parametrize( @@ -42,7 +39,7 @@ def test_intra_day_conversion_factors(freq1, freq2, expected): ) def test_period_ordinal_start_values(freq, expected): # information for Jan. 1, 1970. - assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq_code(freq)[0]) == expected + assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq(freq)) == expected @pytest.mark.parametrize( @@ -55,7 +52,7 @@ def test_period_ordinal_start_values(freq, expected): ], ) def test_period_ordinal_week(dt, expected): - args = dt + (get_freq_code("W")[0],) + args = dt + (get_freq("W"),) assert period_ordinal(*args) == expected @@ -77,5 +74,5 @@ def test_period_ordinal_week(dt, expected): ], ) def test_period_ordinal_business_day(day, expected): - args = (2013, 10, day, 0, 0, 0, 0, 0, get_freq_code("B")[0]) + args = (2013, 10, day, 0, 0, 0, 0, 0, get_freq("B")) assert period_ordinal(*args) == expected diff --git a/pandas/tests/window/common.py b/pandas/tests/window/common.py index 7e0be331ec8d5..5dca26df49930 100644 --- a/pandas/tests/window/common.py +++ b/pandas/tests/window/common.py @@ -1,8 +1,28 @@ +from datetime import datetime + import numpy as np +from numpy.random import randn -from pandas import Series +from pandas import DataFrame, Series, bdate_range import pandas._testing as tm +N, K = 100, 10 + + +class Base: + + _nan_locs = np.arange(20, 40) + _inf_locs = np.array([]) + + def _create_data(self): + arr = randn(N) + arr[self._nan_locs] = np.NaN + + self.arr = arr + self.rng = bdate_range(datetime(2009, 1, 1), periods=N) + self.series = Series(arr.copy(), index=self.rng) + self.frame = DataFrame(randn(N, K), index=self.rng, columns=np.arange(K)) + def check_pairwise_moment(frame, dispatch, name, **kwargs): def get_result(obj, obj2=None): diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py index 74f3406d30225..4f462a09a60a3 100644 --- a/pandas/tests/window/conftest.py +++ b/pandas/tests/window/conftest.py @@ -1,12 +1,9 @@ -from datetime import datetime - import numpy as np -from numpy.random import randn import pytest import pandas.util._test_decorators as td -from pandas import DataFrame, Series, bdate_range, notna +from pandas import DataFrame, Series, notna @pytest.fixture(params=[True, False]) @@ -245,60 +242,3 @@ def no_nans(x): def consistency_data(request): """Create consistency data""" return request.param - - -def _create_arr(): - """Internal function to mock an array.""" - arr = randn(100) - locs = np.arange(20, 40) - arr[locs] = np.NaN - return arr - - -def _create_rng(): - """Internal function to mock date range.""" - rng = bdate_range(datetime(2009, 1, 1), periods=100) - return rng - - -def _create_series(): - """Internal function to mock Series.""" - arr = _create_arr() - series = Series(arr.copy(), index=_create_rng()) - return series - - -def _create_frame(): - """Internal function to mock DataFrame.""" - rng = _create_rng() - return DataFrame(randn(100, 10), index=rng, columns=np.arange(10)) - - -@pytest.fixture -def nan_locs(): - """Make a range as loc fixture.""" - return np.arange(20, 40) - - -@pytest.fixture -def arr(): - """Make an array as fixture.""" - return _create_arr() - - -@pytest.fixture -def frame(): - """Make mocked frame as fixture.""" - return _create_frame() - - -@pytest.fixture -def series(): - """Make mocked series as fixture.""" - return _create_series() - - -@pytest.fixture(params=[_create_series(), _create_frame()]) -def which(request): - """Turn parametrized which as fixture for series and frame""" - return request.param diff --git a/pandas/tests/window/moments/test_moments_consistency_ewm.py b/pandas/tests/window/moments/test_moments_consistency_ewm.py index f143278e12ec5..3b3a9d59cb6e7 100644 --- a/pandas/tests/window/moments/test_moments_consistency_ewm.py +++ b/pandas/tests/window/moments/test_moments_consistency_ewm.py @@ -4,6 +4,7 @@ from pandas import DataFrame, Series, concat from pandas.tests.window.common import ( + Base, check_binary_ew, check_binary_ew_min_periods, check_pairwise_moment, @@ -18,9 +19,13 @@ ) -@pytest.mark.parametrize("func", ["cov", "corr"]) -def test_ewm_pairwise_cov_corr(func, frame): - check_pairwise_moment(frame, "ewm", func, span=10, min_periods=5) +class TestEwmMomentsConsistency(Base): + def setup_method(self, method): + self._create_data() + + @pytest.mark.parametrize("func", ["cov", "corr"]) + def test_ewm_pairwise_cov_corr(self, func): + check_pairwise_moment(self.frame, "ewm", func, span=10, min_periods=5) @pytest.mark.parametrize("name", ["cov", "corr"]) diff --git a/pandas/tests/window/moments/test_moments_consistency_expanding.py b/pandas/tests/window/moments/test_moments_consistency_expanding.py index ee3579d76d1db..09cd2ff218c2b 100644 --- a/pandas/tests/window/moments/test_moments_consistency_expanding.py +++ b/pandas/tests/window/moments/test_moments_consistency_expanding.py @@ -7,6 +7,7 @@ from pandas import DataFrame, Index, MultiIndex, Series, isna, notna import pandas._testing as tm from pandas.tests.window.common import ( + Base, moments_consistency_cov_data, moments_consistency_is_constant, moments_consistency_mock_mean, @@ -17,145 +18,132 @@ ) -def _check_expanding( - func, static_comp, preserve_nan=True, series=None, frame=None, nan_locs=None -): - - series_result = func(series) - assert isinstance(series_result, Series) - frame_result = func(frame) - assert isinstance(frame_result, DataFrame) - - result = func(series) - tm.assert_almost_equal(result[10], static_comp(series[:11])) - - if preserve_nan: - assert result.iloc[nan_locs].isna().all() +class TestExpandingMomentsConsistency(Base): + def setup_method(self, method): + self._create_data() + def test_expanding_corr(self): + A = self.series.dropna() + B = (A + randn(len(A)))[:-5] -def _check_expanding_has_min_periods(func, static_comp, has_min_periods): - ser = Series(randn(50)) + result = A.expanding().corr(B) - if has_min_periods: - result = func(ser, min_periods=30) - assert result[:29].isna().all() - tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) + rolling_result = A.rolling(window=len(A), min_periods=1).corr(B) - # min_periods is working correctly - result = func(ser, min_periods=15) - assert isna(result.iloc[13]) - assert notna(result.iloc[14]) + tm.assert_almost_equal(rolling_result, result) - ser2 = Series(randn(20)) - result = func(ser2, min_periods=5) - assert isna(result[3]) - assert notna(result[4]) - - # min_periods=0 - result0 = func(ser, min_periods=0) - result1 = func(ser, min_periods=1) - tm.assert_almost_equal(result0, result1) - else: - result = func(ser) - tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) - - -def test_expanding_corr(series): - A = series.dropna() - B = (A + randn(len(A)))[:-5] + def test_expanding_count(self): + result = self.series.expanding(min_periods=0).count() + tm.assert_almost_equal( + result, self.series.rolling(window=len(self.series), min_periods=0).count() + ) - result = A.expanding().corr(B) + def test_expanding_quantile(self): + result = self.series.expanding().quantile(0.5) - rolling_result = A.rolling(window=len(A), min_periods=1).corr(B) + rolling_result = self.series.rolling( + window=len(self.series), min_periods=1 + ).quantile(0.5) - tm.assert_almost_equal(rolling_result, result) + tm.assert_almost_equal(result, rolling_result) + def test_expanding_cov(self): + A = self.series + B = (A + randn(len(A)))[:-5] -def test_expanding_count(series): - result = series.expanding(min_periods=0).count() - tm.assert_almost_equal( - result, series.rolling(window=len(series), min_periods=0).count() - ) + result = A.expanding().cov(B) + rolling_result = A.rolling(window=len(A), min_periods=1).cov(B) -def test_expanding_quantile(series): - result = series.expanding().quantile(0.5) + tm.assert_almost_equal(rolling_result, result) - rolling_result = series.rolling(window=len(series), min_periods=1).quantile(0.5) + def test_expanding_cov_pairwise(self): + result = self.frame.expanding().corr() - tm.assert_almost_equal(result, rolling_result) + rolling_result = self.frame.rolling( + window=len(self.frame), min_periods=1 + ).corr() + tm.assert_frame_equal(result, rolling_result) -def test_expanding_cov(series): - A = series - B = (A + randn(len(A)))[:-5] + def test_expanding_corr_pairwise(self): + result = self.frame.expanding().corr() - result = A.expanding().cov(B) + rolling_result = self.frame.rolling( + window=len(self.frame), min_periods=1 + ).corr() + tm.assert_frame_equal(result, rolling_result) - rolling_result = A.rolling(window=len(A), min_periods=1).cov(B) + @pytest.mark.parametrize("has_min_periods", [True, False]) + @pytest.mark.parametrize( + "func,static_comp", + [("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)], + ids=["sum", "mean", "max", "min"], + ) + def test_expanding_func(self, func, static_comp, has_min_periods): + def expanding_func(x, min_periods=1, center=False, axis=0): + exp = x.expanding(min_periods=min_periods, center=center, axis=axis) + return getattr(exp, func)() + + self._check_expanding(expanding_func, static_comp, preserve_nan=False) + self._check_expanding_has_min_periods( + expanding_func, static_comp, has_min_periods + ) - tm.assert_almost_equal(rolling_result, result) + @pytest.mark.parametrize("has_min_periods", [True, False]) + def test_expanding_apply(self, engine_and_raw, has_min_periods): + engine, raw = engine_and_raw -def test_expanding_cov_pairwise(frame): - result = frame.expanding().cov() + def expanding_mean(x, min_periods=1): - rolling_result = frame.rolling(window=len(frame), min_periods=1).cov() + exp = x.expanding(min_periods=min_periods) + result = exp.apply(lambda x: x.mean(), raw=raw, engine=engine) + return result - tm.assert_frame_equal(result, rolling_result) + # TODO(jreback), needed to add preserve_nan=False + # here to make this pass + self._check_expanding(expanding_mean, np.mean, preserve_nan=False) + self._check_expanding_has_min_periods(expanding_mean, np.mean, has_min_periods) + def _check_expanding(self, func, static_comp, preserve_nan=True): -def test_expanding_corr_pairwise(frame): - result = frame.expanding().corr() + series_result = func(self.series) + assert isinstance(series_result, Series) + frame_result = func(self.frame) + assert isinstance(frame_result, DataFrame) - rolling_result = frame.rolling(window=len(frame), min_periods=1).corr() - tm.assert_frame_equal(result, rolling_result) + result = func(self.series) + tm.assert_almost_equal(result[10], static_comp(self.series[:11])) + if preserve_nan: + assert result.iloc[self._nan_locs].isna().all() -@pytest.mark.parametrize("has_min_periods", [True, False]) -@pytest.mark.parametrize( - "func,static_comp", - [("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)], - ids=["sum", "mean", "max", "min"], -) -def test_expanding_func(func, static_comp, has_min_periods, series, frame, nan_locs): - def expanding_func(x, min_periods=1, center=False, axis=0): - exp = x.expanding(min_periods=min_periods, center=center, axis=axis) - return getattr(exp, func)() - - _check_expanding( - expanding_func, - static_comp, - preserve_nan=False, - series=series, - frame=frame, - nan_locs=nan_locs, - ) - _check_expanding_has_min_periods(expanding_func, static_comp, has_min_periods) + def _check_expanding_has_min_periods(self, func, static_comp, has_min_periods): + ser = Series(randn(50)) + if has_min_periods: + result = func(ser, min_periods=30) + assert result[:29].isna().all() + tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) -@pytest.mark.parametrize("has_min_periods", [True, False]) -def test_expanding_apply(engine_and_raw, has_min_periods, series, frame, nan_locs): + # min_periods is working correctly + result = func(ser, min_periods=15) + assert isna(result.iloc[13]) + assert notna(result.iloc[14]) - engine, raw = engine_and_raw + ser2 = Series(randn(20)) + result = func(ser2, min_periods=5) + assert isna(result[3]) + assert notna(result[4]) - def expanding_mean(x, min_periods=1): - - exp = x.expanding(min_periods=min_periods) - result = exp.apply(lambda x: x.mean(), raw=raw, engine=engine) - return result - - # TODO(jreback), needed to add preserve_nan=False - # here to make this pass - _check_expanding( - expanding_mean, - np.mean, - preserve_nan=False, - series=series, - frame=frame, - nan_locs=nan_locs, - ) - _check_expanding_has_min_periods(expanding_mean, np.mean, has_min_periods) + # min_periods=0 + result0 = func(ser, min_periods=0) + result1 = func(ser, min_periods=1) + tm.assert_almost_equal(result0, result1) + else: + result = func(ser) + tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) diff --git a/pandas/tests/window/moments/test_moments_consistency_rolling.py b/pandas/tests/window/moments/test_moments_consistency_rolling.py index a3de8aa69f840..2c37baeae13b7 100644 --- a/pandas/tests/window/moments/test_moments_consistency_rolling.py +++ b/pandas/tests/window/moments/test_moments_consistency_rolling.py @@ -12,6 +12,7 @@ import pandas._testing as tm from pandas.core.window.common import _flex_binary_moment from pandas.tests.window.common import ( + Base, check_pairwise_moment, moments_consistency_cov_data, moments_consistency_is_constant, @@ -32,56 +33,60 @@ def _rolling_consistency_cases(): yield window, min_periods, center -# binary moments -def test_rolling_cov(series): - A = series - B = A + randn(len(A)) +class TestRollingMomentsConsistency(Base): + def setup_method(self, method): + self._create_data() - result = A.rolling(window=50, min_periods=25).cov(B) - tm.assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1]) + # binary moments + def test_rolling_cov(self): + A = self.series + B = A + randn(len(A)) + result = A.rolling(window=50, min_periods=25).cov(B) + tm.assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1]) -def test_rolling_corr(series): - A = series - B = A + randn(len(A)) + def test_rolling_corr(self): + A = self.series + B = A + randn(len(A)) - result = A.rolling(window=50, min_periods=25).corr(B) - tm.assert_almost_equal(result[-1], np.corrcoef(A[-50:], B[-50:])[0, 1]) + result = A.rolling(window=50, min_periods=25).corr(B) + tm.assert_almost_equal(result[-1], np.corrcoef(A[-50:], B[-50:])[0, 1]) - # test for correct bias correction - a = tm.makeTimeSeries() - b = tm.makeTimeSeries() - a[:5] = np.nan - b[:10] = np.nan + # test for correct bias correction + a = tm.makeTimeSeries() + b = tm.makeTimeSeries() + a[:5] = np.nan + b[:10] = np.nan - result = a.rolling(window=len(a), min_periods=1).corr(b) - tm.assert_almost_equal(result[-1], a.corr(b)) + result = a.rolling(window=len(a), min_periods=1).corr(b) + tm.assert_almost_equal(result[-1], a.corr(b)) + @pytest.mark.parametrize("func", ["cov", "corr"]) + def test_rolling_pairwise_cov_corr(self, func): + check_pairwise_moment(self.frame, "rolling", func, window=10, min_periods=5) -@pytest.mark.parametrize("func", ["cov", "corr"]) -def test_rolling_pairwise_cov_corr(func, frame): - check_pairwise_moment(frame, "rolling", func, window=10, min_periods=5) + @pytest.mark.parametrize("method", ["corr", "cov"]) + def test_flex_binary_frame(self, method): + series = self.frame[1] + res = getattr(series.rolling(window=10), method)(self.frame) + res2 = getattr(self.frame.rolling(window=10), method)(series) + exp = self.frame.apply(lambda x: getattr(series.rolling(window=10), method)(x)) -@pytest.mark.parametrize("method", ["corr", "cov"]) -def test_flex_binary_frame(method, frame): - series = frame[1] + tm.assert_frame_equal(res, exp) + tm.assert_frame_equal(res2, exp) - res = getattr(series.rolling(window=10), method)(frame) - res2 = getattr(frame.rolling(window=10), method)(series) - exp = frame.apply(lambda x: getattr(series.rolling(window=10), method)(x)) + frame2 = self.frame.copy() + frame2.values[:] = np.random.randn(*frame2.shape) - tm.assert_frame_equal(res, exp) - tm.assert_frame_equal(res2, exp) - - frame2 = frame.copy() - frame2.values[:] = np.random.randn(*frame2.shape) - - res3 = getattr(frame.rolling(window=10), method)(frame2) - exp = DataFrame( - {k: getattr(frame[k].rolling(window=10), method)(frame2[k]) for k in frame} - ) - tm.assert_frame_equal(res3, exp) + res3 = getattr(self.frame.rolling(window=10), method)(frame2) + exp = DataFrame( + { + k: getattr(self.frame[k].rolling(window=10), method)(frame2[k]) + for k in self.frame + } + ) + tm.assert_frame_equal(res3, exp) @pytest.mark.slow diff --git a/pandas/tests/window/moments/test_moments_ewm.py b/pandas/tests/window/moments/test_moments_ewm.py index c6a92c0ad47b6..162917fff9363 100644 --- a/pandas/tests/window/moments/test_moments_ewm.py +++ b/pandas/tests/window/moments/test_moments_ewm.py @@ -5,248 +5,257 @@ import pandas as pd from pandas import DataFrame, Series import pandas._testing as tm +from pandas.tests.window.common import Base -def check_ew(name=None, preserve_nan=False, series=None, frame=None, nan_locs=None): - series_result = getattr(series.ewm(com=10), name)() - assert isinstance(series_result, Series) - - frame_result = getattr(frame.ewm(com=10), name)() - assert type(frame_result) == DataFrame - - result = getattr(series.ewm(com=10), name)() - if preserve_nan: - assert result[nan_locs].isna().all() - - -def test_ewma(series, frame, nan_locs): - check_ew(name="mean", frame=frame, series=series, nan_locs=nan_locs) - - vals = pd.Series(np.zeros(1000)) - vals[5] = 1 - result = vals.ewm(span=100, adjust=False).mean().sum() - assert np.abs(result - 1) < 1e-2 - - -@pytest.mark.parametrize("adjust", [True, False]) -@pytest.mark.parametrize("ignore_na", [True, False]) -def test_ewma_cases(adjust, ignore_na): - # try adjust/ignore_na args matrix - - s = Series([1.0, 2.0, 4.0, 8.0]) - - if adjust: - expected = Series([1.0, 1.6, 2.736842, 4.923077]) - else: - expected = Series([1.0, 1.333333, 2.222222, 4.148148]) - - result = s.ewm(com=2.0, adjust=adjust, ignore_na=ignore_na).mean() - tm.assert_series_equal(result, expected) - - -def test_ewma_nan_handling(): - s = Series([1.0] + [np.nan] * 5 + [1.0]) - result = s.ewm(com=5).mean() - tm.assert_series_equal(result, Series([1.0] * len(s))) - - s = Series([np.nan] * 2 + [1.0] + [np.nan] * 2 + [1.0]) - result = s.ewm(com=5).mean() - tm.assert_series_equal(result, Series([np.nan] * 2 + [1.0] * 4)) - - # GH 7603 - s0 = Series([np.nan, 1.0, 101.0]) - s1 = Series([1.0, np.nan, 101.0]) - s2 = Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]) - s3 = Series([1.0, np.nan, 101.0, 50.0]) - com = 2.0 - alpha = 1.0 / (1.0 + com) - - def simple_wma(s, w): - return (s.multiply(w).cumsum() / w.cumsum()).fillna(method="ffill") - - for (s, adjust, ignore_na, w) in [ - (s0, True, False, [np.nan, (1.0 - alpha), 1.0]), - (s0, True, True, [np.nan, (1.0 - alpha), 1.0]), - (s0, False, False, [np.nan, (1.0 - alpha), alpha]), - (s0, False, True, [np.nan, (1.0 - alpha), alpha]), - (s1, True, False, [(1.0 - alpha) ** 2, np.nan, 1.0]), - (s1, True, True, [(1.0 - alpha), np.nan, 1.0]), - (s1, False, False, [(1.0 - alpha) ** 2, np.nan, alpha]), - (s1, False, True, [(1.0 - alpha), np.nan, alpha]), - (s2, True, False, [np.nan, (1.0 - alpha) ** 3, np.nan, np.nan, 1.0, np.nan],), - (s2, True, True, [np.nan, (1.0 - alpha), np.nan, np.nan, 1.0, np.nan]), - ( - s2, - False, - False, - [np.nan, (1.0 - alpha) ** 3, np.nan, np.nan, alpha, np.nan], - ), - (s2, False, True, [np.nan, (1.0 - alpha), np.nan, np.nan, alpha, np.nan]), - (s3, True, False, [(1.0 - alpha) ** 3, np.nan, (1.0 - alpha), 1.0]), - (s3, True, True, [(1.0 - alpha) ** 2, np.nan, (1.0 - alpha), 1.0]), - ( - s3, - False, - False, - [ - (1.0 - alpha) ** 3, - np.nan, - (1.0 - alpha) * alpha, - alpha * ((1.0 - alpha) ** 2 + alpha), - ], - ), - (s3, False, True, [(1.0 - alpha) ** 2, np.nan, (1.0 - alpha) * alpha, alpha],), - ]: - expected = simple_wma(s, Series(w)) - result = s.ewm(com=com, adjust=adjust, ignore_na=ignore_na).mean() +@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") +class TestMoments(Base): + def setup_method(self, method): + self._create_data() + def test_ewma(self): + self._check_ew(name="mean") + + vals = pd.Series(np.zeros(1000)) + vals[5] = 1 + result = vals.ewm(span=100, adjust=False).mean().sum() + assert np.abs(result - 1) < 1e-2 + + @pytest.mark.parametrize("adjust", [True, False]) + @pytest.mark.parametrize("ignore_na", [True, False]) + def test_ewma_cases(self, adjust, ignore_na): + # try adjust/ignore_na args matrix + + s = Series([1.0, 2.0, 4.0, 8.0]) + + if adjust: + expected = Series([1.0, 1.6, 2.736842, 4.923077]) + else: + expected = Series([1.0, 1.333333, 2.222222, 4.148148]) + + result = s.ewm(com=2.0, adjust=adjust, ignore_na=ignore_na).mean() tm.assert_series_equal(result, expected) - if ignore_na is False: - # check that ignore_na defaults to False - result = s.ewm(com=com, adjust=adjust).mean() - tm.assert_series_equal(result, expected) + def test_ewma_nan_handling(self): + s = Series([1.0] + [np.nan] * 5 + [1.0]) + result = s.ewm(com=5).mean() + tm.assert_series_equal(result, Series([1.0] * len(s))) + + s = Series([np.nan] * 2 + [1.0] + [np.nan] * 2 + [1.0]) + result = s.ewm(com=5).mean() + tm.assert_series_equal(result, Series([np.nan] * 2 + [1.0] * 4)) + + # GH 7603 + s0 = Series([np.nan, 1.0, 101.0]) + s1 = Series([1.0, np.nan, 101.0]) + s2 = Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]) + s3 = Series([1.0, np.nan, 101.0, 50.0]) + com = 2.0 + alpha = 1.0 / (1.0 + com) + + def simple_wma(s, w): + return (s.multiply(w).cumsum() / w.cumsum()).fillna(method="ffill") + + for (s, adjust, ignore_na, w) in [ + (s0, True, False, [np.nan, (1.0 - alpha), 1.0]), + (s0, True, True, [np.nan, (1.0 - alpha), 1.0]), + (s0, False, False, [np.nan, (1.0 - alpha), alpha]), + (s0, False, True, [np.nan, (1.0 - alpha), alpha]), + (s1, True, False, [(1.0 - alpha) ** 2, np.nan, 1.0]), + (s1, True, True, [(1.0 - alpha), np.nan, 1.0]), + (s1, False, False, [(1.0 - alpha) ** 2, np.nan, alpha]), + (s1, False, True, [(1.0 - alpha), np.nan, alpha]), + ( + s2, + True, + False, + [np.nan, (1.0 - alpha) ** 3, np.nan, np.nan, 1.0, np.nan], + ), + (s2, True, True, [np.nan, (1.0 - alpha), np.nan, np.nan, 1.0, np.nan]), + ( + s2, + False, + False, + [np.nan, (1.0 - alpha) ** 3, np.nan, np.nan, alpha, np.nan], + ), + (s2, False, True, [np.nan, (1.0 - alpha), np.nan, np.nan, alpha, np.nan]), + (s3, True, False, [(1.0 - alpha) ** 3, np.nan, (1.0 - alpha), 1.0]), + (s3, True, True, [(1.0 - alpha) ** 2, np.nan, (1.0 - alpha), 1.0]), + ( + s3, + False, + False, + [ + (1.0 - alpha) ** 3, + np.nan, + (1.0 - alpha) * alpha, + alpha * ((1.0 - alpha) ** 2 + alpha), + ], + ), + ( + s3, + False, + True, + [(1.0 - alpha) ** 2, np.nan, (1.0 - alpha) * alpha, alpha], + ), + ]: + expected = simple_wma(s, Series(w)) + result = s.ewm(com=com, adjust=adjust, ignore_na=ignore_na).mean() -@pytest.mark.parametrize("name", ["var", "vol"]) -def test_ewmvar_ewmvol(series, frame, nan_locs, name): - check_ew(name=name, frame=frame, series=series, nan_locs=nan_locs) - - -def test_ewma_span_com_args(series): - A = series.ewm(com=9.5).mean() - B = series.ewm(span=20).mean() - tm.assert_almost_equal(A, B) - - with pytest.raises(ValueError): - series.ewm(com=9.5, span=20) - with pytest.raises(ValueError): - series.ewm().mean() - - -def test_ewma_halflife_arg(series): - A = series.ewm(com=13.932726172912965).mean() - B = series.ewm(halflife=10.0).mean() - tm.assert_almost_equal(A, B) - - with pytest.raises(ValueError): - series.ewm(span=20, halflife=50) - with pytest.raises(ValueError): - series.ewm(com=9.5, halflife=50) - with pytest.raises(ValueError): - series.ewm(com=9.5, span=20, halflife=50) - with pytest.raises(ValueError): - series.ewm() - - -def test_ewm_alpha(arr): - # GH 10789 - s = Series(arr) - a = s.ewm(alpha=0.61722699889169674).mean() - b = s.ewm(com=0.62014947789973052).mean() - c = s.ewm(span=2.240298955799461).mean() - d = s.ewm(halflife=0.721792864318).mean() - tm.assert_series_equal(a, b) - tm.assert_series_equal(a, c) - tm.assert_series_equal(a, d) - - -def test_ewm_alpha_arg(series): - # GH 10789 - s = series - with pytest.raises(ValueError): - s.ewm() - with pytest.raises(ValueError): - s.ewm(com=10.0, alpha=0.5) - with pytest.raises(ValueError): - s.ewm(span=10.0, alpha=0.5) - with pytest.raises(ValueError): - s.ewm(halflife=10.0, alpha=0.5) - - -def test_ewm_domain_checks(arr): - # GH 12492 - s = Series(arr) - msg = "comass must satisfy: comass >= 0" - with pytest.raises(ValueError, match=msg): - s.ewm(com=-0.1) - s.ewm(com=0.0) - s.ewm(com=0.1) - - msg = "span must satisfy: span >= 1" - with pytest.raises(ValueError, match=msg): - s.ewm(span=-0.1) - with pytest.raises(ValueError, match=msg): - s.ewm(span=0.0) - with pytest.raises(ValueError, match=msg): - s.ewm(span=0.9) - s.ewm(span=1.0) - s.ewm(span=1.1) - - msg = "halflife must satisfy: halflife > 0" - with pytest.raises(ValueError, match=msg): - s.ewm(halflife=-0.1) - with pytest.raises(ValueError, match=msg): - s.ewm(halflife=0.0) - s.ewm(halflife=0.1) - - msg = "alpha must satisfy: 0 < alpha <= 1" - with pytest.raises(ValueError, match=msg): - s.ewm(alpha=-0.1) - with pytest.raises(ValueError, match=msg): - s.ewm(alpha=0.0) - s.ewm(alpha=0.1) - s.ewm(alpha=1.0) - with pytest.raises(ValueError, match=msg): - s.ewm(alpha=1.1) - - -@pytest.mark.parametrize("method", ["mean", "vol", "var"]) -def test_ew_empty_series(method): - vals = pd.Series([], dtype=np.float64) - - ewm = vals.ewm(3) - result = getattr(ewm, method)() - tm.assert_almost_equal(result, vals) - - -@pytest.mark.parametrize("min_periods", [0, 1]) -@pytest.mark.parametrize("name", ["mean", "var", "vol"]) -def test_ew_min_periods(min_periods, name): - # excluding NaNs correctly - arr = randn(50) - arr[:10] = np.NaN - arr[-10:] = np.NaN - s = Series(arr) - - # check min_periods - # GH 7898 - result = getattr(s.ewm(com=50, min_periods=2), name)() - assert result[:11].isna().all() - assert not result[11:].isna().any() - - result = getattr(s.ewm(com=50, min_periods=min_periods), name)() - if name == "mean": - assert result[:10].isna().all() - assert not result[10:].isna().any() - else: - # ewm.std, ewm.vol, ewm.var (with bias=False) require at least - # two values + tm.assert_series_equal(result, expected) + if ignore_na is False: + # check that ignore_na defaults to False + result = s.ewm(com=com, adjust=adjust).mean() + tm.assert_series_equal(result, expected) + + def test_ewmvar(self): + self._check_ew(name="var") + + def test_ewmvol(self): + self._check_ew(name="vol") + + def test_ewma_span_com_args(self): + A = self.series.ewm(com=9.5).mean() + B = self.series.ewm(span=20).mean() + tm.assert_almost_equal(A, B) + + with pytest.raises(ValueError): + self.series.ewm(com=9.5, span=20) + with pytest.raises(ValueError): + self.series.ewm().mean() + + def test_ewma_halflife_arg(self): + A = self.series.ewm(com=13.932726172912965).mean() + B = self.series.ewm(halflife=10.0).mean() + tm.assert_almost_equal(A, B) + + with pytest.raises(ValueError): + self.series.ewm(span=20, halflife=50) + with pytest.raises(ValueError): + self.series.ewm(com=9.5, halflife=50) + with pytest.raises(ValueError): + self.series.ewm(com=9.5, span=20, halflife=50) + with pytest.raises(ValueError): + self.series.ewm() + + def test_ewm_alpha(self): + # GH 10789 + s = Series(self.arr) + a = s.ewm(alpha=0.61722699889169674).mean() + b = s.ewm(com=0.62014947789973052).mean() + c = s.ewm(span=2.240298955799461).mean() + d = s.ewm(halflife=0.721792864318).mean() + tm.assert_series_equal(a, b) + tm.assert_series_equal(a, c) + tm.assert_series_equal(a, d) + + def test_ewm_alpha_arg(self): + # GH 10789 + s = self.series + with pytest.raises(ValueError): + s.ewm() + with pytest.raises(ValueError): + s.ewm(com=10.0, alpha=0.5) + with pytest.raises(ValueError): + s.ewm(span=10.0, alpha=0.5) + with pytest.raises(ValueError): + s.ewm(halflife=10.0, alpha=0.5) + + def test_ewm_domain_checks(self): + # GH 12492 + s = Series(self.arr) + msg = "comass must satisfy: comass >= 0" + with pytest.raises(ValueError, match=msg): + s.ewm(com=-0.1) + s.ewm(com=0.0) + s.ewm(com=0.1) + + msg = "span must satisfy: span >= 1" + with pytest.raises(ValueError, match=msg): + s.ewm(span=-0.1) + with pytest.raises(ValueError, match=msg): + s.ewm(span=0.0) + with pytest.raises(ValueError, match=msg): + s.ewm(span=0.9) + s.ewm(span=1.0) + s.ewm(span=1.1) + + msg = "halflife must satisfy: halflife > 0" + with pytest.raises(ValueError, match=msg): + s.ewm(halflife=-0.1) + with pytest.raises(ValueError, match=msg): + s.ewm(halflife=0.0) + s.ewm(halflife=0.1) + + msg = "alpha must satisfy: 0 < alpha <= 1" + with pytest.raises(ValueError, match=msg): + s.ewm(alpha=-0.1) + with pytest.raises(ValueError, match=msg): + s.ewm(alpha=0.0) + s.ewm(alpha=0.1) + s.ewm(alpha=1.0) + with pytest.raises(ValueError, match=msg): + s.ewm(alpha=1.1) + + @pytest.mark.parametrize("method", ["mean", "vol", "var"]) + def test_ew_empty_series(self, method): + vals = pd.Series([], dtype=np.float64) + + ewm = vals.ewm(3) + result = getattr(ewm, method)() + tm.assert_almost_equal(result, vals) + + def _check_ew(self, name=None, preserve_nan=False): + series_result = getattr(self.series.ewm(com=10), name)() + assert isinstance(series_result, Series) + + frame_result = getattr(self.frame.ewm(com=10), name)() + assert type(frame_result) == DataFrame + + result = getattr(self.series.ewm(com=10), name)() + if preserve_nan: + assert result[self._nan_locs].isna().all() + + @pytest.mark.parametrize("min_periods", [0, 1]) + @pytest.mark.parametrize("name", ["mean", "var", "vol"]) + def test_ew_min_periods(self, min_periods, name): + # excluding NaNs correctly + arr = randn(50) + arr[:10] = np.NaN + arr[-10:] = np.NaN + s = Series(arr) + + # check min_periods + # GH 7898 + result = getattr(s.ewm(com=50, min_periods=2), name)() assert result[:11].isna().all() assert not result[11:].isna().any() - # check series of length 0 - result = getattr(Series(dtype=object).ewm(com=50, min_periods=min_periods), name)() - tm.assert_series_equal(result, Series(dtype="float64")) - - # check series of length 1 - result = getattr(Series([1.0]).ewm(50, min_periods=min_periods), name)() - if name == "mean": - tm.assert_series_equal(result, Series([1.0])) - else: - # ewm.std, ewm.vol, ewm.var with bias=False require at least - # two values - tm.assert_series_equal(result, Series([np.NaN])) - - # pass in ints - result2 = getattr(Series(np.arange(50)).ewm(span=10), name)() - assert result2.dtype == np.float_ + result = getattr(s.ewm(com=50, min_periods=min_periods), name)() + if name == "mean": + assert result[:10].isna().all() + assert not result[10:].isna().any() + else: + # ewm.std, ewm.vol, ewm.var (with bias=False) require at least + # two values + assert result[:11].isna().all() + assert not result[11:].isna().any() + + # check series of length 0 + result = getattr( + Series(dtype=object).ewm(com=50, min_periods=min_periods), name + )() + tm.assert_series_equal(result, Series(dtype="float64")) + + # check series of length 1 + result = getattr(Series([1.0]).ewm(50, min_periods=min_periods), name)() + if name == "mean": + tm.assert_series_equal(result, Series([1.0])) + else: + # ewm.std, ewm.vol, ewm.var with bias=False require at least + # two values + tm.assert_series_equal(result, Series([np.NaN])) + + # pass in ints + result2 = getattr(Series(np.arange(50)).ewm(span=10), name)() + assert result2.dtype == np.float_ diff --git a/pandas/tests/window/moments/test_moments_rolling.py b/pandas/tests/window/moments/test_moments_rolling.py index 3e5475e6b274f..399b76e92fc4f 100644 --- a/pandas/tests/window/moments/test_moments_rolling.py +++ b/pandas/tests/window/moments/test_moments_rolling.py @@ -10,947 +10,912 @@ import pandas as pd from pandas import DataFrame, Series, isna, notna import pandas._testing as tm +from pandas.tests.window.common import Base import pandas.tseries.offsets as offsets -def _check_moment_func( - static_comp, - name, - raw, - has_min_periods=True, - has_center=True, - has_time_rule=True, - fill_value=None, - zero_min_periods_equal=True, - series=None, - frame=None, - **kwargs, -): - - # inject raw - if name == "apply": - kwargs = copy.copy(kwargs) - kwargs["raw"] = raw - - def get_result(obj, window, min_periods=None, center=False): - r = obj.rolling(window=window, min_periods=min_periods, center=center) - return getattr(r, name)(**kwargs) - - series_result = get_result(series, window=50) - assert isinstance(series_result, Series) - tm.assert_almost_equal(series_result.iloc[-1], static_comp(series[-50:])) - - frame_result = get_result(frame, window=50) - assert isinstance(frame_result, DataFrame) - tm.assert_series_equal( - frame_result.iloc[-1, :], - frame.iloc[-50:, :].apply(static_comp, axis=0, raw=raw), - check_names=False, - ) +@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") +class TestMoments(Base): + def setup_method(self, method): + self._create_data() - # check time_rule works - if has_time_rule: - win = 25 - minp = 10 - ser = series[::2].resample("B").mean() - frm = frame[::2].resample("B").mean() + def test_centered_axis_validation(self): - if has_min_periods: - series_result = get_result(ser, window=win, min_periods=minp) - frame_result = get_result(frm, window=win, min_periods=minp) - else: - series_result = get_result(ser, window=win, min_periods=0) - frame_result = get_result(frm, window=win, min_periods=0) + # ok + Series(np.ones(10)).rolling(window=3, center=True, axis=0).mean() - last_date = series_result.index[-1] - prev_date = last_date - 24 * offsets.BDay() + # bad axis + with pytest.raises(ValueError): + Series(np.ones(10)).rolling(window=3, center=True, axis=1).mean() - trunc_series = series[::2].truncate(prev_date, last_date) - trunc_frame = frame[::2].truncate(prev_date, last_date) + # ok ok + DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=0).mean() + DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=1).mean() - tm.assert_almost_equal(series_result[-1], static_comp(trunc_series)) + # bad axis + with pytest.raises(ValueError): + (DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=2).mean()) - tm.assert_series_equal( - frame_result.xs(last_date), - trunc_frame.apply(static_comp, raw=raw), - check_names=False, + def test_rolling_sum(self, raw): + self._check_moment_func( + np.nansum, name="sum", zero_min_periods_equal=False, raw=raw ) - # excluding NaNs correctly - obj = Series(randn(50)) - obj[:10] = np.NaN - obj[-10:] = np.NaN - if has_min_periods: - result = get_result(obj, 50, min_periods=30) - tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10])) - - # min_periods is working correctly - result = get_result(obj, 20, min_periods=15) - assert isna(result.iloc[23]) - assert not isna(result.iloc[24]) - - assert not isna(result.iloc[-6]) - assert isna(result.iloc[-5]) - - obj2 = Series(randn(20)) - result = get_result(obj2, 10, min_periods=5) - assert isna(result.iloc[3]) - assert notna(result.iloc[4]) - - if zero_min_periods_equal: - # min_periods=0 may be equivalent to min_periods=1 - result0 = get_result(obj, 20, min_periods=0) - result1 = get_result(obj, 20, min_periods=1) - tm.assert_almost_equal(result0, result1) - else: - result = get_result(obj, 50) - tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10])) - - # window larger than series length (#7297) - if has_min_periods: - for minp in (0, len(series) - 1, len(series)): - result = get_result(series, len(series) + 1, min_periods=minp) - expected = get_result(series, len(series), min_periods=minp) - nan_mask = isna(result) - tm.assert_series_equal(nan_mask, isna(expected)) - - nan_mask = ~nan_mask - tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) - else: - result = get_result(series, len(series) + 1, min_periods=0) - expected = get_result(series, len(series), min_periods=0) - nan_mask = isna(result) - tm.assert_series_equal(nan_mask, isna(expected)) - - nan_mask = ~nan_mask - tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) - - # check center=True - if has_center: - if has_min_periods: - result = get_result(obj, 20, min_periods=15, center=True) - expected = get_result( - pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=15 - )[9:].reset_index(drop=True) - else: - result = get_result(obj, 20, min_periods=0, center=True) - print(result) - expected = get_result( - pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=0 - )[9:].reset_index(drop=True) - - tm.assert_series_equal(result, expected) - - # shifter index - s = [f"x{x:d}" for x in range(12)] - - if has_min_periods: - minp = 10 + def test_rolling_count(self, raw): + counter = lambda x: np.isfinite(x).astype(float).sum() + self._check_moment_func( + counter, name="count", has_min_periods=False, fill_value=0, raw=raw + ) - series_xp = ( - get_result( - series.reindex(list(series.index) + s), window=25, min_periods=minp, - ) - .shift(-12) - .reindex(series.index) - ) - frame_xp = ( - get_result( - frame.reindex(list(frame.index) + s), window=25, min_periods=minp, - ) - .shift(-12) - .reindex(frame.index) - ) + def test_rolling_mean(self, raw): + self._check_moment_func(np.mean, name="mean", raw=raw) - series_rs = get_result(series, window=25, min_periods=minp, center=True) - frame_rs = get_result(frame, window=25, min_periods=minp, center=True) + @td.skip_if_no_scipy + def test_cmov_mean(self): + # GH 8238 + vals = np.array( + [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48] + ) + result = Series(vals).rolling(5, center=True).mean() + expected = Series( + [ + np.nan, + np.nan, + 9.962, + 11.27, + 11.564, + 12.516, + 12.818, + 12.952, + np.nan, + np.nan, + ] + ) + tm.assert_series_equal(expected, result) - else: - series_xp = ( - get_result( - series.reindex(list(series.index) + s), window=25, min_periods=0, - ) - .shift(-12) - .reindex(series.index) - ) - frame_xp = ( - get_result( - frame.reindex(list(frame.index) + s), window=25, min_periods=0, - ) - .shift(-12) - .reindex(frame.index) + @td.skip_if_no_scipy + def test_cmov_window(self): + # GH 8238 + vals = np.array( + [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48] + ) + result = Series(vals).rolling(5, win_type="boxcar", center=True).mean() + expected = Series( + [ + np.nan, + np.nan, + 9.962, + 11.27, + 11.564, + 12.516, + 12.818, + 12.952, + np.nan, + np.nan, + ] + ) + tm.assert_series_equal(expected, result) + + @td.skip_if_no_scipy + def test_cmov_window_corner(self): + # GH 8238 + # all nan + vals = pd.Series([np.nan] * 10) + result = vals.rolling(5, center=True, win_type="boxcar").mean() + assert np.isnan(result).all() + + # empty + vals = pd.Series([], dtype=object) + result = vals.rolling(5, center=True, win_type="boxcar").mean() + assert len(result) == 0 + + # shorter than window + vals = pd.Series(np.random.randn(5)) + result = vals.rolling(10, win_type="boxcar").mean() + assert np.isnan(result).all() + assert len(result) == 5 + + @td.skip_if_no_scipy + @pytest.mark.parametrize( + "f,xp", + [ + ( + "mean", + [ + [np.nan, np.nan], + [np.nan, np.nan], + [9.252, 9.392], + [8.644, 9.906], + [8.87, 10.208], + [6.81, 8.588], + [7.792, 8.644], + [9.05, 7.824], + [np.nan, np.nan], + [np.nan, np.nan], + ], + ), + ( + "std", + [ + [np.nan, np.nan], + [np.nan, np.nan], + [3.789706, 4.068313], + [3.429232, 3.237411], + [3.589269, 3.220810], + [3.405195, 2.380655], + [3.281839, 2.369869], + [3.676846, 1.801799], + [np.nan, np.nan], + [np.nan, np.nan], + ], + ), + ( + "var", + [ + [np.nan, np.nan], + [np.nan, np.nan], + [14.36187, 16.55117], + [11.75963, 10.48083], + [12.88285, 10.37362], + [11.59535, 5.66752], + [10.77047, 5.61628], + [13.51920, 3.24648], + [np.nan, np.nan], + [np.nan, np.nan], + ], + ), + ( + "sum", + [ + [np.nan, np.nan], + [np.nan, np.nan], + [46.26, 46.96], + [43.22, 49.53], + [44.35, 51.04], + [34.05, 42.94], + [38.96, 43.22], + [45.25, 39.12], + [np.nan, np.nan], + [np.nan, np.nan], + ], + ), + ], + ) + def test_cmov_window_frame(self, f, xp): + # Gh 8238 + df = DataFrame( + np.array( + [ + [12.18, 3.64], + [10.18, 9.16], + [13.24, 14.61], + [4.51, 8.11], + [6.15, 11.44], + [9.14, 6.21], + [11.31, 10.67], + [2.94, 6.51], + [9.42, 8.39], + [12.44, 7.34], + ] ) + ) + xp = DataFrame(np.array(xp)) - series_rs = get_result(series, window=25, min_periods=0, center=True) - frame_rs = get_result(frame, window=25, min_periods=0, center=True) - - if fill_value is not None: - series_xp = series_xp.fillna(fill_value) - frame_xp = frame_xp.fillna(fill_value) - tm.assert_series_equal(series_xp, series_rs) - tm.assert_frame_equal(frame_xp, frame_rs) - - -def test_centered_axis_validation(): - - # ok - Series(np.ones(10)).rolling(window=3, center=True, axis=0).mean() - - # bad axis - with pytest.raises(ValueError): - Series(np.ones(10)).rolling(window=3, center=True, axis=1).mean() - - # ok ok - DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=0).mean() - DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=1).mean() - - # bad axis - with pytest.raises(ValueError): - (DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=2).mean()) - - -def test_rolling_sum(raw, series, frame): - _check_moment_func( - np.nansum, - name="sum", - zero_min_periods_equal=False, - raw=raw, - series=series, - frame=frame, - ) + roll = df.rolling(5, win_type="boxcar", center=True) + rs = getattr(roll, f)() + tm.assert_frame_equal(xp, rs) -def test_rolling_count(raw, series, frame): - counter = lambda x: np.isfinite(x).astype(float).sum() - _check_moment_func( - counter, - name="count", - has_min_periods=False, - fill_value=0, - raw=raw, - series=series, - frame=frame, - ) + @td.skip_if_no_scipy + def test_cmov_window_na_min_periods(self): + # min_periods + vals = Series(np.random.randn(10)) + vals[4] = np.nan + vals[8] = np.nan + xp = vals.rolling(5, min_periods=4, center=True).mean() + rs = vals.rolling(5, win_type="boxcar", min_periods=4, center=True).mean() + tm.assert_series_equal(xp, rs) -def test_rolling_mean(raw, series, frame): - _check_moment_func(np.mean, name="mean", raw=raw, series=series, frame=frame) - - -@td.skip_if_no_scipy -def test_cmov_mean(): - # GH 8238 - vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) - result = Series(vals).rolling(5, center=True).mean() - expected_values = [ - np.nan, - np.nan, - 9.962, - 11.27, - 11.564, - 12.516, - 12.818, - 12.952, - np.nan, - np.nan, - ] - expected = Series(expected_values) - tm.assert_series_equal(expected, result) - - -@td.skip_if_no_scipy -def test_cmov_window(): - # GH 8238 - vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) - result = Series(vals).rolling(5, win_type="boxcar", center=True).mean() - expected_values = [ - np.nan, - np.nan, - 9.962, - 11.27, - 11.564, - 12.516, - 12.818, - 12.952, - np.nan, - np.nan, - ] - expected = Series(expected_values) - tm.assert_series_equal(expected, result) - - -@td.skip_if_no_scipy -def test_cmov_window_corner(): - # GH 8238 - # all nan - vals = pd.Series([np.nan] * 10) - result = vals.rolling(5, center=True, win_type="boxcar").mean() - assert np.isnan(result).all() - - # empty - vals = pd.Series([], dtype=object) - result = vals.rolling(5, center=True, win_type="boxcar").mean() - assert len(result) == 0 - - # shorter than window - vals = pd.Series(np.random.randn(5)) - result = vals.rolling(10, win_type="boxcar").mean() - assert np.isnan(result).all() - assert len(result) == 5 - - -@td.skip_if_no_scipy -@pytest.mark.parametrize( - "f,xp", - [ - ( - "mean", - [ - [np.nan, np.nan], - [np.nan, np.nan], - [9.252, 9.392], - [8.644, 9.906], - [8.87, 10.208], - [6.81, 8.588], - [7.792, 8.644], - [9.05, 7.824], - [np.nan, np.nan], - [np.nan, np.nan], + @td.skip_if_no_scipy + def test_cmov_window_regular(self, win_types): + # GH 8238 + vals = np.array( + [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48] + ) + xps = { + "hamming": [ + np.nan, + np.nan, + 8.71384, + 9.56348, + 12.38009, + 14.03687, + 13.8567, + 11.81473, + np.nan, + np.nan, ], - ), - ( - "std", - [ - [np.nan, np.nan], - [np.nan, np.nan], - [3.789706, 4.068313], - [3.429232, 3.237411], - [3.589269, 3.220810], - [3.405195, 2.380655], - [3.281839, 2.369869], - [3.676846, 1.801799], - [np.nan, np.nan], - [np.nan, np.nan], + "triang": [ + np.nan, + np.nan, + 9.28667, + 10.34667, + 12.00556, + 13.33889, + 13.38, + 12.33667, + np.nan, + np.nan, ], - ), - ( - "var", - [ - [np.nan, np.nan], - [np.nan, np.nan], - [14.36187, 16.55117], - [11.75963, 10.48083], - [12.88285, 10.37362], - [11.59535, 5.66752], - [10.77047, 5.61628], - [13.51920, 3.24648], - [np.nan, np.nan], - [np.nan, np.nan], + "barthann": [ + np.nan, + np.nan, + 8.4425, + 9.1925, + 12.5575, + 14.3675, + 14.0825, + 11.5675, + np.nan, + np.nan, ], - ), - ( - "sum", - [ - [np.nan, np.nan], - [np.nan, np.nan], - [46.26, 46.96], - [43.22, 49.53], - [44.35, 51.04], - [34.05, 42.94], - [38.96, 43.22], - [45.25, 39.12], - [np.nan, np.nan], - [np.nan, np.nan], + "bohman": [ + np.nan, + np.nan, + 7.61599, + 9.1764, + 12.83559, + 14.17267, + 14.65923, + 11.10401, + np.nan, + np.nan, ], - ), - ], -) -def test_cmov_window_frame(f, xp): - # Gh 8238 - df = DataFrame( - np.array( - [ - [12.18, 3.64], - [10.18, 9.16], - [13.24, 14.61], - [4.51, 8.11], - [6.15, 11.44], - [9.14, 6.21], - [11.31, 10.67], - [2.94, 6.51], - [9.42, 8.39], - [12.44, 7.34], - ] + "blackmanharris": [ + np.nan, + np.nan, + 6.97691, + 9.16438, + 13.05052, + 14.02156, + 15.10512, + 10.74574, + np.nan, + np.nan, + ], + "nuttall": [ + np.nan, + np.nan, + 7.04618, + 9.16786, + 13.02671, + 14.03559, + 15.05657, + 10.78514, + np.nan, + np.nan, + ], + "blackman": [ + np.nan, + np.nan, + 7.73345, + 9.17869, + 12.79607, + 14.20036, + 14.57726, + 11.16988, + np.nan, + np.nan, + ], + "bartlett": [ + np.nan, + np.nan, + 8.4425, + 9.1925, + 12.5575, + 14.3675, + 14.0825, + 11.5675, + np.nan, + np.nan, + ], + } + + xp = Series(xps[win_types]) + rs = Series(vals).rolling(5, win_type=win_types, center=True).mean() + tm.assert_series_equal(xp, rs) + + @td.skip_if_no_scipy + def test_cmov_window_regular_linear_range(self, win_types): + # GH 8238 + vals = np.array(range(10), dtype=np.float) + xp = vals.copy() + xp[:2] = np.nan + xp[-2:] = np.nan + xp = Series(xp) + + rs = Series(vals).rolling(5, win_type=win_types, center=True).mean() + tm.assert_series_equal(xp, rs) + + @td.skip_if_no_scipy + def test_cmov_window_regular_missing_data(self, win_types): + # GH 8238 + vals = np.array( + [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, np.nan, 10.63, 14.48] + ) + xps = { + "bartlett": [ + np.nan, + np.nan, + 9.70333, + 10.5225, + 8.4425, + 9.1925, + 12.5575, + 14.3675, + 15.61667, + 13.655, + ], + "blackman": [ + np.nan, + np.nan, + 9.04582, + 11.41536, + 7.73345, + 9.17869, + 12.79607, + 14.20036, + 15.8706, + 13.655, + ], + "barthann": [ + np.nan, + np.nan, + 9.70333, + 10.5225, + 8.4425, + 9.1925, + 12.5575, + 14.3675, + 15.61667, + 13.655, + ], + "bohman": [ + np.nan, + np.nan, + 8.9444, + 11.56327, + 7.61599, + 9.1764, + 12.83559, + 14.17267, + 15.90976, + 13.655, + ], + "hamming": [ + np.nan, + np.nan, + 9.59321, + 10.29694, + 8.71384, + 9.56348, + 12.38009, + 14.20565, + 15.24694, + 13.69758, + ], + "nuttall": [ + np.nan, + np.nan, + 8.47693, + 12.2821, + 7.04618, + 9.16786, + 13.02671, + 14.03673, + 16.08759, + 13.65553, + ], + "triang": [ + np.nan, + np.nan, + 9.33167, + 9.76125, + 9.28667, + 10.34667, + 12.00556, + 13.82125, + 14.49429, + 13.765, + ], + "blackmanharris": [ + np.nan, + np.nan, + 8.42526, + 12.36824, + 6.97691, + 9.16438, + 13.05052, + 14.02175, + 16.1098, + 13.65509, + ], + } + + xp = Series(xps[win_types]) + rs = Series(vals).rolling(5, win_type=win_types, min_periods=3).mean() + tm.assert_series_equal(xp, rs) + + @td.skip_if_no_scipy + def test_cmov_window_special(self, win_types_special): + # GH 8238 + kwds = { + "kaiser": {"beta": 1.0}, + "gaussian": {"std": 1.0}, + "general_gaussian": {"power": 2.0, "width": 2.0}, + "exponential": {"tau": 10}, + } + + vals = np.array( + [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48] ) - ) - xp = DataFrame(np.array(xp)) - - roll = df.rolling(5, win_type="boxcar", center=True) - rs = getattr(roll, f)() - - tm.assert_frame_equal(xp, rs) - - -@td.skip_if_no_scipy -def test_cmov_window_na_min_periods(): - # min_periods - vals = Series(np.random.randn(10)) - vals[4] = np.nan - vals[8] = np.nan - - xp = vals.rolling(5, min_periods=4, center=True).mean() - rs = vals.rolling(5, win_type="boxcar", min_periods=4, center=True).mean() - tm.assert_series_equal(xp, rs) - - -@td.skip_if_no_scipy -def test_cmov_window_regular(win_types): - # GH 8238 - vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) - xps = { - "hamming": [ - np.nan, - np.nan, - 8.71384, - 9.56348, - 12.38009, - 14.03687, - 13.8567, - 11.81473, - np.nan, - np.nan, - ], - "triang": [ - np.nan, - np.nan, - 9.28667, - 10.34667, - 12.00556, - 13.33889, - 13.38, - 12.33667, - np.nan, - np.nan, - ], - "barthann": [ - np.nan, - np.nan, - 8.4425, - 9.1925, - 12.5575, - 14.3675, - 14.0825, - 11.5675, - np.nan, - np.nan, - ], - "bohman": [ - np.nan, - np.nan, - 7.61599, - 9.1764, - 12.83559, - 14.17267, - 14.65923, - 11.10401, - np.nan, - np.nan, - ], - "blackmanharris": [ - np.nan, - np.nan, - 6.97691, - 9.16438, - 13.05052, - 14.02156, - 15.10512, - 10.74574, - np.nan, - np.nan, - ], - "nuttall": [ - np.nan, - np.nan, - 7.04618, - 9.16786, - 13.02671, - 14.03559, - 15.05657, - 10.78514, - np.nan, - np.nan, - ], - "blackman": [ - np.nan, - np.nan, - 7.73345, - 9.17869, - 12.79607, - 14.20036, - 14.57726, - 11.16988, - np.nan, - np.nan, - ], - "bartlett": [ - np.nan, - np.nan, - 8.4425, - 9.1925, - 12.5575, - 14.3675, - 14.0825, - 11.5675, - np.nan, - np.nan, - ], - } - - xp = Series(xps[win_types]) - rs = Series(vals).rolling(5, win_type=win_types, center=True).mean() - tm.assert_series_equal(xp, rs) - - -@td.skip_if_no_scipy -def test_cmov_window_regular_linear_range(win_types): - # GH 8238 - vals = np.array(range(10), dtype=np.float) - xp = vals.copy() - xp[:2] = np.nan - xp[-2:] = np.nan - xp = Series(xp) - - rs = Series(vals).rolling(5, win_type=win_types, center=True).mean() - tm.assert_series_equal(xp, rs) - -@td.skip_if_no_scipy -def test_cmov_window_regular_missing_data(win_types): - # GH 8238 - vals = np.array( - [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, np.nan, 10.63, 14.48] - ) - xps = { - "bartlett": [ - np.nan, - np.nan, - 9.70333, - 10.5225, - 8.4425, - 9.1925, - 12.5575, - 14.3675, - 15.61667, - 13.655, - ], - "blackman": [ - np.nan, - np.nan, - 9.04582, - 11.41536, - 7.73345, - 9.17869, - 12.79607, - 14.20036, - 15.8706, - 13.655, - ], - "barthann": [ - np.nan, - np.nan, - 9.70333, - 10.5225, - 8.4425, - 9.1925, - 12.5575, - 14.3675, - 15.61667, - 13.655, - ], - "bohman": [ - np.nan, - np.nan, - 8.9444, - 11.56327, - 7.61599, - 9.1764, - 12.83559, - 14.17267, - 15.90976, - 13.655, - ], - "hamming": [ - np.nan, - np.nan, - 9.59321, - 10.29694, - 8.71384, - 9.56348, - 12.38009, - 14.20565, - 15.24694, - 13.69758, - ], - "nuttall": [ - np.nan, - np.nan, - 8.47693, - 12.2821, - 7.04618, - 9.16786, - 13.02671, - 14.03673, - 16.08759, - 13.65553, - ], - "triang": [ - np.nan, - np.nan, - 9.33167, - 9.76125, - 9.28667, - 10.34667, - 12.00556, - 13.82125, - 14.49429, - 13.765, - ], - "blackmanharris": [ - np.nan, - np.nan, - 8.42526, - 12.36824, - 6.97691, - 9.16438, - 13.05052, - 14.02175, - 16.1098, - 13.65509, - ], - } - - xp = Series(xps[win_types]) - rs = Series(vals).rolling(5, win_type=win_types, min_periods=3).mean() - tm.assert_series_equal(xp, rs) - - -@td.skip_if_no_scipy -def test_cmov_window_special(win_types_special): - # GH 8238 - kwds = { - "kaiser": {"beta": 1.0}, - "gaussian": {"std": 1.0}, - "general_gaussian": {"power": 2.0, "width": 2.0}, - "exponential": {"tau": 10}, - } - - vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) - - xps = { - "gaussian": [ - np.nan, - np.nan, - 8.97297, - 9.76077, - 12.24763, - 13.89053, - 13.65671, - 12.01002, - np.nan, - np.nan, - ], - "general_gaussian": [ - np.nan, - np.nan, - 9.85011, - 10.71589, - 11.73161, - 13.08516, - 12.95111, - 12.74577, - np.nan, - np.nan, - ], - "kaiser": [ - np.nan, - np.nan, - 9.86851, - 11.02969, - 11.65161, - 12.75129, - 12.90702, - 12.83757, - np.nan, - np.nan, - ], - "exponential": [ - np.nan, - np.nan, - 9.83364, - 11.10472, - 11.64551, - 12.66138, - 12.92379, - 12.83770, - np.nan, - np.nan, - ], - } + xps = { + "gaussian": [ + np.nan, + np.nan, + 8.97297, + 9.76077, + 12.24763, + 13.89053, + 13.65671, + 12.01002, + np.nan, + np.nan, + ], + "general_gaussian": [ + np.nan, + np.nan, + 9.85011, + 10.71589, + 11.73161, + 13.08516, + 12.95111, + 12.74577, + np.nan, + np.nan, + ], + "kaiser": [ + np.nan, + np.nan, + 9.86851, + 11.02969, + 11.65161, + 12.75129, + 12.90702, + 12.83757, + np.nan, + np.nan, + ], + "exponential": [ + np.nan, + np.nan, + 9.83364, + 11.10472, + 11.64551, + 12.66138, + 12.92379, + 12.83770, + np.nan, + np.nan, + ], + } - xp = Series(xps[win_types_special]) - rs = ( - Series(vals) - .rolling(5, win_type=win_types_special, center=True) - .mean(**kwds[win_types_special]) - ) - tm.assert_series_equal(xp, rs) - - -@td.skip_if_no_scipy -def test_cmov_window_special_linear_range(win_types_special): - # GH 8238 - kwds = { - "kaiser": {"beta": 1.0}, - "gaussian": {"std": 1.0}, - "general_gaussian": {"power": 2.0, "width": 2.0}, - "slepian": {"width": 0.5}, - "exponential": {"tau": 10}, - } - - vals = np.array(range(10), dtype=np.float) - xp = vals.copy() - xp[:2] = np.nan - xp[-2:] = np.nan - xp = Series(xp) - - rs = ( - Series(vals) - .rolling(5, win_type=win_types_special, center=True) - .mean(**kwds[win_types_special]) - ) - tm.assert_series_equal(xp, rs) + xp = Series(xps[win_types_special]) + rs = ( + Series(vals) + .rolling(5, win_type=win_types_special, center=True) + .mean(**kwds[win_types_special]) + ) + tm.assert_series_equal(xp, rs) + + @td.skip_if_no_scipy + def test_cmov_window_special_linear_range(self, win_types_special): + # GH 8238 + kwds = { + "kaiser": {"beta": 1.0}, + "gaussian": {"std": 1.0}, + "general_gaussian": {"power": 2.0, "width": 2.0}, + "slepian": {"width": 0.5}, + "exponential": {"tau": 10}, + } + + vals = np.array(range(10), dtype=np.float) + xp = vals.copy() + xp[:2] = np.nan + xp[-2:] = np.nan + xp = Series(xp) + + rs = ( + Series(vals) + .rolling(5, win_type=win_types_special, center=True) + .mean(**kwds[win_types_special]) + ) + tm.assert_series_equal(xp, rs) + def test_rolling_median(self, raw): + self._check_moment_func(np.median, name="median", raw=raw) -def test_rolling_median(raw, series, frame): - _check_moment_func(np.median, name="median", raw=raw, series=series, frame=frame) + def test_rolling_min(self, raw): + self._check_moment_func(np.min, name="min", raw=raw) + a = pd.Series([1, 2, 3, 4, 5]) + result = a.rolling(window=100, min_periods=1).min() + expected = pd.Series(np.ones(len(a))) + tm.assert_series_equal(result, expected) -def test_rolling_min(raw, series, frame): - _check_moment_func(np.min, name="min", raw=raw, series=series, frame=frame) + with pytest.raises(ValueError): + pd.Series([1, 2, 3]).rolling(window=3, min_periods=5).min() - a = pd.Series([1, 2, 3, 4, 5]) - result = a.rolling(window=100, min_periods=1).min() - expected = pd.Series(np.ones(len(a))) - tm.assert_series_equal(result, expected) + def test_rolling_max(self, raw): + self._check_moment_func(np.max, name="max", raw=raw) - with pytest.raises(ValueError): - pd.Series([1, 2, 3]).rolling(window=3, min_periods=5).min() + a = pd.Series([1, 2, 3, 4, 5], dtype=np.float64) + b = a.rolling(window=100, min_periods=1).max() + tm.assert_almost_equal(a, b) + with pytest.raises(ValueError): + pd.Series([1, 2, 3]).rolling(window=3, min_periods=5).max() -def test_rolling_max(raw, series, frame): - _check_moment_func(np.max, name="max", raw=raw, series=series, frame=frame) + @pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) + def test_rolling_quantile(self, q, raw): + def scoreatpercentile(a, per): + values = np.sort(a, axis=0) - a = pd.Series([1, 2, 3, 4, 5], dtype=np.float64) - b = a.rolling(window=100, min_periods=1).max() - tm.assert_almost_equal(a, b) + idx = int(per / 1.0 * (values.shape[0] - 1)) - with pytest.raises(ValueError): - pd.Series([1, 2, 3]).rolling(window=3, min_periods=5).max() + if idx == values.shape[0] - 1: + retval = values[-1] + else: + qlow = float(idx) / float(values.shape[0] - 1) + qhig = float(idx + 1) / float(values.shape[0] - 1) + vlow = values[idx] + vhig = values[idx + 1] + retval = vlow + (vhig - vlow) * (per - qlow) / (qhig - qlow) -@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) -def test_rolling_quantile(q, raw, series, frame): - def scoreatpercentile(a, per): - values = np.sort(a, axis=0) + return retval - idx = int(per / 1.0 * (values.shape[0] - 1)) + def quantile_func(x): + return scoreatpercentile(x, q) - if idx == values.shape[0] - 1: - retval = values[-1] + self._check_moment_func(quantile_func, name="quantile", quantile=q, raw=raw) - else: - qlow = float(idx) / float(values.shape[0] - 1) - qhig = float(idx + 1) / float(values.shape[0] - 1) - vlow = values[idx] - vhig = values[idx + 1] - retval = vlow + (vhig - vlow) * (per - qlow) / (qhig - qlow) + def test_rolling_quantile_np_percentile(self): + # #9413: Tests that rolling window's quantile default behavior + # is analogous to Numpy's percentile + row = 10 + col = 5 + idx = pd.date_range("20100101", periods=row, freq="B") + df = DataFrame(np.random.rand(row * col).reshape((row, -1)), index=idx) - return retval + df_quantile = df.quantile([0.25, 0.5, 0.75], axis=0) + np_percentile = np.percentile(df, [25, 50, 75], axis=0) - def quantile_func(x): - return scoreatpercentile(x, q) + tm.assert_almost_equal(df_quantile.values, np.array(np_percentile)) - _check_moment_func( - quantile_func, name="quantile", quantile=q, raw=raw, series=series, frame=frame + @pytest.mark.parametrize("quantile", [0.0, 0.1, 0.45, 0.5, 1]) + @pytest.mark.parametrize( + "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"] ) + @pytest.mark.parametrize( + "data", + [ + [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], + [8.0, 1.0, 3.0, 4.0, 5.0, 2.0, 6.0, 7.0], + [0.0, np.nan, 0.2, np.nan, 0.4], + [np.nan, np.nan, np.nan, np.nan], + [np.nan, 0.1, np.nan, 0.3, 0.4, 0.5], + [0.5], + [np.nan, 0.7, 0.6], + ], + ) + def test_rolling_quantile_interpolation_options( + self, quantile, interpolation, data + ): + # Tests that rolling window's quantile behavior is analogous to + # Series' quantile for each interpolation option + s = Series(data) + + q1 = s.quantile(quantile, interpolation) + q2 = s.expanding(min_periods=1).quantile(quantile, interpolation).iloc[-1] + + if np.isnan(q1): + assert np.isnan(q2) + else: + assert q1 == q2 + def test_invalid_quantile_value(self): + data = np.arange(5) + s = Series(data) -def test_rolling_quantile_np_percentile(): - # #9413: Tests that rolling window's quantile default behavior - # is analogous to Numpy's percentile - row = 10 - col = 5 - idx = pd.date_range("20100101", periods=row, freq="B") - df = DataFrame(np.random.rand(row * col).reshape((row, -1)), index=idx) - - df_quantile = df.quantile([0.25, 0.5, 0.75], axis=0) - np_percentile = np.percentile(df, [25, 50, 75], axis=0) - - tm.assert_almost_equal(df_quantile.values, np.array(np_percentile)) - - -@pytest.mark.parametrize("quantile", [0.0, 0.1, 0.45, 0.5, 1]) -@pytest.mark.parametrize( - "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"] -) -@pytest.mark.parametrize( - "data", - [ - [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], - [8.0, 1.0, 3.0, 4.0, 5.0, 2.0, 6.0, 7.0], - [0.0, np.nan, 0.2, np.nan, 0.4], - [np.nan, np.nan, np.nan, np.nan], - [np.nan, 0.1, np.nan, 0.3, 0.4, 0.5], - [0.5], - [np.nan, 0.7, 0.6], - ], -) -def test_rolling_quantile_interpolation_options(quantile, interpolation, data): - # Tests that rolling window's quantile behavior is analogous to - # Series' quantile for each interpolation option - s = Series(data) - - q1 = s.quantile(quantile, interpolation) - q2 = s.expanding(min_periods=1).quantile(quantile, interpolation).iloc[-1] - - if np.isnan(q1): - assert np.isnan(q2) - else: - assert q1 == q2 + msg = "Interpolation 'invalid' is not supported" + with pytest.raises(ValueError, match=msg): + s.rolling(len(data), min_periods=1).quantile(0.5, interpolation="invalid") + def test_rolling_quantile_param(self): + ser = Series([0.0, 0.1, 0.5, 0.9, 1.0]) -def test_invalid_quantile_value(): - data = np.arange(5) - s = Series(data) + with pytest.raises(ValueError): + ser.rolling(3).quantile(-0.1) - msg = "Interpolation 'invalid' is not supported" - with pytest.raises(ValueError, match=msg): - s.rolling(len(data), min_periods=1).quantile(0.5, interpolation="invalid") + with pytest.raises(ValueError): + ser.rolling(3).quantile(10.0) + with pytest.raises(TypeError): + ser.rolling(3).quantile("foo") -def test_rolling_quantile_param(): - ser = Series([0.0, 0.1, 0.5, 0.9, 1.0]) + def test_rolling_apply(self, raw): + # suppress warnings about empty slices, as we are deliberately testing + # with a 0-length Series - with pytest.raises(ValueError): - ser.rolling(3).quantile(-0.1) + def f(x): + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message=".*(empty slice|0 for slice).*", + category=RuntimeWarning, + ) + return x[np.isfinite(x)].mean() - with pytest.raises(ValueError): - ser.rolling(3).quantile(10.0) + self._check_moment_func(np.mean, name="apply", func=f, raw=raw) - with pytest.raises(TypeError): - ser.rolling(3).quantile("foo") + def test_rolling_std(self, raw): + self._check_moment_func(lambda x: np.std(x, ddof=1), name="std", raw=raw) + self._check_moment_func( + lambda x: np.std(x, ddof=0), name="std", ddof=0, raw=raw + ) + def test_rolling_std_1obs(self): + vals = pd.Series([1.0, 2.0, 3.0, 4.0, 5.0]) -def test_rolling_apply(raw, series, frame): - # suppress warnings about empty slices, as we are deliberately testing - # with a 0-length Series + result = vals.rolling(1, min_periods=1).std() + expected = pd.Series([np.nan] * 5) + tm.assert_series_equal(result, expected) - def f(x): - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - message=".*(empty slice|0 for slice).*", - category=RuntimeWarning, - ) - return x[np.isfinite(x)].mean() + result = vals.rolling(1, min_periods=1).std(ddof=0) + expected = pd.Series([0.0] * 5) + tm.assert_series_equal(result, expected) - _check_moment_func( - np.mean, name="apply", func=f, raw=raw, series=series, frame=frame - ) + result = pd.Series([np.nan, np.nan, 3, 4, 5]).rolling(3, min_periods=2).std() + assert np.isnan(result[2]) + def test_rolling_std_neg_sqrt(self): + # unit test from Bottleneck -def test_rolling_std(raw, series, frame): - _check_moment_func( - lambda x: np.std(x, ddof=1), name="std", raw=raw, series=series, frame=frame - ) - _check_moment_func( - lambda x: np.std(x, ddof=0), - name="std", - ddof=0, - raw=raw, - series=series, - frame=frame, - ) + # Test move_nanstd for neg sqrt. + a = pd.Series( + [ + 0.0011448196318903589, + 0.00028718669878572767, + 0.00028718669878572767, + 0.00028718669878572767, + 0.00028718669878572767, + ] + ) + b = a.rolling(window=3).std() + assert np.isfinite(b[2:]).all() -def test_rolling_std_1obs(): - vals = pd.Series([1.0, 2.0, 3.0, 4.0, 5.0]) + b = a.ewm(span=3).std() + assert np.isfinite(b[2:]).all() - result = vals.rolling(1, min_periods=1).std() - expected = pd.Series([np.nan] * 5) - tm.assert_series_equal(result, expected) + def test_rolling_var(self, raw): + self._check_moment_func(lambda x: np.var(x, ddof=1), name="var", raw=raw) + self._check_moment_func( + lambda x: np.var(x, ddof=0), name="var", ddof=0, raw=raw + ) - result = vals.rolling(1, min_periods=1).std(ddof=0) - expected = pd.Series([0.0] * 5) - tm.assert_series_equal(result, expected) + @td.skip_if_no_scipy + def test_rolling_skew(self, raw): + from scipy.stats import skew + + self._check_moment_func(lambda x: skew(x, bias=False), name="skew", raw=raw) + + @td.skip_if_no_scipy + def test_rolling_kurt(self, raw): + from scipy.stats import kurtosis + + self._check_moment_func(lambda x: kurtosis(x, bias=False), name="kurt", raw=raw) + + def _check_moment_func( + self, + static_comp, + name, + raw, + has_min_periods=True, + has_center=True, + has_time_rule=True, + fill_value=None, + zero_min_periods_equal=True, + **kwargs, + ): + + # inject raw + if name == "apply": + kwargs = copy.copy(kwargs) + kwargs["raw"] = raw + + def get_result(obj, window, min_periods=None, center=False): + r = obj.rolling(window=window, min_periods=min_periods, center=center) + return getattr(r, name)(**kwargs) + + series_result = get_result(self.series, window=50) + assert isinstance(series_result, Series) + tm.assert_almost_equal(series_result.iloc[-1], static_comp(self.series[-50:])) + + frame_result = get_result(self.frame, window=50) + assert isinstance(frame_result, DataFrame) + tm.assert_series_equal( + frame_result.iloc[-1, :], + self.frame.iloc[-50:, :].apply(static_comp, axis=0, raw=raw), + check_names=False, + ) - result = pd.Series([np.nan, np.nan, 3, 4, 5]).rolling(3, min_periods=2).std() - assert np.isnan(result[2]) + # check time_rule works + if has_time_rule: + win = 25 + minp = 10 + series = self.series[::2].resample("B").mean() + frame = self.frame[::2].resample("B").mean() + if has_min_periods: + series_result = get_result(series, window=win, min_periods=minp) + frame_result = get_result(frame, window=win, min_periods=minp) + else: + series_result = get_result(series, window=win, min_periods=0) + frame_result = get_result(frame, window=win, min_periods=0) -def test_rolling_std_neg_sqrt(): - # unit test from Bottleneck + last_date = series_result.index[-1] + prev_date = last_date - 24 * offsets.BDay() - # Test move_nanstd for neg sqrt. + trunc_series = self.series[::2].truncate(prev_date, last_date) + trunc_frame = self.frame[::2].truncate(prev_date, last_date) - a = pd.Series( - [ - 0.0011448196318903589, - 0.00028718669878572767, - 0.00028718669878572767, - 0.00028718669878572767, - 0.00028718669878572767, - ] - ) - b = a.rolling(window=3).std() - assert np.isfinite(b[2:]).all() + tm.assert_almost_equal(series_result[-1], static_comp(trunc_series)) - b = a.ewm(span=3).std() - assert np.isfinite(b[2:]).all() + tm.assert_series_equal( + frame_result.xs(last_date), + trunc_frame.apply(static_comp, raw=raw), + check_names=False, + ) + # excluding NaNs correctly + obj = Series(randn(50)) + obj[:10] = np.NaN + obj[-10:] = np.NaN + if has_min_periods: + result = get_result(obj, 50, min_periods=30) + tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10])) + + # min_periods is working correctly + result = get_result(obj, 20, min_periods=15) + assert isna(result.iloc[23]) + assert not isna(result.iloc[24]) + + assert not isna(result.iloc[-6]) + assert isna(result.iloc[-5]) + + obj2 = Series(randn(20)) + result = get_result(obj2, 10, min_periods=5) + assert isna(result.iloc[3]) + assert notna(result.iloc[4]) + + if zero_min_periods_equal: + # min_periods=0 may be equivalent to min_periods=1 + result0 = get_result(obj, 20, min_periods=0) + result1 = get_result(obj, 20, min_periods=1) + tm.assert_almost_equal(result0, result1) + else: + result = get_result(obj, 50) + tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10])) -def test_rolling_var(raw, series, frame): - _check_moment_func( - lambda x: np.var(x, ddof=1), name="var", raw=raw, series=series, frame=frame - ) - _check_moment_func( - lambda x: np.var(x, ddof=0), - name="var", - ddof=0, - raw=raw, - series=series, - frame=frame, - ) + # window larger than series length (#7297) + if has_min_periods: + for minp in (0, len(self.series) - 1, len(self.series)): + result = get_result(self.series, len(self.series) + 1, min_periods=minp) + expected = get_result(self.series, len(self.series), min_periods=minp) + nan_mask = isna(result) + tm.assert_series_equal(nan_mask, isna(expected)) + + nan_mask = ~nan_mask + tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) + else: + result = get_result(self.series, len(self.series) + 1, min_periods=0) + expected = get_result(self.series, len(self.series), min_periods=0) + nan_mask = isna(result) + tm.assert_series_equal(nan_mask, isna(expected)) + nan_mask = ~nan_mask + tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) -@td.skip_if_no_scipy -def test_rolling_skew(raw, series, frame): - from scipy.stats import skew + # check center=True + if has_center: + if has_min_periods: + result = get_result(obj, 20, min_periods=15, center=True) + expected = get_result( + pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=15 + )[9:].reset_index(drop=True) + else: + result = get_result(obj, 20, min_periods=0, center=True) + print(result) + expected = get_result( + pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=0 + )[9:].reset_index(drop=True) + + tm.assert_series_equal(result, expected) + + # shifter index + s = [f"x{x:d}" for x in range(12)] + + if has_min_periods: + minp = 10 + + series_xp = ( + get_result( + self.series.reindex(list(self.series.index) + s), + window=25, + min_periods=minp, + ) + .shift(-12) + .reindex(self.series.index) + ) + frame_xp = ( + get_result( + self.frame.reindex(list(self.frame.index) + s), + window=25, + min_periods=minp, + ) + .shift(-12) + .reindex(self.frame.index) + ) - _check_moment_func( - lambda x: skew(x, bias=False), name="skew", raw=raw, series=series, frame=frame - ) + series_rs = get_result( + self.series, window=25, min_periods=minp, center=True + ) + frame_rs = get_result( + self.frame, window=25, min_periods=minp, center=True + ) + else: + series_xp = ( + get_result( + self.series.reindex(list(self.series.index) + s), + window=25, + min_periods=0, + ) + .shift(-12) + .reindex(self.series.index) + ) + frame_xp = ( + get_result( + self.frame.reindex(list(self.frame.index) + s), + window=25, + min_periods=0, + ) + .shift(-12) + .reindex(self.frame.index) + ) -@td.skip_if_no_scipy -def test_rolling_kurt(raw, series, frame): - from scipy.stats import kurtosis + series_rs = get_result( + self.series, window=25, min_periods=0, center=True + ) + frame_rs = get_result(self.frame, window=25, min_periods=0, center=True) - _check_moment_func( - lambda x: kurtosis(x, bias=False), - name="kurt", - raw=raw, - series=series, - frame=frame, - ) + if fill_value is not None: + series_xp = series_xp.fillna(fill_value) + frame_xp = frame_xp.fillna(fill_value) + tm.assert_series_equal(series_xp, series_rs) + tm.assert_frame_equal(frame_xp, frame_rs) diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py index 33fb79d98a324..680237db0535b 100644 --- a/pandas/tests/window/test_api.py +++ b/pandas/tests/window/test_api.py @@ -9,341 +9,336 @@ from pandas import DataFrame, Index, Series, Timestamp, concat import pandas._testing as tm from pandas.core.base import SpecificationError +from pandas.tests.window.common import Base -def test_getitem(frame): - r = frame.rolling(window=5) - tm.assert_index_equal(r._selected_obj.columns, frame.columns) +class TestApi(Base): + def setup_method(self, method): + self._create_data() - r = frame.rolling(window=5)[1] - assert r._selected_obj.name == frame.columns[1] + def test_getitem(self): - # technically this is allowed - r = frame.rolling(window=5)[1, 3] - tm.assert_index_equal(r._selected_obj.columns, frame.columns[[1, 3]]) + r = self.frame.rolling(window=5) + tm.assert_index_equal(r._selected_obj.columns, self.frame.columns) - r = frame.rolling(window=5)[[1, 3]] - tm.assert_index_equal(r._selected_obj.columns, frame.columns[[1, 3]]) + r = self.frame.rolling(window=5)[1] + assert r._selected_obj.name == self.frame.columns[1] + # technically this is allowed + r = self.frame.rolling(window=5)[1, 3] + tm.assert_index_equal(r._selected_obj.columns, self.frame.columns[[1, 3]]) -def test_select_bad_cols(): - df = DataFrame([[1, 2]], columns=["A", "B"]) - g = df.rolling(window=5) - with pytest.raises(KeyError, match="Columns not found: 'C'"): - g[["C"]] - with pytest.raises(KeyError, match="^[^A]+$"): - # A should not be referenced as a bad column... - # will have to rethink regex if you change message! - g[["A", "C"]] + r = self.frame.rolling(window=5)[[1, 3]] + tm.assert_index_equal(r._selected_obj.columns, self.frame.columns[[1, 3]]) + def test_select_bad_cols(self): + df = DataFrame([[1, 2]], columns=["A", "B"]) + g = df.rolling(window=5) + with pytest.raises(KeyError, match="Columns not found: 'C'"): + g[["C"]] + with pytest.raises(KeyError, match="^[^A]+$"): + # A should not be referenced as a bad column... + # will have to rethink regex if you change message! + g[["A", "C"]] -def test_attribute_access(): + def test_attribute_access(self): - df = DataFrame([[1, 2]], columns=["A", "B"]) - r = df.rolling(window=5) - tm.assert_series_equal(r.A.sum(), r["A"].sum()) - msg = "'Rolling' object has no attribute 'F'" - with pytest.raises(AttributeError, match=msg): - r.F + df = DataFrame([[1, 2]], columns=["A", "B"]) + r = df.rolling(window=5) + tm.assert_series_equal(r.A.sum(), r["A"].sum()) + msg = "'Rolling' object has no attribute 'F'" + with pytest.raises(AttributeError, match=msg): + r.F + def tests_skip_nuisance(self): -def tests_skip_nuisance(): - - df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"}) - r = df.rolling(window=3) - result = r[["A", "B"]].sum() - expected = DataFrame( - {"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]}, - columns=list("AB"), - ) - tm.assert_frame_equal(result, expected) - - -def test_skip_sum_object_raises(): - df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"}) - r = df.rolling(window=3) - result = r.sum() - expected = DataFrame( - {"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]}, - columns=list("AB"), - ) - tm.assert_frame_equal(result, expected) - - -def test_agg(): - df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) - - r = df.rolling(window=3) - a_mean = r["A"].mean() - a_std = r["A"].std() - a_sum = r["A"].sum() - b_mean = r["B"].mean() - b_std = r["B"].std() - - result = r.aggregate([np.mean, np.std]) - expected = concat([a_mean, a_std, b_mean, b_std], axis=1) - expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]]) - tm.assert_frame_equal(result, expected) - - result = r.aggregate({"A": np.mean, "B": np.std}) - - expected = concat([a_mean, b_std], axis=1) - tm.assert_frame_equal(result, expected, check_like=True) + df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"}) + r = df.rolling(window=3) + result = r[["A", "B"]].sum() + expected = DataFrame( + {"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]}, + columns=list("AB"), + ) + tm.assert_frame_equal(result, expected) + + def test_skip_sum_object_raises(self): + df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"}) + r = df.rolling(window=3) + result = r.sum() + expected = DataFrame( + {"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]}, + columns=list("AB"), + ) + tm.assert_frame_equal(result, expected) - result = r.aggregate({"A": ["mean", "std"]}) - expected = concat([a_mean, a_std], axis=1) - expected.columns = pd.MultiIndex.from_tuples([("A", "mean"), ("A", "std")]) - tm.assert_frame_equal(result, expected) + def test_agg(self): + df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) - result = r["A"].aggregate(["mean", "sum"]) - expected = concat([a_mean, a_sum], axis=1) - expected.columns = ["mean", "sum"] - tm.assert_frame_equal(result, expected) + r = df.rolling(window=3) + a_mean = r["A"].mean() + a_std = r["A"].std() + a_sum = r["A"].sum() + b_mean = r["B"].mean() + b_std = r["B"].std() - msg = "nested renamer is not supported" - with pytest.raises(SpecificationError, match=msg): - # using a dict with renaming - r.aggregate({"A": {"mean": "mean", "sum": "sum"}}) + result = r.aggregate([np.mean, np.std]) + expected = concat([a_mean, a_std, b_mean, b_std], axis=1) + expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]]) + tm.assert_frame_equal(result, expected) - with pytest.raises(SpecificationError, match=msg): - r.aggregate( - { - "A": {"mean": "mean", "sum": "sum"}, - "B": {"mean2": "mean", "sum2": "sum"}, - } - ) + result = r.aggregate({"A": np.mean, "B": np.std}) - result = r.aggregate({"A": ["mean", "std"], "B": ["mean", "std"]}) - expected = concat([a_mean, a_std, b_mean, b_std], axis=1) + expected = concat([a_mean, b_std], axis=1) + tm.assert_frame_equal(result, expected, check_like=True) - exp_cols = [("A", "mean"), ("A", "std"), ("B", "mean"), ("B", "std")] - expected.columns = pd.MultiIndex.from_tuples(exp_cols) - tm.assert_frame_equal(result, expected, check_like=True) + result = r.aggregate({"A": ["mean", "std"]}) + expected = concat([a_mean, a_std], axis=1) + expected.columns = pd.MultiIndex.from_tuples([("A", "mean"), ("A", "std")]) + tm.assert_frame_equal(result, expected) + result = r["A"].aggregate(["mean", "sum"]) + expected = concat([a_mean, a_sum], axis=1) + expected.columns = ["mean", "sum"] + tm.assert_frame_equal(result, expected) -def test_agg_apply(raw): + msg = "nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + # using a dict with renaming + r.aggregate({"A": {"mean": "mean", "sum": "sum"}}) - # passed lambda - df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) + with pytest.raises(SpecificationError, match=msg): + r.aggregate( + { + "A": {"mean": "mean", "sum": "sum"}, + "B": {"mean2": "mean", "sum2": "sum"}, + } + ) - r = df.rolling(window=3) - a_sum = r["A"].sum() + result = r.aggregate({"A": ["mean", "std"], "B": ["mean", "std"]}) + expected = concat([a_mean, a_std, b_mean, b_std], axis=1) - result = r.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)}) - rcustom = r["B"].apply(lambda x: np.std(x, ddof=1), raw=raw) - expected = concat([a_sum, rcustom], axis=1) - tm.assert_frame_equal(result, expected, check_like=True) + exp_cols = [("A", "mean"), ("A", "std"), ("B", "mean"), ("B", "std")] + expected.columns = pd.MultiIndex.from_tuples(exp_cols) + tm.assert_frame_equal(result, expected, check_like=True) + def test_agg_apply(self, raw): -def test_agg_consistency(): + # passed lambda + df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) - df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) - r = df.rolling(window=3) + r = df.rolling(window=3) + a_sum = r["A"].sum() - result = r.agg([np.sum, np.mean]).columns - expected = pd.MultiIndex.from_product([list("AB"), ["sum", "mean"]]) - tm.assert_index_equal(result, expected) + result = r.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)}) + rcustom = r["B"].apply(lambda x: np.std(x, ddof=1), raw=raw) + expected = concat([a_sum, rcustom], axis=1) + tm.assert_frame_equal(result, expected, check_like=True) - result = r["A"].agg([np.sum, np.mean]).columns - expected = Index(["sum", "mean"]) - tm.assert_index_equal(result, expected) + def test_agg_consistency(self): - result = r.agg({"A": [np.sum, np.mean]}).columns - expected = pd.MultiIndex.from_tuples([("A", "sum"), ("A", "mean")]) - tm.assert_index_equal(result, expected) + df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) + r = df.rolling(window=3) + result = r.agg([np.sum, np.mean]).columns + expected = pd.MultiIndex.from_product([list("AB"), ["sum", "mean"]]) + tm.assert_index_equal(result, expected) -def test_agg_nested_dicts(): + result = r["A"].agg([np.sum, np.mean]).columns + expected = Index(["sum", "mean"]) + tm.assert_index_equal(result, expected) - # API change for disallowing these types of nested dicts - df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) - r = df.rolling(window=3) + result = r.agg({"A": [np.sum, np.mean]}).columns + expected = pd.MultiIndex.from_tuples([("A", "sum"), ("A", "mean")]) + tm.assert_index_equal(result, expected) - msg = "nested renamer is not supported" - with pytest.raises(SpecificationError, match=msg): - r.aggregate({"r1": {"A": ["mean", "sum"]}, "r2": {"B": ["mean", "sum"]}}) + def test_agg_nested_dicts(self): - expected = concat( - [r["A"].mean(), r["A"].std(), r["B"].mean(), r["B"].std()], axis=1 - ) - expected.columns = pd.MultiIndex.from_tuples( - [("ra", "mean"), ("ra", "std"), ("rb", "mean"), ("rb", "std")] - ) - with pytest.raises(SpecificationError, match=msg): - r[["A", "B"]].agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}}) - - with pytest.raises(SpecificationError, match=msg): - r.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}}) - - -def test_count_nonnumeric_types(): - # GH12541 - cols = [ - "int", - "float", - "string", - "datetime", - "timedelta", - "periods", - "fl_inf", - "fl_nan", - "str_nan", - "dt_nat", - "periods_nat", - ] - dt_nat_col = [ - Timestamp("20170101"), - Timestamp("20170203"), - Timestamp(None), - ] - - df = DataFrame( - { - "int": [1, 2, 3], - "float": [4.0, 5.0, 6.0], - "string": list("abc"), - "datetime": pd.date_range("20170101", periods=3), - "timedelta": pd.timedelta_range("1 s", periods=3, freq="s"), - "periods": [ - pd.Period("2012-01"), - pd.Period("2012-02"), - pd.Period("2012-03"), - ], - "fl_inf": [1.0, 2.0, np.Inf], - "fl_nan": [1.0, 2.0, np.NaN], - "str_nan": ["aa", "bb", np.NaN], - "dt_nat": dt_nat_col, - "periods_nat": [ - pd.Period("2012-01"), - pd.Period("2012-02"), - pd.Period(None), - ], - }, - columns=cols, - ) - - expected = DataFrame( - { - "int": [1.0, 2.0, 2.0], - "float": [1.0, 2.0, 2.0], - "string": [1.0, 2.0, 2.0], - "datetime": [1.0, 2.0, 2.0], - "timedelta": [1.0, 2.0, 2.0], - "periods": [1.0, 2.0, 2.0], - "fl_inf": [1.0, 2.0, 2.0], - "fl_nan": [1.0, 2.0, 1.0], - "str_nan": [1.0, 2.0, 1.0], - "dt_nat": [1.0, 2.0, 1.0], - "periods_nat": [1.0, 2.0, 1.0], - }, - columns=cols, - ) + # API change for disallowing these types of nested dicts + df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) + r = df.rolling(window=3) - result = df.rolling(window=2, min_periods=0).count() - tm.assert_frame_equal(result, expected) + msg = "nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + r.aggregate({"r1": {"A": ["mean", "sum"]}, "r2": {"B": ["mean", "sum"]}}) - result = df.rolling(1, min_periods=0).count() - expected = df.notna().astype(float) - tm.assert_frame_equal(result, expected) + expected = concat( + [r["A"].mean(), r["A"].std(), r["B"].mean(), r["B"].std()], axis=1 + ) + expected.columns = pd.MultiIndex.from_tuples( + [("ra", "mean"), ("ra", "std"), ("rb", "mean"), ("rb", "std")] + ) + with pytest.raises(SpecificationError, match=msg): + r[["A", "B"]].agg( + {"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}} + ) + + with pytest.raises(SpecificationError, match=msg): + r.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}}) + + def test_count_nonnumeric_types(self): + # GH12541 + cols = [ + "int", + "float", + "string", + "datetime", + "timedelta", + "periods", + "fl_inf", + "fl_nan", + "str_nan", + "dt_nat", + "periods_nat", + ] + + df = DataFrame( + { + "int": [1, 2, 3], + "float": [4.0, 5.0, 6.0], + "string": list("abc"), + "datetime": pd.date_range("20170101", periods=3), + "timedelta": pd.timedelta_range("1 s", periods=3, freq="s"), + "periods": [ + pd.Period("2012-01"), + pd.Period("2012-02"), + pd.Period("2012-03"), + ], + "fl_inf": [1.0, 2.0, np.Inf], + "fl_nan": [1.0, 2.0, np.NaN], + "str_nan": ["aa", "bb", np.NaN], + "dt_nat": [ + Timestamp("20170101"), + Timestamp("20170203"), + Timestamp(None), + ], + "periods_nat": [ + pd.Period("2012-01"), + pd.Period("2012-02"), + pd.Period(None), + ], + }, + columns=cols, + ) + expected = DataFrame( + { + "int": [1.0, 2.0, 2.0], + "float": [1.0, 2.0, 2.0], + "string": [1.0, 2.0, 2.0], + "datetime": [1.0, 2.0, 2.0], + "timedelta": [1.0, 2.0, 2.0], + "periods": [1.0, 2.0, 2.0], + "fl_inf": [1.0, 2.0, 2.0], + "fl_nan": [1.0, 2.0, 1.0], + "str_nan": [1.0, 2.0, 1.0], + "dt_nat": [1.0, 2.0, 1.0], + "periods_nat": [1.0, 2.0, 1.0], + }, + columns=cols, + ) -@td.skip_if_no_scipy -@pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning") -def test_window_with_args(): - # make sure that we are aggregating window functions correctly with arg - r = Series(np.random.randn(100)).rolling( - window=10, min_periods=1, win_type="gaussian" - ) - expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1) - expected.columns = ["", ""] - result = r.aggregate([lambda x: x.mean(std=10), lambda x: x.mean(std=0.01)]) - tm.assert_frame_equal(result, expected) + result = df.rolling(window=2, min_periods=0).count() + tm.assert_frame_equal(result, expected) - def a(x): - return x.mean(std=10) + result = df.rolling(1, min_periods=0).count() + expected = df.notna().astype(float) + tm.assert_frame_equal(result, expected) - def b(x): - return x.mean(std=0.01) + @td.skip_if_no_scipy + @pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning") + def test_window_with_args(self): + # make sure that we are aggregating window functions correctly with arg + r = Series(np.random.randn(100)).rolling( + window=10, min_periods=1, win_type="gaussian" + ) + expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1) + expected.columns = ["", ""] + result = r.aggregate([lambda x: x.mean(std=10), lambda x: x.mean(std=0.01)]) + tm.assert_frame_equal(result, expected) - expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1) - expected.columns = ["a", "b"] - result = r.aggregate([a, b]) - tm.assert_frame_equal(result, expected) + def a(x): + return x.mean(std=10) + def b(x): + return x.mean(std=0.01) -def test_preserve_metadata(): - # GH 10565 - s = Series(np.arange(100), name="foo") + expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1) + expected.columns = ["a", "b"] + result = r.aggregate([a, b]) + tm.assert_frame_equal(result, expected) - s2 = s.rolling(30).sum() - s3 = s.rolling(20).sum() - assert s2.name == "foo" - assert s3.name == "foo" + def test_preserve_metadata(self): + # GH 10565 + s = Series(np.arange(100), name="foo") + s2 = s.rolling(30).sum() + s3 = s.rolling(20).sum() + assert s2.name == "foo" + assert s3.name == "foo" -@pytest.mark.parametrize( - "func,window_size,expected_vals", - [ - ( - "rolling", - 2, - [ - [np.nan, np.nan, np.nan, np.nan], - [15.0, 20.0, 25.0, 20.0], - [25.0, 30.0, 35.0, 30.0], - [np.nan, np.nan, np.nan, np.nan], - [20.0, 30.0, 35.0, 30.0], - [35.0, 40.0, 60.0, 40.0], - [60.0, 80.0, 85.0, 80], - ], - ), - ( - "expanding", - None, - [ - [10.0, 10.0, 20.0, 20.0], - [15.0, 20.0, 25.0, 20.0], - [20.0, 30.0, 30.0, 20.0], - [10.0, 10.0, 30.0, 30.0], - [20.0, 30.0, 35.0, 30.0], - [26.666667, 40.0, 50.0, 30.0], - [40.0, 80.0, 60.0, 30.0], - ], - ), - ], -) -def test_multiple_agg_funcs(func, window_size, expected_vals): - # GH 15072 - df = pd.DataFrame( + @pytest.mark.parametrize( + "func,window_size,expected_vals", [ - ["A", 10, 20], - ["A", 20, 30], - ["A", 30, 40], - ["B", 10, 30], - ["B", 30, 40], - ["B", 40, 80], - ["B", 80, 90], + ( + "rolling", + 2, + [ + [np.nan, np.nan, np.nan, np.nan], + [15.0, 20.0, 25.0, 20.0], + [25.0, 30.0, 35.0, 30.0], + [np.nan, np.nan, np.nan, np.nan], + [20.0, 30.0, 35.0, 30.0], + [35.0, 40.0, 60.0, 40.0], + [60.0, 80.0, 85.0, 80], + ], + ), + ( + "expanding", + None, + [ + [10.0, 10.0, 20.0, 20.0], + [15.0, 20.0, 25.0, 20.0], + [20.0, 30.0, 30.0, 20.0], + [10.0, 10.0, 30.0, 30.0], + [20.0, 30.0, 35.0, 30.0], + [26.666667, 40.0, 50.0, 30.0], + [40.0, 80.0, 60.0, 30.0], + ], + ), ], - columns=["stock", "low", "high"], ) + def test_multiple_agg_funcs(self, func, window_size, expected_vals): + # GH 15072 + df = pd.DataFrame( + [ + ["A", 10, 20], + ["A", 20, 30], + ["A", 30, 40], + ["B", 10, 30], + ["B", 30, 40], + ["B", 40, 80], + ["B", 80, 90], + ], + columns=["stock", "low", "high"], + ) - f = getattr(df.groupby("stock"), func) - if window_size: - window = f(window_size) - else: - window = f() + f = getattr(df.groupby("stock"), func) + if window_size: + window = f(window_size) + else: + window = f() - index = pd.MultiIndex.from_tuples( - [("A", 0), ("A", 1), ("A", 2), ("B", 3), ("B", 4), ("B", 5), ("B", 6)], - names=["stock", None], - ) - columns = pd.MultiIndex.from_tuples( - [("low", "mean"), ("low", "max"), ("high", "mean"), ("high", "min")] - ) - expected = pd.DataFrame(expected_vals, index=index, columns=columns) + index = pd.MultiIndex.from_tuples( + [("A", 0), ("A", 1), ("A", 2), ("B", 3), ("B", 4), ("B", 5), ("B", 6)], + names=["stock", None], + ) + columns = pd.MultiIndex.from_tuples( + [("low", "mean"), ("low", "max"), ("high", "mean"), ("high", "min")] + ) + expected = pd.DataFrame(expected_vals, index=index, columns=columns) - result = window.agg( - OrderedDict((("low", ["mean", "max"]), ("high", ["mean", "min"]))) - ) + result = window.agg( + OrderedDict((("low", ["mean", "max"]), ("high", ["mean", "min"]))) + ) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/window/test_apply.py b/pandas/tests/window/test_apply.py index bc38634da8941..34cf0a3054889 100644 --- a/pandas/tests/window/test_apply.py +++ b/pandas/tests/window/test_apply.py @@ -4,7 +4,7 @@ from pandas.errors import NumbaUtilError import pandas.util._test_decorators as td -from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range +from pandas import DataFrame, Series, Timestamp, date_range import pandas._testing as tm @@ -139,28 +139,3 @@ def test_invalid_kwargs_nopython(): Series(range(1)).rolling(1).apply( lambda x: x, kwargs={"a": 1}, engine="numba", raw=True ) - - -@pytest.mark.parametrize("args_kwargs", [[None, {"par": 10}], [(10,), None]]) -def test_rolling_apply_args_kwargs(args_kwargs): - # GH 33433 - def foo(x, par): - return np.sum(x + par) - - df = DataFrame({"gr": [1, 1], "a": [1, 2]}) - - idx = Index(["gr", "a"]) - expected = DataFrame([[11.0, 11.0], [11.0, 12.0]], columns=idx) - - result = df.rolling(1).apply(foo, args=args_kwargs[0], kwargs=args_kwargs[1]) - tm.assert_frame_equal(result, expected) - - result = df.rolling(1).apply(foo, args=(10,)) - - midx = MultiIndex.from_tuples([(1, 0), (1, 1)], names=["gr", None]) - expected = Series([11.0, 12.0], index=midx, name="a") - - gb_rolling = df.groupby("gr")["a"].rolling(1) - - result = gb_rolling.apply(foo, args=args_kwargs[0], kwargs=args_kwargs[1]) - tm.assert_series_equal(result, expected) diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index 9ba194dcf0959..1683fda500f85 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -5,62 +5,66 @@ from pandas import DataFrame, Series from pandas.core.window import EWM +from pandas.tests.window.common import Base -def test_doc_string(): +class TestEWM(Base): + def setup_method(self, method): + self._create_data() - df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) - df - df.ewm(com=0.5).mean() + def test_doc_string(self): + df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) + df + df.ewm(com=0.5).mean() -def test_constructor(which): + @pytest.mark.parametrize("which", ["series", "frame"]) + def test_constructor(self, which): + o = getattr(self, which) + c = o.ewm - c = which.ewm + # valid + c(com=0.5) + c(span=1.5) + c(alpha=0.5) + c(halflife=0.75) + c(com=0.5, span=None) + c(alpha=0.5, com=None) + c(halflife=0.75, alpha=None) - # valid - c(com=0.5) - c(span=1.5) - c(alpha=0.5) - c(halflife=0.75) - c(com=0.5, span=None) - c(alpha=0.5, com=None) - c(halflife=0.75, alpha=None) - - # not valid: mutually exclusive - with pytest.raises(ValueError): - c(com=0.5, alpha=0.5) - with pytest.raises(ValueError): - c(span=1.5, halflife=0.75) - with pytest.raises(ValueError): - c(alpha=0.5, span=1.5) - - # not valid: com < 0 - with pytest.raises(ValueError): - c(com=-0.5) + # not valid: mutually exclusive + with pytest.raises(ValueError): + c(com=0.5, alpha=0.5) + with pytest.raises(ValueError): + c(span=1.5, halflife=0.75) + with pytest.raises(ValueError): + c(alpha=0.5, span=1.5) - # not valid: span < 1 - with pytest.raises(ValueError): - c(span=0.5) + # not valid: com < 0 + with pytest.raises(ValueError): + c(com=-0.5) - # not valid: halflife <= 0 - with pytest.raises(ValueError): - c(halflife=0) + # not valid: span < 1 + with pytest.raises(ValueError): + c(span=0.5) - # not valid: alpha <= 0 or alpha > 1 - for alpha in (-0.5, 1.5): + # not valid: halflife <= 0 with pytest.raises(ValueError): - c(alpha=alpha) + c(halflife=0) + # not valid: alpha <= 0 or alpha > 1 + for alpha in (-0.5, 1.5): + with pytest.raises(ValueError): + c(alpha=alpha) -@pytest.mark.parametrize("method", ["std", "mean", "var"]) -def test_numpy_compat(method): - # see gh-12811 - e = EWM(Series([2, 4, 6]), alpha=0.5) + @pytest.mark.parametrize("method", ["std", "mean", "var"]) + def test_numpy_compat(self, method): + # see gh-12811 + e = EWM(Series([2, 4, 6]), alpha=0.5) - msg = "numpy operations are not valid with window objects" + msg = "numpy operations are not valid with window objects" - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(e, method)(1, 2, 3) - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(e, method)(dtype=np.float64) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(dtype=np.float64) diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index b57467385d371..6b6367fd80b26 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -7,102 +7,112 @@ from pandas import DataFrame, Series import pandas._testing as tm from pandas.core.window import Expanding +from pandas.tests.window.common import Base -def test_doc_string(): +class TestExpanding(Base): + def setup_method(self, method): + self._create_data() - df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) - df - df.expanding(2).sum() + def test_doc_string(self): + df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) + df + df.expanding(2).sum() -def test_constructor(which): - # GH 12669 + @pytest.mark.parametrize("which", ["series", "frame"]) + def test_constructor(self, which): + # GH 12669 - c = which.expanding + o = getattr(self, which) + c = o.expanding - # valid - c(min_periods=1) - c(min_periods=1, center=True) - c(min_periods=1, center=False) + # valid + c(min_periods=1) + c(min_periods=1, center=True) + c(min_periods=1, center=False) - # not valid - for w in [2.0, "foo", np.array([2])]: - with pytest.raises(ValueError): - c(min_periods=w) - with pytest.raises(ValueError): - c(min_periods=1, center=w) + # not valid + for w in [2.0, "foo", np.array([2])]: + with pytest.raises(ValueError): + c(min_periods=w) + with pytest.raises(ValueError): + c(min_periods=1, center=w) + @pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) + def test_numpy_compat(self, method): + # see gh-12811 + e = Expanding(Series([2, 4, 6]), window=2) -@pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) -def test_numpy_compat(method): - # see gh-12811 - e = Expanding(Series([2, 4, 6]), window=2) + msg = "numpy operations are not valid with window objects" - msg = "numpy operations are not valid with window objects" + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(dtype=np.float64) - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(e, method)(1, 2, 3) - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(e, method)(dtype=np.float64) - - -@pytest.mark.parametrize( - "expander", - [ - 1, - pytest.param( - "ls", - marks=pytest.mark.xfail( - reason="GH#16425 expanding with offset not supported" + @pytest.mark.parametrize( + "expander", + [ + 1, + pytest.param( + "ls", + marks=pytest.mark.xfail( + reason="GH#16425 expanding with offset not supported" + ), ), - ), - ], -) -def test_empty_df_expanding(expander): - # GH 15819 Verifies that datetime and integer expanding windows can be - # applied to empty DataFrames - - expected = DataFrame() - result = DataFrame().expanding(expander).sum() - tm.assert_frame_equal(result, expected) - - # Verifies that datetime and integer expanding windows can be applied - # to empty DataFrames with datetime index - expected = DataFrame(index=pd.DatetimeIndex([])) - result = DataFrame(index=pd.DatetimeIndex([])).expanding(expander).sum() - tm.assert_frame_equal(result, expected) - - -def test_missing_minp_zero(): - # https://github.com/pandas-dev/pandas/pull/18921 - # minp=0 - x = pd.Series([np.nan]) - result = x.expanding(min_periods=0).sum() - expected = pd.Series([0.0]) - tm.assert_series_equal(result, expected) - - # minp=1 - result = x.expanding(min_periods=1).sum() - expected = pd.Series([np.nan]) - tm.assert_series_equal(result, expected) - - -def test_expanding_axis(axis_frame): - # see gh-23372. - df = DataFrame(np.ones((10, 20))) - axis = df._get_axis_number(axis_frame) - - if axis == 0: - expected = DataFrame( - {i: [np.nan] * 2 + [float(j) for j in range(3, 11)] for i in range(20)} - ) - else: - # axis == 1 - expected = DataFrame([[np.nan] * 2 + [float(i) for i in range(3, 21)]] * 10) - - result = df.expanding(3, axis=axis_frame).sum() - tm.assert_frame_equal(result, expected) + ], + ) + def test_empty_df_expanding(self, expander): + # GH 15819 Verifies that datetime and integer expanding windows can be + # applied to empty DataFrames + + expected = DataFrame() + result = DataFrame().expanding(expander).sum() + tm.assert_frame_equal(result, expected) + + # Verifies that datetime and integer expanding windows can be applied + # to empty DataFrames with datetime index + expected = DataFrame(index=pd.DatetimeIndex([])) + result = DataFrame(index=pd.DatetimeIndex([])).expanding(expander).sum() + tm.assert_frame_equal(result, expected) + + def test_missing_minp_zero(self): + # https://github.com/pandas-dev/pandas/pull/18921 + # minp=0 + x = pd.Series([np.nan]) + result = x.expanding(min_periods=0).sum() + expected = pd.Series([0.0]) + tm.assert_series_equal(result, expected) + + # minp=1 + result = x.expanding(min_periods=1).sum() + expected = pd.Series([np.nan]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame]) + def test_iter_raises(self, klass): + # https://github.com/pandas-dev/pandas/issues/11704 + # Iteration over a Window + obj = klass([1, 2, 3, 4]) + with pytest.raises(NotImplementedError): + iter(obj.expanding(2)) + + def test_expanding_axis(self, axis_frame): + # see gh-23372. + df = DataFrame(np.ones((10, 20))) + axis = df._get_axis_number(axis_frame) + + if axis == 0: + expected = DataFrame( + {i: [np.nan] * 2 + [float(j) for j in range(3, 11)] for i in range(20)} + ) + else: + # axis == 1 + expected = DataFrame([[np.nan] * 2 + [float(i) for i in range(3, 21)]] * 10) + + result = df.expanding(3, axis=axis_frame).sum() + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("constructor", [Series, DataFrame]) @@ -122,91 +132,3 @@ def test_expanding_count_default_min_periods_with_null_values(constructor): result = constructor(values).expanding().count() expected = constructor(expected_counts) tm.assert_equal(result, expected) - - -@pytest.mark.parametrize( - "df,expected,min_periods", - [ - ( - DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), - [ - ({"A": [1], "B": [4]}, [0]), - ({"A": [1, 2], "B": [4, 5]}, [0, 1]), - ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), - ], - 3, - ), - ( - DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), - [ - ({"A": [1], "B": [4]}, [0]), - ({"A": [1, 2], "B": [4, 5]}, [0, 1]), - ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), - ], - 2, - ), - ( - DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), - [ - ({"A": [1], "B": [4]}, [0]), - ({"A": [1, 2], "B": [4, 5]}, [0, 1]), - ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), - ], - 1, - ), - (DataFrame({"A": [1], "B": [4]}), [], 2), - (DataFrame(), [({}, [])], 1), - ( - DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}), - [ - ({"A": [1.0], "B": [np.nan]}, [0]), - ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]), - ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]), - ], - 3, - ), - ( - DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}), - [ - ({"A": [1.0], "B": [np.nan]}, [0]), - ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]), - ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]), - ], - 2, - ), - ( - DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}), - [ - ({"A": [1.0], "B": [np.nan]}, [0]), - ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]), - ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]), - ], - 1, - ), - ], -) -def test_iter_expanding_dataframe(df, expected, min_periods): - # GH 11704 - expected = [DataFrame(values, index=index) for (values, index) in expected] - - for (expected, actual) in zip(expected, df.expanding(min_periods)): - tm.assert_frame_equal(actual, expected) - - -@pytest.mark.parametrize( - "ser,expected,min_periods", - [ - (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 3), - (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 2), - (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 1), - (Series([1, 2]), [([1], [0]), ([1, 2], [0, 1])], 2), - (Series([np.nan, 2]), [([np.nan], [0]), ([np.nan, 2], [0, 1])], 2), - (Series([], dtype="int64"), [], 2), - ], -) -def test_iter_expanding_series(ser, expected, min_periods): - # GH 11704 - expected = [Series(values, index=index) for (values, index) in expected] - - for (expected, actual) in zip(expected, ser.expanding(min_periods)): - tm.assert_series_equal(actual, expected) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index f9b0e6856337b..866b7da59382d 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -7,371 +7,380 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame, Series, date_range +from pandas import DataFrame, Series import pandas._testing as tm from pandas.core.window import Rolling +from pandas.tests.window.common import Base -def test_doc_string(): +class TestRolling(Base): + def setup_method(self, method): + self._create_data() - df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) - df - df.rolling(2).sum() - df.rolling(2, min_periods=1).sum() + def test_doc_string(self): + df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) + df + df.rolling(2).sum() + df.rolling(2, min_periods=1).sum() -def test_constructor(which): - # GH 12669 + @pytest.mark.parametrize("which", ["series", "frame"]) + def test_constructor(self, which): + # GH 12669 - c = which.rolling + o = getattr(self, which) + c = o.rolling - # valid - c(0) - c(window=2) - c(window=2, min_periods=1) - c(window=2, min_periods=1, center=True) - c(window=2, min_periods=1, center=False) + # valid + c(0) + c(window=2) + c(window=2, min_periods=1) + c(window=2, min_periods=1, center=True) + c(window=2, min_periods=1, center=False) - # GH 13383 + # GH 13383 - msg = "window must be non-negative" + msg = "window must be non-negative" - with pytest.raises(ValueError, match=msg): - c(-1) - - # not valid - for w in [2.0, "foo", np.array([2])]: - msg = ( - "window must be an integer|" - "passed window foo is not compatible with a datetimelike index" - ) - with pytest.raises(ValueError, match=msg): - c(window=w) - - msg = "min_periods must be an integer" with pytest.raises(ValueError, match=msg): - c(window=2, min_periods=w) + c(-1) + + # not valid + for w in [2.0, "foo", np.array([2])]: + msg = ( + "window must be an integer|" + "passed window foo is not compatible with a datetimelike index" + ) + with pytest.raises(ValueError, match=msg): + c(window=w) + + msg = "min_periods must be an integer" + with pytest.raises(ValueError, match=msg): + c(window=2, min_periods=w) + + msg = "center must be a boolean" + with pytest.raises(ValueError, match=msg): + c(window=2, min_periods=1, center=w) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("which", ["series", "frame"]) + def test_constructor_with_win_type(self, which): + # GH 13383 + o = getattr(self, which) + c = o.rolling + + msg = "window must be > 0" - msg = "center must be a boolean" with pytest.raises(ValueError, match=msg): - c(window=2, min_periods=1, center=w) - - -@td.skip_if_no_scipy -def test_constructor_with_win_type(which): - # GH 13383 - c = which.rolling - - msg = "window must be > 0" - - with pytest.raises(ValueError, match=msg): - c(-1, win_type="boxcar") - - -@pytest.mark.parametrize("window", [timedelta(days=3), pd.Timedelta(days=3)]) -def test_constructor_with_timedelta_window(window): - # GH 15440 - n = 10 - df = DataFrame( - {"value": np.arange(n)}, index=pd.date_range("2015-12-24", periods=n, freq="D"), - ) - expected_data = np.append([0.0, 1.0], np.arange(3.0, 27.0, 3)) - - result = df.rolling(window=window).sum() - expected = DataFrame( - {"value": expected_data}, - index=pd.date_range("2015-12-24", periods=n, freq="D"), - ) - tm.assert_frame_equal(result, expected) - expected = df.rolling("3D").sum() - tm.assert_frame_equal(result, expected) - - -@pytest.mark.parametrize("window", [timedelta(days=3), pd.Timedelta(days=3), "3D"]) -def test_constructor_timedelta_window_and_minperiods(window, raw): - # GH 15305 - n = 10 - df = DataFrame( - {"value": np.arange(n)}, index=pd.date_range("2017-08-08", periods=n, freq="D"), - ) - expected = DataFrame( - {"value": np.append([np.NaN, 1.0], np.arange(3.0, 27.0, 3))}, - index=pd.date_range("2017-08-08", periods=n, freq="D"), - ) - result_roll_sum = df.rolling(window=window, min_periods=2).sum() - result_roll_generic = df.rolling(window=window, min_periods=2).apply(sum, raw=raw) - tm.assert_frame_equal(result_roll_sum, expected) - tm.assert_frame_equal(result_roll_generic, expected) - - -@pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) -def test_numpy_compat(method): - # see gh-12811 - r = Rolling(Series([2, 4, 6]), window=2) - - msg = "numpy operations are not valid with window objects" + c(-1, win_type="boxcar") + + @pytest.mark.parametrize("window", [timedelta(days=3), pd.Timedelta(days=3)]) + def test_constructor_with_timedelta_window(self, window): + # GH 15440 + n = 10 + df = DataFrame( + {"value": np.arange(n)}, + index=pd.date_range("2015-12-24", periods=n, freq="D"), + ) + expected_data = np.append([0.0, 1.0], np.arange(3.0, 27.0, 3)) - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(r, method)(1, 2, 3) - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(r, method)(dtype=np.float64) + result = df.rolling(window=window).sum() + expected = DataFrame( + {"value": expected_data}, + index=pd.date_range("2015-12-24", periods=n, freq="D"), + ) + tm.assert_frame_equal(result, expected) + expected = df.rolling("3D").sum() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("window", [timedelta(days=3), pd.Timedelta(days=3), "3D"]) + def test_constructor_timedelta_window_and_minperiods(self, window, raw): + # GH 15305 + n = 10 + df = DataFrame( + {"value": np.arange(n)}, + index=pd.date_range("2017-08-08", periods=n, freq="D"), + ) + expected = DataFrame( + {"value": np.append([np.NaN, 1.0], np.arange(3.0, 27.0, 3))}, + index=pd.date_range("2017-08-08", periods=n, freq="D"), + ) + result_roll_sum = df.rolling(window=window, min_periods=2).sum() + result_roll_generic = df.rolling(window=window, min_periods=2).apply( + sum, raw=raw + ) + tm.assert_frame_equal(result_roll_sum, expected) + tm.assert_frame_equal(result_roll_generic, expected) + @pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) + def test_numpy_compat(self, method): + # see gh-12811 + r = Rolling(Series([2, 4, 6]), window=2) -def test_closed(): - df = DataFrame({"A": [0, 1, 2, 3, 4]}) - # closed only allowed for datetimelike + msg = "numpy operations are not valid with window objects" - msg = "closed only implemented for datetimelike and offset based windows" + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(r, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(r, method)(dtype=np.float64) - with pytest.raises(ValueError, match=msg): - df.rolling(window=3, closed="neither") + def test_closed(self): + df = DataFrame({"A": [0, 1, 2, 3, 4]}) + # closed only allowed for datetimelike + msg = "closed only implemented for datetimelike and offset based windows" -@pytest.mark.parametrize("closed", ["neither", "left"]) -def test_closed_empty(closed, arithmetic_win_operators): - # GH 26005 - func_name = arithmetic_win_operators - ser = pd.Series( - data=np.arange(5), index=pd.date_range("2000", periods=5, freq="2D") + with pytest.raises(ValueError, match=msg): + df.rolling(window=3, closed="neither") + + @pytest.mark.parametrize("closed", ["neither", "left"]) + def test_closed_empty(self, closed, arithmetic_win_operators): + # GH 26005 + func_name = arithmetic_win_operators + ser = pd.Series( + data=np.arange(5), index=pd.date_range("2000", periods=5, freq="2D") + ) + roll = ser.rolling("1D", closed=closed) + + result = getattr(roll, func_name)() + expected = pd.Series([np.nan] * 5, index=ser.index) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("func", ["min", "max"]) + def test_closed_one_entry(self, func): + # GH24718 + ser = pd.Series(data=[2], index=pd.date_range("2000", periods=1)) + result = getattr(ser.rolling("10D", closed="left"), func)() + tm.assert_series_equal(result, pd.Series([np.nan], index=ser.index)) + + @pytest.mark.parametrize("func", ["min", "max"]) + def test_closed_one_entry_groupby(self, func): + # GH24718 + ser = pd.DataFrame( + data={"A": [1, 1, 2], "B": [3, 2, 1]}, + index=pd.date_range("2000", periods=3), + ) + result = getattr( + ser.groupby("A", sort=False)["B"].rolling("10D", closed="left"), func + )() + exp_idx = pd.MultiIndex.from_arrays( + arrays=[[1, 1, 2], ser.index], names=("A", None) + ) + expected = pd.Series(data=[np.nan, 3, np.nan], index=exp_idx, name="B") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("input_dtype", ["int", "float"]) + @pytest.mark.parametrize( + "func,closed,expected", + [ + ("min", "right", [0.0, 0, 0, 1, 2, 3, 4, 5, 6, 7]), + ("min", "both", [0.0, 0, 0, 0, 1, 2, 3, 4, 5, 6]), + ("min", "neither", [np.nan, 0, 0, 1, 2, 3, 4, 5, 6, 7]), + ("min", "left", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, 6]), + ("max", "right", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + ("max", "both", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + ("max", "neither", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]), + ("max", "left", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]), + ], ) - roll = ser.rolling("1D", closed=closed) - - result = getattr(roll, func_name)() - expected = pd.Series([np.nan] * 5, index=ser.index) - tm.assert_series_equal(result, expected) - - -@pytest.mark.parametrize("func", ["min", "max"]) -def test_closed_one_entry(func): - # GH24718 - ser = pd.Series(data=[2], index=pd.date_range("2000", periods=1)) - result = getattr(ser.rolling("10D", closed="left"), func)() - tm.assert_series_equal(result, pd.Series([np.nan], index=ser.index)) - + def test_closed_min_max_datetime(self, input_dtype, func, closed, expected): + # see gh-21704 + ser = pd.Series( + data=np.arange(10).astype(input_dtype), + index=pd.date_range("2000", periods=10), + ) -@pytest.mark.parametrize("func", ["min", "max"]) -def test_closed_one_entry_groupby(func): - # GH24718 - ser = pd.DataFrame( - data={"A": [1, 1, 2], "B": [3, 2, 1]}, index=pd.date_range("2000", periods=3), - ) - result = getattr( - ser.groupby("A", sort=False)["B"].rolling("10D", closed="left"), func - )() - exp_idx = pd.MultiIndex.from_arrays( - arrays=[[1, 1, 2], ser.index], names=("A", None) + result = getattr(ser.rolling("3D", closed=closed), func)() + expected = pd.Series(expected, index=ser.index) + tm.assert_series_equal(result, expected) + + def test_closed_uneven(self): + # see gh-21704 + ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10)) + + # uneven + ser = ser.drop(index=ser.index[[1, 5]]) + result = ser.rolling("3D", closed="left").min() + expected = pd.Series([np.nan, 0, 0, 2, 3, 4, 6, 6], index=ser.index) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "func,closed,expected", + [ + ("min", "right", [np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan, np.nan]), + ("min", "both", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, np.nan]), + ("min", "neither", [np.nan, np.nan, 0, 1, 2, 3, 4, 5, np.nan, np.nan]), + ("min", "left", [np.nan, np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan]), + ("max", "right", [np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan, np.nan]), + ("max", "both", [np.nan, 1, 2, 3, 4, 5, 6, 6, 6, np.nan]), + ("max", "neither", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, np.nan, np.nan]), + ("max", "left", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan]), + ], ) - expected = pd.Series(data=[np.nan, 3, np.nan], index=exp_idx, name="B") - tm.assert_series_equal(result, expected) - - -@pytest.mark.parametrize("input_dtype", ["int", "float"]) -@pytest.mark.parametrize( - "func,closed,expected", - [ - ("min", "right", [0.0, 0, 0, 1, 2, 3, 4, 5, 6, 7]), - ("min", "both", [0.0, 0, 0, 0, 1, 2, 3, 4, 5, 6]), - ("min", "neither", [np.nan, 0, 0, 1, 2, 3, 4, 5, 6, 7]), - ("min", "left", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, 6]), - ("max", "right", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), - ("max", "both", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), - ("max", "neither", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]), - ("max", "left", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]), - ], -) -def test_closed_min_max_datetime(input_dtype, func, closed, expected): - # see gh-21704 - ser = pd.Series( - data=np.arange(10).astype(input_dtype), index=pd.date_range("2000", periods=10), + def test_closed_min_max_minp(self, func, closed, expected): + # see gh-21704 + ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10)) + ser[ser.index[-3:]] = np.nan + result = getattr(ser.rolling("3D", min_periods=2, closed=closed), func)() + expected = pd.Series(expected, index=ser.index) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "closed,expected", + [ + ("right", [0, 0.5, 1, 2, 3, 4, 5, 6, 7, 8]), + ("both", [0, 0.5, 1, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]), + ("neither", [np.nan, 0, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]), + ("left", [np.nan, 0, 0.5, 1, 2, 3, 4, 5, 6, 7]), + ], ) + def test_closed_median_quantile(self, closed, expected): + # GH 26005 + ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10)) + roll = ser.rolling("3D", closed=closed) + expected = pd.Series(expected, index=ser.index) + + result = roll.median() + tm.assert_series_equal(result, expected) + + result = roll.quantile(0.5) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("roller", ["1s", 1]) + def tests_empty_df_rolling(self, roller): + # GH 15819 Verifies that datetime and integer rolling windows can be + # applied to empty DataFrames + expected = DataFrame() + result = DataFrame().rolling(roller).sum() + tm.assert_frame_equal(result, expected) + + # Verifies that datetime and integer rolling windows can be applied to + # empty DataFrames with datetime index + expected = DataFrame(index=pd.DatetimeIndex([])) + result = DataFrame(index=pd.DatetimeIndex([])).rolling(roller).sum() + tm.assert_frame_equal(result, expected) + + def test_empty_window_median_quantile(self): + # GH 26005 + expected = pd.Series([np.nan, np.nan, np.nan]) + roll = pd.Series(np.arange(3)).rolling(0) + + result = roll.median() + tm.assert_series_equal(result, expected) + + result = roll.quantile(0.1) + tm.assert_series_equal(result, expected) + + def test_missing_minp_zero(self): + # https://github.com/pandas-dev/pandas/pull/18921 + # minp=0 + x = pd.Series([np.nan]) + result = x.rolling(1, min_periods=0).sum() + expected = pd.Series([0.0]) + tm.assert_series_equal(result, expected) + + # minp=1 + result = x.rolling(1, min_periods=1).sum() + expected = pd.Series([np.nan]) + tm.assert_series_equal(result, expected) + + def test_missing_minp_zero_variable(self): + # https://github.com/pandas-dev/pandas/pull/18921 + x = pd.Series( + [np.nan] * 4, + index=pd.DatetimeIndex( + ["2017-01-01", "2017-01-04", "2017-01-06", "2017-01-07"] + ), + ) + result = x.rolling(pd.Timedelta("2d"), min_periods=0).sum() + expected = pd.Series(0.0, index=x.index) + tm.assert_series_equal(result, expected) - result = getattr(ser.rolling("3D", closed=closed), func)() - expected = pd.Series(expected, index=ser.index) - tm.assert_series_equal(result, expected) - - -def test_closed_uneven(): - # see gh-21704 - ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10)) - - # uneven - ser = ser.drop(index=ser.index[[1, 5]]) - result = ser.rolling("3D", closed="left").min() - expected = pd.Series([np.nan, 0, 0, 2, 3, 4, 6, 6], index=ser.index) - tm.assert_series_equal(result, expected) - - -@pytest.mark.parametrize( - "func,closed,expected", - [ - ("min", "right", [np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan, np.nan]), - ("min", "both", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, np.nan]), - ("min", "neither", [np.nan, np.nan, 0, 1, 2, 3, 4, 5, np.nan, np.nan]), - ("min", "left", [np.nan, np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan]), - ("max", "right", [np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan, np.nan]), - ("max", "both", [np.nan, 1, 2, 3, 4, 5, 6, 6, 6, np.nan]), - ("max", "neither", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, np.nan, np.nan]), - ("max", "left", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan]), - ], -) -def test_closed_min_max_minp(func, closed, expected): - # see gh-21704 - ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10)) - ser[ser.index[-3:]] = np.nan - result = getattr(ser.rolling("3D", min_periods=2, closed=closed), func)() - expected = pd.Series(expected, index=ser.index) - tm.assert_series_equal(result, expected) - - -@pytest.mark.parametrize( - "closed,expected", - [ - ("right", [0, 0.5, 1, 2, 3, 4, 5, 6, 7, 8]), - ("both", [0, 0.5, 1, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]), - ("neither", [np.nan, 0, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]), - ("left", [np.nan, 0, 0.5, 1, 2, 3, 4, 5, 6, 7]), - ], -) -def test_closed_median_quantile(closed, expected): - # GH 26005 - ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10)) - roll = ser.rolling("3D", closed=closed) - expected = pd.Series(expected, index=ser.index) - - result = roll.median() - tm.assert_series_equal(result, expected) - - result = roll.quantile(0.5) - tm.assert_series_equal(result, expected) - - -@pytest.mark.parametrize("roller", ["1s", 1]) -def tests_empty_df_rolling(roller): - # GH 15819 Verifies that datetime and integer rolling windows can be - # applied to empty DataFrames - expected = DataFrame() - result = DataFrame().rolling(roller).sum() - tm.assert_frame_equal(result, expected) - - # Verifies that datetime and integer rolling windows can be applied to - # empty DataFrames with datetime index - expected = DataFrame(index=pd.DatetimeIndex([])) - result = DataFrame(index=pd.DatetimeIndex([])).rolling(roller).sum() - tm.assert_frame_equal(result, expected) - - -def test_empty_window_median_quantile(): - # GH 26005 - expected = pd.Series([np.nan, np.nan, np.nan]) - roll = pd.Series(np.arange(3)).rolling(0) - - result = roll.median() - tm.assert_series_equal(result, expected) - - result = roll.quantile(0.1) - tm.assert_series_equal(result, expected) - - -def test_missing_minp_zero(): - # https://github.com/pandas-dev/pandas/pull/18921 - # minp=0 - x = pd.Series([np.nan]) - result = x.rolling(1, min_periods=0).sum() - expected = pd.Series([0.0]) - tm.assert_series_equal(result, expected) - - # minp=1 - result = x.rolling(1, min_periods=1).sum() - expected = pd.Series([np.nan]) - tm.assert_series_equal(result, expected) - - -def test_missing_minp_zero_variable(): - # https://github.com/pandas-dev/pandas/pull/18921 - x = pd.Series( - [np.nan] * 4, - index=pd.DatetimeIndex( - ["2017-01-01", "2017-01-04", "2017-01-06", "2017-01-07"] - ), - ) - result = x.rolling(pd.Timedelta("2d"), min_periods=0).sum() - expected = pd.Series(0.0, index=x.index) - tm.assert_series_equal(result, expected) - - -def test_multi_index_names(): - - # GH 16789, 16825 - cols = pd.MultiIndex.from_product([["A", "B"], ["C", "D", "E"]], names=["1", "2"]) - df = DataFrame(np.ones((10, 6)), columns=cols) - result = df.rolling(3).cov() - - tm.assert_index_equal(result.columns, df.columns) - assert result.index.names == [None, "1", "2"] - - -def test_rolling_axis_sum(axis_frame): - # see gh-23372. - df = DataFrame(np.ones((10, 20))) - axis = df._get_axis_number(axis_frame) - - if axis == 0: - expected = DataFrame({i: [np.nan] * 2 + [3.0] * 8 for i in range(20)}) - else: - # axis == 1 - expected = DataFrame([[np.nan] * 2 + [3.0] * 18] * 10) - - result = df.rolling(3, axis=axis_frame).sum() - tm.assert_frame_equal(result, expected) - - -def test_rolling_axis_count(axis_frame): - # see gh-26055 - df = DataFrame({"x": range(3), "y": range(3)}) - - axis = df._get_axis_number(axis_frame) - - if axis in [0, "index"]: - expected = DataFrame({"x": [1.0, 2.0, 2.0], "y": [1.0, 2.0, 2.0]}) - else: - expected = DataFrame({"x": [1.0, 1.0, 1.0], "y": [2.0, 2.0, 2.0]}) - - result = df.rolling(2, axis=axis_frame, min_periods=0).count() - tm.assert_frame_equal(result, expected) - - -def test_readonly_array(): - # GH-27766 - arr = np.array([1, 3, np.nan, 3, 5]) - arr.setflags(write=False) - result = pd.Series(arr).rolling(2).mean() - expected = pd.Series([np.nan, 2, np.nan, np.nan, 4]) - tm.assert_series_equal(result, expected) - + def test_multi_index_names(self): -def test_rolling_datetime(axis_frame, tz_naive_fixture): - # GH-28192 - tz = tz_naive_fixture - df = pd.DataFrame( - {i: [1] * 2 for i in pd.date_range("2019-8-01", "2019-08-03", freq="D", tz=tz)} - ) - if axis_frame in [0, "index"]: - result = df.T.rolling("2D", axis=axis_frame).sum().T - else: - result = df.rolling("2D", axis=axis_frame).sum() - expected = pd.DataFrame( - { - **{ - i: [1.0] * 2 - for i in pd.date_range("2019-8-01", periods=1, freq="D", tz=tz) - }, - **{ - i: [2.0] * 2 - for i in pd.date_range("2019-8-02", "2019-8-03", freq="D", tz=tz) - }, - } - ) - tm.assert_frame_equal(result, expected) + # GH 16789, 16825 + cols = pd.MultiIndex.from_product( + [["A", "B"], ["C", "D", "E"]], names=["1", "2"] + ) + df = DataFrame(np.ones((10, 6)), columns=cols) + result = df.rolling(3).cov() + + tm.assert_index_equal(result.columns, df.columns) + assert result.index.names == [None, "1", "2"] + + @pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame]) + def test_iter_raises(self, klass): + # https://github.com/pandas-dev/pandas/issues/11704 + # Iteration over a Window + obj = klass([1, 2, 3, 4]) + + msg = "See issue #11704 https://github.com/pandas-dev/pandas/issues/11704" + + with pytest.raises(NotImplementedError, match=msg): + iter(obj.rolling(2)) + + def test_rolling_axis_sum(self, axis_frame): + # see gh-23372. + df = DataFrame(np.ones((10, 20))) + axis = df._get_axis_number(axis_frame) + + if axis == 0: + expected = DataFrame({i: [np.nan] * 2 + [3.0] * 8 for i in range(20)}) + else: + # axis == 1 + expected = DataFrame([[np.nan] * 2 + [3.0] * 18] * 10) + + result = df.rolling(3, axis=axis_frame).sum() + tm.assert_frame_equal(result, expected) + + def test_rolling_axis_count(self, axis_frame): + # see gh-26055 + df = DataFrame({"x": range(3), "y": range(3)}) + + axis = df._get_axis_number(axis_frame) + + if axis in [0, "index"]: + expected = DataFrame({"x": [1.0, 2.0, 2.0], "y": [1.0, 2.0, 2.0]}) + else: + expected = DataFrame({"x": [1.0, 1.0, 1.0], "y": [2.0, 2.0, 2.0]}) + + result = df.rolling(2, axis=axis_frame, min_periods=0).count() + tm.assert_frame_equal(result, expected) + + def test_readonly_array(self): + # GH-27766 + arr = np.array([1, 3, np.nan, 3, 5]) + arr.setflags(write=False) + result = pd.Series(arr).rolling(2).mean() + expected = pd.Series([np.nan, 2, np.nan, np.nan, 4]) + tm.assert_series_equal(result, expected) + + def test_rolling_datetime(self, axis_frame, tz_naive_fixture): + # GH-28192 + tz = tz_naive_fixture + df = pd.DataFrame( + { + i: [1] * 2 + for i in pd.date_range("2019-8-01", "2019-08-03", freq="D", tz=tz) + } + ) + if axis_frame in [0, "index"]: + result = df.T.rolling("2D", axis=axis_frame).sum().T + else: + result = df.rolling("2D", axis=axis_frame).sum() + expected = pd.DataFrame( + { + **{ + i: [1.0] * 2 + for i in pd.date_range("2019-8-01", periods=1, freq="D", tz=tz) + }, + **{ + i: [2.0] * 2 + for i in pd.date_range("2019-8-02", "2019-8-03", freq="D", tz=tz) + }, + } + ) + tm.assert_frame_equal(result, expected) def test_rolling_window_as_string(): @@ -458,208 +467,3 @@ def test_rolling_count_default_min_periods_with_null_values(constructor): result = constructor(values).rolling(3).count() expected = constructor(expected_counts) tm.assert_equal(result, expected) - - -@pytest.mark.parametrize( - "df,expected,window,min_periods", - [ - ( - DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), - [ - ({"A": [1], "B": [4]}, [0]), - ({"A": [1, 2], "B": [4, 5]}, [0, 1]), - ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), - ], - 3, - None, - ), - ( - DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), - [ - ({"A": [1], "B": [4]}, [0]), - ({"A": [1, 2], "B": [4, 5]}, [0, 1]), - ({"A": [2, 3], "B": [5, 6]}, [1, 2]), - ], - 2, - 1, - ), - ( - DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), - [ - ({"A": [1], "B": [4]}, [0]), - ({"A": [1, 2], "B": [4, 5]}, [0, 1]), - ({"A": [2, 3], "B": [5, 6]}, [1, 2]), - ], - 2, - 3, - ), - ( - DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), - [ - ({"A": [1], "B": [4]}, [0]), - ({"A": [2], "B": [5]}, [1]), - ({"A": [3], "B": [6]}, [2]), - ], - 1, - 1, - ), - ( - DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), - [ - ({"A": [1], "B": [4]}, [0]), - ({"A": [2], "B": [5]}, [1]), - ({"A": [3], "B": [6]}, [2]), - ], - 1, - 2, - ), - (DataFrame({"A": [1], "B": [4]}), [], 2, None), - (DataFrame({"A": [1], "B": [4]}), [], 2, 1), - (DataFrame(), [({}, [])], 2, None), - ( - DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}), - [ - ({"A": [1.0], "B": [np.nan]}, [0]), - ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]), - ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]), - ], - 3, - 2, - ), - ], -) -def test_iter_rolling_dataframe(df, expected, window, min_periods): - # GH 11704 - expected = [DataFrame(values, index=index) for (values, index) in expected] - - for (expected, actual) in zip( - expected, df.rolling(window, min_periods=min_periods) - ): - tm.assert_frame_equal(actual, expected) - - -@pytest.mark.parametrize( - "expected,window", - [ - ( - [ - ({"A": [1], "B": [4]}, [0]), - ({"A": [1, 2], "B": [4, 5]}, [0, 1]), - ({"A": [2, 3], "B": [5, 6]}, [1, 2]), - ], - "2D", - ), - ( - [ - ({"A": [1], "B": [4]}, [0]), - ({"A": [1, 2], "B": [4, 5]}, [0, 1]), - ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), - ], - "3D", - ), - ( - [ - ({"A": [1], "B": [4]}, [0]), - ({"A": [2], "B": [5]}, [1]), - ({"A": [3], "B": [6]}, [2]), - ], - "1D", - ), - ], -) -def test_iter_rolling_on_dataframe(expected, window): - # GH 11704 - df = DataFrame( - { - "A": [1, 2, 3, 4, 5], - "B": [4, 5, 6, 7, 8], - "C": date_range(start="2016-01-01", periods=5, freq="D"), - } - ) - - expected = [DataFrame(values, index=index) for (values, index) in expected] - for (expected, actual) in zip(expected, df.rolling(window, on="C")): - tm.assert_frame_equal(actual, expected) - - -@pytest.mark.parametrize( - "ser,expected,window, min_periods", - [ - ( - Series([1, 2, 3]), - [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], - 3, - None, - ), - ( - Series([1, 2, 3]), - [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], - 3, - 1, - ), - (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([2, 3], [1, 2])], 2, 1), - (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([2, 3], [1, 2])], 2, 3), - (Series([1, 2, 3]), [([1], [0]), ([2], [1]), ([3], [2])], 1, 0), - (Series([1, 2, 3]), [([1], [0]), ([2], [1]), ([3], [2])], 1, 2), - (Series([1, 2]), [([1], [0]), ([1, 2], [0, 1])], 2, 0), - (Series([], dtype="int64"), [], 2, 1), - ], -) -def test_iter_rolling_series(ser, expected, window, min_periods): - # GH 11704 - expected = [Series(values, index=index) for (values, index) in expected] - - for (expected, actual) in zip( - expected, ser.rolling(window, min_periods=min_periods) - ): - tm.assert_series_equal(actual, expected) - - -@pytest.mark.parametrize( - "expected,expected_index,window", - [ - ( - [[0], [1], [2], [3], [4]], - [ - date_range("2020-01-01", periods=1, freq="D"), - date_range("2020-01-02", periods=1, freq="D"), - date_range("2020-01-03", periods=1, freq="D"), - date_range("2020-01-04", periods=1, freq="D"), - date_range("2020-01-05", periods=1, freq="D"), - ], - "1D", - ), - ( - [[0], [0, 1], [1, 2], [2, 3], [3, 4]], - [ - date_range("2020-01-01", periods=1, freq="D"), - date_range("2020-01-01", periods=2, freq="D"), - date_range("2020-01-02", periods=2, freq="D"), - date_range("2020-01-03", periods=2, freq="D"), - date_range("2020-01-04", periods=2, freq="D"), - ], - "2D", - ), - ( - [[0], [0, 1], [0, 1, 2], [1, 2, 3], [2, 3, 4]], - [ - date_range("2020-01-01", periods=1, freq="D"), - date_range("2020-01-01", periods=2, freq="D"), - date_range("2020-01-01", periods=3, freq="D"), - date_range("2020-01-02", periods=3, freq="D"), - date_range("2020-01-03", periods=3, freq="D"), - ], - "3D", - ), - ], -) -def test_iter_rolling_datetime(expected, expected_index, window): - # GH 11704 - ser = Series(range(5), index=date_range(start="2020-01-01", periods=5, freq="D")) - - expected = [ - Series(values, index=idx) for (values, idx) in zip(expected, expected_index) - ] - - for (expected, actual) in zip(expected, ser.rolling(window)): - tm.assert_series_equal(actual, expected) diff --git a/pandas/tests/window/test_window.py b/pandas/tests/window/test_window.py index a450d29797c41..c7c45f0e5e0de 100644 --- a/pandas/tests/window/test_window.py +++ b/pandas/tests/window/test_window.py @@ -7,62 +7,70 @@ import pandas as pd from pandas import Series from pandas.core.window import Window - - -@td.skip_if_no_scipy -def test_constructor(which): - # GH 12669 - c = which.rolling - - # valid - c(win_type="boxcar", window=2, min_periods=1) - c(win_type="boxcar", window=2, min_periods=1, center=True) - c(win_type="boxcar", window=2, min_periods=1, center=False) - - # not valid - for w in [2.0, "foo", np.array([2])]: - with pytest.raises(ValueError, match="min_periods must be an integer"): - c(win_type="boxcar", window=2, min_periods=w) - with pytest.raises(ValueError, match="center must be a boolean"): - c(win_type="boxcar", window=2, min_periods=1, center=w) - - for wt in ["foobar", 1]: - with pytest.raises(ValueError, match="Invalid win_type"): - c(win_type=wt, window=2) - - -@td.skip_if_no_scipy -def test_constructor_with_win_type(which, win_types): - # GH 12669 - c = which.rolling - c(win_type=win_types, window=2) - - -@pytest.mark.parametrize("method", ["sum", "mean"]) -def test_numpy_compat(method): - # see gh-12811 - w = Window(Series([2, 4, 6]), window=[0, 2]) - - msg = "numpy operations are not valid with window objects" - - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(w, method)(1, 2, 3) - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(w, method)(dtype=np.float64) - - -@td.skip_if_no_scipy -@pytest.mark.parametrize("arg", ["median", "kurt", "skew"]) -def test_agg_function_support(arg): - df = pd.DataFrame({"A": np.arange(5)}) - roll = df.rolling(2, win_type="triang") - - msg = f"'{arg}' is not a valid function for 'Window' object" - with pytest.raises(AttributeError, match=msg): - roll.agg(arg) - - with pytest.raises(AttributeError, match=msg): - roll.agg([arg]) - - with pytest.raises(AttributeError, match=msg): - roll.agg({"A": arg}) +from pandas.tests.window.common import Base + + +@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") +class TestWindow(Base): + def setup_method(self, method): + self._create_data() + + @td.skip_if_no_scipy + @pytest.mark.parametrize("which", ["series", "frame"]) + def test_constructor(self, which): + # GH 12669 + + o = getattr(self, which) + c = o.rolling + + # valid + c(win_type="boxcar", window=2, min_periods=1) + c(win_type="boxcar", window=2, min_periods=1, center=True) + c(win_type="boxcar", window=2, min_periods=1, center=False) + + # not valid + for w in [2.0, "foo", np.array([2])]: + with pytest.raises(ValueError, match="min_periods must be an integer"): + c(win_type="boxcar", window=2, min_periods=w) + with pytest.raises(ValueError, match="center must be a boolean"): + c(win_type="boxcar", window=2, min_periods=1, center=w) + + for wt in ["foobar", 1]: + with pytest.raises(ValueError, match="Invalid win_type"): + c(win_type=wt, window=2) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("which", ["series", "frame"]) + def test_constructor_with_win_type(self, which, win_types): + # GH 12669 + o = getattr(self, which) + c = o.rolling + c(win_type=win_types, window=2) + + @pytest.mark.parametrize("method", ["sum", "mean"]) + def test_numpy_compat(self, method): + # see gh-12811 + w = Window(Series([2, 4, 6]), window=[0, 2]) + + msg = "numpy operations are not valid with window objects" + + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(w, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(w, method)(dtype=np.float64) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("arg", ["median", "kurt", "skew"]) + def test_agg_function_support(self, arg): + df = pd.DataFrame({"A": np.arange(5)}) + roll = df.rolling(2, win_type="triang") + + msg = f"'{arg}' is not a valid function for 'Window' object" + with pytest.raises(AttributeError, match=msg): + roll.agg(arg) + + with pytest.raises(AttributeError, match=msg): + roll.agg([arg]) + + with pytest.raises(AttributeError, match=msg): + roll.agg({"A": arg}) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index d95ffd5b0876d..6213ea198f2cb 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -165,7 +165,7 @@ def to_offset(freq) -> Optional[DateOffset]: ) stride = int(stride) offset = _get_offset(name) - offset = offset * int(np.fabs(stride) * stride_sign) # type: ignore + offset = offset * int(np.fabs(stride) * stride_sign) if delta is None: delta = offset else: diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 88f77a8d7f054..4912dc0eb349e 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1,6 +1,6 @@ from datetime import date, datetime, timedelta import operator -from typing import Optional +from typing import Any, Optional from dateutil.easter import easter import numpy as np @@ -296,7 +296,7 @@ def is_on_offset(self, dt): return True -class SingleConstructorMixin: +class SingleConstructorOffset(BaseOffset): _params = cache_readonly(BaseOffset._params.fget) freqstr = cache_readonly(BaseOffset.freqstr.fget) @@ -308,10 +308,6 @@ def _from_name(cls, suffix=None): return cls() -class SingleConstructorOffset(SingleConstructorMixin, BaseOffset): - pass - - class BusinessDay(BusinessMixin, SingleConstructorOffset): """ DateOffset subclass representing possibly n business days. @@ -320,6 +316,10 @@ class BusinessDay(BusinessMixin, SingleConstructorOffset): _prefix = "B" _attributes = frozenset(["n", "normalize", "offset"]) + def __init__(self, n=1, normalize=False, offset=timedelta(0)): + BaseOffset.__init__(self, n, normalize) + object.__setattr__(self, "_offset", offset) + def _offset_str(self) -> str: def get_str(td): off_str = "" @@ -419,15 +419,7 @@ def is_on_offset(self, dt: datetime) -> bool: return dt.weekday() < 5 -class BusinessHour(SingleConstructorMixin, liboffsets.BusinessHourMixin): - """ - DateOffset subclass representing possibly n business hours. - """ - - _prefix = "BH" - _anchor = 0 - _attributes = frozenset(["n", "normalize", "start", "end", "offset"]) - +class BusinessHourMixin(liboffsets.BusinessHourMixin): @cache_readonly def next_bday(self): """ @@ -687,6 +679,22 @@ def _is_on_offset(self, dt): return False +class BusinessHour(BusinessHourMixin, SingleConstructorOffset): + """ + DateOffset subclass representing possibly n business hours. + """ + + _prefix = "BH" + _anchor = 0 + _attributes = frozenset(["n", "normalize", "start", "end", "offset"]) + + def __init__( + self, n=1, normalize=False, start="09:00", end="17:00", offset=timedelta(0) + ): + BaseOffset.__init__(self, n, normalize) + super().__init__(start=start, end=end, offset=offset) + + class CustomBusinessDay(CustomMixin, BusinessDay): """ DateOffset subclass representing custom business days excluding holidays. @@ -719,7 +727,9 @@ def __init__( calendar=None, offset=timedelta(0), ): - BusinessDay.__init__(self, n, normalize, offset) + BaseOffset.__init__(self, n, normalize) + object.__setattr__(self, "_offset", offset) + CustomMixin.__init__(self, weekmask, holidays, calendar) @apply_wraps @@ -762,7 +772,7 @@ def is_on_offset(self, dt: datetime) -> bool: return np.is_busday(day64, busdaycal=self.calendar) -class CustomBusinessHour(CustomMixin, BusinessHour): +class CustomBusinessHour(CustomMixin, BusinessHourMixin, SingleConstructorOffset): """ DateOffset subclass representing possibly n custom business days. """ @@ -784,8 +794,11 @@ def __init__( end="17:00", offset=timedelta(0), ): - BusinessHour.__init__(self, n, normalize, start=start, end=end, offset=offset) + BaseOffset.__init__(self, n, normalize) + object.__setattr__(self, "_offset", offset) + CustomMixin.__init__(self, weekmask, holidays, calendar) + BusinessHourMixin.__init__(self, start=start, end=end, offset=offset) # --------------------------------------------------------------------- @@ -885,7 +898,9 @@ def __init__( calendar=None, offset=timedelta(0), ): - BusinessMixin.__init__(self, n, normalize, offset) + BaseOffset.__init__(self, n, normalize) + object.__setattr__(self, "_offset", offset) + CustomMixin.__init__(self, weekmask, holidays, calendar) @cache_readonly @@ -965,9 +980,9 @@ def __init__(self, n=1, normalize=False, day_of_month=None): BaseOffset.__init__(self, n, normalize) if day_of_month is None: - day_of_month = self._default_day_of_month - - object.__setattr__(self, "day_of_month", int(day_of_month)) + object.__setattr__(self, "day_of_month", self._default_day_of_month) + else: + object.__setattr__(self, "day_of_month", int(day_of_month)) if not self._min_day_of_month <= self.day_of_month <= 27: raise ValueError( "day_of_month must be " @@ -1293,7 +1308,7 @@ def _from_name(cls, suffix=None): return cls(weekday=weekday) -class WeekOfMonth(SingleConstructorMixin, liboffsets.WeekOfMonthMixin): +class WeekOfMonth(liboffsets.WeekOfMonthMixin, SingleConstructorOffset): """ Describes monthly dates like "the Tuesday of the 2nd week of each month". @@ -1319,9 +1334,12 @@ class WeekOfMonth(SingleConstructorMixin, liboffsets.WeekOfMonthMixin): _attributes = frozenset(["n", "normalize", "week", "weekday"]) def __init__(self, n=1, normalize=False, week=0, weekday=0): - liboffsets.WeekOfMonthMixin.__init__(self, n, normalize, weekday) + BaseOffset.__init__(self, n, normalize) + object.__setattr__(self, "weekday", weekday) object.__setattr__(self, "week", week) + if self.weekday < 0 or self.weekday > 6: + raise ValueError(f"Day must be 0<=day<=6, got {self.weekday}") if self.week < 0 or self.week > 3: raise ValueError(f"Week must be 0<=week<=3, got {self.week}") @@ -1343,6 +1361,11 @@ def _get_offset_day(self, other: datetime) -> int: shift_days = (self.weekday - wday) % 7 return 1 + shift_days + self.week * 7 + @property + def rule_code(self) -> str: + weekday = ccalendar.int_to_weekday.get(self.weekday, "") + return f"{self._prefix}-{self.week + 1}{weekday}" + @classmethod def _from_name(cls, suffix=None): if not suffix: @@ -1354,7 +1377,7 @@ def _from_name(cls, suffix=None): return cls(week=week, weekday=weekday) -class LastWeekOfMonth(SingleConstructorMixin, liboffsets.WeekOfMonthMixin): +class LastWeekOfMonth(liboffsets.WeekOfMonthMixin, SingleConstructorOffset): """ Describes monthly dates in last week of month like "the last Tuesday of each month". @@ -1378,11 +1401,14 @@ class LastWeekOfMonth(SingleConstructorMixin, liboffsets.WeekOfMonthMixin): _attributes = frozenset(["n", "normalize", "weekday"]) def __init__(self, n=1, normalize=False, weekday=0): - liboffsets.WeekOfMonthMixin.__init__(self, n, normalize, weekday) + BaseOffset.__init__(self, n, normalize) + object.__setattr__(self, "weekday", weekday) if self.n == 0: raise ValueError("N cannot be 0") - object.__setattr__(self, "week", -1) + + if self.weekday < 0 or self.weekday > 6: + raise ValueError(f"Day must be 0<=day<=6, got {self.weekday}") def _get_offset_day(self, other: datetime) -> int: """ @@ -1403,6 +1429,11 @@ def _get_offset_day(self, other: datetime) -> int: shift_days = (wday - self.weekday) % 7 return dim - shift_days + @property + def rule_code(self) -> str: + weekday = ccalendar.int_to_weekday.get(self.weekday, "") + return f"{self._prefix}-{weekday}" + @classmethod def _from_name(cls, suffix=None): if not suffix: @@ -2103,7 +2134,35 @@ def is_on_offset(self, dt: datetime) -> bool: # Ticks +def _tick_comp(op): + """ + Tick comparisons should behave identically to Timedelta comparisons. + """ + + def f(self, other): + return op(self.delta, other) + + f.__name__ = f"__{op.__name__}__" + return f + + class Tick(liboffsets._Tick, SingleConstructorOffset): + _inc = Timedelta(microseconds=1000) + _prefix = "undefined" + _attributes = frozenset(["n", "normalize"]) + + def __init__(self, n=1, normalize=False): + BaseOffset.__init__(self, n, normalize) + if normalize: + raise ValueError( + "Tick offset with `normalize=True` are not allowed." + ) # GH#21427 + + __gt__ = _tick_comp(operator.gt) + __ge__ = _tick_comp(operator.ge) + __lt__ = _tick_comp(operator.lt) + __le__ = _tick_comp(operator.le) + def __add__(self, other): if isinstance(other, Tick): if type(self) == type(other): @@ -2121,11 +2180,47 @@ def __add__(self, other): f"the add operation between {self} and {other} will overflow" ) from err + def __eq__(self, other: Any) -> bool: + if isinstance(other, str): + from pandas.tseries.frequencies import to_offset + + try: + # GH#23524 if to_offset fails, we are dealing with an + # incomparable type so == is False and != is True + other = to_offset(other) + except ValueError: + # e.g. "infer" + return False + + return _tick_comp(operator.eq)(self, other) + # This is identical to DateOffset.__hash__, but has to be redefined here # for Python 3, because we've redefined __eq__. def __hash__(self) -> int: return hash(self._params) + def __ne__(self, other): + if isinstance(other, str): + from pandas.tseries.frequencies import to_offset + + try: + # GH#23524 if to_offset fails, we are dealing with an + # incomparable type so == is False and != is True + other = to_offset(other) + except ValueError: + # e.g. "infer" + return True + + return _tick_comp(operator.ne)(self, other) + + @property + def delta(self) -> Timedelta: + return self.n * self._inc + + @property + def nanos(self): + return delta_to_nanoseconds(self.delta) + def apply(self, other): # Timestamp can handle tz and nano sec, thus no need to use apply_wraps if isinstance(other, Timestamp): @@ -2145,9 +2240,6 @@ def apply(self, other): if isinstance(other, timedelta): return other + self.delta elif isinstance(other, type(self)): - # TODO: this is reached in tests that specifically call apply, - # but should not be reached "naturally" because __add__ should - # catch this case first. return type(self)(self.n + other.n) raise ApplyTypeError(f"Unhandled type: {type(other).__name__}") diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index 80286d5f138ad..92bfce7ec9c83 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -329,7 +329,7 @@ def wrapper(*args, **kwargs) -> Callable[..., Any]: return decorate -def doc(*args: Union[str, Callable], **kwargs) -> Callable[[F], F]: +def doc(*args: Union[str, Callable], **kwargs: str) -> Callable[[F], F]: """ A decorator take docstring templates, concatenate them and perform string substitution on it. @@ -345,8 +345,8 @@ def doc(*args: Union[str, Callable], **kwargs) -> Callable[[F], F]: *args : str or callable The string / docstring / docstring template to be appended in order after default docstring under function. - **kwargs - The objects which would be used to format docstring template. + **kwargs : str + The string which would be used to format docstring template. """ def decorator(func: F) -> F: From 12ff5a5f2d36550c95ac344342d0ab9450c20afb Mon Sep 17 00:00:00 2001 From: jnecus Date: Mon, 18 May 2020 12:07:12 +0100 Subject: [PATCH 17/53] Revert "Revert "Merge remote-tracking branch 'upstream/master'"" This reverts commit dce777b68716b62939d521fa56abad7d224fde3e. --- .travis.yml | 3 +- README.md | 1 + doc/source/ecosystem.rst | 18 +- doc/source/reference/offset_frequency.rst | 8 + doc/source/user_guide/computation.rst | 18 + doc/source/whatsnew/v1.1.0.rst | 6 + pandas/_libs/groupby.pyx | 4 +- pandas/_libs/hashtable.pyx | 5 +- pandas/_libs/index.pyx | 9 +- pandas/_libs/internals.pyx | 3 +- pandas/_libs/interval.pyx | 11 +- pandas/_libs/lib.pyx | 3 +- pandas/_libs/missing.pyx | 2 +- pandas/_libs/parsers.pyx | 2 +- pandas/_libs/reduction.pyx | 4 +- pandas/_libs/reshape.pyx | 5 +- pandas/_libs/testing.pyx | 8 +- pandas/_libs/tslibs/frequencies.pxd | 1 - pandas/_libs/tslibs/frequencies.pyx | 19 - pandas/_libs/tslibs/np_datetime.pxd | 2 - pandas/_libs/tslibs/np_datetime.pyx | 9 - pandas/_libs/tslibs/offsets.pyx | 108 +- pandas/_libs/tslibs/strptime.pyx | 4 +- pandas/_libs/tslibs/timedeltas.pyx | 2 +- pandas/_libs/tslibs/timestamps.pxd | 2 +- pandas/_libs/tslibs/timestamps.pyx | 32 +- pandas/core/arrays/categorical.py | 4 + pandas/core/arrays/datetimelike.py | 6 + pandas/core/arrays/datetimes.py | 3 + pandas/core/arrays/integer.py | 4 + pandas/core/arrays/period.py | 3 +- pandas/core/arrays/timedeltas.py | 23 +- pandas/core/base.py | 3 +- pandas/core/groupby/groupby.py | 197 +- pandas/core/groupby/ops.py | 15 +- pandas/core/indexes/base.py | 36 +- pandas/core/indexes/category.py | 7 - pandas/core/indexes/extension.py | 46 +- pandas/core/indexes/numeric.py | 4 - pandas/core/indexes/period.py | 10 +- pandas/core/indexes/range.py | 12 +- pandas/core/internals/blocks.py | 2 +- pandas/core/ops/common.py | 24 +- pandas/core/strings.py | 17 +- pandas/core/window/rolling.py | 20 +- pandas/plotting/_matplotlib/converter.py | 8 +- pandas/plotting/_matplotlib/timeseries.py | 6 +- pandas/tests/arithmetic/test_datetime64.py | 2 +- pandas/tests/arithmetic/test_numeric.py | 2 +- pandas/tests/arithmetic/test_timedelta64.py | 16 +- pandas/tests/arrays/boolean/test_logical.py | 2 +- .../tests/arrays/integer/test_arithmetic.py | 5 +- pandas/tests/arrays/string_/test_string.py | 2 +- pandas/tests/arrays/test_datetimelike.py | 3 +- .../indexes/categorical/test_category.py | 2 +- .../tests/indexes/interval/test_interval.py | 2 +- pandas/tests/indexes/test_base.py | 10 + .../tests/scalar/timestamp/test_arithmetic.py | 2 +- .../tseries/frequencies/test_freq_code.py | 47 +- pandas/tests/tseries/offsets/test_offsets.py | 44 +- pandas/tests/tslibs/test_period_asfreq.py | 13 +- pandas/tests/window/common.py | 22 +- pandas/tests/window/conftest.py | 62 +- .../moments/test_moments_consistency_ewm.py | 11 +- .../test_moments_consistency_expanding.py | 202 +- .../test_moments_consistency_rolling.py | 81 +- .../tests/window/moments/test_moments_ewm.py | 481 +++-- .../window/moments/test_moments_rolling.py | 1691 +++++++++-------- pandas/tests/window/test_api.py | 565 +++--- pandas/tests/window/test_apply.py | 27 +- pandas/tests/window/test_ewm.py | 92 +- pandas/tests/window/test_expanding.py | 264 ++- pandas/tests/window/test_rolling.py | 888 +++++---- pandas/tests/window/test_window.py | 126 +- pandas/tseries/frequencies.py | 2 +- pandas/tseries/offsets.py | 152 +- pandas/util/_decorators.py | 6 +- 77 files changed, 2954 insertions(+), 2609 deletions(-) diff --git a/.travis.yml b/.travis.yml index 7943ca370af1a..c5dbddacc6a43 100644 --- a/.travis.yml +++ b/.travis.yml @@ -75,8 +75,7 @@ matrix: before_install: - echo "before_install" - # set non-blocking IO on travis - # https://github.com/travis-ci/travis-ci/issues/8920#issuecomment-352661024 + # Use blocking IO on travis. Ref: https://github.com/travis-ci/travis-ci/issues/8920#issuecomment-352661024 - python -c 'import os,sys,fcntl; flags = fcntl.fcntl(sys.stdout, fcntl.F_GETFL); fcntl.fcntl(sys.stdout, fcntl.F_SETFL, flags&~os.O_NONBLOCK);' - source ci/travis_process_gbq_encryption.sh - export PATH="$HOME/miniconda3/bin:$PATH" diff --git a/README.md b/README.md index 33dfbf10ff743..7edee8d3feeed 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ [![Downloads](https://anaconda.org/conda-forge/pandas/badges/downloads.svg)](https://pandas.pydata.org) [![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/pydata/pandas) [![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](https://numfocus.org) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) ## What is it? diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst index fd5e7c552fe0a..6c6a7f42d4b7e 100644 --- a/doc/source/ecosystem.rst +++ b/doc/source/ecosystem.rst @@ -30,7 +30,7 @@ substantial projects that you feel should be on this list, please let us know. Data cleaning and validation ---------------------------- -`pyjanitor `__ +`Pyjanitor `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Pyjanitor provides a clean API for cleaning data, using method chaining. @@ -115,7 +115,7 @@ It is very similar to the matplotlib plotting backend, but provides interactive web-based charts and maps. -`seaborn `__ +`Seaborn `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Seaborn is a Python visualization library based on @@ -136,7 +136,7 @@ provides a powerful, declarative and extremely general way to generate bespoke p Various implementations to other languages are available. A good implementation for Python users is `has2k1/plotnine `__. -`IPython Vega `__ +`IPython vega `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `IPython Vega `__ leverages `Vega @@ -147,7 +147,7 @@ A good implementation for Python users is `has2k1/plotnine `__ `Python API `__ enables interactive figures and web shareability. Maps, 2D, 3D, and live-streaming graphs are rendered with WebGL and `D3.js `__. The library supports plotting directly from a pandas DataFrame and cloud-based collaboration. Users of `matplotlib, ggplot for Python, and Seaborn `__ can convert figures into interactive web-based plots. Plots can be drawn in `IPython Notebooks `__ , edited with R or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly is free for unlimited sharing, and has `cloud `__, `offline `__, or `on-premise `__ accounts for private use. -`QtPandas `__ +`Qtpandas `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Spun off from the main pandas library, the `qtpandas `__ @@ -187,7 +187,7 @@ See :ref:`Options and Settings ` and :ref:`Available Options ` for pandas ``display.`` settings. -`quantopian/qgrid `__ +`Quantopian/qgrid `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ qgrid is "an interactive grid for sorting and filtering @@ -249,12 +249,12 @@ The following data feeds are available: * Stooq Index Data * MOEX Data -`quandl/Python `__ +`Quandl/Python `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Quandl API for Python wraps the Quandl REST API to return Pandas DataFrames with timeseries indexes. -`pydatastream `__ +`Pydatastream `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PyDatastream is a Python interface to the `Refinitiv Datastream (DWS) `__ @@ -384,7 +384,7 @@ Pandas provides an interface for defining system. The following libraries implement that interface to provide types not found in NumPy or pandas, which work well with pandas' data containers. -`cyberpandas`_ +`Cyberpandas`_ ~~~~~~~~~~~~~~ Cyberpandas provides an extension type for storing arrays of IP Addresses. These @@ -411,4 +411,4 @@ Library Accessor Classes Description .. _pdvega: https://altair-viz.github.io/pdvega/ .. _Altair: https://altair-viz.github.io/ .. _pandas_path: https://github.com/drivendataorg/pandas-path/ -.. _pathlib.Path: https://docs.python.org/3/library/pathlib.html \ No newline at end of file +.. _pathlib.Path: https://docs.python.org/3/library/pathlib.html diff --git a/doc/source/reference/offset_frequency.rst b/doc/source/reference/offset_frequency.rst index 6240181708f97..9b2753ca02495 100644 --- a/doc/source/reference/offset_frequency.rst +++ b/doc/source/reference/offset_frequency.rst @@ -1044,6 +1044,7 @@ Properties Tick.nanos Tick.normalize Tick.rule_code + Tick.n Methods ~~~~~~~ @@ -1077,6 +1078,7 @@ Properties Day.nanos Day.normalize Day.rule_code + Day.n Methods ~~~~~~~ @@ -1110,6 +1112,7 @@ Properties Hour.nanos Hour.normalize Hour.rule_code + Hour.n Methods ~~~~~~~ @@ -1143,6 +1146,7 @@ Properties Minute.nanos Minute.normalize Minute.rule_code + Minute.n Methods ~~~~~~~ @@ -1176,6 +1180,7 @@ Properties Second.nanos Second.normalize Second.rule_code + Second.n Methods ~~~~~~~ @@ -1209,6 +1214,7 @@ Properties Milli.nanos Milli.normalize Milli.rule_code + Milli.n Methods ~~~~~~~ @@ -1242,6 +1248,7 @@ Properties Micro.nanos Micro.normalize Micro.rule_code + Micro.n Methods ~~~~~~~ @@ -1275,6 +1282,7 @@ Properties Nano.nanos Nano.normalize Nano.rule_code + Nano.n Methods ~~~~~~~ diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst index d371f6d5f273c..cf630a9671013 100644 --- a/doc/source/user_guide/computation.rst +++ b/doc/source/user_guide/computation.rst @@ -648,6 +648,24 @@ from present information back to past information. This allows the rolling windo Currently, this feature is only implemented for time-based windows. For fixed windows, the closed parameter cannot be set and the rolling window will always have both endpoints closed. +.. _stats.iter_rolling_window: + +Iteration over window: +~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 1.1.0 + +``Rolling`` and ``Expanding`` objects now support iteration. Be noted that ``min_periods`` is ignored in iteration. + +.. ipython:: + + In [1]: df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + + In [2]: for i in df.rolling(2): + ...: print(i) + ...: + + .. _stats.moments.ts-versus-resampling: Time-aware rolling vs. resampling diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 1437006ee3fb8..eaf8c19b9a21b 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -235,6 +235,7 @@ Other enhancements :class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`, and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`26599`). - :meth:`HDFStore.put` now accepts `track_times` parameter. Parameter is passed to ``create_table`` method of ``PyTables`` (:issue:`32682`). +- Make :class:`pandas.core.window.Rolling` and :class:`pandas.core.window.Expanding` iterable(:issue:`11704`) .. --------------------------------------------------------------------------- @@ -585,6 +586,7 @@ Deprecations - :func:`pandas.api.types.is_categorical` is deprecated and will be removed in a future version; use `:func:pandas.api.types.is_categorical_dtype` instead (:issue:`33385`) - :meth:`Index.get_value` is deprecated and will be removed in a future version (:issue:`19728`) +- :meth:`DateOffset.__call__` is deprecated and will be removed in a future version, use ``offset + other`` instead (:issue:`34171`) .. --------------------------------------------------------------------------- @@ -605,6 +607,8 @@ Performance improvements sparse values from ``scipy.sparse`` matrices using the :meth:`DataFrame.sparse.from_spmatrix` constructor (:issue:`32821`, :issue:`32825`, :issue:`32826`, :issue:`32856`, :issue:`32858`). +- Performance improvement for groupby methods :meth:`~pandas.core.groupby.groupby.Groupby.first` + and :meth:`~pandas.core.groupby.groupby.Groupby.last` (:issue:`34178`) - Performance improvement in :func:`factorize` for nullable (integer and boolean) dtypes (:issue:`33064`). - Performance improvement in reductions (sum, prod, min, max) for nullable (integer and boolean) dtypes (:issue:`30982`, :issue:`33261`, :issue:`33442`). @@ -813,6 +817,8 @@ Groupby/resample/rolling - Bug in :meth:`GroupBy.first` and :meth:`GroupBy.last` where None is not preserved in object dtype (:issue:`32800`) - Bug in :meth:`Rolling.min` and :meth:`Rolling.max`: Growing memory usage after multiple calls when using a fixed window (:issue:`30726`) - Bug in :meth:`GroupBy.agg`, :meth:`GroupBy.transform`, and :meth:`GroupBy.resample` where subclasses are not preserved (:issue:`28330`) +- Bug in :meth:`GroupBy.rolling.apply` ignores args and kwargs parameters (:issue:`33433`) + Reshaping ^^^^^^^^^ diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 53e66c4b8723d..d5d706650bb34 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -9,11 +9,9 @@ cimport numpy as cnp from numpy cimport (ndarray, int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t, float32_t, float64_t, complex64_t, complex128_t) +from numpy.math cimport NAN cnp.import_array() -cdef extern from "numpy/npy_math.h": - float64_t NAN "NPY_NAN" - from pandas._libs.util cimport numeric, get_nat from pandas._libs.algos cimport (swap, TiebreakEnumType, TIEBREAK_AVERAGE, diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index e80f134290a7e..c3dcbb942d7fe 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -8,10 +8,9 @@ from libc.stdlib cimport malloc, free import numpy as np cimport numpy as cnp from numpy cimport ndarray, uint8_t, uint32_t, float64_t +from numpy.math cimport NAN cnp.import_array() -cdef extern from "numpy/npy_math.h": - float64_t NAN "NPY_NAN" from pandas._libs.khash cimport ( khiter_t, @@ -54,7 +53,7 @@ from pandas._libs.khash cimport ( ) -cimport pandas._libs.util as util +from pandas._libs cimport util from pandas._libs.missing cimport checknull diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 245c554570ce4..b4dcdaa10d0ef 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -19,11 +19,10 @@ from numpy cimport ( cnp.import_array() -cimport pandas._libs.util as util +from pandas._libs cimport util -from pandas._libs.tslibs import Period, Timedelta from pandas._libs.tslibs.nattype cimport c_NaT as NaT -from pandas._libs.tslibs.base cimport ABCTimestamp +from pandas._libs.tslibs.base cimport ABCTimestamp, ABCTimedelta, ABCPeriod from pandas._libs.hashtable cimport HashTable @@ -470,7 +469,7 @@ cdef class TimedeltaEngine(DatetimeEngine): return 'm8[ns]' cdef int64_t _unbox_scalar(self, scalar) except? -1: - if not (isinstance(scalar, Timedelta) or scalar is NaT): + if not (isinstance(scalar, ABCTimedelta) or scalar is NaT): raise TypeError(scalar) return scalar.value @@ -480,7 +479,7 @@ cdef class PeriodEngine(Int64Engine): cdef int64_t _unbox_scalar(self, scalar) except? -1: if scalar is NaT: return scalar.value - if isinstance(scalar, Period): + if isinstance(scalar, ABCPeriod): # NB: we assume that we have the correct freq here. return scalar.ordinal raise TypeError(scalar) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 1e53b789aa05c..1aa95e92b73d1 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -1,5 +1,6 @@ -import cython from collections import defaultdict + +import cython from cython import Py_ssize_t from cpython.slice cimport PySlice_GetIndicesEx diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 657a2798f7267..3850b24fdf519 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -33,7 +33,7 @@ from numpy cimport ( cnp.import_array() -cimport pandas._libs.util as util +from pandas._libs cimport util from pandas._libs.hashtable cimport Int64Vector from pandas._libs.tslibs.util cimport ( @@ -42,8 +42,7 @@ from pandas._libs.tslibs.util cimport ( is_timedelta64_object, ) -from pandas._libs.tslibs import Timestamp -from pandas._libs.tslibs.timedeltas import Timedelta +from pandas._libs.tslibs.base cimport ABCTimestamp, ABCTimedelta from pandas._libs.tslibs.timezones cimport tz_compare @@ -329,7 +328,7 @@ cdef class Interval(IntervalMixin): raise ValueError(f"invalid option for 'closed': {closed}") if not left <= right: raise ValueError("left side of interval must be <= right side") - if (isinstance(left, Timestamp) and + if (isinstance(left, ABCTimestamp) and not tz_compare(left.tzinfo, right.tzinfo)): # GH 18538 raise ValueError("left and right must have the same time zone, got " @@ -341,7 +340,7 @@ cdef class Interval(IntervalMixin): def _validate_endpoint(self, endpoint): # GH 23013 if not (is_integer_object(endpoint) or is_float_object(endpoint) or - isinstance(endpoint, (Timestamp, Timedelta))): + isinstance(endpoint, (ABCTimestamp, ABCTimedelta))): raise ValueError("Only numeric, Timestamp and Timedelta endpoints " "are allowed when constructing an Interval.") @@ -371,7 +370,7 @@ cdef class Interval(IntervalMixin): right = self.right # TODO: need more general formatting methodology here - if isinstance(left, Timestamp) and isinstance(right, Timestamp): + if isinstance(left, ABCTimestamp) and isinstance(right, ABCTimestamp): left = left._short_repr right = right._short_repr diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 658f7fb202531..bd623a39010f6 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1,6 +1,5 @@ from collections import abc from decimal import Decimal - import warnings import cython @@ -63,7 +62,7 @@ cdef extern from "numpy/arrayobject.h": cdef extern from "src/parse_helper.h": int floatify(object, float64_t *result, int *maybe_int) except -1 -cimport pandas._libs.util as util +from pandas._libs cimport util from pandas._libs.util cimport is_nan, UINT64_MAX, INT64_MAX, INT64_MIN from pandas._libs.tslib import array_to_datetime diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 6bca5e370ac89..6d4d1e95fe8c3 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -8,7 +8,7 @@ cimport numpy as cnp from numpy cimport ndarray, int64_t, uint8_t, float64_t cnp.import_array() -cimport pandas._libs.util as util +from pandas._libs cimport util from pandas._libs.tslibs.np_datetime cimport get_datetime64_value, get_timedelta64_value diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 9bb5e10348e47..461419239c730 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -34,7 +34,7 @@ cimport numpy as cnp from numpy cimport ndarray, uint8_t, uint64_t, int64_t, float64_t cnp.import_array() -cimport pandas._libs.util as util +from pandas._libs cimport util from pandas._libs.util cimport UINT64_MAX, INT64_MAX, INT64_MIN import pandas._libs.lib as lib diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index a7b2d5d5491d5..0988cd7ff0dde 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -14,7 +14,7 @@ from numpy cimport (ndarray, flatiter) cnp.import_array() -cimport pandas._libs.util as util +from pandas._libs cimport util from pandas._libs.lib import maybe_convert_objects, is_scalar @@ -603,7 +603,7 @@ cdef class BlockSlider: arr.shape[1] = 0 -def compute_reduction(arr: np.ndarray, f, axis: int = 0, dummy=None, labels=None): +def compute_reduction(arr: ndarray, f, axis: int = 0, dummy=None, labels=None): """ Parameters diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx index aed5e1d612088..da4dd00027395 100644 --- a/pandas/_libs/reshape.pyx +++ b/pandas/_libs/reshape.pyx @@ -15,11 +15,12 @@ from numpy cimport ( uint64_t, ) -cimport numpy as cnp import numpy as np -from pandas._libs.lib cimport c_is_list_like +cimport numpy as cnp cnp.import_array() +from pandas._libs.lib cimport c_is_list_like + ctypedef fused reshape_t: uint8_t uint16_t diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index 0460a69f366c4..9d3959d0a070a 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -1,4 +1,8 @@ import numpy as np +from numpy cimport import_array +import_array() + +from pandas._libs.util cimport is_array from pandas.core.dtypes.missing import isna, array_equivalent from pandas.core.dtypes.common import is_dtype_equal @@ -116,8 +120,8 @@ cpdef assert_almost_equal(a, b, assert a == b, f"{a} != {b}" return True - a_is_ndarray = isinstance(a, np.ndarray) - b_is_ndarray = isinstance(b, np.ndarray) + a_is_ndarray = is_array(a) + b_is_ndarray = is_array(b) if obj is None: if a_is_ndarray or b_is_ndarray: diff --git a/pandas/_libs/tslibs/frequencies.pxd b/pandas/_libs/tslibs/frequencies.pxd index 1b7efb8c5dfdf..d6bae78576f50 100644 --- a/pandas/_libs/tslibs/frequencies.pxd +++ b/pandas/_libs/tslibs/frequencies.pxd @@ -1,7 +1,6 @@ cpdef str get_rule_month(object source, str default=*) cpdef get_freq_code(freqstr) -cpdef object get_freq(object freq) cpdef str get_base_alias(freqstr) cpdef int get_to_timestamp_base(int base) cpdef str get_freq_str(base, mult=*) diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index d97a9fa0ba2fa..c1f10b3dda612 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -306,25 +306,6 @@ cpdef int get_to_timestamp_base(int base): return base -cpdef object get_freq(object freq): - """ - Return frequency code of given frequency str. - If input is not string, return input as it is. - - Examples - -------- - >>> get_freq('A') - 1000 - - >>> get_freq('3A') - 1000 - """ - if isinstance(freq, str): - base, mult = get_freq_code(freq) - freq = base - return freq - - # ---------------------------------------------------------------------- # Frequency comparison diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index c936d42b34db5..038632e1575c3 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -53,8 +53,6 @@ cdef extern from "src/datetime/np_datetime.h": npy_datetimestruct *result) nogil -cdef int reverse_ops[6] - cdef bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1 cdef check_dts_bounds(npy_datetimestruct *dts) diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 9a8a8fdae6d2f..5ac0e4fa44bee 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -68,15 +68,6 @@ cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil: # ---------------------------------------------------------------------- # Comparison -cdef int reverse_ops[6] - -reverse_ops[Py_LT] = Py_GT -reverse_ops[Py_LE] = Py_GE -reverse_ops[Py_EQ] = Py_EQ -reverse_ops[Py_NE] = Py_NE -reverse_ops[Py_GT] = Py_LT -reverse_ops[Py_GE] = Py_LE - cdef inline bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1: """ diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 5efb9b3534f14..c113897e4fe82 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -19,11 +19,11 @@ cnp.import_array() from pandas._libs.tslibs cimport util -from pandas._libs.tslibs.util cimport is_integer_object +from pandas._libs.tslibs.util cimport is_integer_object, is_datetime64_object from pandas._libs.tslibs.base cimport ABCTick, ABCTimestamp, is_tick_object -from pandas._libs.tslibs.ccalendar import MONTHS, DAYS +from pandas._libs.tslibs.ccalendar import MONTHS, DAYS, weekday_to_int, int_to_weekday from pandas._libs.tslibs.ccalendar cimport get_days_in_month, dayofweek from pandas._libs.tslibs.conversion cimport ( convert_datetime_to_tsobject, @@ -35,6 +35,7 @@ from pandas._libs.tslibs.np_datetime cimport ( from pandas._libs.tslibs.timezones cimport utc_pytz as UTC from pandas._libs.tslibs.tzconversion cimport tz_convert_single +from pandas._libs.tslibs.timedeltas import Timedelta from pandas._libs.tslibs.timestamps import Timestamp # --------------------------------------------------------------------- @@ -161,7 +162,7 @@ def apply_wraps(func): elif isinstance(other, (timedelta, BaseOffset)): # timedelta path return func(self, other) - elif isinstance(other, (np.datetime64, datetime, date)): + elif isinstance(other, (datetime, date)) or is_datetime64_object(other): other = Timestamp(other) else: # This will end up returning NotImplemented back in __add__ @@ -486,6 +487,12 @@ class _BaseOffset: return NotImplemented def __call__(self, other): + warnings.warn( + "DateOffset.__call__ is deprecated and will be removed in a future " + "version. Use `offset + other` instead.", + FutureWarning, + stacklevel=1, + ) return self.apply(other) def __mul__(self, other): @@ -643,7 +650,10 @@ class _BaseOffset: # ------------------------------------------------------------------ - def _validate_n(self, n): + # Staticmethod so we can call from _Tick.__init__, will be unnecessary + # once BaseOffset is a cdef class and is inherited by _Tick + @staticmethod + def _validate_n(n): """ Require that `n` be an integer. @@ -760,6 +770,33 @@ cdef class _Tick(ABCTick): # ensure that reversed-ops with numpy scalars return NotImplemented __array_priority__ = 1000 _adjust_dst = False + _inc = Timedelta(microseconds=1000) + _prefix = "undefined" + _attributes = frozenset(["n", "normalize"]) + + cdef readonly: + int64_t n + bint normalize + dict _cache + + def __init__(self, n=1, normalize=False): + n = _BaseOffset._validate_n(n) + self.n = n + self.normalize = False + self._cache = {} + if normalize: + # GH#21427 + raise ValueError( + "Tick offset with `normalize=True` are not allowed." + ) + + @property + def delta(self) -> Timedelta: + return self.n * self._inc + + @property + def nanos(self) -> int64_t: + return self.delta.value def is_on_offset(self, dt) -> bool: return True @@ -767,6 +804,35 @@ cdef class _Tick(ABCTick): def is_anchored(self) -> bool: return False + # -------------------------------------------------------------------- + # Comparison and Arithmetic Methods + + def __eq__(self, other): + if isinstance(other, str): + try: + # GH#23524 if to_offset fails, we are dealing with an + # incomparable type so == is False and != is True + other = to_offset(other) + except ValueError: + # e.g. "infer" + return False + return self.delta == other + + def __ne__(self, other): + return not (self == other) + + def __le__(self, other): + return self.delta.__le__(other) + + def __lt__(self, other): + return self.delta.__lt__(other) + + def __ge__(self, other): + return self.delta.__ge__(other) + + def __gt__(self, other): + return self.delta.__gt__(other) + def __truediv__(self, other): if not isinstance(self, _Tick): # cython semantics mean the args are sometimes swapped @@ -775,17 +841,24 @@ cdef class _Tick(ABCTick): result = self.delta.__truediv__(other) return _wrap_timedelta_result(result) + # -------------------------------------------------------------------- + # Pickle Methods + def __reduce__(self): return (type(self), (self.n,)) def __setstate__(self, state): - object.__setattr__(self, "n", state["n"]) + self.n = state["n"] + self.normalize = False -class BusinessMixin: +class BusinessMixin(BaseOffset): """ Mixin to business types to provide related functions. """ + def __init__(self, n=1, normalize=False, offset=timedelta(0)): + BaseOffset.__init__(self, n, normalize) + object.__setattr__(self, "_offset", offset) @property def offset(self): @@ -809,7 +882,11 @@ class BusinessMixin: class BusinessHourMixin(BusinessMixin): _adjust_dst = False - def __init__(self, start="09:00", end="17:00", offset=timedelta(0)): + def __init__( + self, n=1, normalize=False, start="09:00", end="17:00", offset=timedelta(0) + ): + BusinessMixin.__init__(self, n, normalize, offset) + # must be validated here to equality check if np.ndim(start) == 0: # i.e. not is_list_like @@ -853,7 +930,6 @@ class BusinessHourMixin(BusinessMixin): object.__setattr__(self, "start", start) object.__setattr__(self, "end", end) - object.__setattr__(self, "_offset", offset) def _repr_attrs(self) -> str: out = super()._repr_attrs() @@ -916,10 +992,16 @@ class CustomMixin: object.__setattr__(self, "calendar", calendar) -class WeekOfMonthMixin: +class WeekOfMonthMixin(BaseOffset): """ Mixin for methods common to WeekOfMonth and LastWeekOfMonth. """ + def __init__(self, n=1, normalize=False, weekday=0): + BaseOffset.__init__(self, n, normalize) + object.__setattr__(self, "weekday", weekday) + + if weekday < 0 or weekday > 6: + raise ValueError(f"Day must be 0<=day<=6, got {weekday}") @apply_wraps def apply(self, other): @@ -940,6 +1022,14 @@ class WeekOfMonthMixin: return False return dt.day == self._get_offset_day(dt) + @property + def rule_code(self) -> str: + weekday = int_to_weekday.get(self.weekday, "") + if self.week == -1: + # LastWeekOfMonth + return f"{self._prefix}-{weekday}" + return f"{self._prefix}-{self.week + 1}{weekday}" + # ---------------------------------------------------------------------- # RelativeDelta Arithmetic diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index a209f71dd0676..884578df3e00b 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -5,6 +5,8 @@ import locale import calendar import re +from cpython cimport datetime + from _thread import allocate_lock as _thread_allocate_lock import pytz @@ -12,8 +14,6 @@ import pytz import numpy as np from numpy cimport int64_t -cimport cpython.datetime as datetime - from pandas._libs.tslibs.np_datetime cimport ( check_dts_bounds, dtstruct_to_dt64, npy_datetimestruct) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 03419a6267983..7a3af169a960e 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -26,7 +26,7 @@ from pandas._libs.tslibs.base cimport ABCTimedelta, ABCTimestamp, is_tick_object from pandas._libs.tslibs.ccalendar cimport DAY_NANOS from pandas._libs.tslibs.np_datetime cimport ( - cmp_scalar, reverse_ops, td64_to_tdstruct, pandas_timedeltastruct) + cmp_scalar, td64_to_tdstruct, pandas_timedeltastruct) from pandas._libs.tslibs.nattype cimport ( checknull_with_nat, diff --git a/pandas/_libs/tslibs/timestamps.pxd b/pandas/_libs/tslibs/timestamps.pxd index 90f50e3af503c..88d21b19e1e37 100644 --- a/pandas/_libs/tslibs/timestamps.pxd +++ b/pandas/_libs/tslibs/timestamps.pxd @@ -15,7 +15,7 @@ cdef class _Timestamp(ABCTimestamp): cdef readonly: int64_t value, nanosecond object freq - list _date_attributes + cpdef bint _get_start_end_field(self, str field) cpdef _get_date_name_field(self, object field, object locale) cdef int64_t _maybe_convert_value_to_local(self) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index ab8f9b6c30eb1..4f8b85240c79f 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -25,7 +25,7 @@ from pandas._libs.tslibs.util cimport ( is_timedelta64_object, is_array, ) -from pandas._libs.tslibs.base cimport ABCTimestamp, is_tick_object +from pandas._libs.tslibs.base cimport ABCTimedelta, ABCTimestamp, is_tick_object from pandas._libs.tslibs cimport ccalendar @@ -355,10 +355,10 @@ cdef class _Timestamp(ABCTimestamp): elif PyDelta_Check(other): # logic copied from delta_to_nanoseconds to prevent circular import - if hasattr(other, 'delta'): + if isinstance(other, ABCTimedelta): # pd.Timedelta nanos = other.value - elif PyDelta_Check(other): + else: nanos = (other.days * 24 * 60 * 60 * 1000000 + other.seconds * 1000000 + other.microseconds) * 1000 @@ -387,6 +387,10 @@ cdef class _Timestamp(ABCTimestamp): dtype=object, ) + elif not isinstance(self, _Timestamp): + # cython semantics, args have been switched and this is __radd__ + return other.__add__(self) + return NotImplemented def __sub__(self, other): @@ -1051,7 +1055,7 @@ timedelta}, default 'raise' return Period(self, freq=freq) @property - def dayofweek(self): + def dayofweek(self) -> int: """ Return day of the week. """ @@ -1092,7 +1096,7 @@ timedelta}, default 'raise' return self._get_date_name_field('month_name', locale) @property - def dayofyear(self): + def dayofyear(self) -> int: """ Return the day of the year. """ @@ -1115,7 +1119,7 @@ timedelta}, default 'raise' return ((self.month - 1) // 3) + 1 @property - def days_in_month(self): + def days_in_month(self) -> int: """ Return the number of days in the month. """ @@ -1428,16 +1432,7 @@ default 'raise' return base1 + base2 - def _has_time_component(self) -> bool: - """ - Returns if the Timestamp has a time component - in addition to the date part - """ - return (self.time() != _zero_time - or self.tzinfo is not None - or self.nanosecond != 0) - - def to_julian_date(self): + def to_julian_date(self) -> np.float64: """ Convert TimeStamp to a Julian Date. 0 Julian date is noon January 1, 4713 BC. @@ -1474,11 +1469,6 @@ default 'raise' np.array([self.value], dtype='i8'), tz=self.tz)[0] return Timestamp(normalized_value).tz_localize(self.tz) - def __radd__(self, other): - # __radd__ on cython extension types like _Timestamp is not used, so - # define it here instead - return self + other - # Add the min and max fields at the class level cdef int64_t _NS_UPPER_BOUND = np.iinfo(np.int64).max diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 737c130161246..2a01ab3802e62 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -64,6 +64,10 @@ def _cat_compare_op(op): @unpack_zerodim_and_defer(opname) def func(self, other): + if is_list_like(other) and len(other) != len(self): + # TODO: Could this fail if the categories are listlike objects? + raise ValueError("Lengths must match.") + if not self.ordered: if opname in ["__lt__", "__gt__", "__le__", "__ge__"]: raise TypeError( diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index bbaa64dae3eea..145654805cc6b 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -84,6 +84,9 @@ def _validate_comparison_value(self, other): elif not is_list_like(other): raise InvalidComparison(other) + elif len(other) != len(self): + raise ValueError("Lengths must match") + else: try: other = self._validate_listlike(other, opname, allow_object=True) @@ -1234,6 +1237,9 @@ def _add_timedelta_arraylike(self, other): """ # overridden by PeriodArray + if len(self) != len(other): + raise ValueError("cannot add indices of unequal length") + if isinstance(other, np.ndarray): # ndarray[timedelta64]; wrap in TimedeltaIndex for op from pandas.core.arrays import TimedeltaArray diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 897c53c5c75d1..90088c370697e 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -654,6 +654,9 @@ def _assert_tzawareness_compat(self, other): def _sub_datetime_arraylike(self, other): """subtract DatetimeArray/Index or ndarray[datetime64]""" + if len(self) != len(other): + raise ValueError("cannot add indices of unequal length") + if isinstance(other, np.ndarray): assert is_datetime64_dtype(other) other = type(self)(other) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 3ca7e028913c6..5a90ea4a36a21 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -517,6 +517,8 @@ def cmp_method(self, other): raise NotImplementedError( "can only perform ops with 1-d structures" ) + if len(self) != len(other): + raise ValueError("Lengths must match to compare") if other is libmissing.NA: # numpy does not handle pd.NA well as "other" scalar (it returns @@ -620,6 +622,8 @@ def integer_arithmetic_method(self, other): raise NotImplementedError( "can only perform ops with 1-d structures" ) + if len(self) != len(other): + raise ValueError("Lengths must match") if not (is_float_dtype(other) or is_integer_dtype(other)): raise TypeError("can only perform ops with numeric values") diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 5c700a53ceac4..3978161829481 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -746,8 +746,7 @@ def _check_timedeltalike_freq_compat(self, other): IncompatibleFrequency """ assert isinstance(self.freq, Tick) # checked by calling function - own_offset = frequencies.to_offset(self.freq.rule_code) - base_nanos = delta_to_nanoseconds(own_offset) + base_nanos = self.freq.base.nanos if isinstance(other, (timedelta, np.timedelta64, Tick)): nanos = delta_to_nanoseconds(other) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 4b84b3ea8b46a..bc215eec4c345 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -471,6 +471,10 @@ def __mul__(self, other): if not hasattr(other, "dtype"): # list, tuple other = np.array(other) + if len(other) != len(self) and not is_timedelta64_dtype(other.dtype): + # Exclude timedelta64 here so we correctly raise TypeError + # for that instead of ValueError + raise ValueError("Cannot multiply with unequal lengths") if is_object_dtype(other.dtype): # this multiplication will succeed only if all elements of other @@ -514,7 +518,10 @@ def __truediv__(self, other): # e.g. list, tuple other = np.array(other) - if is_timedelta64_dtype(other.dtype): + if len(other) != len(self): + raise ValueError("Cannot divide vectors with unequal lengths") + + elif is_timedelta64_dtype(other.dtype): # let numpy handle it return self._data / other @@ -564,7 +571,10 @@ def __rtruediv__(self, other): # e.g. list, tuple other = np.array(other) - if is_timedelta64_dtype(other.dtype): + if len(other) != len(self): + raise ValueError("Cannot divide vectors with unequal lengths") + + elif is_timedelta64_dtype(other.dtype): # let numpy handle it return other / self._data @@ -613,8 +623,10 @@ def __floordiv__(self, other): if not hasattr(other, "dtype"): # list, tuple other = np.array(other) + if len(other) != len(self): + raise ValueError("Cannot divide with unequal lengths") - if is_timedelta64_dtype(other.dtype): + elif is_timedelta64_dtype(other.dtype): other = type(self)(other) # numpy timedelta64 does not natively support floordiv, so operate @@ -666,7 +678,10 @@ def __rfloordiv__(self, other): # list, tuple other = np.array(other) - if is_timedelta64_dtype(other.dtype): + if len(other) != len(self): + raise ValueError("Cannot divide with unequal lengths") + + elif is_timedelta64_dtype(other.dtype): other = type(self)(other) # numpy timedelta64 does not natively support floordiv, so operate diff --git a/pandas/core/base.py b/pandas/core/base.py index 309b6e0ad5e1a..a8a736b6aafdf 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1257,8 +1257,7 @@ def value_counts( def unique(self): values = self._values - if hasattr(values, "unique"): - + if not isinstance(values, np.ndarray): result = values.unique() if self.dtype.kind in ["m", "M"] and isinstance(self, ABCSeries): # GH#31182 Series._values returns EA, unpack for backward-compat diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index b9b403ffdc69a..55b9c28c74cb2 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -36,7 +36,6 @@ class providing the base-class of operations. from pandas._libs import Timestamp import pandas._libs.groupby as libgroupby from pandas._typing import FrameOrSeries, Scalar -from pandas.compat import set_function_name from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, Substitution, cache_readonly, doc @@ -192,6 +191,24 @@ class providing the base-class of operations. """, ) +_groupby_agg_method_template = """ +Compute {fname} of group values. + +Parameters +---------- +numeric_only : bool, default {no} + Include only float, int, boolean columns. If None, will attempt to use + everything, then use only numeric data. +min_count : int, default {mc} + The required number of valid values to perform the operation. If fewer + than ``min_count`` non-NA values are present the result will be NA. + +Returns +------- +Series or DataFrame + Computed {fname} of values within each group. +""" + _pipe_template = """ Apply a function `func` with arguments to this %(klass)s object and return the function's result. @@ -945,6 +962,37 @@ def _wrap_transformed_output(self, output: Mapping[base.OutputKey, np.ndarray]): def _wrap_applied_output(self, keys, values, not_indexed_same: bool = False): raise AbstractMethodError(self) + def _agg_general( + self, + numeric_only: bool = True, + min_count: int = -1, + *, + alias: str, + npfunc: Callable, + ): + self._set_group_selection() + + # try a cython aggregation if we can + try: + return self._cython_agg_general( + how=alias, alt=npfunc, numeric_only=numeric_only, min_count=min_count, + ) + except DataError: + pass + except NotImplementedError as err: + if "function is not implemented for this dtype" in str( + err + ) or "category dtype not supported" in str(err): + # raised in _get_cython_function, in some cases can + # be trimmed by implementing cython funcs for more dtypes + pass + else: + raise + + # apply a non-cython aggregation + result = self.aggregate(lambda x: npfunc(x, axis=self.axis)) + return result + def _cython_agg_general( self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1 ): @@ -1438,105 +1486,79 @@ def size(self): result = self._obj_1d_constructor(result) return self._reindex_output(result, fill_value=0) - @classmethod - def _add_numeric_operations(cls): - """ - Add numeric operations to the GroupBy generically. - """ + @doc(_groupby_agg_method_template, fname="sum", no=True, mc=0) + def sum(self, numeric_only: bool = True, min_count: int = 0): + return self._agg_general( + numeric_only=numeric_only, min_count=min_count, alias="add", npfunc=np.sum + ) - def groupby_function( - name: str, - alias: str, - npfunc, - numeric_only: bool = True, - min_count: int = -1, - ): + @doc(_groupby_agg_method_template, fname="prod", no=True, mc=0) + def prod(self, numeric_only: bool = True, min_count: int = 0): + return self._agg_general( + numeric_only=numeric_only, min_count=min_count, alias="prod", npfunc=np.prod + ) - _local_template = """ - Compute %(f)s of group values. - - Parameters - ---------- - numeric_only : bool, default %(no)s - Include only float, int, boolean columns. If None, will attempt to use - everything, then use only numeric data. - min_count : int, default %(mc)s - The required number of valid values to perform the operation. If fewer - than ``min_count`` non-NA values are present the result will be NA. - - Returns - ------- - Series or DataFrame - Computed %(f)s of values within each group. - """ - - @Substitution(name="groupby", f=name, no=numeric_only, mc=min_count) - @Appender(_common_see_also) - @Appender(_local_template) - def func(self, numeric_only=numeric_only, min_count=min_count): - self._set_group_selection() - - # try a cython aggregation if we can - try: - return self._cython_agg_general( - how=alias, - alt=npfunc, - numeric_only=numeric_only, - min_count=min_count, - ) - except DataError: - pass - except NotImplementedError as err: - if "function is not implemented for this dtype" in str( - err - ) or "category dtype not supported" in str(err): - # raised in _get_cython_function, in some cases can - # be trimmed by implementing cython funcs for more dtypes - pass - else: - raise - - # apply a non-cython aggregation - result = self.aggregate(lambda x: npfunc(x, axis=self.axis)) - return result - - set_function_name(func, name, cls) - - return func - - def first_compat(x, axis=0): - def first(x): - x = x.to_numpy() - - x = x[notna(x)] + @doc(_groupby_agg_method_template, fname="min", no=False, mc=-1) + def min(self, numeric_only: bool = False, min_count: int = -1): + return self._agg_general( + numeric_only=numeric_only, min_count=min_count, alias="min", npfunc=np.min + ) + + @doc(_groupby_agg_method_template, fname="max", no=False, mc=-1) + def max(self, numeric_only: bool = False, min_count: int = -1): + return self._agg_general( + numeric_only=numeric_only, min_count=min_count, alias="max", npfunc=np.max + ) + + @doc(_groupby_agg_method_template, fname="first", no=False, mc=-1) + def first(self, numeric_only: bool = False, min_count: int = -1): + def first_compat(obj: FrameOrSeries, axis: int = 0): + def first(x: Series): + """Helper function for first item that isn't NA. + """ + x = x.array[notna(x.array)] if len(x) == 0: return np.nan return x[0] - if isinstance(x, DataFrame): - return x.apply(first, axis=axis) + if isinstance(obj, DataFrame): + return obj.apply(first, axis=axis) + elif isinstance(obj, Series): + return first(obj) else: - return first(x) + raise TypeError(type(obj)) + + return self._agg_general( + numeric_only=numeric_only, + min_count=min_count, + alias="first", + npfunc=first_compat, + ) - def last_compat(x, axis=0): - def last(x): - x = x.to_numpy() - x = x[notna(x)] + @doc(_groupby_agg_method_template, fname="last", no=False, mc=-1) + def last(self, numeric_only: bool = False, min_count: int = -1): + def last_compat(obj: FrameOrSeries, axis: int = 0): + def last(x: Series): + """Helper function for last item that isn't NA. + """ + x = x.array[notna(x.array)] if len(x) == 0: return np.nan return x[-1] - if isinstance(x, DataFrame): - return x.apply(last, axis=axis) + if isinstance(obj, DataFrame): + return obj.apply(last, axis=axis) + elif isinstance(obj, Series): + return last(obj) else: - return last(x) + raise TypeError(type(obj)) - cls.sum = groupby_function("sum", "add", np.sum, min_count=0) - cls.prod = groupby_function("prod", "prod", np.prod, min_count=0) - cls.min = groupby_function("min", "min", np.min, numeric_only=False) - cls.max = groupby_function("max", "max", np.max, numeric_only=False) - cls.first = groupby_function("first", "first", first_compat, numeric_only=False) - cls.last = groupby_function("last", "last", last_compat, numeric_only=False) + return self._agg_general( + numeric_only=numeric_only, + min_count=min_count, + alias="last", + npfunc=last_compat, + ) @Substitution(name="groupby") @Appender(_common_see_also) @@ -2636,9 +2658,6 @@ def _reindex_output( return output.reset_index(drop=True) -GroupBy._add_numeric_operations() - - @doc(GroupBy) def get_groupby( obj: NDFrame, diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 597a160995eef..74db87f46c5e2 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -952,7 +952,9 @@ def _chop(self, sdata, slice_obj: slice) -> NDFrame: class SeriesSplitter(DataSplitter): def _chop(self, sdata: Series, slice_obj: slice) -> Series: - return sdata.iloc[slice_obj] + # fastpath equivalent to `sdata.iloc[slice_obj]` + mgr = sdata._mgr.get_slice(slice_obj) + return type(sdata)(mgr, name=sdata.name, fastpath=True) class FrameSplitter(DataSplitter): @@ -962,10 +964,13 @@ def fast_apply(self, f: F, sdata: FrameOrSeries, names): return libreduction.apply_frame_axis0(sdata, f, names, starts, ends) def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: - if self.axis == 0: - return sdata.iloc[slice_obj] - else: - return sdata.iloc[:, slice_obj] + # Fastpath equivalent to: + # if self.axis == 0: + # return sdata.iloc[slice_obj] + # else: + # return sdata.iloc[:, slice_obj] + mgr = sdata._mgr.get_slice(slice_obj, axis=1 - self.axis) + return type(sdata)(mgr) def get_splitter( diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b8a9827b5effd..d9828707b6164 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -54,7 +54,6 @@ ABCCategorical, ABCDataFrame, ABCDatetimeIndex, - ABCIntervalIndex, ABCMultiIndex, ABCPandasArray, ABCPeriodIndex, @@ -75,7 +74,6 @@ from pandas.core.indexes.frozen import FrozenList import pandas.core.missing as missing from pandas.core.ops import get_op_result_name -from pandas.core.ops.common import unpack_zerodim_and_defer from pandas.core.ops.invalid import make_invalid_op from pandas.core.sorting import ensure_key_mapped from pandas.core.strings import StringMethods @@ -109,8 +107,10 @@ def _make_comparison_op(op, cls): - @unpack_zerodim_and_defer(op.__name__) def cmp_method(self, other): + if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)): + if other.ndim > 0 and len(self) != len(other): + raise ValueError("Lengths must match to compare") if is_object_dtype(self.dtype) and isinstance(other, ABCCategorical): left = type(other)(self._values, dtype=other.dtype) @@ -4101,37 +4101,13 @@ def append(self, other): return self._concat(to_concat, name) def _concat(self, to_concat, name): - - typs = _concat.get_dtype_kinds(to_concat) - - if len(typs) == 1: - return self._concat_same_dtype(to_concat, name=name) - return Index._concat_same_dtype(self, to_concat, name=name) - - def _concat_same_dtype(self, to_concat, name): """ - Concatenate to_concat which has the same class. + Concatenate multiple Index objects. """ - # must be overridden in specific classes - klasses = ( - ABCDatetimeIndex, - ABCTimedeltaIndex, - ABCPeriodIndex, - ExtensionArray, - ABCIntervalIndex, - ) - to_concat = [ - x.astype(object) if isinstance(x, klasses) else x for x in to_concat - ] - - self = to_concat[0] - attribs = self._get_attributes_dict() - attribs["name"] = name - to_concat = [x._values if isinstance(x, Index) else x for x in to_concat] - res_values = np.concatenate(to_concat) - return Index(res_values, name=name) + result = _concat.concat_compat(to_concat) + return Index(result, name=name) def putmask(self, mask, value): """ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 25df4a0bee737..2a79c83de7ef2 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -738,13 +738,6 @@ def insert(self, loc: int, item): def _concat(self, to_concat, name): # if calling index is category, don't check dtype of others - return CategoricalIndex._concat_same_dtype(self, to_concat, name) - - def _concat_same_dtype(self, to_concat, name): - """ - Concatenate to_concat which has the same class - ValueError if other is not in the categories - """ codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat]) result = self._create_from_codes(codes, name=name) # if name is None, _create_from_codes sets self.name diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 6e965ecea7cd8..badf6502aa723 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -9,11 +9,7 @@ from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly, doc -from pandas.core.dtypes.common import ( - ensure_platform_int, - is_dtype_equal, - is_object_dtype, -) +from pandas.core.dtypes.common import is_dtype_equal, is_object_dtype from pandas.core.dtypes.generic import ABCSeries from pandas.core.arrays import ExtensionArray @@ -223,29 +219,14 @@ def __getitem__(self, key): deprecate_ndim_indexing(result) return result - def __iter__(self): - return self._data.__iter__() - # --------------------------------------------------------------------- - def __array__(self, dtype=None) -> np.ndarray: - return np.asarray(self._data, dtype=dtype) - def _get_engine_target(self) -> np.ndarray: # NB: _values_for_argsort happens to match the desired engine targets # for all of our existing EA-backed indexes, but in general # cannot be relied upon to exist. return self._data._values_for_argsort() - @doc(Index.dropna) - def dropna(self, how="any"): - if how not in ("any", "all"): - raise ValueError(f"invalid how option: {how}") - - if self.hasnans: - return self._shallow_copy(self._data[~self._isnan]) - return self._shallow_copy() - def repeat(self, repeats, axis=None): nv.validate_repeat(tuple(), dict(axis=axis)) result = self._data.repeat(repeats, axis=axis) @@ -255,31 +236,6 @@ def insert(self, loc: int, item): # ExtensionIndex subclasses must override Index.insert raise AbstractMethodError(self) - def _concat_same_dtype(self, to_concat, name): - arr = type(self._data)._concat_same_type(to_concat) - return type(self)._simple_new(arr, name=name) - - @doc(Index.take) - def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): - nv.validate_take(tuple(), kwargs) - indices = ensure_platform_int(indices) - - taken = self._assert_take_fillable( - self._data, - indices, - allow_fill=allow_fill, - fill_value=fill_value, - na_value=self._na_value, - ) - return type(self)(taken, name=self.name) - - def unique(self, level=None): - if level is not None: - self._validate_index_level(level) - - result = self._data.unique() - return self._shallow_copy(result) - def _get_unique_index(self, dropna=False): if self.is_unique and not dropna: return self diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 06040166d0f9e..5020a25c88ff4 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -147,10 +147,6 @@ def _assert_safe_casting(cls, data, subarr): """ pass - def _concat_same_dtype(self, indexes, name): - result = type(indexes[0])(np.concatenate([x._values for x in indexes])) - return result.rename(name) - @property def is_all_dates(self) -> bool: """ diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index b0b85f69396ba..be243d7014233 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -5,7 +5,7 @@ from pandas._libs import index as libindex from pandas._libs.lib import no_default -from pandas._libs.tslibs import Period, frequencies as libfrequencies, resolution +from pandas._libs.tslibs import Period, resolution from pandas._libs.tslibs.parsing import parse_time_string from pandas._typing import DtypeObj, Label from pandas.util._decorators import Appender, cache_readonly, doc @@ -44,7 +44,6 @@ from pandas.core.ops import get_op_result_name from pandas.core.tools.datetimes import DateParseError -from pandas.tseries import frequencies from pandas.tseries.offsets import DateOffset, Tick _index_doc_kwargs = dict(ibase._index_doc_kwargs) @@ -278,15 +277,12 @@ def _maybe_convert_timedelta(self, other): of self.freq. Note IncompatibleFrequency subclasses ValueError. """ if isinstance(other, (timedelta, np.timedelta64, Tick, np.ndarray)): - offset = frequencies.to_offset(self.freq.rule_code) - if isinstance(offset, Tick): + if isinstance(self.freq, Tick): # _check_timedeltalike_freq_compat will raise if incompatible delta = self._data._check_timedeltalike_freq_compat(other) return delta elif isinstance(other, DateOffset): - freqstr = other.rule_code - base = libfrequencies.get_base_alias(freqstr) - if base == self.freq.rule_code: + if other.base == self.freq.base: return other.n raise raise_on_incompatible(self, other) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index c34b8965ca36a..49a0f0fb7ae92 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -627,14 +627,18 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False) return super().join(other, how, level, return_indexers, sort) - def _concat_same_dtype(self, indexes, name): + def _concat(self, indexes, name): """ - Concatenates multiple RangeIndex instances. All members of "indexes" must - be of type RangeIndex; result will be RangeIndex if possible, Int64Index - otherwise. E.g.: + Overriding parent method for the case of all RangeIndex instances. + + When all members of "indexes" are of type RangeIndex: result will be + RangeIndex if possible, Int64Index otherwise. E.g.: indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6) indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5]) """ + if not all(isinstance(x, RangeIndex) for x in indexes): + return super()._concat(indexes, name) + start = step = next_ = None # Filter the empty indexes diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 3e2b5bdccd5d1..c052c6c9d7d1d 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -251,7 +251,7 @@ def make_block_same_class(self, values, placement=None, ndim=None): placement = self.mgr_locs if ndim is None: ndim = self.ndim - return make_block(values, placement=placement, ndim=ndim, klass=type(self)) + return type(self)(values, placement=placement, ndim=ndim) def __repr__(self) -> str: # don't want to print out all of the items here diff --git a/pandas/core/ops/common.py b/pandas/core/ops/common.py index 1fb9398083884..515a0a5198d74 100644 --- a/pandas/core/ops/common.py +++ b/pandas/core/ops/common.py @@ -1,13 +1,10 @@ """ Boilerplate functions used in defining binary operations. """ -from collections import UserDict from functools import wraps from typing import Callable -import numpy as np - -from pandas._libs.lib import is_list_like, item_from_zerodim +from pandas._libs.lib import item_from_zerodim from pandas._typing import F from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries @@ -65,25 +62,6 @@ def new_method(self, other): other = item_from_zerodim(other) - if isinstance(self, (ABCSeries, ABCDataFrame)) and isinstance( - other, (ABCSeries, ABCDataFrame) - ): - # we dont require length matches - pass - elif is_list_like(other, allow_sets=False) and not isinstance( - other, (dict, UserDict) - ): - if len(other) != len(self): - if len(other) == 1 and not hasattr(other, "dtype"): - # i.e. unpack scalar list, but leave e.g. Categorical, - # for which the scalar behavior doesnt match the - # array behavior - other = other[0] - else: - raise ValueError( - "Lengths must match", self.shape, np.shape(other), type(other) - ) - return method(self, other) return new_method diff --git a/pandas/core/strings.py b/pandas/core/strings.py index bb62cd6b34722..b27ad744dbdba 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2975,7 +2975,7 @@ def encode(self, encoding, errors="strict"): _shared_docs[ "str_strip" ] = r""" - Remove leading and trailing characters. + Remove %(position)s characters. Strip whitespaces (including newlines) or a set of specified characters from each string in the Series/Index from %(side)s. @@ -3038,20 +3038,29 @@ def encode(self, encoding, errors="strict"): """ @Appender( - _shared_docs["str_strip"] % dict(side="left and right sides", method="strip") + _shared_docs["str_strip"] + % dict( + side="left and right sides", method="strip", position="leading and trailing" + ) ) @forbid_nonstring_types(["bytes"]) def strip(self, to_strip=None): result = str_strip(self._parent, to_strip, side="both") return self._wrap_result(result) - @Appender(_shared_docs["str_strip"] % dict(side="left side", method="lstrip")) + @Appender( + _shared_docs["str_strip"] + % dict(side="left side", method="lstrip", position="leading") + ) @forbid_nonstring_types(["bytes"]) def lstrip(self, to_strip=None): result = str_strip(self._parent, to_strip, side="left") return self._wrap_result(result) - @Appender(_shared_docs["str_strip"] % dict(side="right side", method="rstrip")) + @Appender( + _shared_docs["str_strip"] + % dict(side="right side", method="rstrip", position="trailing") + ) @forbid_nonstring_types(["bytes"]) def rstrip(self, to_strip=None): result = str_strip(self._parent, to_strip, side="right") diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 166ab13344816..c615e18af68e6 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -247,8 +247,22 @@ def __repr__(self) -> str: return f"{self._window_type} [{attrs}]" def __iter__(self): - url = "https://github.com/pandas-dev/pandas/issues/11704" - raise NotImplementedError(f"See issue #11704 {url}") + window = self._get_window(win_type=None) + blocks, obj = self._create_blocks() + index = self._get_window_indexer(window=window) + + start, end = index.get_window_bounds( + num_values=len(obj), + min_periods=self.min_periods, + center=self.center, + closed=self.closed, + ) + # From get_window_bounds, those two should be equal in length of array + assert len(start) == len(end) + + for s, e in zip(start, end): + result = obj.iloc[slice(s, e)] + yield result def _prep_values(self, values: Optional[np.ndarray] = None) -> np.ndarray: """Convert input to numpy arrays for Cython routines""" @@ -1302,6 +1316,8 @@ def apply( use_numba_cache=engine == "numba", raw=raw, original_func=func, + args=args, + kwargs=kwargs, ) def _generate_cython_apply_func(self, args, kwargs, raw, offset, func): diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py index c6d159d3d016b..132cbdb160bec 100644 --- a/pandas/plotting/_matplotlib/converter.py +++ b/pandas/plotting/_matplotlib/converter.py @@ -12,7 +12,7 @@ from pandas._libs import lib, tslibs from pandas._libs.tslibs import resolution -from pandas._libs.tslibs.frequencies import FreqGroup, get_freq +from pandas._libs.tslibs.frequencies import FreqGroup, get_freq_code from pandas.core.dtypes.common import ( is_datetime64_ns_dtype, @@ -887,7 +887,7 @@ def _annual_finder(vmin, vmax, freq): def get_finder(freq): if isinstance(freq, str): - freq = get_freq(freq) + freq = get_freq_code(freq)[0] fgroup = resolution.get_freq_group(freq) if fgroup == FreqGroup.FR_ANN: @@ -932,7 +932,7 @@ def __init__( plot_obj=None, ): if isinstance(freq, str): - freq = get_freq(freq) + freq = get_freq_code(freq)[0] self.freq = freq self.base = base (self.quarter, self.month, self.day) = (quarter, month, day) @@ -1011,7 +1011,7 @@ class TimeSeries_DateFormatter(Formatter): def __init__(self, freq, minor_locator=False, dynamic_mode=True, plot_obj=None): if isinstance(freq, str): - freq = get_freq(freq) + freq = get_freq_code(freq)[0] self.format = None self.freq = freq self.locs = [] diff --git a/pandas/plotting/_matplotlib/timeseries.py b/pandas/plotting/_matplotlib/timeseries.py index 3abce690cbe6b..f6e120e2f91e7 100644 --- a/pandas/plotting/_matplotlib/timeseries.py +++ b/pandas/plotting/_matplotlib/timeseries.py @@ -7,7 +7,7 @@ from pandas._libs.tslibs.frequencies import ( FreqGroup, get_base_alias, - get_freq, + get_freq_code, is_subperiod, is_superperiod, ) @@ -209,9 +209,9 @@ def _use_dynamic_x(ax, data): if freq is None: return False - # hack this for 0.10.1, creating more technical debt...sigh + # FIXME: hack this for 0.10.1, creating more technical debt...sigh if isinstance(data.index, ABCDatetimeIndex): - base = get_freq(freq) + base = get_freq_code(freq)[0] x = data.index if base <= FreqGroup.FR_DAY: return x[:1].is_normalized diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 0fb3cb1025639..8c480faa4ee81 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -2206,7 +2206,7 @@ def test_sub_dti_dti(self): # different length raises ValueError dti1 = date_range("20130101", periods=3) dti2 = date_range("20130101", periods=4) - msg = "Lengths must match" + msg = "cannot add indices of unequal length" with pytest.raises(ValueError, match=msg): dti1 - dti2 diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index b6456a2141c06..a37339c66bf6e 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -649,7 +649,7 @@ def test_mul_datelike_raises(self, numeric_idx): def test_mul_size_mismatch_raises(self, numeric_idx): idx = numeric_idx - msg = "Lengths must match" + msg = "operands could not be broadcast together" with pytest.raises(ValueError, match=msg): idx * idx[0:3] with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 180364420b021..65e3c6a07d4f3 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -451,7 +451,7 @@ def test_addition_ops(self): tm.assert_index_equal(result, expected) # unequal length - msg = "Lengths must match" + msg = "cannot add indices of unequal length" with pytest.raises(ValueError, match=msg): tdi + dti[0:1] with pytest.raises(ValueError, match=msg): @@ -1723,7 +1723,7 @@ def test_tdarr_div_length_mismatch(self, box_with_array): mismatched = [1, 2, 3, 4] rng = tm.box_expected(rng, box_with_array) - msg = "Lengths must match|Unable to coerce to Series" + msg = "Cannot divide vectors|Unable to coerce to Series" for obj in [mismatched, mismatched[:2]]: # one shorter, one longer for other in [obj, np.array(obj), pd.Index(obj)]: @@ -1905,14 +1905,12 @@ def test_td64arr_mul_tdscalar_invalid(self, box_with_array, scalar_td): def test_td64arr_mul_too_short_raises(self, box_with_array): idx = TimedeltaIndex(np.arange(5, dtype="int64")) idx = tm.box_expected(idx, box_with_array) - msg = "|".join( - [ - "Lengths must match", # <- EA, Index, Series - "cannot use operands with types dtype", # <- DataFrame - "Unable to coerce to Series", # <- Series - ] + msg = ( + "cannot use operands with types dtype|" + "Cannot multiply with unequal lengths|" + "Unable to coerce to Series" ) - with pytest.raises((ValueError, TypeError), match=msg): + with pytest.raises(TypeError, match=msg): # length check before dtype check idx * idx[:3] with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/arrays/boolean/test_logical.py b/pandas/tests/arrays/boolean/test_logical.py index a61746d46daeb..bf4775bbd7b32 100644 --- a/pandas/tests/arrays/boolean/test_logical.py +++ b/pandas/tests/arrays/boolean/test_logical.py @@ -46,7 +46,7 @@ def test_empty_ok(self, all_logical_operators): def test_logical_length_mismatch_raises(self, all_logical_operators): op_name = all_logical_operators a = pd.array([True, False, None], dtype="boolean") - msg = "Lengths must match" + msg = "Lengths must match to compare" with pytest.raises(ValueError, match=msg): getattr(a, op_name)([True, False]) diff --git a/pandas/tests/arrays/integer/test_arithmetic.py b/pandas/tests/arrays/integer/test_arithmetic.py index b7fdd8581101b..18f1dac3c13b2 100644 --- a/pandas/tests/arrays/integer/test_arithmetic.py +++ b/pandas/tests/arrays/integer/test_arithmetic.py @@ -232,9 +232,8 @@ def test_error(self, data, all_arithmetic_operators): result = opa(pd.DataFrame({"A": s})) assert result is NotImplemented - # msg = r"can only perform ops with 1-d structures" - msg = "Lengths must match" - with pytest.raises(ValueError, match=msg): + msg = r"can only perform ops with 1-d structures" + with pytest.raises(NotImplementedError, match=msg): opa(np.arange(len(s)).reshape(-1, len(s))) @pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)]) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 2e047b5c4eb60..6f9a1a5be4c43 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -98,7 +98,7 @@ def test_add_2d(): a + b s = pd.Series(a) - with pytest.raises(ValueError, match="Lengths must match"): + with pytest.raises(ValueError, match="3 != 1"): s + b diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 61d78034f0747..d0bf5bb41bb2c 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -168,7 +168,8 @@ def test_concat_same_type(self): arr = self.array_cls(idx) result = arr._concat_same_type([arr[:-1], arr[1:], arr]) - expected = idx._concat_same_dtype([idx[:-1], idx[1:], idx], None) + arr2 = arr.astype(object) + expected = self.index_cls(np.concatenate([arr2[:-1], arr2[1:], arr2]), None) tm.assert_index_equal(self.index_cls(result), expected) diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index 9765c77c6b60c..8a84090ea6e94 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -136,7 +136,7 @@ def test_append(self): tm.assert_index_equal(result, expected, exact=True) def test_append_to_another(self): - # hits Index._concat_same_dtype + # hits Index._concat fst = Index(["a", "b"]) snd = CategoricalIndex(["d", "e"]) result = fst.append(snd) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index fac9eb1c34dbf..997887cc18d61 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -579,7 +579,7 @@ def test_comparison(self): with pytest.raises(TypeError, match=msg): self.index > np.arange(2) - msg = "Lengths must match" + msg = "Lengths must match to compare" with pytest.raises(ValueError, match=msg): self.index > np.arange(3) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 9f235dcdbb295..466b491eb7a2c 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2423,6 +2423,16 @@ def test_index_repr_bool_nan(self): out2 = "Index([True, False, nan], dtype='object')" assert out2 == exp2 + @pytest.mark.filterwarnings("ignore:elementwise comparison failed:FutureWarning") + def test_index_with_tuple_bool(self): + # GH34123 + # TODO: remove tupleize_cols=False once correct behaviour is restored + # TODO: also this op right now produces FutureWarning from numpy + idx = Index([("a", "b"), ("b", "c"), ("c", "a")], tupleize_cols=False) + result = idx == ("c", "a",) + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + class TestIndexUtils: @pytest.mark.parametrize( diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py index b038ee1aee106..ed0045bcab989 100644 --- a/pandas/tests/scalar/timestamp/test_arithmetic.py +++ b/pandas/tests/scalar/timestamp/test_arithmetic.py @@ -52,7 +52,7 @@ def test_overflow_offset_raises(self): # used to crash, so check for proper overflow exception stamp = Timestamp("2000/1/1") - offset_overflow = to_offset("D") * 100 ** 25 + offset_overflow = to_offset("D") * 100 ** 5 with pytest.raises(OverflowError, match=msg): stamp + offset_overflow diff --git a/pandas/tests/tseries/frequencies/test_freq_code.py b/pandas/tests/tseries/frequencies/test_freq_code.py index be07f829dbae8..273e03925dd36 100644 --- a/pandas/tests/tseries/frequencies/test_freq_code.py +++ b/pandas/tests/tseries/frequencies/test_freq_code.py @@ -1,12 +1,7 @@ import pytest from pandas._libs.tslibs import frequencies as libfrequencies, resolution -from pandas._libs.tslibs.frequencies import ( - FreqGroup, - _period_code_map, - get_freq, - get_freq_code, -) +from pandas._libs.tslibs.frequencies import FreqGroup, _period_code_map, get_freq_code import pandas.tseries.offsets as offsets @@ -31,12 +26,12 @@ def period_code_item(request): ], ) def test_freq_code(freqstr, expected): - assert get_freq(freqstr) == expected + assert get_freq_code(freqstr)[0] == expected def test_freq_code_match(period_code_item): freqstr, code = period_code_item - assert get_freq(freqstr) == code + assert get_freq_code(freqstr)[0] == code @pytest.mark.parametrize( @@ -156,31 +151,31 @@ def test_cat(args): "freq_input,expected", [ # Frequency string. - ("A", (get_freq("A"), 1)), - ("3D", (get_freq("D"), 3)), - ("-2M", (get_freq("M"), -2)), + ("A", (get_freq_code("A")[0], 1)), + ("3D", (get_freq_code("D")[0], 3)), + ("-2M", (get_freq_code("M")[0], -2)), # Tuple. - (("D", 1), (get_freq("D"), 1)), - (("A", 3), (get_freq("A"), 3)), - (("M", -2), (get_freq("M"), -2)), + (("D", 1), (get_freq_code("D")[0], 1)), + (("A", 3), (get_freq_code("A")[0], 3)), + (("M", -2), (get_freq_code("M")[0], -2)), ((5, "T"), (FreqGroup.FR_MIN, 5)), # Numeric Tuple. ((1000, 1), (1000, 1)), # Offsets. - (offsets.Day(), (get_freq("D"), 1)), - (offsets.Day(3), (get_freq("D"), 3)), - (offsets.Day(-2), (get_freq("D"), -2)), - (offsets.MonthEnd(), (get_freq("M"), 1)), - (offsets.MonthEnd(3), (get_freq("M"), 3)), - (offsets.MonthEnd(-2), (get_freq("M"), -2)), - (offsets.Week(), (get_freq("W"), 1)), - (offsets.Week(3), (get_freq("W"), 3)), - (offsets.Week(-2), (get_freq("W"), -2)), + (offsets.Day(), (get_freq_code("D")[0], 1)), + (offsets.Day(3), (get_freq_code("D")[0], 3)), + (offsets.Day(-2), (get_freq_code("D")[0], -2)), + (offsets.MonthEnd(), (get_freq_code("M")[0], 1)), + (offsets.MonthEnd(3), (get_freq_code("M")[0], 3)), + (offsets.MonthEnd(-2), (get_freq_code("M")[0], -2)), + (offsets.Week(), (get_freq_code("W")[0], 1)), + (offsets.Week(3), (get_freq_code("W")[0], 3)), + (offsets.Week(-2), (get_freq_code("W")[0], -2)), (offsets.Hour(), (FreqGroup.FR_HR, 1)), # Monday is weekday=0. - (offsets.Week(weekday=1), (get_freq("W-TUE"), 1)), - (offsets.Week(3, weekday=0), (get_freq("W-MON"), 3)), - (offsets.Week(-2, weekday=4), (get_freq("W-FRI"), -2)), + (offsets.Week(weekday=1), (get_freq_code("W-TUE")[0], 1)), + (offsets.Week(3, weekday=0), (get_freq_code("W-MON")[0], 3)), + (offsets.Week(-2, weekday=4), (get_freq_code("W-FRI")[0], -2)), ], ) def test_get_freq_code(freq_input, expected): diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 0a7eaa7b7be3e..f0dcef4dbc967 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -756,7 +756,9 @@ def test_hash(self): assert hash(self.offset2) == hash(self.offset2) def test_call(self): - assert self.offset2(self.d) == datetime(2008, 1, 3) + with tm.assert_produces_warning(FutureWarning): + # GH#34171 DateOffset.__call__ is deprecated + assert self.offset2(self.d) == datetime(2008, 1, 3) def testRollback1(self): assert BDay(10).rollback(self.d) == self.d @@ -1040,13 +1042,15 @@ def test_hash(self, offset_name): assert offset == offset def test_call(self): - assert self.offset1(self.d) == datetime(2014, 7, 1, 11) - assert self.offset2(self.d) == datetime(2014, 7, 1, 13) - assert self.offset3(self.d) == datetime(2014, 6, 30, 17) - assert self.offset4(self.d) == datetime(2014, 6, 30, 14) - assert self.offset8(self.d) == datetime(2014, 7, 1, 11) - assert self.offset9(self.d) == datetime(2014, 7, 1, 22) - assert self.offset10(self.d) == datetime(2014, 7, 1, 1) + with tm.assert_produces_warning(FutureWarning): + # GH#34171 DateOffset.__call__ is deprecated + assert self.offset1(self.d) == datetime(2014, 7, 1, 11) + assert self.offset2(self.d) == datetime(2014, 7, 1, 13) + assert self.offset3(self.d) == datetime(2014, 6, 30, 17) + assert self.offset4(self.d) == datetime(2014, 6, 30, 14) + assert self.offset8(self.d) == datetime(2014, 7, 1, 11) + assert self.offset9(self.d) == datetime(2014, 7, 1, 22) + assert self.offset10(self.d) == datetime(2014, 7, 1, 1) def test_sub(self): # we have to override test_sub here because self.offset2 is not @@ -2377,8 +2381,10 @@ def test_hash(self): assert hash(self.offset2) == hash(self.offset2) def test_call(self): - assert self.offset1(self.d) == datetime(2014, 7, 1, 11) - assert self.offset2(self.d) == datetime(2014, 7, 1, 11) + with tm.assert_produces_warning(FutureWarning): + # GH#34171 DateOffset.__call__ is deprecated + assert self.offset1(self.d) == datetime(2014, 7, 1, 11) + assert self.offset2(self.d) == datetime(2014, 7, 1, 11) def testRollback1(self): assert self.offset1.rollback(self.d) == self.d @@ -2642,8 +2648,10 @@ def test_hash(self): assert hash(self.offset2) == hash(self.offset2) def test_call(self): - assert self.offset2(self.d) == datetime(2008, 1, 3) - assert self.offset2(self.nd) == datetime(2008, 1, 3) + with tm.assert_produces_warning(FutureWarning): + # GH#34171 DateOffset.__call__ is deprecated + assert self.offset2(self.d) == datetime(2008, 1, 3) + assert self.offset2(self.nd) == datetime(2008, 1, 3) def testRollback1(self): assert CDay(10).rollback(self.d) == self.d @@ -2892,8 +2900,10 @@ def test_repr(self): assert repr(self.offset) == "" assert repr(self.offset2) == "<2 * CustomBusinessMonthEnds>" - def testCall(self): - assert self.offset2(self.d) == datetime(2008, 2, 29) + def test_call(self): + with tm.assert_produces_warning(FutureWarning): + # GH#34171 DateOffset.__call__ is deprecated + assert self.offset2(self.d) == datetime(2008, 2, 29) def testRollback1(self): assert CDay(10).rollback(datetime(2007, 12, 31)) == datetime(2007, 12, 31) @@ -3041,8 +3051,10 @@ def test_repr(self): assert repr(self.offset) == "" assert repr(self.offset2) == "<2 * CustomBusinessMonthBegins>" - def testCall(self): - assert self.offset2(self.d) == datetime(2008, 3, 3) + def test_call(self): + with tm.assert_produces_warning(FutureWarning): + # GH#34171 DateOffset.__call__ is deprecated + assert self.offset2(self.d) == datetime(2008, 3, 3) def testRollback1(self): assert CDay(10).rollback(datetime(2007, 12, 31)) == datetime(2007, 12, 31) diff --git a/pandas/tests/tslibs/test_period_asfreq.py b/pandas/tests/tslibs/test_period_asfreq.py index 5497cb65c5373..7205c3cc676cf 100644 --- a/pandas/tests/tslibs/test_period_asfreq.py +++ b/pandas/tests/tslibs/test_period_asfreq.py @@ -1,6 +1,6 @@ import pytest -from pandas._libs.tslibs.frequencies import get_freq +from pandas._libs.tslibs.frequencies import get_freq_code from pandas._libs.tslibs.period import period_asfreq, period_ordinal @@ -31,7 +31,10 @@ ], ) def test_intra_day_conversion_factors(freq1, freq2, expected): - assert period_asfreq(1, get_freq(freq1), get_freq(freq2), False) == expected + assert ( + period_asfreq(1, get_freq_code(freq1)[0], get_freq_code(freq2)[0], False) + == expected + ) @pytest.mark.parametrize( @@ -39,7 +42,7 @@ def test_intra_day_conversion_factors(freq1, freq2, expected): ) def test_period_ordinal_start_values(freq, expected): # information for Jan. 1, 1970. - assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq(freq)) == expected + assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq_code(freq)[0]) == expected @pytest.mark.parametrize( @@ -52,7 +55,7 @@ def test_period_ordinal_start_values(freq, expected): ], ) def test_period_ordinal_week(dt, expected): - args = dt + (get_freq("W"),) + args = dt + (get_freq_code("W")[0],) assert period_ordinal(*args) == expected @@ -74,5 +77,5 @@ def test_period_ordinal_week(dt, expected): ], ) def test_period_ordinal_business_day(day, expected): - args = (2013, 10, day, 0, 0, 0, 0, 0, get_freq("B")) + args = (2013, 10, day, 0, 0, 0, 0, 0, get_freq_code("B")[0]) assert period_ordinal(*args) == expected diff --git a/pandas/tests/window/common.py b/pandas/tests/window/common.py index 5dca26df49930..7e0be331ec8d5 100644 --- a/pandas/tests/window/common.py +++ b/pandas/tests/window/common.py @@ -1,28 +1,8 @@ -from datetime import datetime - import numpy as np -from numpy.random import randn -from pandas import DataFrame, Series, bdate_range +from pandas import Series import pandas._testing as tm -N, K = 100, 10 - - -class Base: - - _nan_locs = np.arange(20, 40) - _inf_locs = np.array([]) - - def _create_data(self): - arr = randn(N) - arr[self._nan_locs] = np.NaN - - self.arr = arr - self.rng = bdate_range(datetime(2009, 1, 1), periods=N) - self.series = Series(arr.copy(), index=self.rng) - self.frame = DataFrame(randn(N, K), index=self.rng, columns=np.arange(K)) - def check_pairwise_moment(frame, dispatch, name, **kwargs): def get_result(obj, obj2=None): diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py index 4f462a09a60a3..74f3406d30225 100644 --- a/pandas/tests/window/conftest.py +++ b/pandas/tests/window/conftest.py @@ -1,9 +1,12 @@ +from datetime import datetime + import numpy as np +from numpy.random import randn import pytest import pandas.util._test_decorators as td -from pandas import DataFrame, Series, notna +from pandas import DataFrame, Series, bdate_range, notna @pytest.fixture(params=[True, False]) @@ -242,3 +245,60 @@ def no_nans(x): def consistency_data(request): """Create consistency data""" return request.param + + +def _create_arr(): + """Internal function to mock an array.""" + arr = randn(100) + locs = np.arange(20, 40) + arr[locs] = np.NaN + return arr + + +def _create_rng(): + """Internal function to mock date range.""" + rng = bdate_range(datetime(2009, 1, 1), periods=100) + return rng + + +def _create_series(): + """Internal function to mock Series.""" + arr = _create_arr() + series = Series(arr.copy(), index=_create_rng()) + return series + + +def _create_frame(): + """Internal function to mock DataFrame.""" + rng = _create_rng() + return DataFrame(randn(100, 10), index=rng, columns=np.arange(10)) + + +@pytest.fixture +def nan_locs(): + """Make a range as loc fixture.""" + return np.arange(20, 40) + + +@pytest.fixture +def arr(): + """Make an array as fixture.""" + return _create_arr() + + +@pytest.fixture +def frame(): + """Make mocked frame as fixture.""" + return _create_frame() + + +@pytest.fixture +def series(): + """Make mocked series as fixture.""" + return _create_series() + + +@pytest.fixture(params=[_create_series(), _create_frame()]) +def which(request): + """Turn parametrized which as fixture for series and frame""" + return request.param diff --git a/pandas/tests/window/moments/test_moments_consistency_ewm.py b/pandas/tests/window/moments/test_moments_consistency_ewm.py index 3b3a9d59cb6e7..f143278e12ec5 100644 --- a/pandas/tests/window/moments/test_moments_consistency_ewm.py +++ b/pandas/tests/window/moments/test_moments_consistency_ewm.py @@ -4,7 +4,6 @@ from pandas import DataFrame, Series, concat from pandas.tests.window.common import ( - Base, check_binary_ew, check_binary_ew_min_periods, check_pairwise_moment, @@ -19,13 +18,9 @@ ) -class TestEwmMomentsConsistency(Base): - def setup_method(self, method): - self._create_data() - - @pytest.mark.parametrize("func", ["cov", "corr"]) - def test_ewm_pairwise_cov_corr(self, func): - check_pairwise_moment(self.frame, "ewm", func, span=10, min_periods=5) +@pytest.mark.parametrize("func", ["cov", "corr"]) +def test_ewm_pairwise_cov_corr(func, frame): + check_pairwise_moment(frame, "ewm", func, span=10, min_periods=5) @pytest.mark.parametrize("name", ["cov", "corr"]) diff --git a/pandas/tests/window/moments/test_moments_consistency_expanding.py b/pandas/tests/window/moments/test_moments_consistency_expanding.py index 09cd2ff218c2b..ee3579d76d1db 100644 --- a/pandas/tests/window/moments/test_moments_consistency_expanding.py +++ b/pandas/tests/window/moments/test_moments_consistency_expanding.py @@ -7,7 +7,6 @@ from pandas import DataFrame, Index, MultiIndex, Series, isna, notna import pandas._testing as tm from pandas.tests.window.common import ( - Base, moments_consistency_cov_data, moments_consistency_is_constant, moments_consistency_mock_mean, @@ -18,132 +17,145 @@ ) -class TestExpandingMomentsConsistency(Base): - def setup_method(self, method): - self._create_data() +def _check_expanding( + func, static_comp, preserve_nan=True, series=None, frame=None, nan_locs=None +): - def test_expanding_corr(self): - A = self.series.dropna() - B = (A + randn(len(A)))[:-5] + series_result = func(series) + assert isinstance(series_result, Series) + frame_result = func(frame) + assert isinstance(frame_result, DataFrame) - result = A.expanding().corr(B) + result = func(series) + tm.assert_almost_equal(result[10], static_comp(series[:11])) - rolling_result = A.rolling(window=len(A), min_periods=1).corr(B) + if preserve_nan: + assert result.iloc[nan_locs].isna().all() - tm.assert_almost_equal(rolling_result, result) - def test_expanding_count(self): - result = self.series.expanding(min_periods=0).count() - tm.assert_almost_equal( - result, self.series.rolling(window=len(self.series), min_periods=0).count() - ) +def _check_expanding_has_min_periods(func, static_comp, has_min_periods): + ser = Series(randn(50)) - def test_expanding_quantile(self): - result = self.series.expanding().quantile(0.5) + if has_min_periods: + result = func(ser, min_periods=30) + assert result[:29].isna().all() + tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) - rolling_result = self.series.rolling( - window=len(self.series), min_periods=1 - ).quantile(0.5) + # min_periods is working correctly + result = func(ser, min_periods=15) + assert isna(result.iloc[13]) + assert notna(result.iloc[14]) - tm.assert_almost_equal(result, rolling_result) + ser2 = Series(randn(20)) + result = func(ser2, min_periods=5) + assert isna(result[3]) + assert notna(result[4]) - def test_expanding_cov(self): - A = self.series - B = (A + randn(len(A)))[:-5] + # min_periods=0 + result0 = func(ser, min_periods=0) + result1 = func(ser, min_periods=1) + tm.assert_almost_equal(result0, result1) + else: + result = func(ser) + tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) - result = A.expanding().cov(B) - rolling_result = A.rolling(window=len(A), min_periods=1).cov(B) +def test_expanding_corr(series): + A = series.dropna() + B = (A + randn(len(A)))[:-5] - tm.assert_almost_equal(rolling_result, result) + result = A.expanding().corr(B) - def test_expanding_cov_pairwise(self): - result = self.frame.expanding().corr() + rolling_result = A.rolling(window=len(A), min_periods=1).corr(B) - rolling_result = self.frame.rolling( - window=len(self.frame), min_periods=1 - ).corr() + tm.assert_almost_equal(rolling_result, result) - tm.assert_frame_equal(result, rolling_result) - def test_expanding_corr_pairwise(self): - result = self.frame.expanding().corr() +def test_expanding_count(series): + result = series.expanding(min_periods=0).count() + tm.assert_almost_equal( + result, series.rolling(window=len(series), min_periods=0).count() + ) - rolling_result = self.frame.rolling( - window=len(self.frame), min_periods=1 - ).corr() - tm.assert_frame_equal(result, rolling_result) - @pytest.mark.parametrize("has_min_periods", [True, False]) - @pytest.mark.parametrize( - "func,static_comp", - [("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)], - ids=["sum", "mean", "max", "min"], - ) - def test_expanding_func(self, func, static_comp, has_min_periods): - def expanding_func(x, min_periods=1, center=False, axis=0): - exp = x.expanding(min_periods=min_periods, center=center, axis=axis) - return getattr(exp, func)() - - self._check_expanding(expanding_func, static_comp, preserve_nan=False) - self._check_expanding_has_min_periods( - expanding_func, static_comp, has_min_periods - ) +def test_expanding_quantile(series): + result = series.expanding().quantile(0.5) + + rolling_result = series.rolling(window=len(series), min_periods=1).quantile(0.5) + + tm.assert_almost_equal(result, rolling_result) + - @pytest.mark.parametrize("has_min_periods", [True, False]) - def test_expanding_apply(self, engine_and_raw, has_min_periods): +def test_expanding_cov(series): + A = series + B = (A + randn(len(A)))[:-5] - engine, raw = engine_and_raw + result = A.expanding().cov(B) - def expanding_mean(x, min_periods=1): + rolling_result = A.rolling(window=len(A), min_periods=1).cov(B) - exp = x.expanding(min_periods=min_periods) - result = exp.apply(lambda x: x.mean(), raw=raw, engine=engine) - return result + tm.assert_almost_equal(rolling_result, result) - # TODO(jreback), needed to add preserve_nan=False - # here to make this pass - self._check_expanding(expanding_mean, np.mean, preserve_nan=False) - self._check_expanding_has_min_periods(expanding_mean, np.mean, has_min_periods) - def _check_expanding(self, func, static_comp, preserve_nan=True): +def test_expanding_cov_pairwise(frame): + result = frame.expanding().cov() - series_result = func(self.series) - assert isinstance(series_result, Series) - frame_result = func(self.frame) - assert isinstance(frame_result, DataFrame) + rolling_result = frame.rolling(window=len(frame), min_periods=1).cov() - result = func(self.series) - tm.assert_almost_equal(result[10], static_comp(self.series[:11])) + tm.assert_frame_equal(result, rolling_result) - if preserve_nan: - assert result.iloc[self._nan_locs].isna().all() - def _check_expanding_has_min_periods(self, func, static_comp, has_min_periods): - ser = Series(randn(50)) +def test_expanding_corr_pairwise(frame): + result = frame.expanding().corr() - if has_min_periods: - result = func(ser, min_periods=30) - assert result[:29].isna().all() - tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) + rolling_result = frame.rolling(window=len(frame), min_periods=1).corr() + tm.assert_frame_equal(result, rolling_result) - # min_periods is working correctly - result = func(ser, min_periods=15) - assert isna(result.iloc[13]) - assert notna(result.iloc[14]) - ser2 = Series(randn(20)) - result = func(ser2, min_periods=5) - assert isna(result[3]) - assert notna(result[4]) +@pytest.mark.parametrize("has_min_periods", [True, False]) +@pytest.mark.parametrize( + "func,static_comp", + [("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)], + ids=["sum", "mean", "max", "min"], +) +def test_expanding_func(func, static_comp, has_min_periods, series, frame, nan_locs): + def expanding_func(x, min_periods=1, center=False, axis=0): + exp = x.expanding(min_periods=min_periods, center=center, axis=axis) + return getattr(exp, func)() + + _check_expanding( + expanding_func, + static_comp, + preserve_nan=False, + series=series, + frame=frame, + nan_locs=nan_locs, + ) + _check_expanding_has_min_periods(expanding_func, static_comp, has_min_periods) + - # min_periods=0 - result0 = func(ser, min_periods=0) - result1 = func(ser, min_periods=1) - tm.assert_almost_equal(result0, result1) - else: - result = func(ser) - tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) +@pytest.mark.parametrize("has_min_periods", [True, False]) +def test_expanding_apply(engine_and_raw, has_min_periods, series, frame, nan_locs): + + engine, raw = engine_and_raw + + def expanding_mean(x, min_periods=1): + + exp = x.expanding(min_periods=min_periods) + result = exp.apply(lambda x: x.mean(), raw=raw, engine=engine) + return result + + # TODO(jreback), needed to add preserve_nan=False + # here to make this pass + _check_expanding( + expanding_mean, + np.mean, + preserve_nan=False, + series=series, + frame=frame, + nan_locs=nan_locs, + ) + _check_expanding_has_min_periods(expanding_mean, np.mean, has_min_periods) @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) diff --git a/pandas/tests/window/moments/test_moments_consistency_rolling.py b/pandas/tests/window/moments/test_moments_consistency_rolling.py index 2c37baeae13b7..a3de8aa69f840 100644 --- a/pandas/tests/window/moments/test_moments_consistency_rolling.py +++ b/pandas/tests/window/moments/test_moments_consistency_rolling.py @@ -12,7 +12,6 @@ import pandas._testing as tm from pandas.core.window.common import _flex_binary_moment from pandas.tests.window.common import ( - Base, check_pairwise_moment, moments_consistency_cov_data, moments_consistency_is_constant, @@ -33,60 +32,56 @@ def _rolling_consistency_cases(): yield window, min_periods, center -class TestRollingMomentsConsistency(Base): - def setup_method(self, method): - self._create_data() +# binary moments +def test_rolling_cov(series): + A = series + B = A + randn(len(A)) - # binary moments - def test_rolling_cov(self): - A = self.series - B = A + randn(len(A)) + result = A.rolling(window=50, min_periods=25).cov(B) + tm.assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1]) - result = A.rolling(window=50, min_periods=25).cov(B) - tm.assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1]) - def test_rolling_corr(self): - A = self.series - B = A + randn(len(A)) +def test_rolling_corr(series): + A = series + B = A + randn(len(A)) - result = A.rolling(window=50, min_periods=25).corr(B) - tm.assert_almost_equal(result[-1], np.corrcoef(A[-50:], B[-50:])[0, 1]) + result = A.rolling(window=50, min_periods=25).corr(B) + tm.assert_almost_equal(result[-1], np.corrcoef(A[-50:], B[-50:])[0, 1]) - # test for correct bias correction - a = tm.makeTimeSeries() - b = tm.makeTimeSeries() - a[:5] = np.nan - b[:10] = np.nan + # test for correct bias correction + a = tm.makeTimeSeries() + b = tm.makeTimeSeries() + a[:5] = np.nan + b[:10] = np.nan - result = a.rolling(window=len(a), min_periods=1).corr(b) - tm.assert_almost_equal(result[-1], a.corr(b)) + result = a.rolling(window=len(a), min_periods=1).corr(b) + tm.assert_almost_equal(result[-1], a.corr(b)) - @pytest.mark.parametrize("func", ["cov", "corr"]) - def test_rolling_pairwise_cov_corr(self, func): - check_pairwise_moment(self.frame, "rolling", func, window=10, min_periods=5) - @pytest.mark.parametrize("method", ["corr", "cov"]) - def test_flex_binary_frame(self, method): - series = self.frame[1] +@pytest.mark.parametrize("func", ["cov", "corr"]) +def test_rolling_pairwise_cov_corr(func, frame): + check_pairwise_moment(frame, "rolling", func, window=10, min_periods=5) - res = getattr(series.rolling(window=10), method)(self.frame) - res2 = getattr(self.frame.rolling(window=10), method)(series) - exp = self.frame.apply(lambda x: getattr(series.rolling(window=10), method)(x)) - tm.assert_frame_equal(res, exp) - tm.assert_frame_equal(res2, exp) +@pytest.mark.parametrize("method", ["corr", "cov"]) +def test_flex_binary_frame(method, frame): + series = frame[1] - frame2 = self.frame.copy() - frame2.values[:] = np.random.randn(*frame2.shape) + res = getattr(series.rolling(window=10), method)(frame) + res2 = getattr(frame.rolling(window=10), method)(series) + exp = frame.apply(lambda x: getattr(series.rolling(window=10), method)(x)) - res3 = getattr(self.frame.rolling(window=10), method)(frame2) - exp = DataFrame( - { - k: getattr(self.frame[k].rolling(window=10), method)(frame2[k]) - for k in self.frame - } - ) - tm.assert_frame_equal(res3, exp) + tm.assert_frame_equal(res, exp) + tm.assert_frame_equal(res2, exp) + + frame2 = frame.copy() + frame2.values[:] = np.random.randn(*frame2.shape) + + res3 = getattr(frame.rolling(window=10), method)(frame2) + exp = DataFrame( + {k: getattr(frame[k].rolling(window=10), method)(frame2[k]) for k in frame} + ) + tm.assert_frame_equal(res3, exp) @pytest.mark.slow diff --git a/pandas/tests/window/moments/test_moments_ewm.py b/pandas/tests/window/moments/test_moments_ewm.py index 162917fff9363..c6a92c0ad47b6 100644 --- a/pandas/tests/window/moments/test_moments_ewm.py +++ b/pandas/tests/window/moments/test_moments_ewm.py @@ -5,257 +5,248 @@ import pandas as pd from pandas import DataFrame, Series import pandas._testing as tm -from pandas.tests.window.common import Base -@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") -class TestMoments(Base): - def setup_method(self, method): - self._create_data() +def check_ew(name=None, preserve_nan=False, series=None, frame=None, nan_locs=None): + series_result = getattr(series.ewm(com=10), name)() + assert isinstance(series_result, Series) + + frame_result = getattr(frame.ewm(com=10), name)() + assert type(frame_result) == DataFrame + + result = getattr(series.ewm(com=10), name)() + if preserve_nan: + assert result[nan_locs].isna().all() + + +def test_ewma(series, frame, nan_locs): + check_ew(name="mean", frame=frame, series=series, nan_locs=nan_locs) + + vals = pd.Series(np.zeros(1000)) + vals[5] = 1 + result = vals.ewm(span=100, adjust=False).mean().sum() + assert np.abs(result - 1) < 1e-2 + + +@pytest.mark.parametrize("adjust", [True, False]) +@pytest.mark.parametrize("ignore_na", [True, False]) +def test_ewma_cases(adjust, ignore_na): + # try adjust/ignore_na args matrix + + s = Series([1.0, 2.0, 4.0, 8.0]) + + if adjust: + expected = Series([1.0, 1.6, 2.736842, 4.923077]) + else: + expected = Series([1.0, 1.333333, 2.222222, 4.148148]) + + result = s.ewm(com=2.0, adjust=adjust, ignore_na=ignore_na).mean() + tm.assert_series_equal(result, expected) + + +def test_ewma_nan_handling(): + s = Series([1.0] + [np.nan] * 5 + [1.0]) + result = s.ewm(com=5).mean() + tm.assert_series_equal(result, Series([1.0] * len(s))) + + s = Series([np.nan] * 2 + [1.0] + [np.nan] * 2 + [1.0]) + result = s.ewm(com=5).mean() + tm.assert_series_equal(result, Series([np.nan] * 2 + [1.0] * 4)) + + # GH 7603 + s0 = Series([np.nan, 1.0, 101.0]) + s1 = Series([1.0, np.nan, 101.0]) + s2 = Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]) + s3 = Series([1.0, np.nan, 101.0, 50.0]) + com = 2.0 + alpha = 1.0 / (1.0 + com) + + def simple_wma(s, w): + return (s.multiply(w).cumsum() / w.cumsum()).fillna(method="ffill") + + for (s, adjust, ignore_na, w) in [ + (s0, True, False, [np.nan, (1.0 - alpha), 1.0]), + (s0, True, True, [np.nan, (1.0 - alpha), 1.0]), + (s0, False, False, [np.nan, (1.0 - alpha), alpha]), + (s0, False, True, [np.nan, (1.0 - alpha), alpha]), + (s1, True, False, [(1.0 - alpha) ** 2, np.nan, 1.0]), + (s1, True, True, [(1.0 - alpha), np.nan, 1.0]), + (s1, False, False, [(1.0 - alpha) ** 2, np.nan, alpha]), + (s1, False, True, [(1.0 - alpha), np.nan, alpha]), + (s2, True, False, [np.nan, (1.0 - alpha) ** 3, np.nan, np.nan, 1.0, np.nan],), + (s2, True, True, [np.nan, (1.0 - alpha), np.nan, np.nan, 1.0, np.nan]), + ( + s2, + False, + False, + [np.nan, (1.0 - alpha) ** 3, np.nan, np.nan, alpha, np.nan], + ), + (s2, False, True, [np.nan, (1.0 - alpha), np.nan, np.nan, alpha, np.nan]), + (s3, True, False, [(1.0 - alpha) ** 3, np.nan, (1.0 - alpha), 1.0]), + (s3, True, True, [(1.0 - alpha) ** 2, np.nan, (1.0 - alpha), 1.0]), + ( + s3, + False, + False, + [ + (1.0 - alpha) ** 3, + np.nan, + (1.0 - alpha) * alpha, + alpha * ((1.0 - alpha) ** 2 + alpha), + ], + ), + (s3, False, True, [(1.0 - alpha) ** 2, np.nan, (1.0 - alpha) * alpha, alpha],), + ]: + expected = simple_wma(s, Series(w)) + result = s.ewm(com=com, adjust=adjust, ignore_na=ignore_na).mean() - def test_ewma(self): - self._check_ew(name="mean") - - vals = pd.Series(np.zeros(1000)) - vals[5] = 1 - result = vals.ewm(span=100, adjust=False).mean().sum() - assert np.abs(result - 1) < 1e-2 - - @pytest.mark.parametrize("adjust", [True, False]) - @pytest.mark.parametrize("ignore_na", [True, False]) - def test_ewma_cases(self, adjust, ignore_na): - # try adjust/ignore_na args matrix - - s = Series([1.0, 2.0, 4.0, 8.0]) - - if adjust: - expected = Series([1.0, 1.6, 2.736842, 4.923077]) - else: - expected = Series([1.0, 1.333333, 2.222222, 4.148148]) - - result = s.ewm(com=2.0, adjust=adjust, ignore_na=ignore_na).mean() tm.assert_series_equal(result, expected) + if ignore_na is False: + # check that ignore_na defaults to False + result = s.ewm(com=com, adjust=adjust).mean() + tm.assert_series_equal(result, expected) - def test_ewma_nan_handling(self): - s = Series([1.0] + [np.nan] * 5 + [1.0]) - result = s.ewm(com=5).mean() - tm.assert_series_equal(result, Series([1.0] * len(s))) - - s = Series([np.nan] * 2 + [1.0] + [np.nan] * 2 + [1.0]) - result = s.ewm(com=5).mean() - tm.assert_series_equal(result, Series([np.nan] * 2 + [1.0] * 4)) - - # GH 7603 - s0 = Series([np.nan, 1.0, 101.0]) - s1 = Series([1.0, np.nan, 101.0]) - s2 = Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]) - s3 = Series([1.0, np.nan, 101.0, 50.0]) - com = 2.0 - alpha = 1.0 / (1.0 + com) - - def simple_wma(s, w): - return (s.multiply(w).cumsum() / w.cumsum()).fillna(method="ffill") - - for (s, adjust, ignore_na, w) in [ - (s0, True, False, [np.nan, (1.0 - alpha), 1.0]), - (s0, True, True, [np.nan, (1.0 - alpha), 1.0]), - (s0, False, False, [np.nan, (1.0 - alpha), alpha]), - (s0, False, True, [np.nan, (1.0 - alpha), alpha]), - (s1, True, False, [(1.0 - alpha) ** 2, np.nan, 1.0]), - (s1, True, True, [(1.0 - alpha), np.nan, 1.0]), - (s1, False, False, [(1.0 - alpha) ** 2, np.nan, alpha]), - (s1, False, True, [(1.0 - alpha), np.nan, alpha]), - ( - s2, - True, - False, - [np.nan, (1.0 - alpha) ** 3, np.nan, np.nan, 1.0, np.nan], - ), - (s2, True, True, [np.nan, (1.0 - alpha), np.nan, np.nan, 1.0, np.nan]), - ( - s2, - False, - False, - [np.nan, (1.0 - alpha) ** 3, np.nan, np.nan, alpha, np.nan], - ), - (s2, False, True, [np.nan, (1.0 - alpha), np.nan, np.nan, alpha, np.nan]), - (s3, True, False, [(1.0 - alpha) ** 3, np.nan, (1.0 - alpha), 1.0]), - (s3, True, True, [(1.0 - alpha) ** 2, np.nan, (1.0 - alpha), 1.0]), - ( - s3, - False, - False, - [ - (1.0 - alpha) ** 3, - np.nan, - (1.0 - alpha) * alpha, - alpha * ((1.0 - alpha) ** 2 + alpha), - ], - ), - ( - s3, - False, - True, - [(1.0 - alpha) ** 2, np.nan, (1.0 - alpha) * alpha, alpha], - ), - ]: - expected = simple_wma(s, Series(w)) - result = s.ewm(com=com, adjust=adjust, ignore_na=ignore_na).mean() - tm.assert_series_equal(result, expected) - if ignore_na is False: - # check that ignore_na defaults to False - result = s.ewm(com=com, adjust=adjust).mean() - tm.assert_series_equal(result, expected) - - def test_ewmvar(self): - self._check_ew(name="var") - - def test_ewmvol(self): - self._check_ew(name="vol") - - def test_ewma_span_com_args(self): - A = self.series.ewm(com=9.5).mean() - B = self.series.ewm(span=20).mean() - tm.assert_almost_equal(A, B) - - with pytest.raises(ValueError): - self.series.ewm(com=9.5, span=20) - with pytest.raises(ValueError): - self.series.ewm().mean() - - def test_ewma_halflife_arg(self): - A = self.series.ewm(com=13.932726172912965).mean() - B = self.series.ewm(halflife=10.0).mean() - tm.assert_almost_equal(A, B) - - with pytest.raises(ValueError): - self.series.ewm(span=20, halflife=50) - with pytest.raises(ValueError): - self.series.ewm(com=9.5, halflife=50) - with pytest.raises(ValueError): - self.series.ewm(com=9.5, span=20, halflife=50) - with pytest.raises(ValueError): - self.series.ewm() - - def test_ewm_alpha(self): - # GH 10789 - s = Series(self.arr) - a = s.ewm(alpha=0.61722699889169674).mean() - b = s.ewm(com=0.62014947789973052).mean() - c = s.ewm(span=2.240298955799461).mean() - d = s.ewm(halflife=0.721792864318).mean() - tm.assert_series_equal(a, b) - tm.assert_series_equal(a, c) - tm.assert_series_equal(a, d) - - def test_ewm_alpha_arg(self): - # GH 10789 - s = self.series - with pytest.raises(ValueError): - s.ewm() - with pytest.raises(ValueError): - s.ewm(com=10.0, alpha=0.5) - with pytest.raises(ValueError): - s.ewm(span=10.0, alpha=0.5) - with pytest.raises(ValueError): - s.ewm(halflife=10.0, alpha=0.5) - - def test_ewm_domain_checks(self): - # GH 12492 - s = Series(self.arr) - msg = "comass must satisfy: comass >= 0" - with pytest.raises(ValueError, match=msg): - s.ewm(com=-0.1) - s.ewm(com=0.0) - s.ewm(com=0.1) - - msg = "span must satisfy: span >= 1" - with pytest.raises(ValueError, match=msg): - s.ewm(span=-0.1) - with pytest.raises(ValueError, match=msg): - s.ewm(span=0.0) - with pytest.raises(ValueError, match=msg): - s.ewm(span=0.9) - s.ewm(span=1.0) - s.ewm(span=1.1) - - msg = "halflife must satisfy: halflife > 0" - with pytest.raises(ValueError, match=msg): - s.ewm(halflife=-0.1) - with pytest.raises(ValueError, match=msg): - s.ewm(halflife=0.0) - s.ewm(halflife=0.1) - - msg = "alpha must satisfy: 0 < alpha <= 1" - with pytest.raises(ValueError, match=msg): - s.ewm(alpha=-0.1) - with pytest.raises(ValueError, match=msg): - s.ewm(alpha=0.0) - s.ewm(alpha=0.1) - s.ewm(alpha=1.0) - with pytest.raises(ValueError, match=msg): - s.ewm(alpha=1.1) - - @pytest.mark.parametrize("method", ["mean", "vol", "var"]) - def test_ew_empty_series(self, method): - vals = pd.Series([], dtype=np.float64) - - ewm = vals.ewm(3) - result = getattr(ewm, method)() - tm.assert_almost_equal(result, vals) - - def _check_ew(self, name=None, preserve_nan=False): - series_result = getattr(self.series.ewm(com=10), name)() - assert isinstance(series_result, Series) - - frame_result = getattr(self.frame.ewm(com=10), name)() - assert type(frame_result) == DataFrame - - result = getattr(self.series.ewm(com=10), name)() - if preserve_nan: - assert result[self._nan_locs].isna().all() - - @pytest.mark.parametrize("min_periods", [0, 1]) - @pytest.mark.parametrize("name", ["mean", "var", "vol"]) - def test_ew_min_periods(self, min_periods, name): - # excluding NaNs correctly - arr = randn(50) - arr[:10] = np.NaN - arr[-10:] = np.NaN - s = Series(arr) - - # check min_periods - # GH 7898 - result = getattr(s.ewm(com=50, min_periods=2), name)() +@pytest.mark.parametrize("name", ["var", "vol"]) +def test_ewmvar_ewmvol(series, frame, nan_locs, name): + check_ew(name=name, frame=frame, series=series, nan_locs=nan_locs) + + +def test_ewma_span_com_args(series): + A = series.ewm(com=9.5).mean() + B = series.ewm(span=20).mean() + tm.assert_almost_equal(A, B) + + with pytest.raises(ValueError): + series.ewm(com=9.5, span=20) + with pytest.raises(ValueError): + series.ewm().mean() + + +def test_ewma_halflife_arg(series): + A = series.ewm(com=13.932726172912965).mean() + B = series.ewm(halflife=10.0).mean() + tm.assert_almost_equal(A, B) + + with pytest.raises(ValueError): + series.ewm(span=20, halflife=50) + with pytest.raises(ValueError): + series.ewm(com=9.5, halflife=50) + with pytest.raises(ValueError): + series.ewm(com=9.5, span=20, halflife=50) + with pytest.raises(ValueError): + series.ewm() + + +def test_ewm_alpha(arr): + # GH 10789 + s = Series(arr) + a = s.ewm(alpha=0.61722699889169674).mean() + b = s.ewm(com=0.62014947789973052).mean() + c = s.ewm(span=2.240298955799461).mean() + d = s.ewm(halflife=0.721792864318).mean() + tm.assert_series_equal(a, b) + tm.assert_series_equal(a, c) + tm.assert_series_equal(a, d) + + +def test_ewm_alpha_arg(series): + # GH 10789 + s = series + with pytest.raises(ValueError): + s.ewm() + with pytest.raises(ValueError): + s.ewm(com=10.0, alpha=0.5) + with pytest.raises(ValueError): + s.ewm(span=10.0, alpha=0.5) + with pytest.raises(ValueError): + s.ewm(halflife=10.0, alpha=0.5) + + +def test_ewm_domain_checks(arr): + # GH 12492 + s = Series(arr) + msg = "comass must satisfy: comass >= 0" + with pytest.raises(ValueError, match=msg): + s.ewm(com=-0.1) + s.ewm(com=0.0) + s.ewm(com=0.1) + + msg = "span must satisfy: span >= 1" + with pytest.raises(ValueError, match=msg): + s.ewm(span=-0.1) + with pytest.raises(ValueError, match=msg): + s.ewm(span=0.0) + with pytest.raises(ValueError, match=msg): + s.ewm(span=0.9) + s.ewm(span=1.0) + s.ewm(span=1.1) + + msg = "halflife must satisfy: halflife > 0" + with pytest.raises(ValueError, match=msg): + s.ewm(halflife=-0.1) + with pytest.raises(ValueError, match=msg): + s.ewm(halflife=0.0) + s.ewm(halflife=0.1) + + msg = "alpha must satisfy: 0 < alpha <= 1" + with pytest.raises(ValueError, match=msg): + s.ewm(alpha=-0.1) + with pytest.raises(ValueError, match=msg): + s.ewm(alpha=0.0) + s.ewm(alpha=0.1) + s.ewm(alpha=1.0) + with pytest.raises(ValueError, match=msg): + s.ewm(alpha=1.1) + + +@pytest.mark.parametrize("method", ["mean", "vol", "var"]) +def test_ew_empty_series(method): + vals = pd.Series([], dtype=np.float64) + + ewm = vals.ewm(3) + result = getattr(ewm, method)() + tm.assert_almost_equal(result, vals) + + +@pytest.mark.parametrize("min_periods", [0, 1]) +@pytest.mark.parametrize("name", ["mean", "var", "vol"]) +def test_ew_min_periods(min_periods, name): + # excluding NaNs correctly + arr = randn(50) + arr[:10] = np.NaN + arr[-10:] = np.NaN + s = Series(arr) + + # check min_periods + # GH 7898 + result = getattr(s.ewm(com=50, min_periods=2), name)() + assert result[:11].isna().all() + assert not result[11:].isna().any() + + result = getattr(s.ewm(com=50, min_periods=min_periods), name)() + if name == "mean": + assert result[:10].isna().all() + assert not result[10:].isna().any() + else: + # ewm.std, ewm.vol, ewm.var (with bias=False) require at least + # two values assert result[:11].isna().all() assert not result[11:].isna().any() - result = getattr(s.ewm(com=50, min_periods=min_periods), name)() - if name == "mean": - assert result[:10].isna().all() - assert not result[10:].isna().any() - else: - # ewm.std, ewm.vol, ewm.var (with bias=False) require at least - # two values - assert result[:11].isna().all() - assert not result[11:].isna().any() - - # check series of length 0 - result = getattr( - Series(dtype=object).ewm(com=50, min_periods=min_periods), name - )() - tm.assert_series_equal(result, Series(dtype="float64")) - - # check series of length 1 - result = getattr(Series([1.0]).ewm(50, min_periods=min_periods), name)() - if name == "mean": - tm.assert_series_equal(result, Series([1.0])) - else: - # ewm.std, ewm.vol, ewm.var with bias=False require at least - # two values - tm.assert_series_equal(result, Series([np.NaN])) - - # pass in ints - result2 = getattr(Series(np.arange(50)).ewm(span=10), name)() - assert result2.dtype == np.float_ + # check series of length 0 + result = getattr(Series(dtype=object).ewm(com=50, min_periods=min_periods), name)() + tm.assert_series_equal(result, Series(dtype="float64")) + + # check series of length 1 + result = getattr(Series([1.0]).ewm(50, min_periods=min_periods), name)() + if name == "mean": + tm.assert_series_equal(result, Series([1.0])) + else: + # ewm.std, ewm.vol, ewm.var with bias=False require at least + # two values + tm.assert_series_equal(result, Series([np.NaN])) + + # pass in ints + result2 = getattr(Series(np.arange(50)).ewm(span=10), name)() + assert result2.dtype == np.float_ diff --git a/pandas/tests/window/moments/test_moments_rolling.py b/pandas/tests/window/moments/test_moments_rolling.py index 399b76e92fc4f..3e5475e6b274f 100644 --- a/pandas/tests/window/moments/test_moments_rolling.py +++ b/pandas/tests/window/moments/test_moments_rolling.py @@ -10,912 +10,947 @@ import pandas as pd from pandas import DataFrame, Series, isna, notna import pandas._testing as tm -from pandas.tests.window.common import Base import pandas.tseries.offsets as offsets -@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") -class TestMoments(Base): - def setup_method(self, method): - self._create_data() +def _check_moment_func( + static_comp, + name, + raw, + has_min_periods=True, + has_center=True, + has_time_rule=True, + fill_value=None, + zero_min_periods_equal=True, + series=None, + frame=None, + **kwargs, +): + + # inject raw + if name == "apply": + kwargs = copy.copy(kwargs) + kwargs["raw"] = raw + + def get_result(obj, window, min_periods=None, center=False): + r = obj.rolling(window=window, min_periods=min_periods, center=center) + return getattr(r, name)(**kwargs) + + series_result = get_result(series, window=50) + assert isinstance(series_result, Series) + tm.assert_almost_equal(series_result.iloc[-1], static_comp(series[-50:])) + + frame_result = get_result(frame, window=50) + assert isinstance(frame_result, DataFrame) + tm.assert_series_equal( + frame_result.iloc[-1, :], + frame.iloc[-50:, :].apply(static_comp, axis=0, raw=raw), + check_names=False, + ) - def test_centered_axis_validation(self): + # check time_rule works + if has_time_rule: + win = 25 + minp = 10 + ser = series[::2].resample("B").mean() + frm = frame[::2].resample("B").mean() - # ok - Series(np.ones(10)).rolling(window=3, center=True, axis=0).mean() + if has_min_periods: + series_result = get_result(ser, window=win, min_periods=minp) + frame_result = get_result(frm, window=win, min_periods=minp) + else: + series_result = get_result(ser, window=win, min_periods=0) + frame_result = get_result(frm, window=win, min_periods=0) - # bad axis - with pytest.raises(ValueError): - Series(np.ones(10)).rolling(window=3, center=True, axis=1).mean() + last_date = series_result.index[-1] + prev_date = last_date - 24 * offsets.BDay() - # ok ok - DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=0).mean() - DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=1).mean() + trunc_series = series[::2].truncate(prev_date, last_date) + trunc_frame = frame[::2].truncate(prev_date, last_date) - # bad axis - with pytest.raises(ValueError): - (DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=2).mean()) + tm.assert_almost_equal(series_result[-1], static_comp(trunc_series)) - def test_rolling_sum(self, raw): - self._check_moment_func( - np.nansum, name="sum", zero_min_periods_equal=False, raw=raw + tm.assert_series_equal( + frame_result.xs(last_date), + trunc_frame.apply(static_comp, raw=raw), + check_names=False, ) - def test_rolling_count(self, raw): - counter = lambda x: np.isfinite(x).astype(float).sum() - self._check_moment_func( - counter, name="count", has_min_periods=False, fill_value=0, raw=raw - ) + # excluding NaNs correctly + obj = Series(randn(50)) + obj[:10] = np.NaN + obj[-10:] = np.NaN + if has_min_periods: + result = get_result(obj, 50, min_periods=30) + tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10])) + + # min_periods is working correctly + result = get_result(obj, 20, min_periods=15) + assert isna(result.iloc[23]) + assert not isna(result.iloc[24]) + + assert not isna(result.iloc[-6]) + assert isna(result.iloc[-5]) + + obj2 = Series(randn(20)) + result = get_result(obj2, 10, min_periods=5) + assert isna(result.iloc[3]) + assert notna(result.iloc[4]) + + if zero_min_periods_equal: + # min_periods=0 may be equivalent to min_periods=1 + result0 = get_result(obj, 20, min_periods=0) + result1 = get_result(obj, 20, min_periods=1) + tm.assert_almost_equal(result0, result1) + else: + result = get_result(obj, 50) + tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10])) + + # window larger than series length (#7297) + if has_min_periods: + for minp in (0, len(series) - 1, len(series)): + result = get_result(series, len(series) + 1, min_periods=minp) + expected = get_result(series, len(series), min_periods=minp) + nan_mask = isna(result) + tm.assert_series_equal(nan_mask, isna(expected)) - def test_rolling_mean(self, raw): - self._check_moment_func(np.mean, name="mean", raw=raw) + nan_mask = ~nan_mask + tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) + else: + result = get_result(series, len(series) + 1, min_periods=0) + expected = get_result(series, len(series), min_periods=0) + nan_mask = isna(result) + tm.assert_series_equal(nan_mask, isna(expected)) - @td.skip_if_no_scipy - def test_cmov_mean(self): - # GH 8238 - vals = np.array( - [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48] - ) - result = Series(vals).rolling(5, center=True).mean() - expected = Series( - [ - np.nan, - np.nan, - 9.962, - 11.27, - 11.564, - 12.516, - 12.818, - 12.952, - np.nan, - np.nan, - ] - ) - tm.assert_series_equal(expected, result) + nan_mask = ~nan_mask + tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) - @td.skip_if_no_scipy - def test_cmov_window(self): - # GH 8238 - vals = np.array( - [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48] - ) - result = Series(vals).rolling(5, win_type="boxcar", center=True).mean() - expected = Series( - [ - np.nan, - np.nan, - 9.962, - 11.27, - 11.564, - 12.516, - 12.818, - 12.952, - np.nan, - np.nan, - ] - ) - tm.assert_series_equal(expected, result) - - @td.skip_if_no_scipy - def test_cmov_window_corner(self): - # GH 8238 - # all nan - vals = pd.Series([np.nan] * 10) - result = vals.rolling(5, center=True, win_type="boxcar").mean() - assert np.isnan(result).all() - - # empty - vals = pd.Series([], dtype=object) - result = vals.rolling(5, center=True, win_type="boxcar").mean() - assert len(result) == 0 - - # shorter than window - vals = pd.Series(np.random.randn(5)) - result = vals.rolling(10, win_type="boxcar").mean() - assert np.isnan(result).all() - assert len(result) == 5 - - @td.skip_if_no_scipy - @pytest.mark.parametrize( - "f,xp", - [ - ( - "mean", - [ - [np.nan, np.nan], - [np.nan, np.nan], - [9.252, 9.392], - [8.644, 9.906], - [8.87, 10.208], - [6.81, 8.588], - [7.792, 8.644], - [9.05, 7.824], - [np.nan, np.nan], - [np.nan, np.nan], - ], - ), - ( - "std", - [ - [np.nan, np.nan], - [np.nan, np.nan], - [3.789706, 4.068313], - [3.429232, 3.237411], - [3.589269, 3.220810], - [3.405195, 2.380655], - [3.281839, 2.369869], - [3.676846, 1.801799], - [np.nan, np.nan], - [np.nan, np.nan], - ], - ), - ( - "var", - [ - [np.nan, np.nan], - [np.nan, np.nan], - [14.36187, 16.55117], - [11.75963, 10.48083], - [12.88285, 10.37362], - [11.59535, 5.66752], - [10.77047, 5.61628], - [13.51920, 3.24648], - [np.nan, np.nan], - [np.nan, np.nan], - ], - ), - ( - "sum", - [ - [np.nan, np.nan], - [np.nan, np.nan], - [46.26, 46.96], - [43.22, 49.53], - [44.35, 51.04], - [34.05, 42.94], - [38.96, 43.22], - [45.25, 39.12], - [np.nan, np.nan], - [np.nan, np.nan], - ], - ), - ], - ) - def test_cmov_window_frame(self, f, xp): - # Gh 8238 - df = DataFrame( - np.array( - [ - [12.18, 3.64], - [10.18, 9.16], - [13.24, 14.61], - [4.51, 8.11], - [6.15, 11.44], - [9.14, 6.21], - [11.31, 10.67], - [2.94, 6.51], - [9.42, 8.39], - [12.44, 7.34], - ] + # check center=True + if has_center: + if has_min_periods: + result = get_result(obj, 20, min_periods=15, center=True) + expected = get_result( + pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=15 + )[9:].reset_index(drop=True) + else: + result = get_result(obj, 20, min_periods=0, center=True) + print(result) + expected = get_result( + pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=0 + )[9:].reset_index(drop=True) + + tm.assert_series_equal(result, expected) + + # shifter index + s = [f"x{x:d}" for x in range(12)] + + if has_min_periods: + minp = 10 + + series_xp = ( + get_result( + series.reindex(list(series.index) + s), window=25, min_periods=minp, + ) + .shift(-12) + .reindex(series.index) + ) + frame_xp = ( + get_result( + frame.reindex(list(frame.index) + s), window=25, min_periods=minp, + ) + .shift(-12) + .reindex(frame.index) ) - ) - xp = DataFrame(np.array(xp)) - roll = df.rolling(5, win_type="boxcar", center=True) - rs = getattr(roll, f)() + series_rs = get_result(series, window=25, min_periods=minp, center=True) + frame_rs = get_result(frame, window=25, min_periods=minp, center=True) - tm.assert_frame_equal(xp, rs) + else: + series_xp = ( + get_result( + series.reindex(list(series.index) + s), window=25, min_periods=0, + ) + .shift(-12) + .reindex(series.index) + ) + frame_xp = ( + get_result( + frame.reindex(list(frame.index) + s), window=25, min_periods=0, + ) + .shift(-12) + .reindex(frame.index) + ) - @td.skip_if_no_scipy - def test_cmov_window_na_min_periods(self): - # min_periods - vals = Series(np.random.randn(10)) - vals[4] = np.nan - vals[8] = np.nan + series_rs = get_result(series, window=25, min_periods=0, center=True) + frame_rs = get_result(frame, window=25, min_periods=0, center=True) - xp = vals.rolling(5, min_periods=4, center=True).mean() - rs = vals.rolling(5, win_type="boxcar", min_periods=4, center=True).mean() - tm.assert_series_equal(xp, rs) + if fill_value is not None: + series_xp = series_xp.fillna(fill_value) + frame_xp = frame_xp.fillna(fill_value) + tm.assert_series_equal(series_xp, series_rs) + tm.assert_frame_equal(frame_xp, frame_rs) - @td.skip_if_no_scipy - def test_cmov_window_regular(self, win_types): - # GH 8238 - vals = np.array( - [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48] - ) - xps = { - "hamming": [ - np.nan, - np.nan, - 8.71384, - 9.56348, - 12.38009, - 14.03687, - 13.8567, - 11.81473, - np.nan, - np.nan, - ], - "triang": [ - np.nan, - np.nan, - 9.28667, - 10.34667, - 12.00556, - 13.33889, - 13.38, - 12.33667, - np.nan, - np.nan, - ], - "barthann": [ - np.nan, - np.nan, - 8.4425, - 9.1925, - 12.5575, - 14.3675, - 14.0825, - 11.5675, - np.nan, - np.nan, - ], - "bohman": [ - np.nan, - np.nan, - 7.61599, - 9.1764, - 12.83559, - 14.17267, - 14.65923, - 11.10401, - np.nan, - np.nan, - ], - "blackmanharris": [ - np.nan, - np.nan, - 6.97691, - 9.16438, - 13.05052, - 14.02156, - 15.10512, - 10.74574, - np.nan, - np.nan, - ], - "nuttall": [ - np.nan, - np.nan, - 7.04618, - 9.16786, - 13.02671, - 14.03559, - 15.05657, - 10.78514, - np.nan, - np.nan, - ], - "blackman": [ - np.nan, - np.nan, - 7.73345, - 9.17869, - 12.79607, - 14.20036, - 14.57726, - 11.16988, - np.nan, - np.nan, - ], - "bartlett": [ - np.nan, - np.nan, - 8.4425, - 9.1925, - 12.5575, - 14.3675, - 14.0825, - 11.5675, - np.nan, - np.nan, - ], - } - - xp = Series(xps[win_types]) - rs = Series(vals).rolling(5, win_type=win_types, center=True).mean() - tm.assert_series_equal(xp, rs) - - @td.skip_if_no_scipy - def test_cmov_window_regular_linear_range(self, win_types): - # GH 8238 - vals = np.array(range(10), dtype=np.float) - xp = vals.copy() - xp[:2] = np.nan - xp[-2:] = np.nan - xp = Series(xp) - - rs = Series(vals).rolling(5, win_type=win_types, center=True).mean() - tm.assert_series_equal(xp, rs) - - @td.skip_if_no_scipy - def test_cmov_window_regular_missing_data(self, win_types): - # GH 8238 - vals = np.array( - [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, np.nan, 10.63, 14.48] - ) - xps = { - "bartlett": [ - np.nan, - np.nan, - 9.70333, - 10.5225, - 8.4425, - 9.1925, - 12.5575, - 14.3675, - 15.61667, - 13.655, - ], - "blackman": [ - np.nan, - np.nan, - 9.04582, - 11.41536, - 7.73345, - 9.17869, - 12.79607, - 14.20036, - 15.8706, - 13.655, - ], - "barthann": [ - np.nan, - np.nan, - 9.70333, - 10.5225, - 8.4425, - 9.1925, - 12.5575, - 14.3675, - 15.61667, - 13.655, - ], - "bohman": [ - np.nan, - np.nan, - 8.9444, - 11.56327, - 7.61599, - 9.1764, - 12.83559, - 14.17267, - 15.90976, - 13.655, - ], - "hamming": [ - np.nan, - np.nan, - 9.59321, - 10.29694, - 8.71384, - 9.56348, - 12.38009, - 14.20565, - 15.24694, - 13.69758, + +def test_centered_axis_validation(): + + # ok + Series(np.ones(10)).rolling(window=3, center=True, axis=0).mean() + + # bad axis + with pytest.raises(ValueError): + Series(np.ones(10)).rolling(window=3, center=True, axis=1).mean() + + # ok ok + DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=0).mean() + DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=1).mean() + + # bad axis + with pytest.raises(ValueError): + (DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=2).mean()) + + +def test_rolling_sum(raw, series, frame): + _check_moment_func( + np.nansum, + name="sum", + zero_min_periods_equal=False, + raw=raw, + series=series, + frame=frame, + ) + + +def test_rolling_count(raw, series, frame): + counter = lambda x: np.isfinite(x).astype(float).sum() + _check_moment_func( + counter, + name="count", + has_min_periods=False, + fill_value=0, + raw=raw, + series=series, + frame=frame, + ) + + +def test_rolling_mean(raw, series, frame): + _check_moment_func(np.mean, name="mean", raw=raw, series=series, frame=frame) + + +@td.skip_if_no_scipy +def test_cmov_mean(): + # GH 8238 + vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) + result = Series(vals).rolling(5, center=True).mean() + expected_values = [ + np.nan, + np.nan, + 9.962, + 11.27, + 11.564, + 12.516, + 12.818, + 12.952, + np.nan, + np.nan, + ] + expected = Series(expected_values) + tm.assert_series_equal(expected, result) + + +@td.skip_if_no_scipy +def test_cmov_window(): + # GH 8238 + vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) + result = Series(vals).rolling(5, win_type="boxcar", center=True).mean() + expected_values = [ + np.nan, + np.nan, + 9.962, + 11.27, + 11.564, + 12.516, + 12.818, + 12.952, + np.nan, + np.nan, + ] + expected = Series(expected_values) + tm.assert_series_equal(expected, result) + + +@td.skip_if_no_scipy +def test_cmov_window_corner(): + # GH 8238 + # all nan + vals = pd.Series([np.nan] * 10) + result = vals.rolling(5, center=True, win_type="boxcar").mean() + assert np.isnan(result).all() + + # empty + vals = pd.Series([], dtype=object) + result = vals.rolling(5, center=True, win_type="boxcar").mean() + assert len(result) == 0 + + # shorter than window + vals = pd.Series(np.random.randn(5)) + result = vals.rolling(10, win_type="boxcar").mean() + assert np.isnan(result).all() + assert len(result) == 5 + + +@td.skip_if_no_scipy +@pytest.mark.parametrize( + "f,xp", + [ + ( + "mean", + [ + [np.nan, np.nan], + [np.nan, np.nan], + [9.252, 9.392], + [8.644, 9.906], + [8.87, 10.208], + [6.81, 8.588], + [7.792, 8.644], + [9.05, 7.824], + [np.nan, np.nan], + [np.nan, np.nan], ], - "nuttall": [ - np.nan, - np.nan, - 8.47693, - 12.2821, - 7.04618, - 9.16786, - 13.02671, - 14.03673, - 16.08759, - 13.65553, + ), + ( + "std", + [ + [np.nan, np.nan], + [np.nan, np.nan], + [3.789706, 4.068313], + [3.429232, 3.237411], + [3.589269, 3.220810], + [3.405195, 2.380655], + [3.281839, 2.369869], + [3.676846, 1.801799], + [np.nan, np.nan], + [np.nan, np.nan], ], - "triang": [ - np.nan, - np.nan, - 9.33167, - 9.76125, - 9.28667, - 10.34667, - 12.00556, - 13.82125, - 14.49429, - 13.765, + ), + ( + "var", + [ + [np.nan, np.nan], + [np.nan, np.nan], + [14.36187, 16.55117], + [11.75963, 10.48083], + [12.88285, 10.37362], + [11.59535, 5.66752], + [10.77047, 5.61628], + [13.51920, 3.24648], + [np.nan, np.nan], + [np.nan, np.nan], ], - "blackmanharris": [ - np.nan, - np.nan, - 8.42526, - 12.36824, - 6.97691, - 9.16438, - 13.05052, - 14.02175, - 16.1098, - 13.65509, + ), + ( + "sum", + [ + [np.nan, np.nan], + [np.nan, np.nan], + [46.26, 46.96], + [43.22, 49.53], + [44.35, 51.04], + [34.05, 42.94], + [38.96, 43.22], + [45.25, 39.12], + [np.nan, np.nan], + [np.nan, np.nan], ], - } - - xp = Series(xps[win_types]) - rs = Series(vals).rolling(5, win_type=win_types, min_periods=3).mean() - tm.assert_series_equal(xp, rs) - - @td.skip_if_no_scipy - def test_cmov_window_special(self, win_types_special): - # GH 8238 - kwds = { - "kaiser": {"beta": 1.0}, - "gaussian": {"std": 1.0}, - "general_gaussian": {"power": 2.0, "width": 2.0}, - "exponential": {"tau": 10}, - } - - vals = np.array( - [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48] + ), + ], +) +def test_cmov_window_frame(f, xp): + # Gh 8238 + df = DataFrame( + np.array( + [ + [12.18, 3.64], + [10.18, 9.16], + [13.24, 14.61], + [4.51, 8.11], + [6.15, 11.44], + [9.14, 6.21], + [11.31, 10.67], + [2.94, 6.51], + [9.42, 8.39], + [12.44, 7.34], + ] ) + ) + xp = DataFrame(np.array(xp)) + + roll = df.rolling(5, win_type="boxcar", center=True) + rs = getattr(roll, f)() + + tm.assert_frame_equal(xp, rs) + + +@td.skip_if_no_scipy +def test_cmov_window_na_min_periods(): + # min_periods + vals = Series(np.random.randn(10)) + vals[4] = np.nan + vals[8] = np.nan + + xp = vals.rolling(5, min_periods=4, center=True).mean() + rs = vals.rolling(5, win_type="boxcar", min_periods=4, center=True).mean() + tm.assert_series_equal(xp, rs) + + +@td.skip_if_no_scipy +def test_cmov_window_regular(win_types): + # GH 8238 + vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) + xps = { + "hamming": [ + np.nan, + np.nan, + 8.71384, + 9.56348, + 12.38009, + 14.03687, + 13.8567, + 11.81473, + np.nan, + np.nan, + ], + "triang": [ + np.nan, + np.nan, + 9.28667, + 10.34667, + 12.00556, + 13.33889, + 13.38, + 12.33667, + np.nan, + np.nan, + ], + "barthann": [ + np.nan, + np.nan, + 8.4425, + 9.1925, + 12.5575, + 14.3675, + 14.0825, + 11.5675, + np.nan, + np.nan, + ], + "bohman": [ + np.nan, + np.nan, + 7.61599, + 9.1764, + 12.83559, + 14.17267, + 14.65923, + 11.10401, + np.nan, + np.nan, + ], + "blackmanharris": [ + np.nan, + np.nan, + 6.97691, + 9.16438, + 13.05052, + 14.02156, + 15.10512, + 10.74574, + np.nan, + np.nan, + ], + "nuttall": [ + np.nan, + np.nan, + 7.04618, + 9.16786, + 13.02671, + 14.03559, + 15.05657, + 10.78514, + np.nan, + np.nan, + ], + "blackman": [ + np.nan, + np.nan, + 7.73345, + 9.17869, + 12.79607, + 14.20036, + 14.57726, + 11.16988, + np.nan, + np.nan, + ], + "bartlett": [ + np.nan, + np.nan, + 8.4425, + 9.1925, + 12.5575, + 14.3675, + 14.0825, + 11.5675, + np.nan, + np.nan, + ], + } - xps = { - "gaussian": [ - np.nan, - np.nan, - 8.97297, - 9.76077, - 12.24763, - 13.89053, - 13.65671, - 12.01002, - np.nan, - np.nan, - ], - "general_gaussian": [ - np.nan, - np.nan, - 9.85011, - 10.71589, - 11.73161, - 13.08516, - 12.95111, - 12.74577, - np.nan, - np.nan, - ], - "kaiser": [ - np.nan, - np.nan, - 9.86851, - 11.02969, - 11.65161, - 12.75129, - 12.90702, - 12.83757, - np.nan, - np.nan, - ], - "exponential": [ - np.nan, - np.nan, - 9.83364, - 11.10472, - 11.64551, - 12.66138, - 12.92379, - 12.83770, - np.nan, - np.nan, - ], - } + xp = Series(xps[win_types]) + rs = Series(vals).rolling(5, win_type=win_types, center=True).mean() + tm.assert_series_equal(xp, rs) - xp = Series(xps[win_types_special]) - rs = ( - Series(vals) - .rolling(5, win_type=win_types_special, center=True) - .mean(**kwds[win_types_special]) - ) - tm.assert_series_equal(xp, rs) - - @td.skip_if_no_scipy - def test_cmov_window_special_linear_range(self, win_types_special): - # GH 8238 - kwds = { - "kaiser": {"beta": 1.0}, - "gaussian": {"std": 1.0}, - "general_gaussian": {"power": 2.0, "width": 2.0}, - "slepian": {"width": 0.5}, - "exponential": {"tau": 10}, - } - - vals = np.array(range(10), dtype=np.float) - xp = vals.copy() - xp[:2] = np.nan - xp[-2:] = np.nan - xp = Series(xp) - - rs = ( - Series(vals) - .rolling(5, win_type=win_types_special, center=True) - .mean(**kwds[win_types_special]) - ) - tm.assert_series_equal(xp, rs) - def test_rolling_median(self, raw): - self._check_moment_func(np.median, name="median", raw=raw) +@td.skip_if_no_scipy +def test_cmov_window_regular_linear_range(win_types): + # GH 8238 + vals = np.array(range(10), dtype=np.float) + xp = vals.copy() + xp[:2] = np.nan + xp[-2:] = np.nan + xp = Series(xp) - def test_rolling_min(self, raw): - self._check_moment_func(np.min, name="min", raw=raw) + rs = Series(vals).rolling(5, win_type=win_types, center=True).mean() + tm.assert_series_equal(xp, rs) - a = pd.Series([1, 2, 3, 4, 5]) - result = a.rolling(window=100, min_periods=1).min() - expected = pd.Series(np.ones(len(a))) - tm.assert_series_equal(result, expected) - with pytest.raises(ValueError): - pd.Series([1, 2, 3]).rolling(window=3, min_periods=5).min() +@td.skip_if_no_scipy +def test_cmov_window_regular_missing_data(win_types): + # GH 8238 + vals = np.array( + [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, np.nan, 10.63, 14.48] + ) + xps = { + "bartlett": [ + np.nan, + np.nan, + 9.70333, + 10.5225, + 8.4425, + 9.1925, + 12.5575, + 14.3675, + 15.61667, + 13.655, + ], + "blackman": [ + np.nan, + np.nan, + 9.04582, + 11.41536, + 7.73345, + 9.17869, + 12.79607, + 14.20036, + 15.8706, + 13.655, + ], + "barthann": [ + np.nan, + np.nan, + 9.70333, + 10.5225, + 8.4425, + 9.1925, + 12.5575, + 14.3675, + 15.61667, + 13.655, + ], + "bohman": [ + np.nan, + np.nan, + 8.9444, + 11.56327, + 7.61599, + 9.1764, + 12.83559, + 14.17267, + 15.90976, + 13.655, + ], + "hamming": [ + np.nan, + np.nan, + 9.59321, + 10.29694, + 8.71384, + 9.56348, + 12.38009, + 14.20565, + 15.24694, + 13.69758, + ], + "nuttall": [ + np.nan, + np.nan, + 8.47693, + 12.2821, + 7.04618, + 9.16786, + 13.02671, + 14.03673, + 16.08759, + 13.65553, + ], + "triang": [ + np.nan, + np.nan, + 9.33167, + 9.76125, + 9.28667, + 10.34667, + 12.00556, + 13.82125, + 14.49429, + 13.765, + ], + "blackmanharris": [ + np.nan, + np.nan, + 8.42526, + 12.36824, + 6.97691, + 9.16438, + 13.05052, + 14.02175, + 16.1098, + 13.65509, + ], + } + + xp = Series(xps[win_types]) + rs = Series(vals).rolling(5, win_type=win_types, min_periods=3).mean() + tm.assert_series_equal(xp, rs) + + +@td.skip_if_no_scipy +def test_cmov_window_special(win_types_special): + # GH 8238 + kwds = { + "kaiser": {"beta": 1.0}, + "gaussian": {"std": 1.0}, + "general_gaussian": {"power": 2.0, "width": 2.0}, + "exponential": {"tau": 10}, + } + + vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) + + xps = { + "gaussian": [ + np.nan, + np.nan, + 8.97297, + 9.76077, + 12.24763, + 13.89053, + 13.65671, + 12.01002, + np.nan, + np.nan, + ], + "general_gaussian": [ + np.nan, + np.nan, + 9.85011, + 10.71589, + 11.73161, + 13.08516, + 12.95111, + 12.74577, + np.nan, + np.nan, + ], + "kaiser": [ + np.nan, + np.nan, + 9.86851, + 11.02969, + 11.65161, + 12.75129, + 12.90702, + 12.83757, + np.nan, + np.nan, + ], + "exponential": [ + np.nan, + np.nan, + 9.83364, + 11.10472, + 11.64551, + 12.66138, + 12.92379, + 12.83770, + np.nan, + np.nan, + ], + } - def test_rolling_max(self, raw): - self._check_moment_func(np.max, name="max", raw=raw) + xp = Series(xps[win_types_special]) + rs = ( + Series(vals) + .rolling(5, win_type=win_types_special, center=True) + .mean(**kwds[win_types_special]) + ) + tm.assert_series_equal(xp, rs) + + +@td.skip_if_no_scipy +def test_cmov_window_special_linear_range(win_types_special): + # GH 8238 + kwds = { + "kaiser": {"beta": 1.0}, + "gaussian": {"std": 1.0}, + "general_gaussian": {"power": 2.0, "width": 2.0}, + "slepian": {"width": 0.5}, + "exponential": {"tau": 10}, + } + + vals = np.array(range(10), dtype=np.float) + xp = vals.copy() + xp[:2] = np.nan + xp[-2:] = np.nan + xp = Series(xp) + + rs = ( + Series(vals) + .rolling(5, win_type=win_types_special, center=True) + .mean(**kwds[win_types_special]) + ) + tm.assert_series_equal(xp, rs) - a = pd.Series([1, 2, 3, 4, 5], dtype=np.float64) - b = a.rolling(window=100, min_periods=1).max() - tm.assert_almost_equal(a, b) - with pytest.raises(ValueError): - pd.Series([1, 2, 3]).rolling(window=3, min_periods=5).max() +def test_rolling_median(raw, series, frame): + _check_moment_func(np.median, name="median", raw=raw, series=series, frame=frame) - @pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) - def test_rolling_quantile(self, q, raw): - def scoreatpercentile(a, per): - values = np.sort(a, axis=0) - idx = int(per / 1.0 * (values.shape[0] - 1)) +def test_rolling_min(raw, series, frame): + _check_moment_func(np.min, name="min", raw=raw, series=series, frame=frame) - if idx == values.shape[0] - 1: - retval = values[-1] + a = pd.Series([1, 2, 3, 4, 5]) + result = a.rolling(window=100, min_periods=1).min() + expected = pd.Series(np.ones(len(a))) + tm.assert_series_equal(result, expected) - else: - qlow = float(idx) / float(values.shape[0] - 1) - qhig = float(idx + 1) / float(values.shape[0] - 1) - vlow = values[idx] - vhig = values[idx + 1] - retval = vlow + (vhig - vlow) * (per - qlow) / (qhig - qlow) + with pytest.raises(ValueError): + pd.Series([1, 2, 3]).rolling(window=3, min_periods=5).min() - return retval - def quantile_func(x): - return scoreatpercentile(x, q) +def test_rolling_max(raw, series, frame): + _check_moment_func(np.max, name="max", raw=raw, series=series, frame=frame) - self._check_moment_func(quantile_func, name="quantile", quantile=q, raw=raw) + a = pd.Series([1, 2, 3, 4, 5], dtype=np.float64) + b = a.rolling(window=100, min_periods=1).max() + tm.assert_almost_equal(a, b) - def test_rolling_quantile_np_percentile(self): - # #9413: Tests that rolling window's quantile default behavior - # is analogous to Numpy's percentile - row = 10 - col = 5 - idx = pd.date_range("20100101", periods=row, freq="B") - df = DataFrame(np.random.rand(row * col).reshape((row, -1)), index=idx) + with pytest.raises(ValueError): + pd.Series([1, 2, 3]).rolling(window=3, min_periods=5).max() - df_quantile = df.quantile([0.25, 0.5, 0.75], axis=0) - np_percentile = np.percentile(df, [25, 50, 75], axis=0) - tm.assert_almost_equal(df_quantile.values, np.array(np_percentile)) +@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) +def test_rolling_quantile(q, raw, series, frame): + def scoreatpercentile(a, per): + values = np.sort(a, axis=0) + + idx = int(per / 1.0 * (values.shape[0] - 1)) + + if idx == values.shape[0] - 1: + retval = values[-1] - @pytest.mark.parametrize("quantile", [0.0, 0.1, 0.45, 0.5, 1]) - @pytest.mark.parametrize( - "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"] - ) - @pytest.mark.parametrize( - "data", - [ - [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], - [8.0, 1.0, 3.0, 4.0, 5.0, 2.0, 6.0, 7.0], - [0.0, np.nan, 0.2, np.nan, 0.4], - [np.nan, np.nan, np.nan, np.nan], - [np.nan, 0.1, np.nan, 0.3, 0.4, 0.5], - [0.5], - [np.nan, 0.7, 0.6], - ], - ) - def test_rolling_quantile_interpolation_options( - self, quantile, interpolation, data - ): - # Tests that rolling window's quantile behavior is analogous to - # Series' quantile for each interpolation option - s = Series(data) - - q1 = s.quantile(quantile, interpolation) - q2 = s.expanding(min_periods=1).quantile(quantile, interpolation).iloc[-1] - - if np.isnan(q1): - assert np.isnan(q2) else: - assert q1 == q2 + qlow = float(idx) / float(values.shape[0] - 1) + qhig = float(idx + 1) / float(values.shape[0] - 1) + vlow = values[idx] + vhig = values[idx + 1] + retval = vlow + (vhig - vlow) * (per - qlow) / (qhig - qlow) - def test_invalid_quantile_value(self): - data = np.arange(5) - s = Series(data) + return retval - msg = "Interpolation 'invalid' is not supported" - with pytest.raises(ValueError, match=msg): - s.rolling(len(data), min_periods=1).quantile(0.5, interpolation="invalid") + def quantile_func(x): + return scoreatpercentile(x, q) - def test_rolling_quantile_param(self): - ser = Series([0.0, 0.1, 0.5, 0.9, 1.0]) + _check_moment_func( + quantile_func, name="quantile", quantile=q, raw=raw, series=series, frame=frame + ) - with pytest.raises(ValueError): - ser.rolling(3).quantile(-0.1) - with pytest.raises(ValueError): - ser.rolling(3).quantile(10.0) +def test_rolling_quantile_np_percentile(): + # #9413: Tests that rolling window's quantile default behavior + # is analogous to Numpy's percentile + row = 10 + col = 5 + idx = pd.date_range("20100101", periods=row, freq="B") + df = DataFrame(np.random.rand(row * col).reshape((row, -1)), index=idx) - with pytest.raises(TypeError): - ser.rolling(3).quantile("foo") + df_quantile = df.quantile([0.25, 0.5, 0.75], axis=0) + np_percentile = np.percentile(df, [25, 50, 75], axis=0) - def test_rolling_apply(self, raw): - # suppress warnings about empty slices, as we are deliberately testing - # with a 0-length Series + tm.assert_almost_equal(df_quantile.values, np.array(np_percentile)) - def f(x): - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - message=".*(empty slice|0 for slice).*", - category=RuntimeWarning, - ) - return x[np.isfinite(x)].mean() - self._check_moment_func(np.mean, name="apply", func=f, raw=raw) +@pytest.mark.parametrize("quantile", [0.0, 0.1, 0.45, 0.5, 1]) +@pytest.mark.parametrize( + "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"] +) +@pytest.mark.parametrize( + "data", + [ + [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], + [8.0, 1.0, 3.0, 4.0, 5.0, 2.0, 6.0, 7.0], + [0.0, np.nan, 0.2, np.nan, 0.4], + [np.nan, np.nan, np.nan, np.nan], + [np.nan, 0.1, np.nan, 0.3, 0.4, 0.5], + [0.5], + [np.nan, 0.7, 0.6], + ], +) +def test_rolling_quantile_interpolation_options(quantile, interpolation, data): + # Tests that rolling window's quantile behavior is analogous to + # Series' quantile for each interpolation option + s = Series(data) - def test_rolling_std(self, raw): - self._check_moment_func(lambda x: np.std(x, ddof=1), name="std", raw=raw) - self._check_moment_func( - lambda x: np.std(x, ddof=0), name="std", ddof=0, raw=raw - ) + q1 = s.quantile(quantile, interpolation) + q2 = s.expanding(min_periods=1).quantile(quantile, interpolation).iloc[-1] - def test_rolling_std_1obs(self): - vals = pd.Series([1.0, 2.0, 3.0, 4.0, 5.0]) + if np.isnan(q1): + assert np.isnan(q2) + else: + assert q1 == q2 - result = vals.rolling(1, min_periods=1).std() - expected = pd.Series([np.nan] * 5) - tm.assert_series_equal(result, expected) - result = vals.rolling(1, min_periods=1).std(ddof=0) - expected = pd.Series([0.0] * 5) - tm.assert_series_equal(result, expected) +def test_invalid_quantile_value(): + data = np.arange(5) + s = Series(data) - result = pd.Series([np.nan, np.nan, 3, 4, 5]).rolling(3, min_periods=2).std() - assert np.isnan(result[2]) + msg = "Interpolation 'invalid' is not supported" + with pytest.raises(ValueError, match=msg): + s.rolling(len(data), min_periods=1).quantile(0.5, interpolation="invalid") - def test_rolling_std_neg_sqrt(self): - # unit test from Bottleneck - # Test move_nanstd for neg sqrt. +def test_rolling_quantile_param(): + ser = Series([0.0, 0.1, 0.5, 0.9, 1.0]) - a = pd.Series( - [ - 0.0011448196318903589, - 0.00028718669878572767, - 0.00028718669878572767, - 0.00028718669878572767, - 0.00028718669878572767, - ] - ) - b = a.rolling(window=3).std() - assert np.isfinite(b[2:]).all() + with pytest.raises(ValueError): + ser.rolling(3).quantile(-0.1) - b = a.ewm(span=3).std() - assert np.isfinite(b[2:]).all() + with pytest.raises(ValueError): + ser.rolling(3).quantile(10.0) - def test_rolling_var(self, raw): - self._check_moment_func(lambda x: np.var(x, ddof=1), name="var", raw=raw) - self._check_moment_func( - lambda x: np.var(x, ddof=0), name="var", ddof=0, raw=raw - ) + with pytest.raises(TypeError): + ser.rolling(3).quantile("foo") - @td.skip_if_no_scipy - def test_rolling_skew(self, raw): - from scipy.stats import skew - - self._check_moment_func(lambda x: skew(x, bias=False), name="skew", raw=raw) - - @td.skip_if_no_scipy - def test_rolling_kurt(self, raw): - from scipy.stats import kurtosis - - self._check_moment_func(lambda x: kurtosis(x, bias=False), name="kurt", raw=raw) - - def _check_moment_func( - self, - static_comp, - name, - raw, - has_min_periods=True, - has_center=True, - has_time_rule=True, - fill_value=None, - zero_min_periods_equal=True, - **kwargs, - ): - - # inject raw - if name == "apply": - kwargs = copy.copy(kwargs) - kwargs["raw"] = raw - - def get_result(obj, window, min_periods=None, center=False): - r = obj.rolling(window=window, min_periods=min_periods, center=center) - return getattr(r, name)(**kwargs) - - series_result = get_result(self.series, window=50) - assert isinstance(series_result, Series) - tm.assert_almost_equal(series_result.iloc[-1], static_comp(self.series[-50:])) - - frame_result = get_result(self.frame, window=50) - assert isinstance(frame_result, DataFrame) - tm.assert_series_equal( - frame_result.iloc[-1, :], - self.frame.iloc[-50:, :].apply(static_comp, axis=0, raw=raw), - check_names=False, - ) - # check time_rule works - if has_time_rule: - win = 25 - minp = 10 - series = self.series[::2].resample("B").mean() - frame = self.frame[::2].resample("B").mean() +def test_rolling_apply(raw, series, frame): + # suppress warnings about empty slices, as we are deliberately testing + # with a 0-length Series - if has_min_periods: - series_result = get_result(series, window=win, min_periods=minp) - frame_result = get_result(frame, window=win, min_periods=minp) - else: - series_result = get_result(series, window=win, min_periods=0) - frame_result = get_result(frame, window=win, min_periods=0) + def f(x): + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message=".*(empty slice|0 for slice).*", + category=RuntimeWarning, + ) + return x[np.isfinite(x)].mean() - last_date = series_result.index[-1] - prev_date = last_date - 24 * offsets.BDay() + _check_moment_func( + np.mean, name="apply", func=f, raw=raw, series=series, frame=frame + ) - trunc_series = self.series[::2].truncate(prev_date, last_date) - trunc_frame = self.frame[::2].truncate(prev_date, last_date) - tm.assert_almost_equal(series_result[-1], static_comp(trunc_series)) +def test_rolling_std(raw, series, frame): + _check_moment_func( + lambda x: np.std(x, ddof=1), name="std", raw=raw, series=series, frame=frame + ) + _check_moment_func( + lambda x: np.std(x, ddof=0), + name="std", + ddof=0, + raw=raw, + series=series, + frame=frame, + ) - tm.assert_series_equal( - frame_result.xs(last_date), - trunc_frame.apply(static_comp, raw=raw), - check_names=False, - ) - # excluding NaNs correctly - obj = Series(randn(50)) - obj[:10] = np.NaN - obj[-10:] = np.NaN - if has_min_periods: - result = get_result(obj, 50, min_periods=30) - tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10])) - - # min_periods is working correctly - result = get_result(obj, 20, min_periods=15) - assert isna(result.iloc[23]) - assert not isna(result.iloc[24]) - - assert not isna(result.iloc[-6]) - assert isna(result.iloc[-5]) - - obj2 = Series(randn(20)) - result = get_result(obj2, 10, min_periods=5) - assert isna(result.iloc[3]) - assert notna(result.iloc[4]) - - if zero_min_periods_equal: - # min_periods=0 may be equivalent to min_periods=1 - result0 = get_result(obj, 20, min_periods=0) - result1 = get_result(obj, 20, min_periods=1) - tm.assert_almost_equal(result0, result1) - else: - result = get_result(obj, 50) - tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10])) +def test_rolling_std_1obs(): + vals = pd.Series([1.0, 2.0, 3.0, 4.0, 5.0]) - # window larger than series length (#7297) - if has_min_periods: - for minp in (0, len(self.series) - 1, len(self.series)): - result = get_result(self.series, len(self.series) + 1, min_periods=minp) - expected = get_result(self.series, len(self.series), min_periods=minp) - nan_mask = isna(result) - tm.assert_series_equal(nan_mask, isna(expected)) - - nan_mask = ~nan_mask - tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) - else: - result = get_result(self.series, len(self.series) + 1, min_periods=0) - expected = get_result(self.series, len(self.series), min_periods=0) - nan_mask = isna(result) - tm.assert_series_equal(nan_mask, isna(expected)) + result = vals.rolling(1, min_periods=1).std() + expected = pd.Series([np.nan] * 5) + tm.assert_series_equal(result, expected) - nan_mask = ~nan_mask - tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) + result = vals.rolling(1, min_periods=1).std(ddof=0) + expected = pd.Series([0.0] * 5) + tm.assert_series_equal(result, expected) - # check center=True - if has_center: - if has_min_periods: - result = get_result(obj, 20, min_periods=15, center=True) - expected = get_result( - pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=15 - )[9:].reset_index(drop=True) - else: - result = get_result(obj, 20, min_periods=0, center=True) - print(result) - expected = get_result( - pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=0 - )[9:].reset_index(drop=True) - - tm.assert_series_equal(result, expected) - - # shifter index - s = [f"x{x:d}" for x in range(12)] - - if has_min_periods: - minp = 10 - - series_xp = ( - get_result( - self.series.reindex(list(self.series.index) + s), - window=25, - min_periods=minp, - ) - .shift(-12) - .reindex(self.series.index) - ) - frame_xp = ( - get_result( - self.frame.reindex(list(self.frame.index) + s), - window=25, - min_periods=minp, - ) - .shift(-12) - .reindex(self.frame.index) - ) + result = pd.Series([np.nan, np.nan, 3, 4, 5]).rolling(3, min_periods=2).std() + assert np.isnan(result[2]) - series_rs = get_result( - self.series, window=25, min_periods=minp, center=True - ) - frame_rs = get_result( - self.frame, window=25, min_periods=minp, center=True - ) - else: - series_xp = ( - get_result( - self.series.reindex(list(self.series.index) + s), - window=25, - min_periods=0, - ) - .shift(-12) - .reindex(self.series.index) - ) - frame_xp = ( - get_result( - self.frame.reindex(list(self.frame.index) + s), - window=25, - min_periods=0, - ) - .shift(-12) - .reindex(self.frame.index) - ) +def test_rolling_std_neg_sqrt(): + # unit test from Bottleneck - series_rs = get_result( - self.series, window=25, min_periods=0, center=True - ) - frame_rs = get_result(self.frame, window=25, min_periods=0, center=True) + # Test move_nanstd for neg sqrt. + + a = pd.Series( + [ + 0.0011448196318903589, + 0.00028718669878572767, + 0.00028718669878572767, + 0.00028718669878572767, + 0.00028718669878572767, + ] + ) + b = a.rolling(window=3).std() + assert np.isfinite(b[2:]).all() + + b = a.ewm(span=3).std() + assert np.isfinite(b[2:]).all() - if fill_value is not None: - series_xp = series_xp.fillna(fill_value) - frame_xp = frame_xp.fillna(fill_value) - tm.assert_series_equal(series_xp, series_rs) - tm.assert_frame_equal(frame_xp, frame_rs) + +def test_rolling_var(raw, series, frame): + _check_moment_func( + lambda x: np.var(x, ddof=1), name="var", raw=raw, series=series, frame=frame + ) + _check_moment_func( + lambda x: np.var(x, ddof=0), + name="var", + ddof=0, + raw=raw, + series=series, + frame=frame, + ) + + +@td.skip_if_no_scipy +def test_rolling_skew(raw, series, frame): + from scipy.stats import skew + + _check_moment_func( + lambda x: skew(x, bias=False), name="skew", raw=raw, series=series, frame=frame + ) + + +@td.skip_if_no_scipy +def test_rolling_kurt(raw, series, frame): + from scipy.stats import kurtosis + + _check_moment_func( + lambda x: kurtosis(x, bias=False), + name="kurt", + raw=raw, + series=series, + frame=frame, + ) diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py index 680237db0535b..33fb79d98a324 100644 --- a/pandas/tests/window/test_api.py +++ b/pandas/tests/window/test_api.py @@ -9,336 +9,341 @@ from pandas import DataFrame, Index, Series, Timestamp, concat import pandas._testing as tm from pandas.core.base import SpecificationError -from pandas.tests.window.common import Base -class TestApi(Base): - def setup_method(self, method): - self._create_data() +def test_getitem(frame): + r = frame.rolling(window=5) + tm.assert_index_equal(r._selected_obj.columns, frame.columns) - def test_getitem(self): + r = frame.rolling(window=5)[1] + assert r._selected_obj.name == frame.columns[1] - r = self.frame.rolling(window=5) - tm.assert_index_equal(r._selected_obj.columns, self.frame.columns) + # technically this is allowed + r = frame.rolling(window=5)[1, 3] + tm.assert_index_equal(r._selected_obj.columns, frame.columns[[1, 3]]) - r = self.frame.rolling(window=5)[1] - assert r._selected_obj.name == self.frame.columns[1] + r = frame.rolling(window=5)[[1, 3]] + tm.assert_index_equal(r._selected_obj.columns, frame.columns[[1, 3]]) - # technically this is allowed - r = self.frame.rolling(window=5)[1, 3] - tm.assert_index_equal(r._selected_obj.columns, self.frame.columns[[1, 3]]) - r = self.frame.rolling(window=5)[[1, 3]] - tm.assert_index_equal(r._selected_obj.columns, self.frame.columns[[1, 3]]) +def test_select_bad_cols(): + df = DataFrame([[1, 2]], columns=["A", "B"]) + g = df.rolling(window=5) + with pytest.raises(KeyError, match="Columns not found: 'C'"): + g[["C"]] + with pytest.raises(KeyError, match="^[^A]+$"): + # A should not be referenced as a bad column... + # will have to rethink regex if you change message! + g[["A", "C"]] - def test_select_bad_cols(self): - df = DataFrame([[1, 2]], columns=["A", "B"]) - g = df.rolling(window=5) - with pytest.raises(KeyError, match="Columns not found: 'C'"): - g[["C"]] - with pytest.raises(KeyError, match="^[^A]+$"): - # A should not be referenced as a bad column... - # will have to rethink regex if you change message! - g[["A", "C"]] - def test_attribute_access(self): +def test_attribute_access(): - df = DataFrame([[1, 2]], columns=["A", "B"]) - r = df.rolling(window=5) - tm.assert_series_equal(r.A.sum(), r["A"].sum()) - msg = "'Rolling' object has no attribute 'F'" - with pytest.raises(AttributeError, match=msg): - r.F + df = DataFrame([[1, 2]], columns=["A", "B"]) + r = df.rolling(window=5) + tm.assert_series_equal(r.A.sum(), r["A"].sum()) + msg = "'Rolling' object has no attribute 'F'" + with pytest.raises(AttributeError, match=msg): + r.F - def tests_skip_nuisance(self): - df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"}) - r = df.rolling(window=3) - result = r[["A", "B"]].sum() - expected = DataFrame( - {"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]}, - columns=list("AB"), - ) - tm.assert_frame_equal(result, expected) - - def test_skip_sum_object_raises(self): - df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"}) - r = df.rolling(window=3) - result = r.sum() - expected = DataFrame( - {"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]}, - columns=list("AB"), - ) - tm.assert_frame_equal(result, expected) +def tests_skip_nuisance(): - def test_agg(self): - df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) + df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"}) + r = df.rolling(window=3) + result = r[["A", "B"]].sum() + expected = DataFrame( + {"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]}, + columns=list("AB"), + ) + tm.assert_frame_equal(result, expected) - r = df.rolling(window=3) - a_mean = r["A"].mean() - a_std = r["A"].std() - a_sum = r["A"].sum() - b_mean = r["B"].mean() - b_std = r["B"].std() - result = r.aggregate([np.mean, np.std]) - expected = concat([a_mean, a_std, b_mean, b_std], axis=1) - expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]]) - tm.assert_frame_equal(result, expected) +def test_skip_sum_object_raises(): + df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"}) + r = df.rolling(window=3) + result = r.sum() + expected = DataFrame( + {"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]}, + columns=list("AB"), + ) + tm.assert_frame_equal(result, expected) - result = r.aggregate({"A": np.mean, "B": np.std}) - expected = concat([a_mean, b_std], axis=1) - tm.assert_frame_equal(result, expected, check_like=True) +def test_agg(): + df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) - result = r.aggregate({"A": ["mean", "std"]}) - expected = concat([a_mean, a_std], axis=1) - expected.columns = pd.MultiIndex.from_tuples([("A", "mean"), ("A", "std")]) - tm.assert_frame_equal(result, expected) + r = df.rolling(window=3) + a_mean = r["A"].mean() + a_std = r["A"].std() + a_sum = r["A"].sum() + b_mean = r["B"].mean() + b_std = r["B"].std() - result = r["A"].aggregate(["mean", "sum"]) - expected = concat([a_mean, a_sum], axis=1) - expected.columns = ["mean", "sum"] - tm.assert_frame_equal(result, expected) + result = r.aggregate([np.mean, np.std]) + expected = concat([a_mean, a_std, b_mean, b_std], axis=1) + expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]]) + tm.assert_frame_equal(result, expected) - msg = "nested renamer is not supported" - with pytest.raises(SpecificationError, match=msg): - # using a dict with renaming - r.aggregate({"A": {"mean": "mean", "sum": "sum"}}) + result = r.aggregate({"A": np.mean, "B": np.std}) - with pytest.raises(SpecificationError, match=msg): - r.aggregate( - { - "A": {"mean": "mean", "sum": "sum"}, - "B": {"mean2": "mean", "sum2": "sum"}, - } - ) + expected = concat([a_mean, b_std], axis=1) + tm.assert_frame_equal(result, expected, check_like=True) - result = r.aggregate({"A": ["mean", "std"], "B": ["mean", "std"]}) - expected = concat([a_mean, a_std, b_mean, b_std], axis=1) + result = r.aggregate({"A": ["mean", "std"]}) + expected = concat([a_mean, a_std], axis=1) + expected.columns = pd.MultiIndex.from_tuples([("A", "mean"), ("A", "std")]) + tm.assert_frame_equal(result, expected) - exp_cols = [("A", "mean"), ("A", "std"), ("B", "mean"), ("B", "std")] - expected.columns = pd.MultiIndex.from_tuples(exp_cols) - tm.assert_frame_equal(result, expected, check_like=True) + result = r["A"].aggregate(["mean", "sum"]) + expected = concat([a_mean, a_sum], axis=1) + expected.columns = ["mean", "sum"] + tm.assert_frame_equal(result, expected) - def test_agg_apply(self, raw): + msg = "nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + # using a dict with renaming + r.aggregate({"A": {"mean": "mean", "sum": "sum"}}) - # passed lambda - df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) + with pytest.raises(SpecificationError, match=msg): + r.aggregate( + { + "A": {"mean": "mean", "sum": "sum"}, + "B": {"mean2": "mean", "sum2": "sum"}, + } + ) - r = df.rolling(window=3) - a_sum = r["A"].sum() + result = r.aggregate({"A": ["mean", "std"], "B": ["mean", "std"]}) + expected = concat([a_mean, a_std, b_mean, b_std], axis=1) - result = r.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)}) - rcustom = r["B"].apply(lambda x: np.std(x, ddof=1), raw=raw) - expected = concat([a_sum, rcustom], axis=1) - tm.assert_frame_equal(result, expected, check_like=True) + exp_cols = [("A", "mean"), ("A", "std"), ("B", "mean"), ("B", "std")] + expected.columns = pd.MultiIndex.from_tuples(exp_cols) + tm.assert_frame_equal(result, expected, check_like=True) - def test_agg_consistency(self): - df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) - r = df.rolling(window=3) +def test_agg_apply(raw): - result = r.agg([np.sum, np.mean]).columns - expected = pd.MultiIndex.from_product([list("AB"), ["sum", "mean"]]) - tm.assert_index_equal(result, expected) + # passed lambda + df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) - result = r["A"].agg([np.sum, np.mean]).columns - expected = Index(["sum", "mean"]) - tm.assert_index_equal(result, expected) + r = df.rolling(window=3) + a_sum = r["A"].sum() - result = r.agg({"A": [np.sum, np.mean]}).columns - expected = pd.MultiIndex.from_tuples([("A", "sum"), ("A", "mean")]) - tm.assert_index_equal(result, expected) + result = r.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)}) + rcustom = r["B"].apply(lambda x: np.std(x, ddof=1), raw=raw) + expected = concat([a_sum, rcustom], axis=1) + tm.assert_frame_equal(result, expected, check_like=True) - def test_agg_nested_dicts(self): - # API change for disallowing these types of nested dicts - df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) - r = df.rolling(window=3) +def test_agg_consistency(): - msg = "nested renamer is not supported" - with pytest.raises(SpecificationError, match=msg): - r.aggregate({"r1": {"A": ["mean", "sum"]}, "r2": {"B": ["mean", "sum"]}}) + df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) + r = df.rolling(window=3) - expected = concat( - [r["A"].mean(), r["A"].std(), r["B"].mean(), r["B"].std()], axis=1 - ) - expected.columns = pd.MultiIndex.from_tuples( - [("ra", "mean"), ("ra", "std"), ("rb", "mean"), ("rb", "std")] - ) - with pytest.raises(SpecificationError, match=msg): - r[["A", "B"]].agg( - {"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}} - ) - - with pytest.raises(SpecificationError, match=msg): - r.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}}) - - def test_count_nonnumeric_types(self): - # GH12541 - cols = [ - "int", - "float", - "string", - "datetime", - "timedelta", - "periods", - "fl_inf", - "fl_nan", - "str_nan", - "dt_nat", - "periods_nat", - ] - - df = DataFrame( - { - "int": [1, 2, 3], - "float": [4.0, 5.0, 6.0], - "string": list("abc"), - "datetime": pd.date_range("20170101", periods=3), - "timedelta": pd.timedelta_range("1 s", periods=3, freq="s"), - "periods": [ - pd.Period("2012-01"), - pd.Period("2012-02"), - pd.Period("2012-03"), - ], - "fl_inf": [1.0, 2.0, np.Inf], - "fl_nan": [1.0, 2.0, np.NaN], - "str_nan": ["aa", "bb", np.NaN], - "dt_nat": [ - Timestamp("20170101"), - Timestamp("20170203"), - Timestamp(None), - ], - "periods_nat": [ - pd.Period("2012-01"), - pd.Period("2012-02"), - pd.Period(None), - ], - }, - columns=cols, - ) + result = r.agg([np.sum, np.mean]).columns + expected = pd.MultiIndex.from_product([list("AB"), ["sum", "mean"]]) + tm.assert_index_equal(result, expected) - expected = DataFrame( - { - "int": [1.0, 2.0, 2.0], - "float": [1.0, 2.0, 2.0], - "string": [1.0, 2.0, 2.0], - "datetime": [1.0, 2.0, 2.0], - "timedelta": [1.0, 2.0, 2.0], - "periods": [1.0, 2.0, 2.0], - "fl_inf": [1.0, 2.0, 2.0], - "fl_nan": [1.0, 2.0, 1.0], - "str_nan": [1.0, 2.0, 1.0], - "dt_nat": [1.0, 2.0, 1.0], - "periods_nat": [1.0, 2.0, 1.0], - }, - columns=cols, - ) + result = r["A"].agg([np.sum, np.mean]).columns + expected = Index(["sum", "mean"]) + tm.assert_index_equal(result, expected) - result = df.rolling(window=2, min_periods=0).count() - tm.assert_frame_equal(result, expected) + result = r.agg({"A": [np.sum, np.mean]}).columns + expected = pd.MultiIndex.from_tuples([("A", "sum"), ("A", "mean")]) + tm.assert_index_equal(result, expected) - result = df.rolling(1, min_periods=0).count() - expected = df.notna().astype(float) - tm.assert_frame_equal(result, expected) - @td.skip_if_no_scipy - @pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning") - def test_window_with_args(self): - # make sure that we are aggregating window functions correctly with arg - r = Series(np.random.randn(100)).rolling( - window=10, min_periods=1, win_type="gaussian" - ) - expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1) - expected.columns = ["", ""] - result = r.aggregate([lambda x: x.mean(std=10), lambda x: x.mean(std=0.01)]) - tm.assert_frame_equal(result, expected) +def test_agg_nested_dicts(): - def a(x): - return x.mean(std=10) + # API change for disallowing these types of nested dicts + df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) + r = df.rolling(window=3) - def b(x): - return x.mean(std=0.01) + msg = "nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + r.aggregate({"r1": {"A": ["mean", "sum"]}, "r2": {"B": ["mean", "sum"]}}) - expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1) - expected.columns = ["a", "b"] - result = r.aggregate([a, b]) - tm.assert_frame_equal(result, expected) + expected = concat( + [r["A"].mean(), r["A"].std(), r["B"].mean(), r["B"].std()], axis=1 + ) + expected.columns = pd.MultiIndex.from_tuples( + [("ra", "mean"), ("ra", "std"), ("rb", "mean"), ("rb", "std")] + ) + with pytest.raises(SpecificationError, match=msg): + r[["A", "B"]].agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}}) + + with pytest.raises(SpecificationError, match=msg): + r.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}}) + + +def test_count_nonnumeric_types(): + # GH12541 + cols = [ + "int", + "float", + "string", + "datetime", + "timedelta", + "periods", + "fl_inf", + "fl_nan", + "str_nan", + "dt_nat", + "periods_nat", + ] + dt_nat_col = [ + Timestamp("20170101"), + Timestamp("20170203"), + Timestamp(None), + ] + + df = DataFrame( + { + "int": [1, 2, 3], + "float": [4.0, 5.0, 6.0], + "string": list("abc"), + "datetime": pd.date_range("20170101", periods=3), + "timedelta": pd.timedelta_range("1 s", periods=3, freq="s"), + "periods": [ + pd.Period("2012-01"), + pd.Period("2012-02"), + pd.Period("2012-03"), + ], + "fl_inf": [1.0, 2.0, np.Inf], + "fl_nan": [1.0, 2.0, np.NaN], + "str_nan": ["aa", "bb", np.NaN], + "dt_nat": dt_nat_col, + "periods_nat": [ + pd.Period("2012-01"), + pd.Period("2012-02"), + pd.Period(None), + ], + }, + columns=cols, + ) - def test_preserve_metadata(self): - # GH 10565 - s = Series(np.arange(100), name="foo") + expected = DataFrame( + { + "int": [1.0, 2.0, 2.0], + "float": [1.0, 2.0, 2.0], + "string": [1.0, 2.0, 2.0], + "datetime": [1.0, 2.0, 2.0], + "timedelta": [1.0, 2.0, 2.0], + "periods": [1.0, 2.0, 2.0], + "fl_inf": [1.0, 2.0, 2.0], + "fl_nan": [1.0, 2.0, 1.0], + "str_nan": [1.0, 2.0, 1.0], + "dt_nat": [1.0, 2.0, 1.0], + "periods_nat": [1.0, 2.0, 1.0], + }, + columns=cols, + ) - s2 = s.rolling(30).sum() - s3 = s.rolling(20).sum() - assert s2.name == "foo" - assert s3.name == "foo" + result = df.rolling(window=2, min_periods=0).count() + tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize( - "func,window_size,expected_vals", - [ - ( - "rolling", - 2, - [ - [np.nan, np.nan, np.nan, np.nan], - [15.0, 20.0, 25.0, 20.0], - [25.0, 30.0, 35.0, 30.0], - [np.nan, np.nan, np.nan, np.nan], - [20.0, 30.0, 35.0, 30.0], - [35.0, 40.0, 60.0, 40.0], - [60.0, 80.0, 85.0, 80], - ], - ), - ( - "expanding", - None, - [ - [10.0, 10.0, 20.0, 20.0], - [15.0, 20.0, 25.0, 20.0], - [20.0, 30.0, 30.0, 20.0], - [10.0, 10.0, 30.0, 30.0], - [20.0, 30.0, 35.0, 30.0], - [26.666667, 40.0, 50.0, 30.0], - [40.0, 80.0, 60.0, 30.0], - ], - ), - ], + result = df.rolling(1, min_periods=0).count() + expected = df.notna().astype(float) + tm.assert_frame_equal(result, expected) + + +@td.skip_if_no_scipy +@pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning") +def test_window_with_args(): + # make sure that we are aggregating window functions correctly with arg + r = Series(np.random.randn(100)).rolling( + window=10, min_periods=1, win_type="gaussian" ) - def test_multiple_agg_funcs(self, func, window_size, expected_vals): - # GH 15072 - df = pd.DataFrame( + expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1) + expected.columns = ["", ""] + result = r.aggregate([lambda x: x.mean(std=10), lambda x: x.mean(std=0.01)]) + tm.assert_frame_equal(result, expected) + + def a(x): + return x.mean(std=10) + + def b(x): + return x.mean(std=0.01) + + expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1) + expected.columns = ["a", "b"] + result = r.aggregate([a, b]) + tm.assert_frame_equal(result, expected) + + +def test_preserve_metadata(): + # GH 10565 + s = Series(np.arange(100), name="foo") + + s2 = s.rolling(30).sum() + s3 = s.rolling(20).sum() + assert s2.name == "foo" + assert s3.name == "foo" + + +@pytest.mark.parametrize( + "func,window_size,expected_vals", + [ + ( + "rolling", + 2, [ - ["A", 10, 20], - ["A", 20, 30], - ["A", 30, 40], - ["B", 10, 30], - ["B", 30, 40], - ["B", 40, 80], - ["B", 80, 90], + [np.nan, np.nan, np.nan, np.nan], + [15.0, 20.0, 25.0, 20.0], + [25.0, 30.0, 35.0, 30.0], + [np.nan, np.nan, np.nan, np.nan], + [20.0, 30.0, 35.0, 30.0], + [35.0, 40.0, 60.0, 40.0], + [60.0, 80.0, 85.0, 80], ], - columns=["stock", "low", "high"], - ) + ), + ( + "expanding", + None, + [ + [10.0, 10.0, 20.0, 20.0], + [15.0, 20.0, 25.0, 20.0], + [20.0, 30.0, 30.0, 20.0], + [10.0, 10.0, 30.0, 30.0], + [20.0, 30.0, 35.0, 30.0], + [26.666667, 40.0, 50.0, 30.0], + [40.0, 80.0, 60.0, 30.0], + ], + ), + ], +) +def test_multiple_agg_funcs(func, window_size, expected_vals): + # GH 15072 + df = pd.DataFrame( + [ + ["A", 10, 20], + ["A", 20, 30], + ["A", 30, 40], + ["B", 10, 30], + ["B", 30, 40], + ["B", 40, 80], + ["B", 80, 90], + ], + columns=["stock", "low", "high"], + ) - f = getattr(df.groupby("stock"), func) - if window_size: - window = f(window_size) - else: - window = f() + f = getattr(df.groupby("stock"), func) + if window_size: + window = f(window_size) + else: + window = f() - index = pd.MultiIndex.from_tuples( - [("A", 0), ("A", 1), ("A", 2), ("B", 3), ("B", 4), ("B", 5), ("B", 6)], - names=["stock", None], - ) - columns = pd.MultiIndex.from_tuples( - [("low", "mean"), ("low", "max"), ("high", "mean"), ("high", "min")] - ) - expected = pd.DataFrame(expected_vals, index=index, columns=columns) + index = pd.MultiIndex.from_tuples( + [("A", 0), ("A", 1), ("A", 2), ("B", 3), ("B", 4), ("B", 5), ("B", 6)], + names=["stock", None], + ) + columns = pd.MultiIndex.from_tuples( + [("low", "mean"), ("low", "max"), ("high", "mean"), ("high", "min")] + ) + expected = pd.DataFrame(expected_vals, index=index, columns=columns) - result = window.agg( - OrderedDict((("low", ["mean", "max"]), ("high", ["mean", "min"]))) - ) + result = window.agg( + OrderedDict((("low", ["mean", "max"]), ("high", ["mean", "min"]))) + ) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/window/test_apply.py b/pandas/tests/window/test_apply.py index 34cf0a3054889..bc38634da8941 100644 --- a/pandas/tests/window/test_apply.py +++ b/pandas/tests/window/test_apply.py @@ -4,7 +4,7 @@ from pandas.errors import NumbaUtilError import pandas.util._test_decorators as td -from pandas import DataFrame, Series, Timestamp, date_range +from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range import pandas._testing as tm @@ -139,3 +139,28 @@ def test_invalid_kwargs_nopython(): Series(range(1)).rolling(1).apply( lambda x: x, kwargs={"a": 1}, engine="numba", raw=True ) + + +@pytest.mark.parametrize("args_kwargs", [[None, {"par": 10}], [(10,), None]]) +def test_rolling_apply_args_kwargs(args_kwargs): + # GH 33433 + def foo(x, par): + return np.sum(x + par) + + df = DataFrame({"gr": [1, 1], "a": [1, 2]}) + + idx = Index(["gr", "a"]) + expected = DataFrame([[11.0, 11.0], [11.0, 12.0]], columns=idx) + + result = df.rolling(1).apply(foo, args=args_kwargs[0], kwargs=args_kwargs[1]) + tm.assert_frame_equal(result, expected) + + result = df.rolling(1).apply(foo, args=(10,)) + + midx = MultiIndex.from_tuples([(1, 0), (1, 1)], names=["gr", None]) + expected = Series([11.0, 12.0], index=midx, name="a") + + gb_rolling = df.groupby("gr")["a"].rolling(1) + + result = gb_rolling.apply(foo, args=args_kwargs[0], kwargs=args_kwargs[1]) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index 1683fda500f85..9ba194dcf0959 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -5,66 +5,62 @@ from pandas import DataFrame, Series from pandas.core.window import EWM -from pandas.tests.window.common import Base -class TestEWM(Base): - def setup_method(self, method): - self._create_data() +def test_doc_string(): - def test_doc_string(self): + df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) + df + df.ewm(com=0.5).mean() - df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) - df - df.ewm(com=0.5).mean() - @pytest.mark.parametrize("which", ["series", "frame"]) - def test_constructor(self, which): - o = getattr(self, which) - c = o.ewm +def test_constructor(which): - # valid - c(com=0.5) - c(span=1.5) - c(alpha=0.5) - c(halflife=0.75) - c(com=0.5, span=None) - c(alpha=0.5, com=None) - c(halflife=0.75, alpha=None) + c = which.ewm - # not valid: mutually exclusive - with pytest.raises(ValueError): - c(com=0.5, alpha=0.5) - with pytest.raises(ValueError): - c(span=1.5, halflife=0.75) - with pytest.raises(ValueError): - c(alpha=0.5, span=1.5) + # valid + c(com=0.5) + c(span=1.5) + c(alpha=0.5) + c(halflife=0.75) + c(com=0.5, span=None) + c(alpha=0.5, com=None) + c(halflife=0.75, alpha=None) - # not valid: com < 0 - with pytest.raises(ValueError): - c(com=-0.5) + # not valid: mutually exclusive + with pytest.raises(ValueError): + c(com=0.5, alpha=0.5) + with pytest.raises(ValueError): + c(span=1.5, halflife=0.75) + with pytest.raises(ValueError): + c(alpha=0.5, span=1.5) - # not valid: span < 1 - with pytest.raises(ValueError): - c(span=0.5) + # not valid: com < 0 + with pytest.raises(ValueError): + c(com=-0.5) + + # not valid: span < 1 + with pytest.raises(ValueError): + c(span=0.5) + + # not valid: halflife <= 0 + with pytest.raises(ValueError): + c(halflife=0) - # not valid: halflife <= 0 + # not valid: alpha <= 0 or alpha > 1 + for alpha in (-0.5, 1.5): with pytest.raises(ValueError): - c(halflife=0) + c(alpha=alpha) - # not valid: alpha <= 0 or alpha > 1 - for alpha in (-0.5, 1.5): - with pytest.raises(ValueError): - c(alpha=alpha) - @pytest.mark.parametrize("method", ["std", "mean", "var"]) - def test_numpy_compat(self, method): - # see gh-12811 - e = EWM(Series([2, 4, 6]), alpha=0.5) +@pytest.mark.parametrize("method", ["std", "mean", "var"]) +def test_numpy_compat(method): + # see gh-12811 + e = EWM(Series([2, 4, 6]), alpha=0.5) - msg = "numpy operations are not valid with window objects" + msg = "numpy operations are not valid with window objects" - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(e, method)(1, 2, 3) - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(e, method)(dtype=np.float64) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(dtype=np.float64) diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 6b6367fd80b26..b57467385d371 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -7,112 +7,102 @@ from pandas import DataFrame, Series import pandas._testing as tm from pandas.core.window import Expanding -from pandas.tests.window.common import Base -class TestExpanding(Base): - def setup_method(self, method): - self._create_data() +def test_doc_string(): - def test_doc_string(self): + df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) + df + df.expanding(2).sum() - df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) - df - df.expanding(2).sum() - @pytest.mark.parametrize("which", ["series", "frame"]) - def test_constructor(self, which): - # GH 12669 +def test_constructor(which): + # GH 12669 - o = getattr(self, which) - c = o.expanding + c = which.expanding - # valid - c(min_periods=1) - c(min_periods=1, center=True) - c(min_periods=1, center=False) + # valid + c(min_periods=1) + c(min_periods=1, center=True) + c(min_periods=1, center=False) - # not valid - for w in [2.0, "foo", np.array([2])]: - with pytest.raises(ValueError): - c(min_periods=w) - with pytest.raises(ValueError): - c(min_periods=1, center=w) + # not valid + for w in [2.0, "foo", np.array([2])]: + with pytest.raises(ValueError): + c(min_periods=w) + with pytest.raises(ValueError): + c(min_periods=1, center=w) - @pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) - def test_numpy_compat(self, method): - # see gh-12811 - e = Expanding(Series([2, 4, 6]), window=2) - msg = "numpy operations are not valid with window objects" +@pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) +def test_numpy_compat(method): + # see gh-12811 + e = Expanding(Series([2, 4, 6]), window=2) - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(e, method)(1, 2, 3) - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(e, method)(dtype=np.float64) + msg = "numpy operations are not valid with window objects" - @pytest.mark.parametrize( - "expander", - [ - 1, - pytest.param( - "ls", - marks=pytest.mark.xfail( - reason="GH#16425 expanding with offset not supported" - ), + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(dtype=np.float64) + + +@pytest.mark.parametrize( + "expander", + [ + 1, + pytest.param( + "ls", + marks=pytest.mark.xfail( + reason="GH#16425 expanding with offset not supported" ), - ], - ) - def test_empty_df_expanding(self, expander): - # GH 15819 Verifies that datetime and integer expanding windows can be - # applied to empty DataFrames - - expected = DataFrame() - result = DataFrame().expanding(expander).sum() - tm.assert_frame_equal(result, expected) - - # Verifies that datetime and integer expanding windows can be applied - # to empty DataFrames with datetime index - expected = DataFrame(index=pd.DatetimeIndex([])) - result = DataFrame(index=pd.DatetimeIndex([])).expanding(expander).sum() - tm.assert_frame_equal(result, expected) - - def test_missing_minp_zero(self): - # https://github.com/pandas-dev/pandas/pull/18921 - # minp=0 - x = pd.Series([np.nan]) - result = x.expanding(min_periods=0).sum() - expected = pd.Series([0.0]) - tm.assert_series_equal(result, expected) - - # minp=1 - result = x.expanding(min_periods=1).sum() - expected = pd.Series([np.nan]) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame]) - def test_iter_raises(self, klass): - # https://github.com/pandas-dev/pandas/issues/11704 - # Iteration over a Window - obj = klass([1, 2, 3, 4]) - with pytest.raises(NotImplementedError): - iter(obj.expanding(2)) - - def test_expanding_axis(self, axis_frame): - # see gh-23372. - df = DataFrame(np.ones((10, 20))) - axis = df._get_axis_number(axis_frame) - - if axis == 0: - expected = DataFrame( - {i: [np.nan] * 2 + [float(j) for j in range(3, 11)] for i in range(20)} - ) - else: - # axis == 1 - expected = DataFrame([[np.nan] * 2 + [float(i) for i in range(3, 21)]] * 10) - - result = df.expanding(3, axis=axis_frame).sum() - tm.assert_frame_equal(result, expected) + ), + ], +) +def test_empty_df_expanding(expander): + # GH 15819 Verifies that datetime and integer expanding windows can be + # applied to empty DataFrames + + expected = DataFrame() + result = DataFrame().expanding(expander).sum() + tm.assert_frame_equal(result, expected) + + # Verifies that datetime and integer expanding windows can be applied + # to empty DataFrames with datetime index + expected = DataFrame(index=pd.DatetimeIndex([])) + result = DataFrame(index=pd.DatetimeIndex([])).expanding(expander).sum() + tm.assert_frame_equal(result, expected) + + +def test_missing_minp_zero(): + # https://github.com/pandas-dev/pandas/pull/18921 + # minp=0 + x = pd.Series([np.nan]) + result = x.expanding(min_periods=0).sum() + expected = pd.Series([0.0]) + tm.assert_series_equal(result, expected) + + # minp=1 + result = x.expanding(min_periods=1).sum() + expected = pd.Series([np.nan]) + tm.assert_series_equal(result, expected) + + +def test_expanding_axis(axis_frame): + # see gh-23372. + df = DataFrame(np.ones((10, 20))) + axis = df._get_axis_number(axis_frame) + + if axis == 0: + expected = DataFrame( + {i: [np.nan] * 2 + [float(j) for j in range(3, 11)] for i in range(20)} + ) + else: + # axis == 1 + expected = DataFrame([[np.nan] * 2 + [float(i) for i in range(3, 21)]] * 10) + + result = df.expanding(3, axis=axis_frame).sum() + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("constructor", [Series, DataFrame]) @@ -132,3 +122,91 @@ def test_expanding_count_default_min_periods_with_null_values(constructor): result = constructor(values).expanding().count() expected = constructor(expected_counts) tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "df,expected,min_periods", + [ + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 3, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 2, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 1, + ), + (DataFrame({"A": [1], "B": [4]}), [], 2), + (DataFrame(), [({}, [])], 1), + ( + DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}), + [ + ({"A": [1.0], "B": [np.nan]}, [0]), + ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]), + ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]), + ], + 3, + ), + ( + DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}), + [ + ({"A": [1.0], "B": [np.nan]}, [0]), + ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]), + ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]), + ], + 2, + ), + ( + DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}), + [ + ({"A": [1.0], "B": [np.nan]}, [0]), + ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]), + ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]), + ], + 1, + ), + ], +) +def test_iter_expanding_dataframe(df, expected, min_periods): + # GH 11704 + expected = [DataFrame(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip(expected, df.expanding(min_periods)): + tm.assert_frame_equal(actual, expected) + + +@pytest.mark.parametrize( + "ser,expected,min_periods", + [ + (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 3), + (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 2), + (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 1), + (Series([1, 2]), [([1], [0]), ([1, 2], [0, 1])], 2), + (Series([np.nan, 2]), [([np.nan], [0]), ([np.nan, 2], [0, 1])], 2), + (Series([], dtype="int64"), [], 2), + ], +) +def test_iter_expanding_series(ser, expected, min_periods): + # GH 11704 + expected = [Series(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip(expected, ser.expanding(min_periods)): + tm.assert_series_equal(actual, expected) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 866b7da59382d..f9b0e6856337b 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -7,380 +7,371 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame, Series +from pandas import DataFrame, Series, date_range import pandas._testing as tm from pandas.core.window import Rolling -from pandas.tests.window.common import Base -class TestRolling(Base): - def setup_method(self, method): - self._create_data() +def test_doc_string(): - def test_doc_string(self): + df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) + df + df.rolling(2).sum() + df.rolling(2, min_periods=1).sum() - df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) - df - df.rolling(2).sum() - df.rolling(2, min_periods=1).sum() - @pytest.mark.parametrize("which", ["series", "frame"]) - def test_constructor(self, which): - # GH 12669 +def test_constructor(which): + # GH 12669 - o = getattr(self, which) - c = o.rolling + c = which.rolling - # valid - c(0) - c(window=2) - c(window=2, min_periods=1) - c(window=2, min_periods=1, center=True) - c(window=2, min_periods=1, center=False) + # valid + c(0) + c(window=2) + c(window=2, min_periods=1) + c(window=2, min_periods=1, center=True) + c(window=2, min_periods=1, center=False) - # GH 13383 + # GH 13383 - msg = "window must be non-negative" + msg = "window must be non-negative" + with pytest.raises(ValueError, match=msg): + c(-1) + + # not valid + for w in [2.0, "foo", np.array([2])]: + msg = ( + "window must be an integer|" + "passed window foo is not compatible with a datetimelike index" + ) with pytest.raises(ValueError, match=msg): - c(-1) - - # not valid - for w in [2.0, "foo", np.array([2])]: - msg = ( - "window must be an integer|" - "passed window foo is not compatible with a datetimelike index" - ) - with pytest.raises(ValueError, match=msg): - c(window=w) - - msg = "min_periods must be an integer" - with pytest.raises(ValueError, match=msg): - c(window=2, min_periods=w) - - msg = "center must be a boolean" - with pytest.raises(ValueError, match=msg): - c(window=2, min_periods=1, center=w) - - @td.skip_if_no_scipy - @pytest.mark.parametrize("which", ["series", "frame"]) - def test_constructor_with_win_type(self, which): - # GH 13383 - o = getattr(self, which) - c = o.rolling - - msg = "window must be > 0" + c(window=w) + msg = "min_periods must be an integer" with pytest.raises(ValueError, match=msg): - c(-1, win_type="boxcar") - - @pytest.mark.parametrize("window", [timedelta(days=3), pd.Timedelta(days=3)]) - def test_constructor_with_timedelta_window(self, window): - # GH 15440 - n = 10 - df = DataFrame( - {"value": np.arange(n)}, - index=pd.date_range("2015-12-24", periods=n, freq="D"), - ) - expected_data = np.append([0.0, 1.0], np.arange(3.0, 27.0, 3)) + c(window=2, min_periods=w) - result = df.rolling(window=window).sum() - expected = DataFrame( - {"value": expected_data}, - index=pd.date_range("2015-12-24", periods=n, freq="D"), - ) - tm.assert_frame_equal(result, expected) - expected = df.rolling("3D").sum() - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize("window", [timedelta(days=3), pd.Timedelta(days=3), "3D"]) - def test_constructor_timedelta_window_and_minperiods(self, window, raw): - # GH 15305 - n = 10 - df = DataFrame( - {"value": np.arange(n)}, - index=pd.date_range("2017-08-08", periods=n, freq="D"), - ) - expected = DataFrame( - {"value": np.append([np.NaN, 1.0], np.arange(3.0, 27.0, 3))}, - index=pd.date_range("2017-08-08", periods=n, freq="D"), - ) - result_roll_sum = df.rolling(window=window, min_periods=2).sum() - result_roll_generic = df.rolling(window=window, min_periods=2).apply( - sum, raw=raw - ) - tm.assert_frame_equal(result_roll_sum, expected) - tm.assert_frame_equal(result_roll_generic, expected) + msg = "center must be a boolean" + with pytest.raises(ValueError, match=msg): + c(window=2, min_periods=1, center=w) - @pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) - def test_numpy_compat(self, method): - # see gh-12811 - r = Rolling(Series([2, 4, 6]), window=2) - msg = "numpy operations are not valid with window objects" +@td.skip_if_no_scipy +def test_constructor_with_win_type(which): + # GH 13383 + c = which.rolling - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(r, method)(1, 2, 3) - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(r, method)(dtype=np.float64) + msg = "window must be > 0" - def test_closed(self): - df = DataFrame({"A": [0, 1, 2, 3, 4]}) - # closed only allowed for datetimelike + with pytest.raises(ValueError, match=msg): + c(-1, win_type="boxcar") - msg = "closed only implemented for datetimelike and offset based windows" - with pytest.raises(ValueError, match=msg): - df.rolling(window=3, closed="neither") - - @pytest.mark.parametrize("closed", ["neither", "left"]) - def test_closed_empty(self, closed, arithmetic_win_operators): - # GH 26005 - func_name = arithmetic_win_operators - ser = pd.Series( - data=np.arange(5), index=pd.date_range("2000", periods=5, freq="2D") - ) - roll = ser.rolling("1D", closed=closed) - - result = getattr(roll, func_name)() - expected = pd.Series([np.nan] * 5, index=ser.index) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("func", ["min", "max"]) - def test_closed_one_entry(self, func): - # GH24718 - ser = pd.Series(data=[2], index=pd.date_range("2000", periods=1)) - result = getattr(ser.rolling("10D", closed="left"), func)() - tm.assert_series_equal(result, pd.Series([np.nan], index=ser.index)) - - @pytest.mark.parametrize("func", ["min", "max"]) - def test_closed_one_entry_groupby(self, func): - # GH24718 - ser = pd.DataFrame( - data={"A": [1, 1, 2], "B": [3, 2, 1]}, - index=pd.date_range("2000", periods=3), - ) - result = getattr( - ser.groupby("A", sort=False)["B"].rolling("10D", closed="left"), func - )() - exp_idx = pd.MultiIndex.from_arrays( - arrays=[[1, 1, 2], ser.index], names=("A", None) - ) - expected = pd.Series(data=[np.nan, 3, np.nan], index=exp_idx, name="B") - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("input_dtype", ["int", "float"]) - @pytest.mark.parametrize( - "func,closed,expected", - [ - ("min", "right", [0.0, 0, 0, 1, 2, 3, 4, 5, 6, 7]), - ("min", "both", [0.0, 0, 0, 0, 1, 2, 3, 4, 5, 6]), - ("min", "neither", [np.nan, 0, 0, 1, 2, 3, 4, 5, 6, 7]), - ("min", "left", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, 6]), - ("max", "right", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), - ("max", "both", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), - ("max", "neither", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]), - ("max", "left", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]), - ], +@pytest.mark.parametrize("window", [timedelta(days=3), pd.Timedelta(days=3)]) +def test_constructor_with_timedelta_window(window): + # GH 15440 + n = 10 + df = DataFrame( + {"value": np.arange(n)}, index=pd.date_range("2015-12-24", periods=n, freq="D"), ) - def test_closed_min_max_datetime(self, input_dtype, func, closed, expected): - # see gh-21704 - ser = pd.Series( - data=np.arange(10).astype(input_dtype), - index=pd.date_range("2000", periods=10), - ) + expected_data = np.append([0.0, 1.0], np.arange(3.0, 27.0, 3)) - result = getattr(ser.rolling("3D", closed=closed), func)() - expected = pd.Series(expected, index=ser.index) - tm.assert_series_equal(result, expected) - - def test_closed_uneven(self): - # see gh-21704 - ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10)) - - # uneven - ser = ser.drop(index=ser.index[[1, 5]]) - result = ser.rolling("3D", closed="left").min() - expected = pd.Series([np.nan, 0, 0, 2, 3, 4, 6, 6], index=ser.index) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize( - "func,closed,expected", - [ - ("min", "right", [np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan, np.nan]), - ("min", "both", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, np.nan]), - ("min", "neither", [np.nan, np.nan, 0, 1, 2, 3, 4, 5, np.nan, np.nan]), - ("min", "left", [np.nan, np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan]), - ("max", "right", [np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan, np.nan]), - ("max", "both", [np.nan, 1, 2, 3, 4, 5, 6, 6, 6, np.nan]), - ("max", "neither", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, np.nan, np.nan]), - ("max", "left", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan]), - ], + result = df.rolling(window=window).sum() + expected = DataFrame( + {"value": expected_data}, + index=pd.date_range("2015-12-24", periods=n, freq="D"), ) - def test_closed_min_max_minp(self, func, closed, expected): - # see gh-21704 - ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10)) - ser[ser.index[-3:]] = np.nan - result = getattr(ser.rolling("3D", min_periods=2, closed=closed), func)() - expected = pd.Series(expected, index=ser.index) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize( - "closed,expected", - [ - ("right", [0, 0.5, 1, 2, 3, 4, 5, 6, 7, 8]), - ("both", [0, 0.5, 1, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]), - ("neither", [np.nan, 0, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]), - ("left", [np.nan, 0, 0.5, 1, 2, 3, 4, 5, 6, 7]), - ], + tm.assert_frame_equal(result, expected) + expected = df.rolling("3D").sum() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("window", [timedelta(days=3), pd.Timedelta(days=3), "3D"]) +def test_constructor_timedelta_window_and_minperiods(window, raw): + # GH 15305 + n = 10 + df = DataFrame( + {"value": np.arange(n)}, index=pd.date_range("2017-08-08", periods=n, freq="D"), ) - def test_closed_median_quantile(self, closed, expected): - # GH 26005 - ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10)) - roll = ser.rolling("3D", closed=closed) - expected = pd.Series(expected, index=ser.index) - - result = roll.median() - tm.assert_series_equal(result, expected) - - result = roll.quantile(0.5) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("roller", ["1s", 1]) - def tests_empty_df_rolling(self, roller): - # GH 15819 Verifies that datetime and integer rolling windows can be - # applied to empty DataFrames - expected = DataFrame() - result = DataFrame().rolling(roller).sum() - tm.assert_frame_equal(result, expected) - - # Verifies that datetime and integer rolling windows can be applied to - # empty DataFrames with datetime index - expected = DataFrame(index=pd.DatetimeIndex([])) - result = DataFrame(index=pd.DatetimeIndex([])).rolling(roller).sum() - tm.assert_frame_equal(result, expected) - - def test_empty_window_median_quantile(self): - # GH 26005 - expected = pd.Series([np.nan, np.nan, np.nan]) - roll = pd.Series(np.arange(3)).rolling(0) - - result = roll.median() - tm.assert_series_equal(result, expected) - - result = roll.quantile(0.1) - tm.assert_series_equal(result, expected) - - def test_missing_minp_zero(self): - # https://github.com/pandas-dev/pandas/pull/18921 - # minp=0 - x = pd.Series([np.nan]) - result = x.rolling(1, min_periods=0).sum() - expected = pd.Series([0.0]) - tm.assert_series_equal(result, expected) - - # minp=1 - result = x.rolling(1, min_periods=1).sum() - expected = pd.Series([np.nan]) - tm.assert_series_equal(result, expected) - - def test_missing_minp_zero_variable(self): - # https://github.com/pandas-dev/pandas/pull/18921 - x = pd.Series( - [np.nan] * 4, - index=pd.DatetimeIndex( - ["2017-01-01", "2017-01-04", "2017-01-06", "2017-01-07"] - ), - ) - result = x.rolling(pd.Timedelta("2d"), min_periods=0).sum() - expected = pd.Series(0.0, index=x.index) - tm.assert_series_equal(result, expected) + expected = DataFrame( + {"value": np.append([np.NaN, 1.0], np.arange(3.0, 27.0, 3))}, + index=pd.date_range("2017-08-08", periods=n, freq="D"), + ) + result_roll_sum = df.rolling(window=window, min_periods=2).sum() + result_roll_generic = df.rolling(window=window, min_periods=2).apply(sum, raw=raw) + tm.assert_frame_equal(result_roll_sum, expected) + tm.assert_frame_equal(result_roll_generic, expected) - def test_multi_index_names(self): - # GH 16789, 16825 - cols = pd.MultiIndex.from_product( - [["A", "B"], ["C", "D", "E"]], names=["1", "2"] - ) - df = DataFrame(np.ones((10, 6)), columns=cols) - result = df.rolling(3).cov() - - tm.assert_index_equal(result.columns, df.columns) - assert result.index.names == [None, "1", "2"] - - @pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame]) - def test_iter_raises(self, klass): - # https://github.com/pandas-dev/pandas/issues/11704 - # Iteration over a Window - obj = klass([1, 2, 3, 4]) - - msg = "See issue #11704 https://github.com/pandas-dev/pandas/issues/11704" - - with pytest.raises(NotImplementedError, match=msg): - iter(obj.rolling(2)) - - def test_rolling_axis_sum(self, axis_frame): - # see gh-23372. - df = DataFrame(np.ones((10, 20))) - axis = df._get_axis_number(axis_frame) - - if axis == 0: - expected = DataFrame({i: [np.nan] * 2 + [3.0] * 8 for i in range(20)}) - else: - # axis == 1 - expected = DataFrame([[np.nan] * 2 + [3.0] * 18] * 10) - - result = df.rolling(3, axis=axis_frame).sum() - tm.assert_frame_equal(result, expected) - - def test_rolling_axis_count(self, axis_frame): - # see gh-26055 - df = DataFrame({"x": range(3), "y": range(3)}) - - axis = df._get_axis_number(axis_frame) - - if axis in [0, "index"]: - expected = DataFrame({"x": [1.0, 2.0, 2.0], "y": [1.0, 2.0, 2.0]}) - else: - expected = DataFrame({"x": [1.0, 1.0, 1.0], "y": [2.0, 2.0, 2.0]}) - - result = df.rolling(2, axis=axis_frame, min_periods=0).count() - tm.assert_frame_equal(result, expected) - - def test_readonly_array(self): - # GH-27766 - arr = np.array([1, 3, np.nan, 3, 5]) - arr.setflags(write=False) - result = pd.Series(arr).rolling(2).mean() - expected = pd.Series([np.nan, 2, np.nan, np.nan, 4]) - tm.assert_series_equal(result, expected) - - def test_rolling_datetime(self, axis_frame, tz_naive_fixture): - # GH-28192 - tz = tz_naive_fixture - df = pd.DataFrame( - { - i: [1] * 2 - for i in pd.date_range("2019-8-01", "2019-08-03", freq="D", tz=tz) - } - ) - if axis_frame in [0, "index"]: - result = df.T.rolling("2D", axis=axis_frame).sum().T - else: - result = df.rolling("2D", axis=axis_frame).sum() - expected = pd.DataFrame( - { - **{ - i: [1.0] * 2 - for i in pd.date_range("2019-8-01", periods=1, freq="D", tz=tz) - }, - **{ - i: [2.0] * 2 - for i in pd.date_range("2019-8-02", "2019-8-03", freq="D", tz=tz) - }, - } - ) - tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) +def test_numpy_compat(method): + # see gh-12811 + r = Rolling(Series([2, 4, 6]), window=2) + + msg = "numpy operations are not valid with window objects" + + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(r, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(r, method)(dtype=np.float64) + + +def test_closed(): + df = DataFrame({"A": [0, 1, 2, 3, 4]}) + # closed only allowed for datetimelike + + msg = "closed only implemented for datetimelike and offset based windows" + + with pytest.raises(ValueError, match=msg): + df.rolling(window=3, closed="neither") + + +@pytest.mark.parametrize("closed", ["neither", "left"]) +def test_closed_empty(closed, arithmetic_win_operators): + # GH 26005 + func_name = arithmetic_win_operators + ser = pd.Series( + data=np.arange(5), index=pd.date_range("2000", periods=5, freq="2D") + ) + roll = ser.rolling("1D", closed=closed) + + result = getattr(roll, func_name)() + expected = pd.Series([np.nan] * 5, index=ser.index) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("func", ["min", "max"]) +def test_closed_one_entry(func): + # GH24718 + ser = pd.Series(data=[2], index=pd.date_range("2000", periods=1)) + result = getattr(ser.rolling("10D", closed="left"), func)() + tm.assert_series_equal(result, pd.Series([np.nan], index=ser.index)) + + +@pytest.mark.parametrize("func", ["min", "max"]) +def test_closed_one_entry_groupby(func): + # GH24718 + ser = pd.DataFrame( + data={"A": [1, 1, 2], "B": [3, 2, 1]}, index=pd.date_range("2000", periods=3), + ) + result = getattr( + ser.groupby("A", sort=False)["B"].rolling("10D", closed="left"), func + )() + exp_idx = pd.MultiIndex.from_arrays( + arrays=[[1, 1, 2], ser.index], names=("A", None) + ) + expected = pd.Series(data=[np.nan, 3, np.nan], index=exp_idx, name="B") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("input_dtype", ["int", "float"]) +@pytest.mark.parametrize( + "func,closed,expected", + [ + ("min", "right", [0.0, 0, 0, 1, 2, 3, 4, 5, 6, 7]), + ("min", "both", [0.0, 0, 0, 0, 1, 2, 3, 4, 5, 6]), + ("min", "neither", [np.nan, 0, 0, 1, 2, 3, 4, 5, 6, 7]), + ("min", "left", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, 6]), + ("max", "right", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + ("max", "both", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + ("max", "neither", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]), + ("max", "left", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]), + ], +) +def test_closed_min_max_datetime(input_dtype, func, closed, expected): + # see gh-21704 + ser = pd.Series( + data=np.arange(10).astype(input_dtype), index=pd.date_range("2000", periods=10), + ) + + result = getattr(ser.rolling("3D", closed=closed), func)() + expected = pd.Series(expected, index=ser.index) + tm.assert_series_equal(result, expected) + + +def test_closed_uneven(): + # see gh-21704 + ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10)) + + # uneven + ser = ser.drop(index=ser.index[[1, 5]]) + result = ser.rolling("3D", closed="left").min() + expected = pd.Series([np.nan, 0, 0, 2, 3, 4, 6, 6], index=ser.index) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "func,closed,expected", + [ + ("min", "right", [np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan, np.nan]), + ("min", "both", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, np.nan]), + ("min", "neither", [np.nan, np.nan, 0, 1, 2, 3, 4, 5, np.nan, np.nan]), + ("min", "left", [np.nan, np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan]), + ("max", "right", [np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan, np.nan]), + ("max", "both", [np.nan, 1, 2, 3, 4, 5, 6, 6, 6, np.nan]), + ("max", "neither", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, np.nan, np.nan]), + ("max", "left", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan]), + ], +) +def test_closed_min_max_minp(func, closed, expected): + # see gh-21704 + ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10)) + ser[ser.index[-3:]] = np.nan + result = getattr(ser.rolling("3D", min_periods=2, closed=closed), func)() + expected = pd.Series(expected, index=ser.index) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "closed,expected", + [ + ("right", [0, 0.5, 1, 2, 3, 4, 5, 6, 7, 8]), + ("both", [0, 0.5, 1, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]), + ("neither", [np.nan, 0, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]), + ("left", [np.nan, 0, 0.5, 1, 2, 3, 4, 5, 6, 7]), + ], +) +def test_closed_median_quantile(closed, expected): + # GH 26005 + ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10)) + roll = ser.rolling("3D", closed=closed) + expected = pd.Series(expected, index=ser.index) + + result = roll.median() + tm.assert_series_equal(result, expected) + + result = roll.quantile(0.5) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("roller", ["1s", 1]) +def tests_empty_df_rolling(roller): + # GH 15819 Verifies that datetime and integer rolling windows can be + # applied to empty DataFrames + expected = DataFrame() + result = DataFrame().rolling(roller).sum() + tm.assert_frame_equal(result, expected) + + # Verifies that datetime and integer rolling windows can be applied to + # empty DataFrames with datetime index + expected = DataFrame(index=pd.DatetimeIndex([])) + result = DataFrame(index=pd.DatetimeIndex([])).rolling(roller).sum() + tm.assert_frame_equal(result, expected) + + +def test_empty_window_median_quantile(): + # GH 26005 + expected = pd.Series([np.nan, np.nan, np.nan]) + roll = pd.Series(np.arange(3)).rolling(0) + + result = roll.median() + tm.assert_series_equal(result, expected) + + result = roll.quantile(0.1) + tm.assert_series_equal(result, expected) + + +def test_missing_minp_zero(): + # https://github.com/pandas-dev/pandas/pull/18921 + # minp=0 + x = pd.Series([np.nan]) + result = x.rolling(1, min_periods=0).sum() + expected = pd.Series([0.0]) + tm.assert_series_equal(result, expected) + + # minp=1 + result = x.rolling(1, min_periods=1).sum() + expected = pd.Series([np.nan]) + tm.assert_series_equal(result, expected) + + +def test_missing_minp_zero_variable(): + # https://github.com/pandas-dev/pandas/pull/18921 + x = pd.Series( + [np.nan] * 4, + index=pd.DatetimeIndex( + ["2017-01-01", "2017-01-04", "2017-01-06", "2017-01-07"] + ), + ) + result = x.rolling(pd.Timedelta("2d"), min_periods=0).sum() + expected = pd.Series(0.0, index=x.index) + tm.assert_series_equal(result, expected) + + +def test_multi_index_names(): + + # GH 16789, 16825 + cols = pd.MultiIndex.from_product([["A", "B"], ["C", "D", "E"]], names=["1", "2"]) + df = DataFrame(np.ones((10, 6)), columns=cols) + result = df.rolling(3).cov() + + tm.assert_index_equal(result.columns, df.columns) + assert result.index.names == [None, "1", "2"] + + +def test_rolling_axis_sum(axis_frame): + # see gh-23372. + df = DataFrame(np.ones((10, 20))) + axis = df._get_axis_number(axis_frame) + + if axis == 0: + expected = DataFrame({i: [np.nan] * 2 + [3.0] * 8 for i in range(20)}) + else: + # axis == 1 + expected = DataFrame([[np.nan] * 2 + [3.0] * 18] * 10) + + result = df.rolling(3, axis=axis_frame).sum() + tm.assert_frame_equal(result, expected) + + +def test_rolling_axis_count(axis_frame): + # see gh-26055 + df = DataFrame({"x": range(3), "y": range(3)}) + + axis = df._get_axis_number(axis_frame) + + if axis in [0, "index"]: + expected = DataFrame({"x": [1.0, 2.0, 2.0], "y": [1.0, 2.0, 2.0]}) + else: + expected = DataFrame({"x": [1.0, 1.0, 1.0], "y": [2.0, 2.0, 2.0]}) + + result = df.rolling(2, axis=axis_frame, min_periods=0).count() + tm.assert_frame_equal(result, expected) + + +def test_readonly_array(): + # GH-27766 + arr = np.array([1, 3, np.nan, 3, 5]) + arr.setflags(write=False) + result = pd.Series(arr).rolling(2).mean() + expected = pd.Series([np.nan, 2, np.nan, np.nan, 4]) + tm.assert_series_equal(result, expected) + + +def test_rolling_datetime(axis_frame, tz_naive_fixture): + # GH-28192 + tz = tz_naive_fixture + df = pd.DataFrame( + {i: [1] * 2 for i in pd.date_range("2019-8-01", "2019-08-03", freq="D", tz=tz)} + ) + if axis_frame in [0, "index"]: + result = df.T.rolling("2D", axis=axis_frame).sum().T + else: + result = df.rolling("2D", axis=axis_frame).sum() + expected = pd.DataFrame( + { + **{ + i: [1.0] * 2 + for i in pd.date_range("2019-8-01", periods=1, freq="D", tz=tz) + }, + **{ + i: [2.0] * 2 + for i in pd.date_range("2019-8-02", "2019-8-03", freq="D", tz=tz) + }, + } + ) + tm.assert_frame_equal(result, expected) def test_rolling_window_as_string(): @@ -467,3 +458,208 @@ def test_rolling_count_default_min_periods_with_null_values(constructor): result = constructor(values).rolling(3).count() expected = constructor(expected_counts) tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "df,expected,window,min_periods", + [ + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 3, + None, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [2, 3], "B": [5, 6]}, [1, 2]), + ], + 2, + 1, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [2, 3], "B": [5, 6]}, [1, 2]), + ], + 2, + 3, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [2], "B": [5]}, [1]), + ({"A": [3], "B": [6]}, [2]), + ], + 1, + 1, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [2], "B": [5]}, [1]), + ({"A": [3], "B": [6]}, [2]), + ], + 1, + 2, + ), + (DataFrame({"A": [1], "B": [4]}), [], 2, None), + (DataFrame({"A": [1], "B": [4]}), [], 2, 1), + (DataFrame(), [({}, [])], 2, None), + ( + DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}), + [ + ({"A": [1.0], "B": [np.nan]}, [0]), + ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]), + ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]), + ], + 3, + 2, + ), + ], +) +def test_iter_rolling_dataframe(df, expected, window, min_periods): + # GH 11704 + expected = [DataFrame(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip( + expected, df.rolling(window, min_periods=min_periods) + ): + tm.assert_frame_equal(actual, expected) + + +@pytest.mark.parametrize( + "expected,window", + [ + ( + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [2, 3], "B": [5, 6]}, [1, 2]), + ], + "2D", + ), + ( + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + "3D", + ), + ( + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [2], "B": [5]}, [1]), + ({"A": [3], "B": [6]}, [2]), + ], + "1D", + ), + ], +) +def test_iter_rolling_on_dataframe(expected, window): + # GH 11704 + df = DataFrame( + { + "A": [1, 2, 3, 4, 5], + "B": [4, 5, 6, 7, 8], + "C": date_range(start="2016-01-01", periods=5, freq="D"), + } + ) + + expected = [DataFrame(values, index=index) for (values, index) in expected] + for (expected, actual) in zip(expected, df.rolling(window, on="C")): + tm.assert_frame_equal(actual, expected) + + +@pytest.mark.parametrize( + "ser,expected,window, min_periods", + [ + ( + Series([1, 2, 3]), + [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], + 3, + None, + ), + ( + Series([1, 2, 3]), + [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], + 3, + 1, + ), + (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([2, 3], [1, 2])], 2, 1), + (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([2, 3], [1, 2])], 2, 3), + (Series([1, 2, 3]), [([1], [0]), ([2], [1]), ([3], [2])], 1, 0), + (Series([1, 2, 3]), [([1], [0]), ([2], [1]), ([3], [2])], 1, 2), + (Series([1, 2]), [([1], [0]), ([1, 2], [0, 1])], 2, 0), + (Series([], dtype="int64"), [], 2, 1), + ], +) +def test_iter_rolling_series(ser, expected, window, min_periods): + # GH 11704 + expected = [Series(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip( + expected, ser.rolling(window, min_periods=min_periods) + ): + tm.assert_series_equal(actual, expected) + + +@pytest.mark.parametrize( + "expected,expected_index,window", + [ + ( + [[0], [1], [2], [3], [4]], + [ + date_range("2020-01-01", periods=1, freq="D"), + date_range("2020-01-02", periods=1, freq="D"), + date_range("2020-01-03", periods=1, freq="D"), + date_range("2020-01-04", periods=1, freq="D"), + date_range("2020-01-05", periods=1, freq="D"), + ], + "1D", + ), + ( + [[0], [0, 1], [1, 2], [2, 3], [3, 4]], + [ + date_range("2020-01-01", periods=1, freq="D"), + date_range("2020-01-01", periods=2, freq="D"), + date_range("2020-01-02", periods=2, freq="D"), + date_range("2020-01-03", periods=2, freq="D"), + date_range("2020-01-04", periods=2, freq="D"), + ], + "2D", + ), + ( + [[0], [0, 1], [0, 1, 2], [1, 2, 3], [2, 3, 4]], + [ + date_range("2020-01-01", periods=1, freq="D"), + date_range("2020-01-01", periods=2, freq="D"), + date_range("2020-01-01", periods=3, freq="D"), + date_range("2020-01-02", periods=3, freq="D"), + date_range("2020-01-03", periods=3, freq="D"), + ], + "3D", + ), + ], +) +def test_iter_rolling_datetime(expected, expected_index, window): + # GH 11704 + ser = Series(range(5), index=date_range(start="2020-01-01", periods=5, freq="D")) + + expected = [ + Series(values, index=idx) for (values, idx) in zip(expected, expected_index) + ] + + for (expected, actual) in zip(expected, ser.rolling(window)): + tm.assert_series_equal(actual, expected) diff --git a/pandas/tests/window/test_window.py b/pandas/tests/window/test_window.py index c7c45f0e5e0de..a450d29797c41 100644 --- a/pandas/tests/window/test_window.py +++ b/pandas/tests/window/test_window.py @@ -7,70 +7,62 @@ import pandas as pd from pandas import Series from pandas.core.window import Window -from pandas.tests.window.common import Base - - -@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") -class TestWindow(Base): - def setup_method(self, method): - self._create_data() - - @td.skip_if_no_scipy - @pytest.mark.parametrize("which", ["series", "frame"]) - def test_constructor(self, which): - # GH 12669 - - o = getattr(self, which) - c = o.rolling - - # valid - c(win_type="boxcar", window=2, min_periods=1) - c(win_type="boxcar", window=2, min_periods=1, center=True) - c(win_type="boxcar", window=2, min_periods=1, center=False) - - # not valid - for w in [2.0, "foo", np.array([2])]: - with pytest.raises(ValueError, match="min_periods must be an integer"): - c(win_type="boxcar", window=2, min_periods=w) - with pytest.raises(ValueError, match="center must be a boolean"): - c(win_type="boxcar", window=2, min_periods=1, center=w) - - for wt in ["foobar", 1]: - with pytest.raises(ValueError, match="Invalid win_type"): - c(win_type=wt, window=2) - - @td.skip_if_no_scipy - @pytest.mark.parametrize("which", ["series", "frame"]) - def test_constructor_with_win_type(self, which, win_types): - # GH 12669 - o = getattr(self, which) - c = o.rolling - c(win_type=win_types, window=2) - - @pytest.mark.parametrize("method", ["sum", "mean"]) - def test_numpy_compat(self, method): - # see gh-12811 - w = Window(Series([2, 4, 6]), window=[0, 2]) - - msg = "numpy operations are not valid with window objects" - - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(w, method)(1, 2, 3) - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(w, method)(dtype=np.float64) - - @td.skip_if_no_scipy - @pytest.mark.parametrize("arg", ["median", "kurt", "skew"]) - def test_agg_function_support(self, arg): - df = pd.DataFrame({"A": np.arange(5)}) - roll = df.rolling(2, win_type="triang") - - msg = f"'{arg}' is not a valid function for 'Window' object" - with pytest.raises(AttributeError, match=msg): - roll.agg(arg) - - with pytest.raises(AttributeError, match=msg): - roll.agg([arg]) - - with pytest.raises(AttributeError, match=msg): - roll.agg({"A": arg}) + + +@td.skip_if_no_scipy +def test_constructor(which): + # GH 12669 + c = which.rolling + + # valid + c(win_type="boxcar", window=2, min_periods=1) + c(win_type="boxcar", window=2, min_periods=1, center=True) + c(win_type="boxcar", window=2, min_periods=1, center=False) + + # not valid + for w in [2.0, "foo", np.array([2])]: + with pytest.raises(ValueError, match="min_periods must be an integer"): + c(win_type="boxcar", window=2, min_periods=w) + with pytest.raises(ValueError, match="center must be a boolean"): + c(win_type="boxcar", window=2, min_periods=1, center=w) + + for wt in ["foobar", 1]: + with pytest.raises(ValueError, match="Invalid win_type"): + c(win_type=wt, window=2) + + +@td.skip_if_no_scipy +def test_constructor_with_win_type(which, win_types): + # GH 12669 + c = which.rolling + c(win_type=win_types, window=2) + + +@pytest.mark.parametrize("method", ["sum", "mean"]) +def test_numpy_compat(method): + # see gh-12811 + w = Window(Series([2, 4, 6]), window=[0, 2]) + + msg = "numpy operations are not valid with window objects" + + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(w, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(w, method)(dtype=np.float64) + + +@td.skip_if_no_scipy +@pytest.mark.parametrize("arg", ["median", "kurt", "skew"]) +def test_agg_function_support(arg): + df = pd.DataFrame({"A": np.arange(5)}) + roll = df.rolling(2, win_type="triang") + + msg = f"'{arg}' is not a valid function for 'Window' object" + with pytest.raises(AttributeError, match=msg): + roll.agg(arg) + + with pytest.raises(AttributeError, match=msg): + roll.agg([arg]) + + with pytest.raises(AttributeError, match=msg): + roll.agg({"A": arg}) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 6213ea198f2cb..d95ffd5b0876d 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -165,7 +165,7 @@ def to_offset(freq) -> Optional[DateOffset]: ) stride = int(stride) offset = _get_offset(name) - offset = offset * int(np.fabs(stride) * stride_sign) + offset = offset * int(np.fabs(stride) * stride_sign) # type: ignore if delta is None: delta = offset else: diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 4912dc0eb349e..88f77a8d7f054 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1,6 +1,6 @@ from datetime import date, datetime, timedelta import operator -from typing import Any, Optional +from typing import Optional from dateutil.easter import easter import numpy as np @@ -296,7 +296,7 @@ def is_on_offset(self, dt): return True -class SingleConstructorOffset(BaseOffset): +class SingleConstructorMixin: _params = cache_readonly(BaseOffset._params.fget) freqstr = cache_readonly(BaseOffset.freqstr.fget) @@ -308,6 +308,10 @@ def _from_name(cls, suffix=None): return cls() +class SingleConstructorOffset(SingleConstructorMixin, BaseOffset): + pass + + class BusinessDay(BusinessMixin, SingleConstructorOffset): """ DateOffset subclass representing possibly n business days. @@ -316,10 +320,6 @@ class BusinessDay(BusinessMixin, SingleConstructorOffset): _prefix = "B" _attributes = frozenset(["n", "normalize", "offset"]) - def __init__(self, n=1, normalize=False, offset=timedelta(0)): - BaseOffset.__init__(self, n, normalize) - object.__setattr__(self, "_offset", offset) - def _offset_str(self) -> str: def get_str(td): off_str = "" @@ -419,7 +419,15 @@ def is_on_offset(self, dt: datetime) -> bool: return dt.weekday() < 5 -class BusinessHourMixin(liboffsets.BusinessHourMixin): +class BusinessHour(SingleConstructorMixin, liboffsets.BusinessHourMixin): + """ + DateOffset subclass representing possibly n business hours. + """ + + _prefix = "BH" + _anchor = 0 + _attributes = frozenset(["n", "normalize", "start", "end", "offset"]) + @cache_readonly def next_bday(self): """ @@ -679,22 +687,6 @@ def _is_on_offset(self, dt): return False -class BusinessHour(BusinessHourMixin, SingleConstructorOffset): - """ - DateOffset subclass representing possibly n business hours. - """ - - _prefix = "BH" - _anchor = 0 - _attributes = frozenset(["n", "normalize", "start", "end", "offset"]) - - def __init__( - self, n=1, normalize=False, start="09:00", end="17:00", offset=timedelta(0) - ): - BaseOffset.__init__(self, n, normalize) - super().__init__(start=start, end=end, offset=offset) - - class CustomBusinessDay(CustomMixin, BusinessDay): """ DateOffset subclass representing custom business days excluding holidays. @@ -727,9 +719,7 @@ def __init__( calendar=None, offset=timedelta(0), ): - BaseOffset.__init__(self, n, normalize) - object.__setattr__(self, "_offset", offset) - + BusinessDay.__init__(self, n, normalize, offset) CustomMixin.__init__(self, weekmask, holidays, calendar) @apply_wraps @@ -772,7 +762,7 @@ def is_on_offset(self, dt: datetime) -> bool: return np.is_busday(day64, busdaycal=self.calendar) -class CustomBusinessHour(CustomMixin, BusinessHourMixin, SingleConstructorOffset): +class CustomBusinessHour(CustomMixin, BusinessHour): """ DateOffset subclass representing possibly n custom business days. """ @@ -794,11 +784,8 @@ def __init__( end="17:00", offset=timedelta(0), ): - BaseOffset.__init__(self, n, normalize) - object.__setattr__(self, "_offset", offset) - + BusinessHour.__init__(self, n, normalize, start=start, end=end, offset=offset) CustomMixin.__init__(self, weekmask, holidays, calendar) - BusinessHourMixin.__init__(self, start=start, end=end, offset=offset) # --------------------------------------------------------------------- @@ -898,9 +885,7 @@ def __init__( calendar=None, offset=timedelta(0), ): - BaseOffset.__init__(self, n, normalize) - object.__setattr__(self, "_offset", offset) - + BusinessMixin.__init__(self, n, normalize, offset) CustomMixin.__init__(self, weekmask, holidays, calendar) @cache_readonly @@ -980,9 +965,9 @@ def __init__(self, n=1, normalize=False, day_of_month=None): BaseOffset.__init__(self, n, normalize) if day_of_month is None: - object.__setattr__(self, "day_of_month", self._default_day_of_month) - else: - object.__setattr__(self, "day_of_month", int(day_of_month)) + day_of_month = self._default_day_of_month + + object.__setattr__(self, "day_of_month", int(day_of_month)) if not self._min_day_of_month <= self.day_of_month <= 27: raise ValueError( "day_of_month must be " @@ -1308,7 +1293,7 @@ def _from_name(cls, suffix=None): return cls(weekday=weekday) -class WeekOfMonth(liboffsets.WeekOfMonthMixin, SingleConstructorOffset): +class WeekOfMonth(SingleConstructorMixin, liboffsets.WeekOfMonthMixin): """ Describes monthly dates like "the Tuesday of the 2nd week of each month". @@ -1334,12 +1319,9 @@ class WeekOfMonth(liboffsets.WeekOfMonthMixin, SingleConstructorOffset): _attributes = frozenset(["n", "normalize", "week", "weekday"]) def __init__(self, n=1, normalize=False, week=0, weekday=0): - BaseOffset.__init__(self, n, normalize) - object.__setattr__(self, "weekday", weekday) + liboffsets.WeekOfMonthMixin.__init__(self, n, normalize, weekday) object.__setattr__(self, "week", week) - if self.weekday < 0 or self.weekday > 6: - raise ValueError(f"Day must be 0<=day<=6, got {self.weekday}") if self.week < 0 or self.week > 3: raise ValueError(f"Week must be 0<=week<=3, got {self.week}") @@ -1361,11 +1343,6 @@ def _get_offset_day(self, other: datetime) -> int: shift_days = (self.weekday - wday) % 7 return 1 + shift_days + self.week * 7 - @property - def rule_code(self) -> str: - weekday = ccalendar.int_to_weekday.get(self.weekday, "") - return f"{self._prefix}-{self.week + 1}{weekday}" - @classmethod def _from_name(cls, suffix=None): if not suffix: @@ -1377,7 +1354,7 @@ def _from_name(cls, suffix=None): return cls(week=week, weekday=weekday) -class LastWeekOfMonth(liboffsets.WeekOfMonthMixin, SingleConstructorOffset): +class LastWeekOfMonth(SingleConstructorMixin, liboffsets.WeekOfMonthMixin): """ Describes monthly dates in last week of month like "the last Tuesday of each month". @@ -1401,14 +1378,11 @@ class LastWeekOfMonth(liboffsets.WeekOfMonthMixin, SingleConstructorOffset): _attributes = frozenset(["n", "normalize", "weekday"]) def __init__(self, n=1, normalize=False, weekday=0): - BaseOffset.__init__(self, n, normalize) - object.__setattr__(self, "weekday", weekday) + liboffsets.WeekOfMonthMixin.__init__(self, n, normalize, weekday) if self.n == 0: raise ValueError("N cannot be 0") - - if self.weekday < 0 or self.weekday > 6: - raise ValueError(f"Day must be 0<=day<=6, got {self.weekday}") + object.__setattr__(self, "week", -1) def _get_offset_day(self, other: datetime) -> int: """ @@ -1429,11 +1403,6 @@ def _get_offset_day(self, other: datetime) -> int: shift_days = (wday - self.weekday) % 7 return dim - shift_days - @property - def rule_code(self) -> str: - weekday = ccalendar.int_to_weekday.get(self.weekday, "") - return f"{self._prefix}-{weekday}" - @classmethod def _from_name(cls, suffix=None): if not suffix: @@ -2134,35 +2103,7 @@ def is_on_offset(self, dt: datetime) -> bool: # Ticks -def _tick_comp(op): - """ - Tick comparisons should behave identically to Timedelta comparisons. - """ - - def f(self, other): - return op(self.delta, other) - - f.__name__ = f"__{op.__name__}__" - return f - - class Tick(liboffsets._Tick, SingleConstructorOffset): - _inc = Timedelta(microseconds=1000) - _prefix = "undefined" - _attributes = frozenset(["n", "normalize"]) - - def __init__(self, n=1, normalize=False): - BaseOffset.__init__(self, n, normalize) - if normalize: - raise ValueError( - "Tick offset with `normalize=True` are not allowed." - ) # GH#21427 - - __gt__ = _tick_comp(operator.gt) - __ge__ = _tick_comp(operator.ge) - __lt__ = _tick_comp(operator.lt) - __le__ = _tick_comp(operator.le) - def __add__(self, other): if isinstance(other, Tick): if type(self) == type(other): @@ -2180,47 +2121,11 @@ def __add__(self, other): f"the add operation between {self} and {other} will overflow" ) from err - def __eq__(self, other: Any) -> bool: - if isinstance(other, str): - from pandas.tseries.frequencies import to_offset - - try: - # GH#23524 if to_offset fails, we are dealing with an - # incomparable type so == is False and != is True - other = to_offset(other) - except ValueError: - # e.g. "infer" - return False - - return _tick_comp(operator.eq)(self, other) - # This is identical to DateOffset.__hash__, but has to be redefined here # for Python 3, because we've redefined __eq__. def __hash__(self) -> int: return hash(self._params) - def __ne__(self, other): - if isinstance(other, str): - from pandas.tseries.frequencies import to_offset - - try: - # GH#23524 if to_offset fails, we are dealing with an - # incomparable type so == is False and != is True - other = to_offset(other) - except ValueError: - # e.g. "infer" - return True - - return _tick_comp(operator.ne)(self, other) - - @property - def delta(self) -> Timedelta: - return self.n * self._inc - - @property - def nanos(self): - return delta_to_nanoseconds(self.delta) - def apply(self, other): # Timestamp can handle tz and nano sec, thus no need to use apply_wraps if isinstance(other, Timestamp): @@ -2240,6 +2145,9 @@ def apply(self, other): if isinstance(other, timedelta): return other + self.delta elif isinstance(other, type(self)): + # TODO: this is reached in tests that specifically call apply, + # but should not be reached "naturally" because __add__ should + # catch this case first. return type(self)(self.n + other.n) raise ApplyTypeError(f"Unhandled type: {type(other).__name__}") diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index 92bfce7ec9c83..80286d5f138ad 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -329,7 +329,7 @@ def wrapper(*args, **kwargs) -> Callable[..., Any]: return decorate -def doc(*args: Union[str, Callable], **kwargs: str) -> Callable[[F], F]: +def doc(*args: Union[str, Callable], **kwargs) -> Callable[[F], F]: """ A decorator take docstring templates, concatenate them and perform string substitution on it. @@ -345,8 +345,8 @@ def doc(*args: Union[str, Callable], **kwargs: str) -> Callable[[F], F]: *args : str or callable The string / docstring / docstring template to be appended in order after default docstring under function. - **kwargs : str - The string which would be used to format docstring template. + **kwargs + The objects which would be used to format docstring template. """ def decorator(func: F) -> F: From 38aefade8abcc56ead2fd5db56a371a23bcbfb8b Mon Sep 17 00:00:00 2001 From: jnecus Date: Tue, 26 May 2020 14:50:49 +0100 Subject: [PATCH 18/53] use ensure_clean rather than explicit os.remove #34384 I have made a start at this issue #34384. Please let me know if I am along the right lines (beginner contributor). I've left a space in code_checks.sh where I expect to implement the check for instances of 'os.remove' throughout the code. --- ci/code_checks.sh | 7 ++++++- pandas/tests/io/excel/test_openpyxl.py | 3 ++- pandas/tests/io/excel/test_writers.py | 4 ++-- pandas/tests/io/pytables/common.py | 3 ++- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index f7a513ca22d53..daa7776d36d1c 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -266,7 +266,12 @@ if mods: sys.exit(len(mods)) " RET=$(($RET + $?)) ; echo $MSG "DONE" - + + MSG='Check code for instances of os.remove' ; echo $MSG + #TODO: insert check here Issue #34384 + RET=$(($RET + $?)) ; echo $MSG "DONE" + + fi ### DOCTESTS ### diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index 5f8d58ea1f105..86f9ba1e90e3b 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -8,6 +8,7 @@ import pandas._testing as tm from pandas.io.excel import ExcelWriter, _OpenpyxlWriter +from pandas.testing import ensure_clean openpyxl = pytest.importorskip("openpyxl") @@ -120,4 +121,4 @@ def test_to_excel_with_openpyxl_engine(ext, tmpdir): styled.to_excel(filename, engine="openpyxl") assert filename.exists() - os.remove(filename) + ensure_clean(filename) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 1692e1a8a0dd3..d1ed1ee695db1 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -901,7 +901,7 @@ def test_to_excel_unicode_filename(self, ext, path): # assert 1 == cell_xf.border.bottom_line_style # assert 1 == cell_xf.border.left_line_style # assert 2 == cell_xf.alignment.hor_align - # os.remove(filename) + # ensure_clean(filename) # def test_to_excel_header_styling_xlsx(self, engine, ext): # import StringIO # s = StringIO( @@ -953,7 +953,7 @@ def test_to_excel_unicode_filename(self, ext, path): # mergedcells_addrs = ["C1", "E1", "G1"] # for maddr in mergedcells_addrs: # assert ws.cell(maddr).merged - # os.remove(filename) + # ensure_clean(filename) @pytest.mark.parametrize("use_headers", [True, False]) @pytest.mark.parametrize("r_idx_nlevels", [1, 2, 3]) diff --git a/pandas/tests/io/pytables/common.py b/pandas/tests/io/pytables/common.py index aad18890de3ad..dc31782df68a5 100644 --- a/pandas/tests/io/pytables/common.py +++ b/pandas/tests/io/pytables/common.py @@ -5,6 +5,7 @@ import pytest from pandas.io.pytables import HDFStore +from pandas.testing import ensure_clean tables = pytest.importorskip("tables") # set these parameters so we don't have file sharing @@ -16,7 +17,7 @@ def safe_remove(path): if path is not None: try: - os.remove(path) + ensure_clean(path) except OSError: pass From df79c64eb1cee5c0b02fd92618dacc731b9cd818 Mon Sep 17 00:00:00 2001 From: jnecus Date: Tue, 26 May 2020 15:28:31 +0100 Subject: [PATCH 19/53] use ensure_clean rather than explicit os.remove #34384 --- ci/code_checks.sh | 11 +++++----- pandas/tests/io/excel/test_openpyxl.py | 29 +++++++++++++------------- pandas/tests/io/pytables/common.py | 5 +++-- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index daa7776d36d1c..f355aba16f0e8 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -266,12 +266,11 @@ if mods: sys.exit(len(mods)) " RET=$(($RET + $?)) ; echo $MSG "DONE" - - MSG='Check code for instances of os.remove' ; echo $MSG - #TODO: insert check here Issue #34384 - RET=$(($RET + $?)) ; echo $MSG "DONE" - - + + MSG='Check code for instances of os.remove' ; echo $MSG + invgrep -R --include="*.py*" -E "os.remove" pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + fi ### DOCTESTS ### diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index 86f9ba1e90e3b..64e736dcef613 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -8,7 +8,7 @@ import pandas._testing as tm from pandas.io.excel import ExcelWriter, _OpenpyxlWriter -from pandas.testing import ensure_clean +from pandas._testing import ensure_clean openpyxl = pytest.importorskip("openpyxl") @@ -109,16 +109,17 @@ def test_write_append_mode(ext, mode, expected): def test_to_excel_with_openpyxl_engine(ext, tmpdir): - # GH 29854 - df1 = DataFrame({"A": np.linspace(1, 10, 10)}) - df2 = DataFrame({"B": np.linspace(1, 20, 10)}) - df = pd.concat([df1, df2], axis=1) - styled = df.style.applymap( - lambda val: "color: %s" % ("red" if val < 0 else "black") - ).highlight_max() - - filename = tmpdir / "styled.xlsx" - styled.to_excel(filename, engine="openpyxl") - - assert filename.exists() - ensure_clean(filename) + + with ensure_clean('styled.xlsx') as filename: + # GH 29854 + df1 = DataFrame({"A": np.linspace(1, 10, 10)}) + df2 = DataFrame({"B": np.linspace(1, 20, 10)}) + df = pd.concat([df1, df2], axis=1) + styled = df.style.applymap( + lambda val: "color: %s" % ("red" if val < 0 else "black") + ).highlight_max() + + styled.to_excel(filename, engine="openpyxl") + + assert filename.exists() + diff --git a/pandas/tests/io/pytables/common.py b/pandas/tests/io/pytables/common.py index dc31782df68a5..53fd3574e3297 100644 --- a/pandas/tests/io/pytables/common.py +++ b/pandas/tests/io/pytables/common.py @@ -5,7 +5,7 @@ import pytest from pandas.io.pytables import HDFStore -from pandas.testing import ensure_clean +from pandas._testing import ensure_clean tables = pytest.importorskip("tables") # set these parameters so we don't have file sharing @@ -17,7 +17,8 @@ def safe_remove(path): if path is not None: try: - ensure_clean(path) + with ensure_clean(path) as filename: + ensure_clean(filename) except OSError: pass From a6732f9259a1180bc30070dc48b5bfda38a3673e Mon Sep 17 00:00:00 2001 From: jnecus Date: Tue, 26 May 2020 17:05:26 +0100 Subject: [PATCH 20/53] Update test_openpyxl.py --- pandas/tests/io/excel/test_openpyxl.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index 64e736dcef613..f685bf793e6c2 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -109,7 +109,7 @@ def test_write_append_mode(ext, mode, expected): def test_to_excel_with_openpyxl_engine(ext, tmpdir): - + with ensure_clean('styled.xlsx') as filename: # GH 29854 df1 = DataFrame({"A": np.linspace(1, 10, 10)}) @@ -122,4 +122,3 @@ def test_to_excel_with_openpyxl_engine(ext, tmpdir): styled.to_excel(filename, engine="openpyxl") assert filename.exists() - From 1db905a6df8c78a48f556dd8a4eea4acaffc8406 Mon Sep 17 00:00:00 2001 From: jnecus Date: Mon, 1 Jun 2020 15:11:52 +0100 Subject: [PATCH 21/53] removed safe_remove entirely --- pandas/tests/io/pytables/common.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/pandas/tests/io/pytables/common.py b/pandas/tests/io/pytables/common.py index 53fd3574e3297..62ba6d4449619 100644 --- a/pandas/tests/io/pytables/common.py +++ b/pandas/tests/io/pytables/common.py @@ -5,7 +5,6 @@ import pytest from pandas.io.pytables import HDFStore -from pandas._testing import ensure_clean tables = pytest.importorskip("tables") # set these parameters so we don't have file sharing @@ -14,15 +13,6 @@ tables.parameters.MAX_THREADS = 1 -def safe_remove(path): - if path is not None: - try: - with ensure_clean(path) as filename: - ensure_clean(filename) - except OSError: - pass - - def safe_close(store): try: if store is not None: From 442d83c22d91eac291f67a68e09eb316af2697f7 Mon Sep 17 00:00:00 2001 From: jnecus Date: Mon, 1 Jun 2020 15:28:40 +0100 Subject: [PATCH 22/53] Update test_openpyxl.py --- pandas/tests/io/excel/test_openpyxl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index f685bf793e6c2..5aacc95e92d8d 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -107,11 +107,11 @@ def test_write_append_mode(ext, mode, expected): for index, cell_value in enumerate(expected): assert wb2.worksheets[index]["A1"].value == cell_value - +# GH 29854 def test_to_excel_with_openpyxl_engine(ext, tmpdir): with ensure_clean('styled.xlsx') as filename: - # GH 29854 + df1 = DataFrame({"A": np.linspace(1, 10, 10)}) df2 = DataFrame({"B": np.linspace(1, 20, 10)}) df = pd.concat([df1, df2], axis=1) From dbb3d7489042bf31249b33311e1e1735eece689a Mon Sep 17 00:00:00 2001 From: jnecus Date: Mon, 1 Jun 2020 15:44:17 +0100 Subject: [PATCH 23/53] sorting imports --- ci/code_checks.sh | 2 +- pandas/tests/io/excel/test_openpyxl.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index f355aba16f0e8..b1d51412f84fd 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -268,7 +268,7 @@ if mods: RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Check code for instances of os.remove' ; echo $MSG - invgrep -R --include="*.py*" -E "os.remove" pandas + invgrep -R --include="*.py*" -E "os.remove" pandas/tests/ RET=$(($RET + $?)) ; echo $MSG "DONE" fi diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index 5aacc95e92d8d..cbdf03dc2978d 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -1,14 +1,12 @@ -import os - import numpy as np import pytest import pandas as pd from pandas import DataFrame import pandas._testing as tm +from pandas._testing import ensure_clean from pandas.io.excel import ExcelWriter, _OpenpyxlWriter -from pandas._testing import ensure_clean openpyxl = pytest.importorskip("openpyxl") From 2799d13714b8cd7b9448985fa1eaca8a075181ea Mon Sep 17 00:00:00 2001 From: jnecus Date: Mon, 1 Jun 2020 15:50:46 +0100 Subject: [PATCH 24/53] Update test_openpyxl.py --- pandas/tests/io/excel/test_openpyxl.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index cbdf03dc2978d..80fc4764c105a 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -4,7 +4,6 @@ import pandas as pd from pandas import DataFrame import pandas._testing as tm -from pandas._testing import ensure_clean from pandas.io.excel import ExcelWriter, _OpenpyxlWriter @@ -108,8 +107,8 @@ def test_write_append_mode(ext, mode, expected): # GH 29854 def test_to_excel_with_openpyxl_engine(ext, tmpdir): - with ensure_clean('styled.xlsx') as filename: - + with tm.ensure_clean('styled.xlsx') as filename: + df1 = DataFrame({"A": np.linspace(1, 10, 10)}) df2 = DataFrame({"B": np.linspace(1, 20, 10)}) df = pd.concat([df1, df2], axis=1) From 3bef5f406963d0f15ddf571f62e0019c268d20bf Mon Sep 17 00:00:00 2001 From: jnecus Date: Mon, 1 Jun 2020 16:04:44 +0100 Subject: [PATCH 25/53] replace safe_remove with ensure clean --- pandas/tests/io/pytables/test_store.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index fe59b989bab7e..f0c89fa6ebfdd 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -40,7 +40,6 @@ ensure_clean_path, ensure_clean_store, safe_close, - safe_remove, tables, ) @@ -91,7 +90,7 @@ def test_context(self, setup_path): except ValueError: pass finally: - safe_remove(path) + tm.ensure_clean(path) try: with HDFStore(path) as tbl: @@ -101,7 +100,7 @@ def test_context(self, setup_path): assert len(tbl) == 1 assert type(tbl["a"]) == DataFrame finally: - safe_remove(path) + tm.ensure_clean(path) def test_conv_read_write(self, setup_path): path = create_tempfile(setup_path) @@ -127,7 +126,7 @@ def roundtrip(key, obj, **kwargs): tm.assert_frame_equal(df[df.index > 2], result) finally: - safe_remove(path) + tm.ensure_clean(path) def test_long_strings(self, setup_path): @@ -4168,7 +4167,7 @@ def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): os.close(fd) except (OSError, ValueError): pass - safe_remove(new_f) + tm.ensure_clean(new_f) # new table df = tm.makeDataFrame() @@ -4181,7 +4180,7 @@ def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): do_copy(f=path) do_copy(f=path, propindexes=False) finally: - safe_remove(path) + tm.ensure_clean(path) def test_store_datetime_fractional_secs(self, setup_path): From bb79fa3aaf1209a8a3e9de4daa9771714ace6b78 Mon Sep 17 00:00:00 2001 From: jnecus Date: Mon, 1 Jun 2020 16:14:09 +0100 Subject: [PATCH 26/53] Update test_openpyxl.py --- pandas/tests/io/excel/test_openpyxl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index 80fc4764c105a..92c4105a209f0 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -104,9 +104,9 @@ def test_write_append_mode(ext, mode, expected): for index, cell_value in enumerate(expected): assert wb2.worksheets[index]["A1"].value == cell_value -# GH 29854 -def test_to_excel_with_openpyxl_engine(ext, tmpdir): +def test_to_excel_with_openpyxl_engine(ext, tmpdir): + # GH 29854 with tm.ensure_clean('styled.xlsx') as filename: df1 = DataFrame({"A": np.linspace(1, 10, 10)}) From 6c3108e29095bd3b2e7b20ffabbaa9b51389cf87 Mon Sep 17 00:00:00 2001 From: jnecus Date: Mon, 1 Jun 2020 16:40:33 +0100 Subject: [PATCH 27/53] Update common.py --- pandas/tests/io/pytables/common.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/pytables/common.py b/pandas/tests/io/pytables/common.py index 62ba6d4449619..b1496f1f6874d 100644 --- a/pandas/tests/io/pytables/common.py +++ b/pandas/tests/io/pytables/common.py @@ -4,6 +4,8 @@ import pytest +import pandas._testing as tm + from pandas.io.pytables import HDFStore tables = pytest.importorskip("tables") @@ -43,7 +45,7 @@ def ensure_clean_store(path, mode="a", complevel=None, complib=None, fletcher32= finally: safe_close(store) if mode == "w" or mode == "a": - safe_remove(path) + tm.ensure_clean(path) @contextmanager @@ -62,7 +64,7 @@ def ensure_clean_path(path): yield filenames[0] finally: for f in filenames: - safe_remove(f) + tm.ensure_clean(f) def _maybe_remove(store, key): From 3f2af9de83e5d324c0e71ddd67a55a0314ba7cbd Mon Sep 17 00:00:00 2001 From: jnecus Date: Tue, 2 Jun 2020 08:47:42 +0100 Subject: [PATCH 28/53] linting correction, excluding os.remove in common.py --- ci/code_checks.sh | 2 +- pandas/tests/io/excel/test_openpyxl.py | 2 +- pandas/tests/io/pytables/common.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index b1d51412f84fd..d0d8ef869203a 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -268,7 +268,7 @@ if mods: RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Check code for instances of os.remove' ; echo $MSG - invgrep -R --include="*.py*" -E "os.remove" pandas/tests/ + invgrep -R --include="*.py*" --exclude "common.py" -E "os.remove" pandas/tests/ RET=$(($RET + $?)) ; echo $MSG "DONE" fi diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index 92c4105a209f0..8404359fbeff1 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -107,7 +107,7 @@ def test_write_append_mode(ext, mode, expected): def test_to_excel_with_openpyxl_engine(ext, tmpdir): # GH 29854 - with tm.ensure_clean('styled.xlsx') as filename: + with tm.ensure_clean("styled.xlsx") as filename: df1 = DataFrame({"A": np.linspace(1, 10, 10)}) df2 = DataFrame({"B": np.linspace(1, 20, 10)}) diff --git a/pandas/tests/io/pytables/common.py b/pandas/tests/io/pytables/common.py index b1496f1f6874d..09dba2dc083d9 100644 --- a/pandas/tests/io/pytables/common.py +++ b/pandas/tests/io/pytables/common.py @@ -45,7 +45,7 @@ def ensure_clean_store(path, mode="a", complevel=None, complib=None, fletcher32= finally: safe_close(store) if mode == "w" or mode == "a": - tm.ensure_clean(path) + os.remove(path) @contextmanager @@ -64,7 +64,7 @@ def ensure_clean_path(path): yield filenames[0] finally: for f in filenames: - tm.ensure_clean(f) + os.remove(f) def _maybe_remove(store, key): From 6cc69f055c9f0ecc5eb7708e2c111883cb7fa850 Mon Sep 17 00:00:00 2001 From: jnecus Date: Tue, 2 Jun 2020 09:14:27 +0100 Subject: [PATCH 29/53] linting --- pandas/tests/io/pytables/common.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/tests/io/pytables/common.py b/pandas/tests/io/pytables/common.py index 09dba2dc083d9..54703bd877a7e 100644 --- a/pandas/tests/io/pytables/common.py +++ b/pandas/tests/io/pytables/common.py @@ -4,8 +4,6 @@ import pytest -import pandas._testing as tm - from pandas.io.pytables import HDFStore tables = pytest.importorskip("tables") From 02eeab1583eff03104dec82dc178cdff5016b986 Mon Sep 17 00:00:00 2001 From: jnecus Date: Tue, 2 Jun 2020 10:08:50 +0100 Subject: [PATCH 30/53] Update test_openpyxl.py --- pandas/tests/io/excel/test_openpyxl.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index 8404359fbeff1..c88a5c406f4ba 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -117,5 +117,3 @@ def test_to_excel_with_openpyxl_engine(ext, tmpdir): ).highlight_max() styled.to_excel(filename, engine="openpyxl") - - assert filename.exists() From 2fb17a3b210c62de7504088753cb3483f14c60fb Mon Sep 17 00:00:00 2001 From: jnecus Date: Wed, 12 Aug 2020 11:08:01 +0100 Subject: [PATCH 31/53] Update code_checks.sh --- ci/code_checks.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 884e9158418ef..8928f5d9f1adb 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -272,9 +272,6 @@ if mods: " RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Check code for instances of os.remove' ; echo $MSG - invgrep -R --include="*.py*" --exclude "common.py" -E "os.remove" pandas/tests/ - RET=$(($RET + $?)) ; echo $MSG "DONE" fi From 4034fd6d3eeff1b4dd0a958fc369adb13a0e3e36 Mon Sep 17 00:00:00 2001 From: jnecus Date: Wed, 12 Aug 2020 11:18:18 +0100 Subject: [PATCH 32/53] Update code_checks.sh --- ci/code_checks.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 8928f5d9f1adb..816bb23865c04 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -272,7 +272,6 @@ if mods: " RET=$(($RET + $?)) ; echo $MSG "DONE" - fi ### DOCTESTS ### From af8b575a0e66631043cff39f703eae618941e6a4 Mon Sep 17 00:00:00 2001 From: jnecus Date: Fri, 2 Oct 2020 11:37:54 +0100 Subject: [PATCH 33/53] Update test_store.py --- pandas/tests/io/pytables/test_store.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index a1135142d1bd2..f4ce21d2e0f29 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -90,7 +90,7 @@ def test_context(self, setup_path): except ValueError: pass finally: - tm.ensure_clean(path) + with tm.ensure_clean(path): try: with HDFStore(path) as tbl: @@ -100,7 +100,7 @@ def test_context(self, setup_path): assert len(tbl) == 1 assert type(tbl["a"]) == DataFrame finally: - tm.ensure_clean(path) + with tm.ensure_clean(path): def test_conv_read_write(self, setup_path): path = create_tempfile(setup_path) @@ -126,7 +126,7 @@ def roundtrip(key, obj, **kwargs): tm.assert_frame_equal(df[df.index > 2], result) finally: - tm.ensure_clean(path) + with tm.ensure_clean(path): def test_long_strings(self, setup_path): @@ -4259,7 +4259,8 @@ def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): os.close(fd) except (OSError, ValueError): pass - tm.ensure_clean(new_f) + with tm.ensure_clean(new_f): + # new table df = tm.makeDataFrame() @@ -4272,7 +4273,7 @@ def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): do_copy(f=path) do_copy(f=path, propindexes=False) finally: - tm.ensure_clean(path) + with tm.ensure_clean(path): def test_store_datetime_fractional_secs(self, setup_path): From 7a3011ebee2f7be567b367185d24888439297245 Mon Sep 17 00:00:00 2001 From: jnecus Date: Mon, 5 Oct 2020 14:32:02 +0100 Subject: [PATCH 34/53] updated ensure_clean as context manager --- pandas/tests/io/excel/test_writers.py | 4 +- pandas/tests/io/pytables/test_store.py | 54 +++++++++++--------------- 2 files changed, 25 insertions(+), 33 deletions(-) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index a3fe427927cba..e3ee53b63e102 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -929,7 +929,7 @@ def test_to_excel_unicode_filename(self, ext, path): # assert 1 == cell_xf.border.bottom_line_style # assert 1 == cell_xf.border.left_line_style # assert 2 == cell_xf.alignment.hor_align - # ensure_clean(filename) + # os.remove(filename) # def test_to_excel_header_styling_xlsx(self, engine, ext): # import StringIO # s = StringIO( @@ -981,7 +981,7 @@ def test_to_excel_unicode_filename(self, ext, path): # mergedcells_addrs = ["C1", "E1", "G1"] # for maddr in mergedcells_addrs: # assert ws.cell(maddr).merged - # ensure_clean(filename) + # os.remove(filename) @pytest.mark.parametrize("use_headers", [True, False]) @pytest.mark.parametrize("r_idx_nlevels", [1, 2, 3]) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index f4ce21d2e0f29..f01de09b17b15 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -83,29 +83,26 @@ def test_format_kwarg_in_constructor(self, setup_path): HDFStore(path, format="table") def test_context(self, setup_path): - path = create_tempfile(setup_path) - try: - with HDFStore(path) as tbl: - raise ValueError("blah") - except ValueError: - pass - finally: - with tm.ensure_clean(path): - - try: - with HDFStore(path) as tbl: - tbl["a"] = tm.makeDataFrame() - - with HDFStore(path) as tbl: - assert len(tbl) == 1 - assert type(tbl["a"]) == DataFrame - finally: - with tm.ensure_clean(path): + with tm.ensure_clean() as path: + try: + with HDFStore(path) as tbl: + raise ValueError("blah") + except ValueError: + pass + try: + with HDFStore(path) as tbl: + tbl["a"] = tm.makeDataFrame() + except ValueError: + pass + try: + with HDFStore(path) as tbl: + assert len(tbl) == 1 + assert type(tbl["a"]) == DataFrame + except ValueError: + pass def test_conv_read_write(self, setup_path): - path = create_tempfile(setup_path) - try: - + with tm.ensure_clean() as path: def roundtrip(key, obj, **kwargs): obj.to_hdf(path, key, **kwargs) return read_hdf(path, key) @@ -125,8 +122,6 @@ def roundtrip(key, obj, **kwargs): result = read_hdf(path, "table", where=["index>2"]) tm.assert_frame_equal(df[df.index > 2], result) - finally: - with tm.ensure_clean(path): def test_long_strings(self, setup_path): @@ -4259,21 +4254,18 @@ def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): os.close(fd) except (OSError, ValueError): pass - with tm.ensure_clean(new_f): + os.remove(new_f) + # new table + df = tm.makeDataFrame() - # new table - df = tm.makeDataFrame() - - try: - path = create_tempfile(setup_path) + try: + with tm.ensure_clean() as path: st = HDFStore(path) st.append("df", df, data_columns=["A"]) st.close() do_copy(f=path) do_copy(f=path, propindexes=False) - finally: - with tm.ensure_clean(path): def test_store_datetime_fractional_secs(self, setup_path): From ee95e7161013907badaa8ed5aa6009bb47ef915b Mon Sep 17 00:00:00 2001 From: jnecus Date: Mon, 5 Oct 2020 14:34:23 +0100 Subject: [PATCH 35/53] Update test_store.py --- pandas/tests/io/pytables/test_store.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index f01de09b17b15..c9ec88cde1147 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -4256,16 +4256,15 @@ def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): pass os.remove(new_f) - # new table - df = tm.makeDataFrame() - - try: - with tm.ensure_clean() as path: - st = HDFStore(path) - st.append("df", df, data_columns=["A"]) - st.close() - do_copy(f=path) - do_copy(f=path, propindexes=False) + # new table + df = tm.makeDataFrame() + try: + with tm.ensure_clean() as path: + st = HDFStore(path) + st.append("df", df, data_columns=["A"]) + st.close() + do_copy(f=path) + do_copy(f=path, propindexes=False) def test_store_datetime_fractional_secs(self, setup_path): From 7791b7d7b9a7c6656196b4bf65884bb9941c6115 Mon Sep 17 00:00:00 2001 From: jnecus Date: Mon, 5 Oct 2020 15:54:11 +0100 Subject: [PATCH 36/53] Update test_store.py --- pandas/tests/io/pytables/test_store.py | 91 +++++++++++++------------- 1 file changed, 44 insertions(+), 47 deletions(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index c9ec88cde1147..bfe8f443f5632 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -122,7 +122,6 @@ def roundtrip(key, obj, **kwargs): result = read_hdf(path, "table", where=["index>2"]) tm.assert_frame_equal(df[df.index > 2], result) - def test_long_strings(self, setup_path): # GH6166 @@ -4212,53 +4211,51 @@ def test_legacy_table_read_py2(self, datapath, setup_path): tm.assert_frame_equal(expected, result) def test_copy(self, setup_path): - - with catch_warnings(record=True): - - def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): - try: - store = HDFStore(f, "r") - - if new_f is None: - import tempfile - - fd, new_f = tempfile.mkstemp() - - tstore = store.copy( - new_f, keys=keys, propindexes=propindexes, **kwargs - ) - - # check keys - if keys is None: - keys = store.keys() - assert set(keys) == set(tstore.keys()) - - # check indices & nrows - for k in tstore.keys(): - if tstore.get_storer(k).is_table: - new_t = tstore.get_storer(k) - orig_t = store.get_storer(k) - - assert orig_t.nrows == new_t.nrows - - # check propindixes - if propindexes: - for a in orig_t.axes: - if a.is_indexed: - assert new_t[a.name].is_indexed - - finally: - safe_close(store) - safe_close(tstore) + + with catch_warnings(record=True): + + def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): try: - os.close(fd) - except (OSError, ValueError): - pass - os.remove(new_f) - - # new table - df = tm.makeDataFrame() - try: + store = HDFStore(f, "r") + + if new_f is None: + import tempfile + + fd, new_f = tempfile.mkstemp() + + tstore = store.copy( + new_f, keys=keys, propindexes=propindexes, **kwargs + ) + + # check keys + if keys is None: + keys = store.keys() + assert set(keys) == set(tstore.keys()) + + # check indices & nrows + for k in tstore.keys(): + if tstore.get_storer(k).is_table: + new_t = tstore.get_storer(k) + orig_t = store.get_storer(k) + + assert orig_t.nrows == new_t.nrows + + # check propindixes + if propindexes: + for a in orig_t.axes: + if a.is_indexed: + assert new_t[a.name].is_indexed + + finally: + safe_close(store) + safe_close(tstore) + try: + os.close(fd) + except (OSError, ValueError): + pass + os.remove(new_f) + # new table + df = tm.makeDataFrame() with tm.ensure_clean() as path: st = HDFStore(path) st.append("df", df, data_columns=["A"]) From 045d7975752684f690c39324648943b8087f5aa9 Mon Sep 17 00:00:00 2001 From: jnecus Date: Tue, 6 Oct 2020 09:25:37 +0100 Subject: [PATCH 37/53] Update test_store.py --- pandas/tests/io/pytables/test_store.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index bfe8f443f5632..ace6474954a97 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -36,7 +36,6 @@ import pandas._testing as tm from pandas.tests.io.pytables.common import ( _maybe_remove, - create_tempfile, ensure_clean_path, ensure_clean_store, safe_close, @@ -4211,41 +4210,30 @@ def test_legacy_table_read_py2(self, datapath, setup_path): tm.assert_frame_equal(expected, result) def test_copy(self, setup_path): - with catch_warnings(record=True): - def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): try: store = HDFStore(f, "r") - if new_f is None: import tempfile - fd, new_f = tempfile.mkstemp() - - tstore = store.copy( - new_f, keys=keys, propindexes=propindexes, **kwargs - ) - + tstore = store.copy( + new_f, keys=keys, propindexes=propindexes, **kwargs) # check keys if keys is None: keys = store.keys() assert set(keys) == set(tstore.keys()) - # check indices & nrows for k in tstore.keys(): if tstore.get_storer(k).is_table: new_t = tstore.get_storer(k) orig_t = store.get_storer(k) - assert orig_t.nrows == new_t.nrows - # check propindixes if propindexes: for a in orig_t.axes: if a.is_indexed: assert new_t[a.name].is_indexed - finally: safe_close(store) safe_close(tstore) From 72ec98759e0daab55fda5e56e39569a7297ac8f1 Mon Sep 17 00:00:00 2001 From: jnecus Date: Tue, 6 Oct 2020 11:11:18 +0100 Subject: [PATCH 38/53] Update test_store.py --- pandas/tests/io/pytables/test_store.py | 90 +++++++++++++++----------- 1 file changed, 51 insertions(+), 39 deletions(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index ace6474954a97..7e47c5c30a670 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -4210,46 +4210,58 @@ def test_legacy_table_read_py2(self, datapath, setup_path): tm.assert_frame_equal(expected, result) def test_copy(self, setup_path): - with catch_warnings(record=True): - def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): + + with catch_warnings(record=True): + + def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): + try: + store = HDFStore(f, "r") + + if new_f is None: + import tempfile + + fd, new_f = tempfile.mkstemp() + + tstore = store.copy( + new_f, keys=keys, propindexes=propindexes, **kwargs) + + # check keys + if keys is None: + keys = store.keys() + assert set(keys) == set(tstore.keys()) + + # check indices & nrows + for k in tstore.keys(): + if tstore.get_storer(k).is_table: + new_t = tstore.get_storer(k) + orig_t = store.get_storer(k) + + assert orig_t.nrows == new_t.nrows + + # check propindixes + if propindexes: + for a in orig_t.axes: + if a.is_indexed: + assert new_t[a.name].is_indexed + + finally: + safe_close(store) + safe_close(tstore) try: - store = HDFStore(f, "r") - if new_f is None: - import tempfile - fd, new_f = tempfile.mkstemp() - tstore = store.copy( - new_f, keys=keys, propindexes=propindexes, **kwargs) - # check keys - if keys is None: - keys = store.keys() - assert set(keys) == set(tstore.keys()) - # check indices & nrows - for k in tstore.keys(): - if tstore.get_storer(k).is_table: - new_t = tstore.get_storer(k) - orig_t = store.get_storer(k) - assert orig_t.nrows == new_t.nrows - # check propindixes - if propindexes: - for a in orig_t.axes: - if a.is_indexed: - assert new_t[a.name].is_indexed - finally: - safe_close(store) - safe_close(tstore) - try: - os.close(fd) - except (OSError, ValueError): - pass - os.remove(new_f) - # new table - df = tm.makeDataFrame() - with tm.ensure_clean() as path: - st = HDFStore(path) - st.append("df", df, data_columns=["A"]) - st.close() - do_copy(f=path) - do_copy(f=path, propindexes=False) + os.close(fd) + except (OSError, ValueError): + pass + os.remove(new_f) + + # new table + df = tm.makeDataFrame() + + with tm.ensure_clean() as path: + st = HDFStore(path) + st.append("df", df, data_columns=["A"]) + st.close() + do_copy(f=path) + do_copy(f=path, propindexes=False) def test_store_datetime_fractional_secs(self, setup_path): From f812a392014202cf0805c96c78c020da3f31ecdd Mon Sep 17 00:00:00 2001 From: jnecus Date: Tue, 6 Oct 2020 14:27:49 +0100 Subject: [PATCH 39/53] Update test_store.py --- pandas/tests/io/pytables/test_store.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 7e47c5c30a670..d2fa94b1dc5e5 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -33,6 +33,7 @@ isna, timedelta_range, ) + import pandas._testing as tm from pandas.tests.io.pytables.common import ( _maybe_remove, @@ -4223,7 +4224,7 @@ def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): fd, new_f = tempfile.mkstemp() tstore = store.copy( - new_f, keys=keys, propindexes=propindexes, **kwargs) + new_f, keys=keys, propindexes=propindexes, **kwargs) # check keys if keys is None: From 657541941a3297857feeb0587a01e70760add30f Mon Sep 17 00:00:00 2001 From: jnecus Date: Tue, 6 Oct 2020 14:35:21 +0100 Subject: [PATCH 40/53] Update test_store.py --- pandas/tests/io/pytables/test_store.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index d2fa94b1dc5e5..ee6ad3bd90815 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -33,7 +33,6 @@ isna, timedelta_range, ) - import pandas._testing as tm from pandas.tests.io.pytables.common import ( _maybe_remove, From 22258def53f91780b08741c2eb5b634a2006bd2f Mon Sep 17 00:00:00 2001 From: jnecus Date: Tue, 6 Oct 2020 14:49:36 +0100 Subject: [PATCH 41/53] Update test_store.py --- pandas/tests/io/pytables/test_store.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index ee6ad3bd90815..68780192c6823 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -102,6 +102,7 @@ def test_context(self, setup_path): def test_conv_read_write(self, setup_path): with tm.ensure_clean() as path: + def roundtrip(key, obj, **kwargs): obj.to_hdf(path, key, **kwargs) return read_hdf(path, key) @@ -4223,7 +4224,8 @@ def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): fd, new_f = tempfile.mkstemp() tstore = store.copy( - new_f, keys=keys, propindexes=propindexes, **kwargs) + new_f, keys=keys, propindexes=propindexes, **kwargs + ) # check keys if keys is None: From 3e41dc58a3c99d733eb6af4b9045b6b610182f8f Mon Sep 17 00:00:00 2001 From: jnecus Date: Wed, 7 Oct 2020 10:22:01 +0100 Subject: [PATCH 42/53] Update test_store.py --- pandas/tests/io/pytables/test_store.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 68780192c6823..517788e552615 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -77,7 +77,7 @@ def test_format_kwarg_in_constructor(self, setup_path): msg = "format is not a defined argument for HDFStore" - with ensure_clean_path(setup_path) as path: + with tm.ensure_clean(setup_path) as path: with pytest.raises(ValueError, match=msg): HDFStore(path, format="table") @@ -597,7 +597,7 @@ def test_reopen_handle(self, setup_path): def test_open_args(self, setup_path): - with ensure_clean_path(setup_path) as path: + with tm.ensure_clean(setup_path) as path: df = tm.makeDataFrame() @@ -4703,7 +4703,7 @@ def test_read_hdf_generic_buffer_errors(self): def test_invalid_complib(self, setup_path): df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) - with ensure_clean_path(setup_path) as path: + with tm.ensure_clean(setup_path) as path: with pytest.raises(ValueError): df.to_hdf(path, "df", complib="foolib") From b3e8d966d0c5f83dc2f2f122d8e1c9246f1927be Mon Sep 17 00:00:00 2001 From: jnecus Date: Wed, 7 Oct 2020 10:42:37 +0100 Subject: [PATCH 43/53] added safe_remove --- pandas/tests/io/pytables/common.py | 12 ++++++++++-- pandas/tests/io/pytables/test_store.py | 4 ++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/pytables/common.py b/pandas/tests/io/pytables/common.py index 54703bd877a7e..aad18890de3ad 100644 --- a/pandas/tests/io/pytables/common.py +++ b/pandas/tests/io/pytables/common.py @@ -13,6 +13,14 @@ tables.parameters.MAX_THREADS = 1 +def safe_remove(path): + if path is not None: + try: + os.remove(path) + except OSError: + pass + + def safe_close(store): try: if store is not None: @@ -43,7 +51,7 @@ def ensure_clean_store(path, mode="a", complevel=None, complib=None, fletcher32= finally: safe_close(store) if mode == "w" or mode == "a": - os.remove(path) + safe_remove(path) @contextmanager @@ -62,7 +70,7 @@ def ensure_clean_path(path): yield filenames[0] finally: for f in filenames: - os.remove(f) + safe_remove(f) def _maybe_remove(store, key): diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 517788e552615..33e096125431a 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -613,8 +613,8 @@ def test_open_args(self, setup_path): store.close() - # the file should not have actually been written - assert not os.path.exists(path) + # the file should not have actually been written + assert not os.path.exists(path) def test_flush(self, setup_path): From 2063623d6ddab47ba4ee5789e3565b7e2da4b233 Mon Sep 17 00:00:00 2001 From: jnecus Date: Thu, 8 Oct 2020 14:43:11 +0100 Subject: [PATCH 44/53] test_context exception --- pandas/tests/io/excel/test_openpyxl.py | 4 ++-- pandas/tests/io/pytables/test_store.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index c88a5c406f4ba..1349808277d81 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -105,9 +105,9 @@ def test_write_append_mode(ext, mode, expected): assert wb2.worksheets[index]["A1"].value == cell_value -def test_to_excel_with_openpyxl_engine(ext, tmpdir): +def test_to_excel_with_openpyxl_engine(ext): # GH 29854 - with tm.ensure_clean("styled.xlsx") as filename: + with tm.ensure_clean(ext) as filename: df1 = DataFrame({"A": np.linspace(1, 10, 10)}) df2 = DataFrame({"B": np.linspace(1, 20, 10)}) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 33e096125431a..66c81976e775e 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -82,23 +82,23 @@ def test_format_kwarg_in_constructor(self, setup_path): HDFStore(path, format="table") def test_context(self, setup_path): - with tm.ensure_clean() as path: + with tm.ensure_clean(setup_path) as path: try: with HDFStore(path) as tbl: raise ValueError("blah") except ValueError: - pass + print("Context error") try: with HDFStore(path) as tbl: tbl["a"] = tm.makeDataFrame() except ValueError: - pass + print("Context error") try: with HDFStore(path) as tbl: assert len(tbl) == 1 assert type(tbl["a"]) == DataFrame except ValueError: - pass + print("Context error") def test_conv_read_write(self, setup_path): with tm.ensure_clean() as path: From 9de691ca13fa103d836d49347e7b688a82252838 Mon Sep 17 00:00:00 2001 From: jnecus Date: Thu, 8 Oct 2020 16:59:23 +0100 Subject: [PATCH 45/53] Update test_store.py --- pandas/tests/io/pytables/test_store.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 66c81976e775e..711e6242687b6 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -87,18 +87,15 @@ def test_context(self, setup_path): with HDFStore(path) as tbl: raise ValueError("blah") except ValueError: - print("Context error") + pass try: with HDFStore(path) as tbl: tbl["a"] = tm.makeDataFrame() except ValueError: print("Context error") - try: - with HDFStore(path) as tbl: - assert len(tbl) == 1 - assert type(tbl["a"]) == DataFrame - except ValueError: - print("Context error") + with HDFStore(path) as tbl: + assert len(tbl) == 1 + assert type(tbl["a"]) == DataFrame def test_conv_read_write(self, setup_path): with tm.ensure_clean() as path: From de874af0a79d865ad04c858c0a3d448bf49c88b4 Mon Sep 17 00:00:00 2001 From: jnecus Date: Thu, 8 Oct 2020 17:01:34 +0100 Subject: [PATCH 46/53] Update test_store.py --- pandas/tests/io/pytables/test_store.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 711e6242687b6..0fa833eba1f07 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -88,11 +88,8 @@ def test_context(self, setup_path): raise ValueError("blah") except ValueError: pass - try: - with HDFStore(path) as tbl: + with HDFStore(path) as tbl: tbl["a"] = tm.makeDataFrame() - except ValueError: - print("Context error") with HDFStore(path) as tbl: assert len(tbl) == 1 assert type(tbl["a"]) == DataFrame From 330922202e4890d852230dabaf7e3d52b97c18a6 Mon Sep 17 00:00:00 2001 From: jnecus Date: Thu, 8 Oct 2020 17:08:14 +0100 Subject: [PATCH 47/53] Update test_store.py --- pandas/tests/io/pytables/test_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 0fa833eba1f07..04a579b69fe5b 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -89,7 +89,7 @@ def test_context(self, setup_path): except ValueError: pass with HDFStore(path) as tbl: - tbl["a"] = tm.makeDataFrame() + tbl["a"] = tm.makeDataFrame() with HDFStore(path) as tbl: assert len(tbl) == 1 assert type(tbl["a"]) == DataFrame From 4c8091c3550229e6bdbdbdce049f9d41b6756b5b Mon Sep 17 00:00:00 2001 From: jnecus Date: Thu, 8 Oct 2020 20:53:39 +0100 Subject: [PATCH 48/53] Update test_store.py --- pandas/tests/io/pytables/test_store.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 04a579b69fe5b..a947e94bec48d 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -4218,8 +4218,7 @@ def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): fd, new_f = tempfile.mkstemp() tstore = store.copy( - new_f, keys=keys, propindexes=propindexes, **kwargs - ) + new_f, keys=keys, propindexes=propindexes, **kwargs) # check keys if keys is None: From 9474ebc403af41cccc13cb4e16a1bd65b7f48ab6 Mon Sep 17 00:00:00 2001 From: jnecus Date: Fri, 9 Oct 2020 10:06:40 +0100 Subject: [PATCH 49/53] Update test_store.py --- pandas/tests/io/pytables/test_store.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index a947e94bec48d..3159a9ce1831c 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -4216,9 +4216,7 @@ def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): import tempfile fd, new_f = tempfile.mkstemp() - - tstore = store.copy( - new_f, keys=keys, propindexes=propindexes, **kwargs) + tstore = store.copy(new_f, keys=keys, propindexes=propindexes, **kwargs) # check keys if keys is None: From 42a6bda8552452f1e3e95eef8e5496cdec4f36fa Mon Sep 17 00:00:00 2001 From: jnecus Date: Fri, 9 Oct 2020 15:00:24 +0100 Subject: [PATCH 50/53] Update test_store.py --- pandas/tests/io/pytables/test_store.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 3159a9ce1831c..ef91f48b07fa0 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -4216,7 +4216,9 @@ def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): import tempfile fd, new_f = tempfile.mkstemp() - tstore = store.copy(new_f, keys=keys, propindexes=propindexes, **kwargs) + tstore = store.copy( + new_f, keys=keys, propindexes=propindexes, **kwargs + ) # check keys if keys is None: From 76020951ba17eaafbfde9506cc4849e5370f2b66 Mon Sep 17 00:00:00 2001 From: jnecus Date: Fri, 9 Oct 2020 15:53:40 +0100 Subject: [PATCH 51/53] Update test_store.py --- pandas/tests/io/pytables/test_store.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index ef91f48b07fa0..f9342dca4cdbc 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -4216,9 +4216,9 @@ def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): import tempfile fd, new_f = tempfile.mkstemp() - tstore = store.copy( - new_f, keys=keys, propindexes=propindexes, **kwargs - ) + tstore = store.copy( + new_f, keys=keys, propindexes=propindexes, **kwargs + ) # check keys if keys is None: From 740a1da516ad3d60dac9c708b0727cff5955d070 Mon Sep 17 00:00:00 2001 From: jnecus Date: Sun, 11 Oct 2020 10:46:15 +0100 Subject: [PATCH 52/53] Update test_store.py --- pandas/tests/io/pytables/test_store.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index f9342dca4cdbc..c6071ba38b2cc 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -88,8 +88,10 @@ def test_context(self, setup_path): raise ValueError("blah") except ValueError: pass + with tm.ensure_clean(setup_path) as path: with HDFStore(path) as tbl: tbl["a"] = tm.makeDataFrame() + with tm.ensure_clean(setup_path) as path: with HDFStore(path) as tbl: assert len(tbl) == 1 assert type(tbl["a"]) == DataFrame From b923b19195158b3a927b0f6fe9d26000515e501a Mon Sep 17 00:00:00 2001 From: jnecus Date: Sun, 11 Oct 2020 11:07:54 +0100 Subject: [PATCH 53/53] Update test_store.py --- pandas/tests/io/pytables/test_store.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index c6071ba38b2cc..4b404c050c4e9 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -91,8 +91,6 @@ def test_context(self, setup_path): with tm.ensure_clean(setup_path) as path: with HDFStore(path) as tbl: tbl["a"] = tm.makeDataFrame() - with tm.ensure_clean(setup_path) as path: - with HDFStore(path) as tbl: assert len(tbl) == 1 assert type(tbl["a"]) == DataFrame