From e7f4f4e34c441c9a66f9f76d56f1731e978395db Mon Sep 17 00:00:00 2001 From: Ming Li Date: Fri, 26 Jan 2018 12:16:46 +0000 Subject: [PATCH 01/13] reindex resulting NDFrame after percentage calculation --- pandas/core/generic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6e777281b11e1..bee954aa9bba8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7315,6 +7315,7 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, rs = (data.div(data.shift(periods=periods, freq=freq, axis=axis, **kwargs)) - 1) + rs = rs.reindex_like(data) if freq is None: mask = isna(com._values_from_object(self)) np.putmask(rs.values, mask, np.nan) From d0baf8f309b6281225f3cdf764b95b747d84dea5 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Fri, 26 Jan 2018 12:17:53 +0000 Subject: [PATCH 02/13] update pct_change test case with freq shift --- pandas/tests/frame/test_timeseries.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 3af798acdede5..c87c1cc69e13d 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -108,7 +108,9 @@ def test_pct_change(self): rs = self.tsframe.pct_change(freq='5D') filled = self.tsframe.fillna(method='pad') - assert_frame_equal(rs, filled / filled.shift(freq='5D') - 1) + assert_frame_equal(rs, + (filled / filled.shift(freq='5D') - 1) + .reindex_like(filled)) def test_pct_change_shift_over_nas(self): s = Series([1., 1.5, np.nan, 2.5, 3.]) From 63701a30aa6e79cd7fa530b20893ea338e6878cc Mon Sep 17 00:00:00 2001 From: Ming Li Date: Fri, 26 Jan 2018 12:18:29 +0000 Subject: [PATCH 03/13] update pct_change test case with freq shift --- pandas/tests/series/test_timeseries.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 7be801629e387..0a9387f1af688 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -344,7 +344,9 @@ def test_pct_change(self): rs = self.ts.pct_change(freq='5D') filled = self.ts.fillna(method='pad') - assert_series_equal(rs, filled / filled.shift(freq='5D') - 1) + assert_series_equal(rs, + (filled / filled.shift(freq='5D') - 1) + .reindex_like(filled)) def test_pct_change_shift_over_nas(self): s = Series([1., 1.5, np.nan, 2.5, 3.]) From 8cfb77a9a75a880be9a8f577a72471a20307d96e Mon Sep 17 00:00:00 2001 From: Ming Li Date: Fri, 26 Jan 2018 12:45:47 +0000 Subject: [PATCH 04/13] whatsnew entry for issue:7292 --- doc/source/whatsnew/v0.23.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 4dde76dee46a5..88d42c42edf6d 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -419,6 +419,7 @@ Datetimelike - Bug in ``.astype()`` to non-ns timedelta units would hold the incorrect dtype (:issue:`19176`, :issue:`19223`, :issue:`12425`) - Bug in subtracting :class:`Series` from ``NaT`` incorrectly returning ``NaT`` (:issue:`19158`) - Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`) +- Bug in :func:`NDFrame.pct_change` produces inconsistent frames using ``periods`` and ``freq`` (:issue:`7292`) Timezones ^^^^^^^^^ From e594e04fe731452133cf5c4bb31379d4173c6343 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Fri, 26 Jan 2018 14:08:35 +0000 Subject: [PATCH 05/13] add new test case for consistent pct_change output --- pandas/tests/frame/test_timeseries.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index c87c1cc69e13d..1aacc218d583f 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -122,6 +122,11 @@ def test_pct_change_shift_over_nas(self): edf = DataFrame({'a': expected, 'b': expected}) assert_frame_equal(chg, edf) + def test_pct_change_periods_freq(self): + rs_periods = self.tsframe.pct_change(5) + rs_freq = self.tsframe.pct_change(periods=1, freq='5B') + assert_frame_equal(rs_freq, rs_periods) + def test_frame_ctor_datetime64_column(self): rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') dates = np.asarray(rng) From dc9d64e68638dda8196788055c69de8a429f19c2 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Fri, 26 Jan 2018 14:08:51 +0000 Subject: [PATCH 06/13] add new test case for consistent pct_change output --- pandas/tests/series/test_timeseries.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 0a9387f1af688..fb3e4c17e5df9 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -355,6 +355,11 @@ def test_pct_change_shift_over_nas(self): expected = Series([np.nan, 0.5, np.nan, 2.5 / 1.5 - 1, .2]) assert_series_equal(chg, expected) + def test_pct_change_periods_freq(self): + rs_periods = self.ts.pct_change(5) + rs_freq = self.ts.pct_change(periods=1, freq='5B') + assert_series_equal(rs_freq, rs_periods) + def test_autocorr(self): # Just run the function corr1 = self.ts.autocorr() From 22e0c34fc9e16bb74f975f39f49f17ba783e7721 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Fri, 26 Jan 2018 16:30:41 +0000 Subject: [PATCH 07/13] rephase bugfix documentation --- doc/source/whatsnew/v0.23.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 88d42c42edf6d..09cdb48cd5a55 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -419,7 +419,7 @@ Datetimelike - Bug in ``.astype()`` to non-ns timedelta units would hold the incorrect dtype (:issue:`19176`, :issue:`19223`, :issue:`12425`) - Bug in subtracting :class:`Series` from ``NaT`` incorrectly returning ``NaT`` (:issue:`19158`) - Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`) -- Bug in :func:`NDFrame.pct_change` produces inconsistent frames using ``periods`` and ``freq`` (:issue:`7292`) +- Bug in :func:`~DataFrame.pct_change` using ``periods`` and ``freq`` produces different sizes frames/series (:issue:`7292`) Timezones ^^^^^^^^^ From a1d804fcb299c60c34249d828e1b7f5840ebfcd0 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Sat, 27 Jan 2018 20:40:16 +0000 Subject: [PATCH 08/13] additional test cases test_pct_change_periods_freq --- pandas/tests/frame/test_timeseries.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 1aacc218d583f..aa3017d81085c 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -123,8 +123,32 @@ def test_pct_change_shift_over_nas(self): assert_frame_equal(chg, edf) def test_pct_change_periods_freq(self): + # see issue #7292 + rs_freq = self.tsframe.pct_change(freq='5B') rs_periods = self.tsframe.pct_change(5) - rs_freq = self.tsframe.pct_change(periods=1, freq='5B') + assert_frame_equal(rs_freq, rs_periods) + + rs_freq = self.tsframe.pct_change(freq='3B', fill_method=None) + rs_periods = self.tsframe.pct_change(3, fill_method=None) + assert_frame_equal(rs_freq, rs_periods) + + rs_freq = self.tsframe.pct_change(freq='3B', fill_method='bfill') + rs_periods = self.tsframe.pct_change(3, fill_method='bfill') + assert_frame_equal(rs_freq, rs_periods) + + rs_freq = \ + self.tsframe.pct_change(freq='7B', fill_method='pad', limit=1) + rs_periods = self.tsframe.pct_change(7, fill_method='pad', limit=1) + assert_frame_equal(rs_freq, rs_periods) + + rs_freq = \ + self.tsframe.pct_change(freq='7B', fill_method='bfill', limit=3) + rs_periods = self.tsframe.pct_change(7, fill_method='bfill', limit=3) + assert_frame_equal(rs_freq, rs_periods) + + empty_ts = self.tsframe.applymap(lambda x: np.NaN) + rs_freq = empty_ts.pct_change(freq='14B') + rs_periods = empty_ts.pct_change(14) assert_frame_equal(rs_freq, rs_periods) def test_frame_ctor_datetime64_column(self): From 106bdf4b40a65583904287478a6f9228b088cdae Mon Sep 17 00:00:00 2001 From: Ming Li Date: Sat, 27 Jan 2018 20:40:25 +0000 Subject: [PATCH 09/13] additional test cases test_pct_change_periods_freq --- pandas/tests/series/test_timeseries.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index fb3e4c17e5df9..39134977ee091 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -356,8 +356,30 @@ def test_pct_change_shift_over_nas(self): assert_series_equal(chg, expected) def test_pct_change_periods_freq(self): + # see issue #7292 + rs_freq = self.ts.pct_change(freq='5B') rs_periods = self.ts.pct_change(5) - rs_freq = self.ts.pct_change(periods=1, freq='5B') + assert_series_equal(rs_freq, rs_periods) + + rs_freq = self.ts.pct_change(freq='3B', fill_method=None) + rs_periods = self.ts.pct_change(3, fill_method=None) + assert_series_equal(rs_freq, rs_periods) + + rs_freq = self.ts.pct_change(freq='3B', fill_method='bfill') + rs_periods = self.ts.pct_change(3, fill_method='bfill') + assert_series_equal(rs_freq, rs_periods) + + rs_freq = self.ts.pct_change(freq='7B', fill_method='pad', limit=1) + rs_periods = self.ts.pct_change(7, fill_method='pad', limit=1) + assert_series_equal(rs_freq, rs_periods) + + rs_freq = self.ts.pct_change(freq='7B', fill_method='bfill', limit=3) + rs_periods = self.ts.pct_change(7, fill_method='bfill', limit=3) + assert_series_equal(rs_freq, rs_periods) + + empty_ts = self.ts.apply(lambda x: np.NaN) + rs_freq = empty_ts.pct_change(freq='14B') + rs_periods = empty_ts.pct_change(14) assert_series_equal(rs_freq, rs_periods) def test_autocorr(self): From b6377dfc9b908123615a0b56a022658d89c106a4 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Sun, 28 Jan 2018 01:01:08 +0000 Subject: [PATCH 10/13] empty containre without numpy --- pandas/tests/frame/test_timeseries.py | 3 ++- pandas/tests/series/test_timeseries.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index aa3017d81085c..a673ee6fc8618 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -146,7 +146,8 @@ def test_pct_change_periods_freq(self): rs_periods = self.tsframe.pct_change(7, fill_method='bfill', limit=3) assert_frame_equal(rs_freq, rs_periods) - empty_ts = self.tsframe.applymap(lambda x: np.NaN) + empty_ts = \ + DataFrame(index=self.tsframe.index, columns=self.tsframe.columns) rs_freq = empty_ts.pct_change(freq='14B') rs_periods = empty_ts.pct_change(14) assert_frame_equal(rs_freq, rs_periods) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 39134977ee091..d5c325ae20f77 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -377,7 +377,7 @@ def test_pct_change_periods_freq(self): rs_periods = self.ts.pct_change(7, fill_method='bfill', limit=3) assert_series_equal(rs_freq, rs_periods) - empty_ts = self.ts.apply(lambda x: np.NaN) + empty_ts = Series(index=self.ts.index) rs_freq = empty_ts.pct_change(freq='14B') rs_periods = empty_ts.pct_change(14) assert_series_equal(rs_freq, rs_periods) From b08d4c82509aac48230a4191e61e1e211eae2b0e Mon Sep 17 00:00:00 2001 From: minggli Date: Tue, 30 Jan 2018 20:03:25 +0000 Subject: [PATCH 11/13] rephase documentation whatsnew --- doc/source/whatsnew/v0.23.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 09cdb48cd5a55..b5b52c7b9c89b 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -419,7 +419,7 @@ Datetimelike - Bug in ``.astype()`` to non-ns timedelta units would hold the incorrect dtype (:issue:`19176`, :issue:`19223`, :issue:`12425`) - Bug in subtracting :class:`Series` from ``NaT`` incorrectly returning ``NaT`` (:issue:`19158`) - Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`) -- Bug in :func:`~DataFrame.pct_change` using ``periods`` and ``freq`` produces different sizes frames/series (:issue:`7292`) +- Bug in :func:`~DataFrame.pct_change` using ``periods`` and ``freq`` returned different length outputs (:issue:`7292`) Timezones ^^^^^^^^^ From 27aee76197c766cbc0dd05519a0ba150e05bb21e Mon Sep 17 00:00:00 2001 From: minggli Date: Tue, 30 Jan 2018 20:04:50 +0000 Subject: [PATCH 12/13] remove slash for line continuation --- pandas/tests/frame/test_timeseries.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index a673ee6fc8618..e6b47fd69cb05 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -123,7 +123,7 @@ def test_pct_change_shift_over_nas(self): assert_frame_equal(chg, edf) def test_pct_change_periods_freq(self): - # see issue #7292 + # GH 7292 rs_freq = self.tsframe.pct_change(freq='5B') rs_periods = self.tsframe.pct_change(5) assert_frame_equal(rs_freq, rs_periods) @@ -136,18 +136,20 @@ def test_pct_change_periods_freq(self): rs_periods = self.tsframe.pct_change(3, fill_method='bfill') assert_frame_equal(rs_freq, rs_periods) - rs_freq = \ - self.tsframe.pct_change(freq='7B', fill_method='pad', limit=1) + rs_freq = self.tsframe.pct_change(freq='7B', + fill_method='pad', + limit=1) rs_periods = self.tsframe.pct_change(7, fill_method='pad', limit=1) assert_frame_equal(rs_freq, rs_periods) - rs_freq = \ - self.tsframe.pct_change(freq='7B', fill_method='bfill', limit=3) + rs_freq = self.tsframe.pct_change(freq='7B', + fill_method='bfill', + limit=3) rs_periods = self.tsframe.pct_change(7, fill_method='bfill', limit=3) assert_frame_equal(rs_freq, rs_periods) - empty_ts = \ - DataFrame(index=self.tsframe.index, columns=self.tsframe.columns) + empty_ts = DataFrame(index=self.tsframe.index, + columns=self.tsframe.columns) rs_freq = empty_ts.pct_change(freq='14B') rs_periods = empty_ts.pct_change(14) assert_frame_equal(rs_freq, rs_periods) From 75b8942ae2a084dfe2b603a25cff26b6a5235399 Mon Sep 17 00:00:00 2001 From: minggli Date: Tue, 30 Jan 2018 20:05:16 +0000 Subject: [PATCH 13/13] change comment to GH 7292 --- pandas/tests/series/test_timeseries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index d5c325ae20f77..7a1aff1cc223c 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -356,7 +356,7 @@ def test_pct_change_shift_over_nas(self): assert_series_equal(chg, expected) def test_pct_change_periods_freq(self): - # see issue #7292 + # GH 7292 rs_freq = self.ts.pct_change(freq='5B') rs_periods = self.ts.pct_change(5) assert_series_equal(rs_freq, rs_periods)