From 0e4e1c2b7f54c1d1bdd7372ac2e1eb6c1df4ce10 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Tue, 12 Feb 2019 23:52:00 +0100 Subject: [PATCH 01/23] Add skipna to pct_change (#25006) --- pandas/core/generic.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ef629361c291a..4d40220b62599 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9948,11 +9948,24 @@ def _check_percentile(self, q): """ @Appender(_shared_docs['pct_change'] % _shared_doc_kwargs) - def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, - **kwargs): + def pct_change(self, periods=1, fill_method=None, limit=None, freq=None, + skipna=False, **kwargs): + if skipna and fill_method is not None: + raise ValueError("cannot pass both skipna and fill_method") + elif skipna and limit is not None: + raise ValueError("cannot pass both skipna and limit") + if skipna and self._typ == 'dataframe': + return self.apply( + lambda s: s.pct_change(periods=periods, + fill_method=fill_method, limit=limit, + freq=freq, skipna=skipna, **kwargs) + ) # TODO: Not sure if above is correct - need someone to confirm. axis = self._get_axis_number(kwargs.pop('axis', self._stat_axis_name)) - if fill_method is None: + if skipna: + # mask = self.isna() + data = self.dropna() + elif fill_method is None: data = self else: data = self.fillna(method=fill_method, limit=limit, axis=axis) @@ -9963,6 +9976,8 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, if freq is None: mask = isna(com.values_from_object(data)) np.putmask(rs.values, mask, np.nan) + if skipna: + rs = rs.reindex_like(self) return rs def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwargs): From 4be1bdca61d049abcf2a099b955c76beb8d834bb Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Tue, 12 Feb 2019 23:52:20 +0100 Subject: [PATCH 02/23] Add tests --- pandas/tests/frame/test_analytics.py | 17 +++++++++++++++++ pandas/tests/generic/test_generic.py | 14 ++++++++++++++ pandas/tests/series/test_analytics.py | 12 ++++++++++++ 3 files changed, 43 insertions(+) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 2e690ebbfa121..00d9835090c34 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -6,6 +6,7 @@ import warnings import numpy as np +from numpy import nan import pytest from pandas.compat import PY35, lrange @@ -18,6 +19,7 @@ import pandas.core.algorithms as algorithms import pandas.core.nanops as nanops import pandas.util.testing as tm +from pandas.util.testing import (assert_frame_equal) def assert_stat_op_calc(opname, alternative, frame, has_skipna=True, @@ -1364,6 +1366,21 @@ def test_pct_change(self): tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("periods, expected_vals", [ + (1, [[nan, nan], [nan, nan], [1., nan], [0.5, 1.], [nan, 0.5], + [0.33333333, nan], [nan, 0.33333333]]), + (2, [[nan, nan], [nan, nan], [nan, nan], [ 2., nan], [nan, 2.], + [ 1., nan], [nan, 1.]]) + ]) + def test_pct_change_skipna(self, periods, expected_vals): + # GH25006 + df = DataFrame([[nan, nan], [ 1., nan], [ 2., 1.], [ 3., 2.], + [nan, 3.], [ 4., nan], [nan, 4.]]) + result = df.pct_change(skipna=True, periods=periods) + expected = DataFrame(expected_vals) + assert_frame_equal(result, expected) + + # ---------------------------------------------------------------------- # Index of max / min diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index fb17b47948336..8b29b4f8dbe39 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -608,6 +608,20 @@ def test_pct_change(self, periods, fill_method, limit, exp): else: tm.assert_series_equal(res, Series(exp)) + @pytest.mark.xfail(raises=ValueError) + @pytest.mark.parametrize('fill_method, limit', [ + ('backfill', None), + ('bfill', None), + ('pad', None), + ('ffill', None), + (None, 1) + ]) + def test_pct_change_skipna_error(self, fill_method, limit): + # GH25006 + vals = [np.nan, np.nan, 1, 2, np.nan, 4, 10, np.nan] + obj = self._typ(vals) + _ = obj.pct_change(skipna=True, fill_method=fill_method, limit=limit) + assert True class TestNDFrame(object): # tests that don't fit elsewhere diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 6811e370726b2..4f65785412100 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -212,6 +212,18 @@ def test_cummax_timedelta64(self): result = s.cummax(skipna=False) tm.assert_series_equal(expected, result) + @pytest.mark.parametrize("periods, expected_vals", [ + (1, [nan, nan, 1.0, 0.5, nan, 0.333333333333333, nan]), + (2, [nan, nan, nan, 2.0, nan, 1.0, nan]) + ]) + def test_pct_change_skipna(self, periods, expected_vals): + # GH25006 + vals = [nan, 1., 2., 3., nan, 4., nan] + s = Series(vals) + result = s.pct_change(skipna=True, periods=periods) + expected = Series(expected_vals) + assert_series_equal(expected, result) + def test_npdiff(self): pytest.skip("skipping due to Series no longer being an " "ndarray") From 192bded8f8bd67d787ad182695e78bed53da0ef6 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Wed, 13 Feb 2019 00:02:00 +0100 Subject: [PATCH 03/23] Fix PEP8 issues --- pandas/tests/frame/test_analytics.py | 8 ++++---- pandas/tests/series/test_analytics.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 00d9835090c34..e4da742e81f62 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1369,13 +1369,13 @@ def test_pct_change(self): @pytest.mark.parametrize("periods, expected_vals", [ (1, [[nan, nan], [nan, nan], [1., nan], [0.5, 1.], [nan, 0.5], [0.33333333, nan], [nan, 0.33333333]]), - (2, [[nan, nan], [nan, nan], [nan, nan], [ 2., nan], [nan, 2.], - [ 1., nan], [nan, 1.]]) + (2, [[nan, nan], [nan, nan], [nan, nan], [2., nan], [nan, 2.], + [1., nan], [nan, 1.]]) ]) def test_pct_change_skipna(self, periods, expected_vals): # GH25006 - df = DataFrame([[nan, nan], [ 1., nan], [ 2., 1.], [ 3., 2.], - [nan, 3.], [ 4., nan], [nan, 4.]]) + df = DataFrame([[nan, nan], [1., nan], [2., 1.], [3., 2.],[nan, 3.], + [4., nan], [nan, 4.]]) result = df.pct_change(skipna=True, periods=periods) expected = DataFrame(expected_vals) assert_frame_equal(result, expected) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 4f65785412100..464453e1fbc04 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -218,7 +218,7 @@ def test_cummax_timedelta64(self): ]) def test_pct_change_skipna(self, periods, expected_vals): # GH25006 - vals = [nan, 1., 2., 3., nan, 4., nan] + vals = [nan, 1., 2., 3., nan, 4., nan] s = Series(vals) result = s.pct_change(skipna=True, periods=periods) expected = Series(expected_vals) From bb74285834b4bb487ee54b63cffd658f7e3bdbcc Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Wed, 13 Feb 2019 07:24:34 +0100 Subject: [PATCH 04/23] Fix PEP8 issue --- pandas/tests/frame/test_analytics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index e4da742e81f62..ca605b72e6f15 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1374,7 +1374,7 @@ def test_pct_change(self): ]) def test_pct_change_skipna(self, periods, expected_vals): # GH25006 - df = DataFrame([[nan, nan], [1., nan], [2., 1.], [3., 2.],[nan, 3.], + df = DataFrame([[nan, nan], [1., nan], [2., 1.], [3., 2.], [nan, 3.], [4., nan], [nan, 4.]]) result = df.pct_change(skipna=True, periods=periods) expected = DataFrame(expected_vals) From 4418bf1e0a5fb1dc484c0d828fcb0d43a0cbb91b Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Wed, 13 Feb 2019 07:31:06 +0100 Subject: [PATCH 05/23] Fix test --- pandas/tests/frame/test_timeseries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index bc37317f72802..4055b9ff2895d 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -142,7 +142,7 @@ def test_pct_change_shift_over_nas(self): df = DataFrame({'a': s, 'b': s}) - chg = df.pct_change() + chg = df.pct_change(fill_method='ffill') expected = Series([np.nan, 0.5, 0., 2.5 / 1.5 - 1, .2]) edf = DataFrame({'a': expected, 'b': expected}) assert_frame_equal(chg, edf) From 3670ffe74197b4395611b49196ec51e97596730e Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Wed, 13 Feb 2019 07:34:35 +0100 Subject: [PATCH 06/23] Fix test --- pandas/tests/series/test_timeseries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index d082b023e1f27..5722b745809e0 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -399,7 +399,7 @@ def test_pct_change(self): def test_pct_change_shift_over_nas(self): s = Series([1., 1.5, np.nan, 2.5, 3.]) - chg = s.pct_change() + chg = s.pct_change(fill_method='ffill') expected = Series([np.nan, 0.5, 0., 2.5 / 1.5 - 1, .2]) assert_series_equal(chg, expected) From 8f36c7aea348c5d50df62510cf086c8c7a28f637 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Wed, 13 Feb 2019 23:38:13 +0100 Subject: [PATCH 07/23] Fix tests --- pandas/tests/frame/test_analytics.py | 16 +++++------ pandas/tests/generic/test_generic.py | 11 +++++--- pandas/tests/series/test_analytics.py | 39 +++++++++++++-------------- 3 files changed, 32 insertions(+), 34 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index ca605b72e6f15..ea8a20bec0aa2 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -6,7 +6,6 @@ import warnings import numpy as np -from numpy import nan import pytest from pandas.compat import PY35, lrange @@ -19,7 +18,6 @@ import pandas.core.algorithms as algorithms import pandas.core.nanops as nanops import pandas.util.testing as tm -from pandas.util.testing import (assert_frame_equal) def assert_stat_op_calc(opname, alternative, frame, has_skipna=True, @@ -1367,18 +1365,18 @@ def test_pct_change(self): tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("periods, expected_vals", [ - (1, [[nan, nan], [nan, nan], [1., nan], [0.5, 1.], [nan, 0.5], - [0.33333333, nan], [nan, 0.33333333]]), - (2, [[nan, nan], [nan, nan], [nan, nan], [2., nan], [nan, 2.], - [1., nan], [nan, 1.]]) + (1, [[np.nan, np.nan], [np.nan, np.nan], [1., np.nan], [0.5, 1.], + [np.nan, 0.5], [0.33333333, np.nan], [np.nan, 0.33333333]]), + (2, [[np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan], + [2., np.nan], [np.nan, 2.], [1., np.nan], [np.nan, 1.]]) ]) def test_pct_change_skipna(self, periods, expected_vals): # GH25006 - df = DataFrame([[nan, nan], [1., nan], [2., 1.], [3., 2.], [nan, 3.], - [4., nan], [nan, 4.]]) + df = DataFrame([[np.nan, np.nan], [1., np.nan], [2., 1.], [3., 2.], + [np.nan, 3.], [4., np.nan], [np.nan, 4.]]) result = df.pct_change(skipna=True, periods=periods) expected = DataFrame(expected_vals) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # ---------------------------------------------------------------------- diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 8b29b4f8dbe39..bca4b2fa21de8 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -618,10 +618,13 @@ def test_pct_change(self, periods, fill_method, limit, exp): ]) def test_pct_change_skipna_error(self, fill_method, limit): # GH25006 - vals = [np.nan, np.nan, 1, 2, np.nan, 4, 10, np.nan] - obj = self._typ(vals) - _ = obj.pct_change(skipna=True, fill_method=fill_method, limit=limit) - assert True + if self._typ is DataFrame or self._typ is Series: + vals = [np.nan, np.nan, 1, 2, np.nan, 4, 10, np.nan] + obj = self._typ(vals) + _ = obj.pct_change(skipna=True, fill_method=fill_method, limit=limit) + assert False + else: + raise ValueError() class TestNDFrame(object): # tests that don't fit elsewhere diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 464453e1fbc04..fefff8bda8234 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -6,7 +6,6 @@ import operator import numpy as np -from numpy import nan import pytest from pandas.compat import PY35, lrange, range @@ -213,12 +212,12 @@ def test_cummax_timedelta64(self): tm.assert_series_equal(expected, result) @pytest.mark.parametrize("periods, expected_vals", [ - (1, [nan, nan, 1.0, 0.5, nan, 0.333333333333333, nan]), - (2, [nan, nan, nan, 2.0, nan, 1.0, nan]) + (1, [np.nan, np.nan, 1.0, 0.5, np.nan, 0.333333333333333, np.nan]), + (2, [np.nan, np.nan, np.nan, 2.0, np.nan, 1.0, np.nan]) ]) def test_pct_change_skipna(self, periods, expected_vals): # GH25006 - vals = [nan, 1., 2., 3., nan, 4., nan] + vals = [np.nan, 1., 2., 3., np.nan, 4., np.nan] s = Series(vals) result = s.pct_change(skipna=True, periods=periods) expected = Series(expected_vals) @@ -232,7 +231,7 @@ def test_npdiff(self): s = Series(np.arange(5)) r = np.diff(s) - assert_series_equal(Series([nan, 0, 0, 0, nan]), r) + assert_series_equal(Series([np.nan, 0, 0, 0, np.nan]), r) def _check_accum_op(self, name, datetime_series_, check_dtype=True): func = getattr(np, name) @@ -457,14 +456,14 @@ def test_count(self, datetime_series): assert datetime_series.count() == np.isfinite(datetime_series).sum() - mi = MultiIndex.from_arrays([list('aabbcc'), [1, 2, 2, nan, 1, 2]]) + mi = MultiIndex.from_arrays([list('aabbcc'), [1, 2, 2, np.nan, 1, 2]]) ts = Series(np.arange(len(mi)), index=mi) left = ts.count(level=1) - right = Series([2, 3, 1], index=[1, 2, nan]) + right = Series([2, 3, 1], index=[1, 2, np.nan]) assert_series_equal(left, right) - ts.iloc[[0, 3, 5]] = nan + ts.iloc[[0, 3, 5]] = np.nan assert_series_equal(ts.count(level=1), right - 1) def test_dot(self): @@ -685,11 +684,11 @@ def test_cummethods_bool(self): result = getattr(s, method)() assert_series_equal(result, expected) - e = pd.Series([False, True, nan, False]) - cse = pd.Series([0, 1, nan, 1], dtype=object) - cpe = pd.Series([False, 0, nan, 0]) - cmin = pd.Series([False, False, nan, False]) - cmax = pd.Series([False, True, nan, True]) + e = pd.Series([False, True, np.nan, False]) + cse = pd.Series([0, 1, np.nan, 1], dtype=object) + cpe = pd.Series([False, 0, np.nan, 0]) + cmin = pd.Series([False, False, np.nan, False]) + cmax = pd.Series([False, True, np.nan, True]) expecteds = {'cumsum': cse, 'cumprod': cpe, 'cummin': cmin, @@ -966,15 +965,13 @@ def test_shift_categorical(self): assert_index_equal(s.values.categories, sn2.values.categories) def test_unstack(self): - from numpy import nan - index = MultiIndex(levels=[['bar', 'foo'], ['one', 'three', 'two']], codes=[[1, 1, 0, 0], [0, 1, 0, 2]]) s = Series(np.arange(4.), index=index) unstacked = s.unstack() - expected = DataFrame([[2., nan, 3.], [0., 1., nan]], + expected = DataFrame([[2., np.nan, 3.], [0., 1., np.nan]], index=['bar', 'foo'], columns=['one', 'three', 'two']) @@ -998,17 +995,17 @@ def test_unstack(self): idx = pd.MultiIndex.from_arrays([[101, 102], [3.5, np.nan]]) ts = pd.Series([1, 2], index=idx) left = ts.unstack() - right = DataFrame([[nan, 1], [2, nan]], index=[101, 102], - columns=[nan, 3.5]) + right = DataFrame([[np.nan, 1], [2, np.nan]], index=[101, 102], + columns=[np.nan, 3.5]) assert_frame_equal(left, right) idx = pd.MultiIndex.from_arrays([['cat', 'cat', 'cat', 'dog', 'dog' ], ['a', 'a', 'b', 'a', 'b'], [1, 2, 1, 1, np.nan]]) ts = pd.Series([1.0, 1.1, 1.2, 1.3, 1.4], index=idx) - right = DataFrame([[1.0, 1.3], [1.1, nan], [nan, 1.4], [1.2, nan]], - columns=['cat', 'dog']) - tpls = [('a', 1), ('a', 2), ('b', nan), ('b', 1)] + right = DataFrame([[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4], + [1.2, np.nan]], columns=['cat', 'dog']) + tpls = [('a', 1), ('a', 2), ('b', np.nan), ('b', 1)] right.index = pd.MultiIndex.from_tuples(tpls) assert_frame_equal(ts.unstack(level=0), right) From add18de0a9e285dad49c9f89664b6a10e1985ef8 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Thu, 14 Feb 2019 07:04:04 +0100 Subject: [PATCH 08/23] Fix linting --- pandas/tests/frame/test_analytics.py | 1 - pandas/tests/generic/test_generic.py | 6 ++++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index ea8a20bec0aa2..d2293b0085afa 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1378,7 +1378,6 @@ def test_pct_change_skipna(self, periods, expected_vals): expected = DataFrame(expected_vals) tm.assert_frame_equal(result, expected) - # ---------------------------------------------------------------------- # Index of max / min diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index bca4b2fa21de8..a87d38d72430b 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -621,11 +621,13 @@ def test_pct_change_skipna_error(self, fill_method, limit): if self._typ is DataFrame or self._typ is Series: vals = [np.nan, np.nan, 1, 2, np.nan, 4, 10, np.nan] obj = self._typ(vals) - _ = obj.pct_change(skipna=True, fill_method=fill_method, limit=limit) - assert False + result = obj.pct_change(skipna=True, fill_method=fill_method, + limit=limit) + assert result is False else: raise ValueError() + class TestNDFrame(object): # tests that don't fit elsewhere From 279f433c62a79b2ea57d97ef03b0267bea4a5ed4 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Wed, 20 Feb 2019 23:45:17 +0100 Subject: [PATCH 09/23] Set default skipna=True --- pandas/core/generic.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 805398f37b548..2d8c4944a9c0b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9954,21 +9954,21 @@ def _check_percentile(self, q): @Appender(_shared_docs['pct_change'] % _shared_doc_kwargs) def pct_change(self, periods=1, fill_method=None, limit=None, freq=None, - skipna=False, **kwargs): + skipna=None, **kwargs): if skipna and fill_method is not None: raise ValueError("cannot pass both skipna and fill_method") elif skipna and limit is not None: raise ValueError("cannot pass both skipna and limit") + if skipna is None and fill_method is None and limit is None: + skipna = True if skipna and self._typ == 'dataframe': return self.apply( - lambda s: s.pct_change(periods=periods, - fill_method=fill_method, limit=limit, - freq=freq, skipna=skipna, **kwargs) + lambda s: s.pct_change(periods=periods, freq=freq, + skipna=skipna, **kwargs) ) # TODO: Not sure if above is correct - need someone to confirm. axis = self._get_axis_number(kwargs.pop('axis', self._stat_axis_name)) if skipna: - # mask = self.isna() data = self.dropna() elif fill_method is None: data = self From 4072ca0cbf9875e24700fa99aabd55dafc64533f Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Wed, 20 Feb 2019 23:46:12 +0100 Subject: [PATCH 10/23] Use pytest.raises --- pandas/tests/generic/test_generic.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index aea5258297175..2407a7c7cd214 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -607,7 +607,6 @@ def test_pct_change(self, periods, fill_method, limit, exp): else: tm.assert_series_equal(res, Series(exp)) - @pytest.mark.xfail(raises=ValueError) @pytest.mark.parametrize('fill_method, limit', [ ('backfill', None), ('bfill', None), @@ -615,16 +614,14 @@ def test_pct_change(self, periods, fill_method, limit, exp): ('ffill', None), (None, 1) ]) - def test_pct_change_skipna_error(self, fill_method, limit): + def test_pct_change_skipna_raises(self, fill_method, limit): # GH25006 if self._typ is DataFrame or self._typ is Series: vals = [np.nan, np.nan, 1, 2, np.nan, 4, 10, np.nan] obj = self._typ(vals) - result = obj.pct_change(skipna=True, fill_method=fill_method, + with pytest.raises(ValueError): + obj.pct_change(skipna=True, fill_method=fill_method, limit=limit) - assert result is False - else: - raise ValueError() class TestNDFrame(object): From 80a09c9e48caf1a8a742dc6c4a41f3beb9769adf Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Sat, 2 Mar 2019 09:53:57 +0100 Subject: [PATCH 11/23] Address requested changes --- pandas/core/generic.py | 24 +++++++++++++++++++----- pandas/tests/frame/test_analytics.py | 20 +++++++++++++------- pandas/tests/generic/test_generic.py | 15 +++++++++------ 3 files changed, 41 insertions(+), 18 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b54d143e7813d..62d5003b27d84 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9845,6 +9845,8 @@ def _check_percentile(self, q): Parameters ---------- + skipna : bool, default True + Exclude NA/null values before computing percent change. periods : int, default 1 Periods to shift for forming percent change. fill_method : str, default 'pad' @@ -9892,8 +9894,9 @@ def _check_percentile(self, q): 2 -0.055556 dtype: float64 - See the percentage change in a Series where filling NAs with last - valid observation forward to next valid. + See the computing of percentage change in a Series with NAs. With + default skipped NAs, NAs are ignored before the computation and kept + afterwards. >>> s = pd.Series([90, 91, None, 85]) >>> s @@ -9903,6 +9906,17 @@ def _check_percentile(self, q): 3 85.0 dtype: float64 + >>> s.pct_change() + 0 NaN + 1 0.011111 + 2 NaN + 3 -0.065934 + dtype: float64 + + On the other hand, if a fill method is set, NAs are filled before the + computation. See forward fill method fills NAs with last valid + observation forward to next valid. + >>> s.pct_change(fill_method='ffill') 0 NaN 1 0.011111 @@ -9952,15 +9966,15 @@ def _check_percentile(self, q): """ @Appender(_shared_docs['pct_change'] % _shared_doc_kwargs) - def pct_change(self, periods=1, fill_method=None, limit=None, freq=None, - skipna=None, **kwargs): + def pct_change(self, skipna=None, periods=1, fill_method=None, limit=None, + freq=None, **kwargs): if skipna and fill_method is not None: raise ValueError("cannot pass both skipna and fill_method") elif skipna and limit is not None: raise ValueError("cannot pass both skipna and limit") if skipna is None and fill_method is None and limit is None: skipna = True - if skipna and self._typ == 'dataframe': + if skipna and isinstance(self, pd.DataFrame): return self.apply( lambda s: s.pct_change(periods=periods, freq=freq, skipna=skipna, **kwargs) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index e5aa3272e0aa5..a5d20a3d4f58e 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1364,17 +1364,23 @@ def test_pct_change(self): tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("periods, expected_vals", [ - (1, [[np.nan, np.nan], [np.nan, np.nan], [1., np.nan], [0.5, 1.], - [np.nan, 0.5], [0.33333333, np.nan], [np.nan, 0.33333333]]), - (2, [[np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan], - [2., np.nan], [np.nan, 2.], [1., np.nan], [np.nan, 1.]]) + @pytest.mark.parametrize("skipna, periods, expected_vals", [ + (True, 1, [[np.nan, np.nan], [np.nan, np.nan], [1., np.nan], [0.5, 1.], + [np.nan, 0.5], [0.33333333, np.nan], [np.nan, 0.33333333]]), + (True, 2, [[np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan], + [2., np.nan], [np.nan, 2.], [1., np.nan], [np.nan, 1.]]), + (False, 1, [[np.nan, np.nan], [np.nan, np.nan], [1., np.nan], + [0.5, 1.], [np.nan, 0.5], [np.nan, np.nan], + [np.nan, np.nan]]), + (False, 2, [[np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan], + [2., np.nan], [np.nan, 2.], [0.33333333, np.nan], + [np.nan, 0.33333333]]) ]) - def test_pct_change_skipna(self, periods, expected_vals): + def test_pct_change_skipna(self, skipna, periods, expected_vals): # GH25006 df = DataFrame([[np.nan, np.nan], [1., np.nan], [2., 1.], [3., 2.], [np.nan, 3.], [4., np.nan], [np.nan, 4.]]) - result = df.pct_change(skipna=True, periods=periods) + result = df.pct_change(skipna=skipna, periods=periods) expected = DataFrame(expected_vals) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 2407a7c7cd214..317ba5d4337ba 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -616,12 +616,15 @@ def test_pct_change(self, periods, fill_method, limit, exp): ]) def test_pct_change_skipna_raises(self, fill_method, limit): # GH25006 - if self._typ is DataFrame or self._typ is Series: - vals = [np.nan, np.nan, 1, 2, np.nan, 4, 10, np.nan] - obj = self._typ(vals) - with pytest.raises(ValueError): - obj.pct_change(skipna=True, fill_method=fill_method, - limit=limit) + vals = [np.nan, np.nan, 1, 2, np.nan, 4, 10, np.nan] + obj = self._typ(vals) + if fill_method: + msg = "cannot pass both skipna and fill_method" + else: + msg = "cannot pass both skipna and limit" + with pytest.raises(ValueError, match=msg): + obj.pct_change(skipna=True, fill_method=fill_method, + limit=limit) class TestNDFrame(object): From 1bf00f81466e7e1ee30ae42a8529caec9dd073f1 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Sat, 2 Mar 2019 11:22:10 +0100 Subject: [PATCH 12/23] Fix tests passing periods as kwarg --- pandas/tests/frame/test_timeseries.py | 8 ++++---- pandas/tests/series/test_timeseries.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 620ec2cb08435..ec6114d13e065 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -121,10 +121,10 @@ def test_diff_axis(self): [[np.nan, np.nan], [2., 2.]])) def test_pct_change(self): - rs = self.tsframe.pct_change(fill_method=None) + rs = self.tsframe.pct_change(skipna=False, fill_method=None) assert_frame_equal(rs, self.tsframe / self.tsframe.shift(1) - 1) - rs = self.tsframe.pct_change(2) + rs = self.tsframe.pct_change(periods=2) filled = self.tsframe.fillna(method='pad') assert_frame_equal(rs, filled / filled.shift(2) - 1) @@ -160,7 +160,7 @@ def test_pct_change_periods_freq(self, freq, periods, fill_method, limit): rs_freq = self.tsframe.pct_change(freq=freq, fill_method=fill_method, limit=limit) - rs_periods = self.tsframe.pct_change(periods, + rs_periods = self.tsframe.pct_change(periods=periods, fill_method=fill_method, limit=limit) assert_frame_equal(rs_freq, rs_periods) @@ -170,7 +170,7 @@ def test_pct_change_periods_freq(self, freq, periods, fill_method, limit): rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) - rs_periods = empty_ts.pct_change(periods, + rs_periods = empty_ts.pct_change(periods=periods, fill_method=fill_method, limit=limit) assert_frame_equal(rs_freq, rs_periods) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 5722b745809e0..ed5912bc95d5e 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -382,7 +382,7 @@ def test_pct_change(self): rs = self.ts.pct_change(fill_method=None) assert_series_equal(rs, self.ts / self.ts.shift(1) - 1) - rs = self.ts.pct_change(2) + rs = self.ts.pct_change(periods=2) filled = self.ts.fillna(method='pad') assert_series_equal(rs, filled / filled.shift(2) - 1) @@ -415,7 +415,7 @@ def test_pct_change_periods_freq(self, freq, periods, fill_method, limit): rs_freq = self.ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) - rs_periods = self.ts.pct_change(periods, + rs_periods = self.ts.pct_change(periods=periods, fill_method=fill_method, limit=limit) assert_series_equal(rs_freq, rs_periods) @@ -424,7 +424,7 @@ def test_pct_change_periods_freq(self, freq, periods, fill_method, limit): rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) - rs_periods = empty_ts.pct_change(periods, + rs_periods = empty_ts.pct_change(periods=periods, fill_method=fill_method, limit=limit) assert_series_equal(rs_freq, rs_periods) From 66cc4a423dc19891cd4a4a968717f98087def1d3 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Sun, 3 Mar 2019 18:09:38 +0100 Subject: [PATCH 13/23] Add whatsnew note --- doc/source/whatsnew/v0.25.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 124ec8f4ab92c..0ca3a7d840762 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -86,6 +86,7 @@ Other API Changes - :class:`DatetimeTZDtype` will now standardize pytz timezones to a common timezone instance (:issue:`24713`) - ``Timestamp`` and ``Timedelta`` scalars now implement the :meth:`to_numpy` method as aliases to :meth:`Timestamp.to_datetime64` and :meth:`Timedelta.to_timedelta64`, respectively. (:issue:`24653`) - :meth:`Timestamp.strptime` will now rise a ``NotImplementedError`` (:issue:`25016`) +- Default `skipna=True` for :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` will drop NAs before calculation (:issue:`25006`) - .. _whatsnew_0250.deprecations: From 14c7a05fe2ab16728f8c8c0777c10ac58a537e0b Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Sun, 3 Mar 2019 18:13:17 +0100 Subject: [PATCH 14/23] Fix for the case axis=1 --- pandas/core/generic.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 362ae210679ac..373da84b286cf 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9986,13 +9986,14 @@ def pct_change(self, skipna=None, periods=1, fill_method=None, limit=None, raise ValueError("cannot pass both skipna and limit") if skipna is None and fill_method is None and limit is None: skipna = True + axis = self._get_axis_number(kwargs.pop('axis', self._stat_axis_name)) if skipna and isinstance(self, pd.DataFrame): + # If DataFrame, apply to each column/row return self.apply( lambda s: s.pct_change(periods=periods, freq=freq, - skipna=skipna, **kwargs) + skipna=skipna, **kwargs), + axis=axis ) - # TODO: Not sure if above is correct - need someone to confirm. - axis = self._get_axis_number(kwargs.pop('axis', self._stat_axis_name)) if skipna: data = self.dropna() elif fill_method is None: From 932fc6651ce7ec80c198779cdebe509bd78526d6 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Sun, 3 Mar 2019 18:14:16 +0100 Subject: [PATCH 15/23] Address requested changes --- pandas/core/generic.py | 92 +++++++++++++++++++++++----- pandas/tests/generic/test_generic.py | 4 +- 2 files changed, 79 insertions(+), 17 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 373da84b286cf..153f2a286b94c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9857,8 +9857,6 @@ def _check_percentile(self, q): Parameters ---------- - skipna : bool, default True - Exclude NA/null values before computing percent change. periods : int, default 1 Periods to shift for forming percent change. fill_method : str, default 'pad' @@ -9867,6 +9865,10 @@ def _check_percentile(self, q): The number of consecutive NAs to fill before stopping. freq : DateOffset, timedelta, or offset alias string, optional Increment to use from time series API (e.g. 'M' or BDay()). + skipna : bool, default True + Exclude NA/null values before computing percent change. + + .. versionadded:: 0.25.0 **kwargs Additional keyword arguments are passed into `DataFrame.shift` or `Series.shift`. @@ -9883,6 +9885,11 @@ def _check_percentile(self, q): Series.shift : Shift the index by some number of periods. DataFrame.shift : Shift the index by some number of periods. + Notes + ----- + The default `skipna=True` drops NAs before computing the percentage + change, and the results are reindexed like the original calling object. + Examples -------- **Series** @@ -9906,16 +9913,19 @@ def _check_percentile(self, q): 2 -0.055556 dtype: float64 - See the computing of percentage change in a Series with NAs. With - default skipped NAs, NAs are ignored before the computation and kept - afterwards. + See how the computing of percentage change is performed in a Series + with NAs. With default `skipna=True`, NAs are dropped before the + computation and eventually the results are reindexed like the original + object, thus keeping the original NAs. - >>> s = pd.Series([90, 91, None, 85]) + >>> s = pd.Series([90, 91, None, 85, None, 95]) >>> s 0 90.0 1 91.0 2 NaN 3 85.0 + 4 NaN + 5 95.0 dtype: float64 >>> s.pct_change() @@ -9923,10 +9933,13 @@ def _check_percentile(self, q): 1 0.011111 2 NaN 3 -0.065934 + 4 NaN + 5 0.117647 dtype: float64 - On the other hand, if a fill method is set, NAs are filled before the - computation. See forward fill method fills NAs with last valid + On the other hand, if a fill method is passed, NAs are filled before + the computation. For example, before the computation of percentage + change, forward fill method `ffill` first fills NAs with last valid observation forward to next valid. >>> s.pct_change(fill_method='ffill') @@ -9934,6 +9947,8 @@ def _check_percentile(self, q): 1 0.011111 2 0.000000 3 -0.065934 + 4 0.000000 + 5 0.117647 dtype: float64 **DataFrame** @@ -9975,16 +9990,63 @@ def _check_percentile(self, q): 2016 2015 2014 GOOG NaN -0.151997 -0.086016 APPL NaN 0.337604 0.012002 + + In a DataFrame with NAs, when computing the percentage change with + default `skipna=True`, NAs are first droppped on each column/row, and + the results are eventually reindexed as originally. + + >>> df = pd.DataFrame({ + ... 'a': [90, 91, None, 85, None, 95], + ... 'b': [91, None, 85, None, 95, None], + ... 'c': [None, 85, None, 95, None, None]}) + >>> df + a b c + 0 90.0 91.0 NaN + 1 91.0 NaN 85.0 + 2 NaN 85.0 NaN + 3 85.0 NaN 95.0 + 4 NaN 95.0 NaN + 5 95.0 NaN NaN + + >>> df.pct_change() + a b c + 0 NaN NaN NaN + 1 0.011111 NaN NaN + 2 NaN -0.065934 NaN + 3 -0.065934 NaN 0.117647 + 4 NaN 0.117647 NaN + 5 0.117647 NaN NaN + + >>> df.pct_change(axis=1) + a b c + 0 NaN 0.011111 NaN + 1 NaN NaN -0.065934 + 2 NaN NaN NaN + 3 NaN NaN 0.117647 + 4 NaN NaN NaN + 5 NaN NaN NaN + + Otherwise, if a fill method is passed, NAs are filled before the + computation. + + >>> df.pct_change(fill_method='ffill') + a b c + 0 NaN NaN NaN + 1 0.011111 0.000000 NaN + 2 0.000000 -0.065934 0.000000 + 3 -0.065934 0.000000 0.117647 + 4 0.000000 0.117647 0.000000 + 5 0.117647 0.000000 0.000000 """ @Appender(_shared_docs['pct_change'] % _shared_doc_kwargs) - def pct_change(self, skipna=None, periods=1, fill_method=None, limit=None, - freq=None, **kwargs): - if skipna and fill_method is not None: - raise ValueError("cannot pass both skipna and fill_method") - elif skipna and limit is not None: - raise ValueError("cannot pass both skipna and limit") - if skipna is None and fill_method is None and limit is None: + def pct_change(self, periods=1, fill_method=None, limit=None, freq=None, + skipna=None, **kwargs): + if fill_method is not None and skipna: + raise ValueError("cannot pass both fill_method and skipna") + elif limit is not None and skipna: + raise ValueError("cannot pass both limit and skipna") + if fill_method is None and limit is None and skipna is None: skipna = True axis = self._get_axis_number(kwargs.pop('axis', self._stat_axis_name)) if skipna and isinstance(self, pd.DataFrame): diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 317ba5d4337ba..9adbed76aa96b 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -619,9 +619,9 @@ def test_pct_change_skipna_raises(self, fill_method, limit): vals = [np.nan, np.nan, 1, 2, np.nan, 4, 10, np.nan] obj = self._typ(vals) if fill_method: - msg = "cannot pass both skipna and fill_method" + msg = "cannot pass both fill_method and skipna" else: - msg = "cannot pass both skipna and limit" + msg = "cannot pass both limit and skipna" with pytest.raises(ValueError, match=msg): obj.pct_change(skipna=True, fill_method=fill_method, limit=limit) From ed86a7bb53aa58d73d953c5f434a0a9ef51b0e94 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Sat, 16 Mar 2019 17:10:50 +0100 Subject: [PATCH 16/23] Replace None with np.nan --- pandas/core/generic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 153f2a286b94c..6e8b198f5d164 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9918,7 +9918,7 @@ def _check_percentile(self, q): computation and eventually the results are reindexed like the original object, thus keeping the original NAs. - >>> s = pd.Series([90, 91, None, 85, None, 95]) + >>> s = pd.Series([90, 91, np.nan, 85, np.nan, 95]) >>> s 0 90.0 1 91.0 @@ -9996,9 +9996,9 @@ def _check_percentile(self, q): the results are eventually reindexed as originally. >>> df = pd.DataFrame({ - ... 'a': [90, 91, None, 85, None, 95], - ... 'b': [91, None, 85, None, 95, None], - ... 'c': [None, 85, None, 95, None, None]}) + ... 'a': [90, 91, np.nan, 85, np.nan, 95], + ... 'b': [91, np.nan, 85, np.nan, 95, np.nan], + ... 'c': [np.nan, 85, np.nan, 95, np.nan, np.nan]}) >>> df a b c 0 90.0 91.0 NaN From a1ca0ca8e026ea47b83897fe3bce2b5477ea7f3c Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Sat, 16 Mar 2019 17:42:01 +0100 Subject: [PATCH 17/23] Replace DataFrame with ABCDataFrame --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6e8b198f5d164..4d82b7f49ebd4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10049,7 +10049,7 @@ def pct_change(self, periods=1, fill_method=None, limit=None, freq=None, if fill_method is None and limit is None and skipna is None: skipna = True axis = self._get_axis_number(kwargs.pop('axis', self._stat_axis_name)) - if skipna and isinstance(self, pd.DataFrame): + if skipna and isinstance(self, ABCDataFrame): # If DataFrame, apply to each column/row return self.apply( lambda s: s.pct_change(periods=periods, freq=freq, From 1acee7ca58cd2f121729a94da08fabd5248300c9 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sun, 25 Aug 2019 19:39:33 -0700 Subject: [PATCH 18/23] blackify --- pandas/core/generic.py | 7 ++- pandas/tests/frame/test_analytics.py | 82 +++++++++++++++++++++----- pandas/tests/frame/test_timeseries.py | 4 +- pandas/tests/generic/test_generic.py | 20 ++++--- pandas/tests/series/test_analytics.py | 20 ++++--- pandas/tests/series/test_timeseries.py | 8 ++- 6 files changed, 105 insertions(+), 36 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 66809a2d08091..85fc4c102e955 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10456,9 +10456,10 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, **kwar if skipna and isinstance(self, ABCDataFrame): # If DataFrame, apply to each column/row return self.apply( - lambda s: s.pct_change(periods=periods, freq=freq, - skipna=skipna, **kwargs), - axis=axis + lambda s: s.pct_change( + periods=periods, freq=freq, skipna=skipna, **kwargs + ), + axis=axis, ) if skipna: data = self.dropna() diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 18d19e35e6d4d..56d310006927b 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1627,22 +1627,76 @@ def test_pct_change(self): tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("skipna, periods, expected_vals", [ - (True, 1, [[np.nan, np.nan], [np.nan, np.nan], [1., np.nan], [0.5, 1.], - [np.nan, 0.5], [0.33333333, np.nan], [np.nan, 0.33333333]]), - (True, 2, [[np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan], - [2., np.nan], [np.nan, 2.], [1., np.nan], [np.nan, 1.]]), - (False, 1, [[np.nan, np.nan], [np.nan, np.nan], [1., np.nan], - [0.5, 1.], [np.nan, 0.5], [np.nan, np.nan], - [np.nan, np.nan]]), - (False, 2, [[np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan], - [2., np.nan], [np.nan, 2.], [0.33333333, np.nan], - [np.nan, 0.33333333]]) - ]) + @pytest.mark.parametrize( + "skipna, periods, expected_vals", + [ + ( + True, + 1, + [ + [np.nan, np.nan], + [np.nan, np.nan], + [1.0, np.nan], + [0.5, 1.0], + [np.nan, 0.5], + [0.33333333, np.nan], + [np.nan, 0.33333333], + ], + ), + ( + True, + 2, + [ + [np.nan, np.nan], + [np.nan, np.nan], + [np.nan, np.nan], + [2.0, np.nan], + [np.nan, 2.0], + [1.0, np.nan], + [np.nan, 1.0], + ], + ), + ( + False, + 1, + [ + [np.nan, np.nan], + [np.nan, np.nan], + [1.0, np.nan], + [0.5, 1.0], + [np.nan, 0.5], + [np.nan, np.nan], + [np.nan, np.nan], + ], + ), + ( + False, + 2, + [ + [np.nan, np.nan], + [np.nan, np.nan], + [np.nan, np.nan], + [2.0, np.nan], + [np.nan, 2.0], + [0.33333333, np.nan], + [np.nan, 0.33333333], + ], + ), + ], + ) def test_pct_change_skipna(self, skipna, periods, expected_vals): # GH25006 - df = DataFrame([[np.nan, np.nan], [1., np.nan], [2., 1.], [3., 2.], - [np.nan, 3.], [4., np.nan], [np.nan, 4.]]) + df = DataFrame( + [ + [np.nan, np.nan], + [1.0, np.nan], + [2.0, 1.0], + [3.0, 2.0], + [np.nan, 3.0], + [4.0, np.nan], + [np.nan, 4.0], + ] + ) result = df.pct_change(skipna=skipna, periods=periods) expected = DataFrame(expected_vals) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 716706e07ceb0..8c4ce7555526b 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -193,7 +193,9 @@ def test_pct_change_periods_freq(self, freq, periods, fill_method, limit): empty_ts = DataFrame(index=self.tsframe.index, columns=self.tsframe.columns) rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) - rs_periods = empty_ts.pct_change(periods=periods, fill_method=fill_method, limit=limit) + rs_periods = empty_ts.pct_change( + periods=periods, fill_method=fill_method, limit=limit + ) assert_frame_equal(rs_freq, rs_periods) def test_frame_ctor_datetime64_column(self): diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 0b43fd4468e4b..d60b706091d96 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -585,13 +585,16 @@ def test_pct_change(self, periods, fill_method, limit, exp): else: tm.assert_series_equal(res, Series(exp)) - @pytest.mark.parametrize('fill_method, limit', [ - ('backfill', None), - ('bfill', None), - ('pad', None), - ('ffill', None), - (None, 1) - ]) + @pytest.mark.parametrize( + "fill_method, limit", + [ + ("backfill", None), + ("bfill", None), + ("pad", None), + ("ffill", None), + (None, 1), + ], + ) def test_pct_change_skipna_raises(self, fill_method, limit): # GH25006 vals = [np.nan, np.nan, 1, 2, np.nan, 4, 10, np.nan] @@ -601,8 +604,7 @@ def test_pct_change_skipna_raises(self, fill_method, limit): else: msg = "cannot pass both limit and skipna" with pytest.raises(ValueError, match=msg): - obj.pct_change(skipna=True, fill_method=fill_method, - limit=limit) + obj.pct_change(skipna=True, fill_method=fill_method, limit=limit) class TestNDFrame: diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index f3d1ffa7cbd4d..a98c54401aef5 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -227,13 +227,16 @@ def test_cummax_timedelta64(self): result = s.cummax(skipna=False) tm.assert_series_equal(expected, result) - @pytest.mark.parametrize("periods, expected_vals", [ - (1, [np.nan, np.nan, 1.0, 0.5, np.nan, 0.333333333333333, np.nan]), - (2, [np.nan, np.nan, np.nan, 2.0, np.nan, 1.0, np.nan]) - ]) + @pytest.mark.parametrize( + "periods, expected_vals", + [ + (1, [np.nan, np.nan, 1.0, 0.5, np.nan, 0.333333333333333, np.nan]), + (2, [np.nan, np.nan, np.nan, 2.0, np.nan, 1.0, np.nan]), + ], + ) def test_pct_change_skipna(self, periods, expected_vals): # GH25006 - vals = [np.nan, 1., 2., 3., np.nan, 4., np.nan] + vals = [np.nan, 1.0, 2.0, 3.0, np.nan, 4.0, np.nan] s = Series(vals) result = s.pct_change(skipna=True, periods=periods) expected = Series(expected_vals) @@ -1027,7 +1030,9 @@ def test_unstack(self): idx = pd.MultiIndex.from_arrays([[101, 102], [3.5, np.nan]]) ts = pd.Series([1, 2], index=idx) left = ts.unstack() - right = DataFrame([[np.nan, 1], [2, np.nan]], index=[101, 102], columns=[np.nan, 3.5]) + right = DataFrame( + [[np.nan, 1], [2, np.nan]], index=[101, 102], columns=[np.nan, 3.5] + ) assert_frame_equal(left, right) idx = pd.MultiIndex.from_arrays( @@ -1039,7 +1044,8 @@ def test_unstack(self): ) ts = pd.Series([1.0, 1.1, 1.2, 1.3, 1.4], index=idx) right = DataFrame( - [[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4], [1.2, np.nan]], columns=["cat", "dog"] + [[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4], [1.2, np.nan]], + columns=["cat", "dog"], ) tpls = [("a", 1), ("a", 2), ("b", np.nan), ("b", 1)] right.index = pd.MultiIndex.from_tuples(tpls) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index bcadfa795b199..f353c672b6315 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -436,12 +436,16 @@ def test_pct_change_shift_over_nas(self): def test_pct_change_periods_freq(self, freq, periods, fill_method, limit): # GH 7292 rs_freq = self.ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) - rs_periods = self.ts.pct_change(periods=periods, fill_method=fill_method, limit=limit) + rs_periods = self.ts.pct_change( + periods=periods, fill_method=fill_method, limit=limit + ) assert_series_equal(rs_freq, rs_periods) empty_ts = Series(index=self.ts.index) rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) - rs_periods = empty_ts.pct_change(periods=periods, fill_method=fill_method, limit=limit) + rs_periods = empty_ts.pct_change( + periods=periods, fill_method=fill_method, limit=limit + ) assert_series_equal(rs_freq, rs_periods) def test_autocorr(self): From 764846dc6c8925d83665905dc135a9c49cad2295 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sun, 25 Aug 2019 19:41:26 -0700 Subject: [PATCH 19/23] Changed whatsnew --- doc/source/whatsnew/v0.25.0.rst | 1 - doc/source/whatsnew/v1.0.0.rst | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 22e2a768ad4c2..fe1e2d7826d62 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -876,7 +876,6 @@ Other API changes - :class:`DatetimeTZDtype` will now standardize pytz timezones to a common timezone instance (:issue:`24713`) - :class:`Timestamp` and :class:`Timedelta` scalars now implement the :meth:`to_numpy` method as aliases to :meth:`Timestamp.to_datetime64` and :meth:`Timedelta.to_timedelta64`, respectively. (:issue:`24653`) - :meth:`Timestamp.strptime` will now rise a ``NotImplementedError`` (:issue:`25016`) -- Default `skipna=True` for :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` will drop NAs before calculation (:issue:`25006`) - Comparing :class:`Timestamp` with unsupported objects now returns :py:obj:`NotImplemented` instead of raising ``TypeError``. This implies that unsupported rich comparisons are delegated to the other object, and are now consistent with Python 3 behavior for ``datetime`` objects (:issue:`24011`) - Bug in :meth:`DatetimeIndex.snap` which didn't preserving the ``name`` of the input :class:`Index` (:issue:`25575`) - The ``arg`` argument in :meth:`pandas.core.groupby.DataFrameGroupBy.agg` has been renamed to ``func`` (:issue:`26089`) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 8e25857e5ad69..df955ef3deb6f 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -29,7 +29,7 @@ Enhancements Other enhancements ^^^^^^^^^^^^^^^^^^ -- +- :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` now accept a ``skipna`` argument (:issue:`25006`) - .. _whatsnew_1000.api_breaking: From 71846986b25b7d6117f02edcc59b270f66d43d9c Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sun, 25 Aug 2019 19:43:01 -0700 Subject: [PATCH 20/23] Updated versionadded --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 85fc4c102e955..1b1dbc2ce4757 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10273,7 +10273,7 @@ def _check_percentile(self, q): skipna : bool, default True Exclude NA/null values before computing percent change. - .. versionadded:: 0.25.0 + .. versionadded:: 1.0.0 **kwargs Additional keyword arguments are passed into `DataFrame.shift` or `Series.shift`. From 3821857f37492e8e881f390a46854231bf5bbef1 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 26 Aug 2019 16:07:37 -0700 Subject: [PATCH 21/23] Signature fixup --- pandas/core/generic.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1b1dbc2ce4757..c51315343a64f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10445,7 +10445,9 @@ def _check_percentile(self, q): """ @Appender(_shared_docs["pct_change"] % _shared_doc_kwargs) - def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, **kwargs): + def pct_change( + self, periods=1, fill_method="pad", limit=None, freq=None, skipna=None, **kwargs + ): if fill_method is not None and skipna: raise ValueError("cannot pass both fill_method and skipna") elif limit is not None and skipna: From a2be8f6c681a2ae304a2b908346b8eb58d854812 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 28 Aug 2019 09:33:45 -0700 Subject: [PATCH 22/23] test failure fixup --- pandas/core/generic.py | 2 +- pandas/tests/series/test_analytics.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c51315343a64f..3786bdd7fd9ef 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10446,7 +10446,7 @@ def _check_percentile(self, q): @Appender(_shared_docs["pct_change"] % _shared_doc_kwargs) def pct_change( - self, periods=1, fill_method="pad", limit=None, freq=None, skipna=None, **kwargs + self, periods=1, fill_method=None, limit=None, freq=None, skipna=None, **kwargs ): if fill_method is not None and skipna: raise ValueError("cannot pass both fill_method and skipna") diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index a98c54401aef5..65589b6402c60 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -490,7 +490,7 @@ def test_count(self, datetime_series): assert datetime_series.count() == np.isfinite(datetime_series).sum() - mi = MultiIndex.from_arrays([list("aabbcc"), [1, 2, 2, nan, 1, 2]]) + mi = MultiIndex.from_arrays([list("aabbcc"), [1, 2, 2, np.nan, 1, 2]]) ts = Series(np.arange(len(mi)), index=mi) left = ts.count(level=1) From fd18d04772ad4a75b2df63470a2ffac619c9ff52 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 30 Aug 2019 10:35:41 -0600 Subject: [PATCH 23/23] docstring for skipna=False --- pandas/core/generic.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 897813daec943..117d9d9a27a58 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10365,6 +10365,17 @@ def _check_percentile(self, q): 5 0.117647 dtype: float64 + By contrast, `skipna=False` will not drop NA values before + computation, instead evaluating each entry against the entry prior. + + >>> s.pct_change(skipna=False) + 0 NaN + 1 0.011111 + 2 NaN + 3 NaN + 4 NaN + 5 NaN + On the other hand, if a fill method is passed, NAs are filled before the computation. For example, before the computation of percentage change, forward fill method `ffill` first fills NAs with last valid