Skip to content

Series pct_change fill_method behavior #25291

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 31 commits into from
Closed
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
0e4e1c2
Add skipna to pct_change (#25006)
Feb 12, 2019
4be1bdc
Add tests
Feb 12, 2019
192bded
Fix PEP8 issues
Feb 12, 2019
bb74285
Fix PEP8 issue
Feb 13, 2019
4418bf1
Fix test
Feb 13, 2019
3670ffe
Fix test
Feb 13, 2019
8f36c7a
Fix tests
Feb 13, 2019
add18de
Fix linting
Feb 14, 2019
59eab18
Merge branch 'master' into fix-25006
Feb 18, 2019
279f433
Set default skipna=True
Feb 20, 2019
4072ca0
Use pytest.raises
Feb 20, 2019
a016d8a
Merge branch 'master' into fix-25006
Mar 2, 2019
80a09c9
Address requested changes
Mar 2, 2019
1bf00f8
Fix tests passing periods as kwarg
Mar 2, 2019
9208f61
Merge branch 'master' into fix-25006
Mar 2, 2019
fd2cdf8
Merge branch 'master' into fix-25006
Mar 3, 2019
66cc4a4
Add whatsnew note
Mar 3, 2019
14c7a05
Fix for the case axis=1
Mar 3, 2019
932fc66
Address requested changes
Mar 3, 2019
ed86a7b
Replace None with np.nan
Mar 16, 2019
a1ca0ca
Replace DataFrame with ABCDataFrame
Mar 16, 2019
efefaf6
Merge branch 'master' into fix-25006
Mar 16, 2019
1e854ed
Merge branch 'master' into fix-25006
May 16, 2019
84c036a
Merge remote-tracking branch 'upstream/master' into fix-25006
WillAyd Aug 26, 2019
1acee7c
blackify
WillAyd Aug 26, 2019
764846d
Changed whatsnew
WillAyd Aug 26, 2019
7184698
Updated versionadded
WillAyd Aug 26, 2019
3821857
Signature fixup
WillAyd Aug 26, 2019
a2be8f6
test failure fixup
WillAyd Aug 28, 2019
e456c6b
Merge remote-tracking branch 'upstream/master' into fix-25006
WillAyd Aug 30, 2019
fd18d04
docstring for skipna=False
WillAyd Aug 30, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9953,11 +9953,24 @@ def _check_percentile(self, q):
"""

@Appender(_shared_docs['pct_change'] % _shared_doc_kwargs)
def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
**kwargs):
def pct_change(self, periods=1, fill_method=None, limit=None, freq=None,
skipna=None, **kwargs):
if skipna and fill_method is not None:
raise ValueError("cannot pass both skipna and fill_method")
elif skipna and limit is not None:
raise ValueError("cannot pass both skipna and limit")
if skipna is None and fill_method is None and limit is None:
skipna = True
if skipna and self._typ == 'dataframe':
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use isinstance here

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm maybe add this to the frame subclass instead? Somewhat confusing to introspect here in the shared one.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the moment, I have added isinstance as required by @jreback. Tell me if you both think I should do otherwise.

return self.apply(
lambda s: s.pct_change(periods=periods, freq=freq,
skipna=skipna, **kwargs)
)
# TODO: Not sure if above is correct - need someone to confirm.
axis = self._get_axis_number(kwargs.pop('axis', self._stat_axis_name))
if fill_method is None:
if skipna:
data = self.dropna()
elif fill_method is None:
data = self
else:
data = self.fillna(method=fill_method, limit=limit, axis=axis)
Expand All @@ -9968,6 +9981,8 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
if freq is None:
mask = isna(com.values_from_object(data))
np.putmask(rs.values, mask, np.nan)
if skipna:
rs = rs.reindex_like(self)
return rs

def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwargs):
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1364,6 +1364,20 @@ def test_pct_change(self):

tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("periods, expected_vals", [
(1, [[np.nan, np.nan], [np.nan, np.nan], [1., np.nan], [0.5, 1.],
[np.nan, 0.5], [0.33333333, np.nan], [np.nan, 0.33333333]]),
(2, [[np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan],
[2., np.nan], [np.nan, 2.], [1., np.nan], [np.nan, 1.]])
])
def test_pct_change_skipna(self, periods, expected_vals):
# GH25006
df = DataFrame([[np.nan, np.nan], [1., np.nan], [2., 1.], [3., 2.],
[np.nan, 3.], [4., np.nan], [np.nan, 4.]])
result = df.pct_change(skipna=True, periods=periods)
expected = DataFrame(expected_vals)
tm.assert_frame_equal(result, expected)

# ----------------------------------------------------------------------
# Index of max / min

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def test_pct_change_shift_over_nas(self):

df = DataFrame({'a': s, 'b': s})

chg = df.pct_change()
chg = df.pct_change(fill_method='ffill')
expected = Series([np.nan, 0.5, 0., 2.5 / 1.5 - 1, .2])
edf = DataFrame({'a': expected, 'b': expected})
assert_frame_equal(chg, edf)
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/generic/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,22 @@ def test_pct_change(self, periods, fill_method, limit, exp):
else:
tm.assert_series_equal(res, Series(exp))

@pytest.mark.parametrize('fill_method, limit', [
('backfill', None),
('bfill', None),
('pad', None),
('ffill', None),
(None, 1)
])
def test_pct_change_skipna_raises(self, fill_method, limit):
# GH25006
if self._typ is DataFrame or self._typ is Series:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should not be needed any longer, Panels are gone

vals = [np.nan, np.nan, 1, 2, np.nan, 4, 10, np.nan]
obj = self._typ(vals)
with pytest.raises(ValueError):
obj.pct_change(skipna=True, fill_method=fill_method,
limit=limit)


class TestNDFrame(object):
# tests that don't fit elsewhere
Expand Down
45 changes: 27 additions & 18 deletions pandas/tests/series/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import operator

import numpy as np
from numpy import nan
import pytest

from pandas.compat import PY35, lrange, range
Expand Down Expand Up @@ -212,6 +211,18 @@ def test_cummax_timedelta64(self):
result = s.cummax(skipna=False)
tm.assert_series_equal(expected, result)

@pytest.mark.parametrize("periods, expected_vals", [
(1, [np.nan, np.nan, 1.0, 0.5, np.nan, 0.333333333333333, np.nan]),
(2, [np.nan, np.nan, np.nan, 2.0, np.nan, 1.0, np.nan])
])
def test_pct_change_skipna(self, periods, expected_vals):
# GH25006
vals = [np.nan, 1., 2., 3., np.nan, 4., np.nan]
s = Series(vals)
result = s.pct_change(skipna=True, periods=periods)
expected = Series(expected_vals)
assert_series_equal(expected, result)

def test_npdiff(self):
pytest.skip("skipping due to Series no longer being an "
"ndarray")
Expand All @@ -220,7 +231,7 @@ def test_npdiff(self):
s = Series(np.arange(5))

r = np.diff(s)
assert_series_equal(Series([nan, 0, 0, 0, nan]), r)
assert_series_equal(Series([np.nan, 0, 0, 0, np.nan]), r)

def _check_accum_op(self, name, datetime_series_, check_dtype=True):
func = getattr(np, name)
Expand Down Expand Up @@ -445,14 +456,14 @@ def test_count(self, datetime_series):

assert datetime_series.count() == np.isfinite(datetime_series).sum()

mi = MultiIndex.from_arrays([list('aabbcc'), [1, 2, 2, nan, 1, 2]])
mi = MultiIndex.from_arrays([list('aabbcc'), [1, 2, 2, np.nan, 1, 2]])
ts = Series(np.arange(len(mi)), index=mi)

left = ts.count(level=1)
right = Series([2, 3, 1], index=[1, 2, nan])
right = Series([2, 3, 1], index=[1, 2, np.nan])
assert_series_equal(left, right)

ts.iloc[[0, 3, 5]] = nan
ts.iloc[[0, 3, 5]] = np.nan
assert_series_equal(ts.count(level=1), right - 1)

def test_dot(self):
Expand Down Expand Up @@ -673,11 +684,11 @@ def test_cummethods_bool(self):
result = getattr(s, method)()
assert_series_equal(result, expected)

e = pd.Series([False, True, nan, False])
cse = pd.Series([0, 1, nan, 1], dtype=object)
cpe = pd.Series([False, 0, nan, 0])
cmin = pd.Series([False, False, nan, False])
cmax = pd.Series([False, True, nan, True])
e = pd.Series([False, True, np.nan, False])
cse = pd.Series([0, 1, np.nan, 1], dtype=object)
cpe = pd.Series([False, 0, np.nan, 0])
cmin = pd.Series([False, False, np.nan, False])
cmax = pd.Series([False, True, np.nan, True])
expecteds = {'cumsum': cse,
'cumprod': cpe,
'cummin': cmin,
Expand Down Expand Up @@ -954,15 +965,13 @@ def test_shift_categorical(self):
assert_index_equal(s.values.categories, sn2.values.categories)

def test_unstack(self):
from numpy import nan

index = MultiIndex(levels=[['bar', 'foo'], ['one', 'three', 'two']],
codes=[[1, 1, 0, 0], [0, 1, 0, 2]])

s = Series(np.arange(4.), index=index)
unstacked = s.unstack()

expected = DataFrame([[2., nan, 3.], [0., 1., nan]],
expected = DataFrame([[2., np.nan, 3.], [0., 1., np.nan]],
index=['bar', 'foo'],
columns=['one', 'three', 'two'])

Expand All @@ -986,17 +995,17 @@ def test_unstack(self):
idx = pd.MultiIndex.from_arrays([[101, 102], [3.5, np.nan]])
ts = pd.Series([1, 2], index=idx)
left = ts.unstack()
right = DataFrame([[nan, 1], [2, nan]], index=[101, 102],
columns=[nan, 3.5])
right = DataFrame([[np.nan, 1], [2, np.nan]], index=[101, 102],
columns=[np.nan, 3.5])
assert_frame_equal(left, right)

idx = pd.MultiIndex.from_arrays([['cat', 'cat', 'cat', 'dog', 'dog'
], ['a', 'a', 'b', 'a', 'b'],
[1, 2, 1, 1, np.nan]])
ts = pd.Series([1.0, 1.1, 1.2, 1.3, 1.4], index=idx)
right = DataFrame([[1.0, 1.3], [1.1, nan], [nan, 1.4], [1.2, nan]],
columns=['cat', 'dog'])
tpls = [('a', 1), ('a', 2), ('b', nan), ('b', 1)]
right = DataFrame([[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4],
[1.2, np.nan]], columns=['cat', 'dog'])
tpls = [('a', 1), ('a', 2), ('b', np.nan), ('b', 1)]
right.index = pd.MultiIndex.from_tuples(tpls)
assert_frame_equal(ts.unstack(level=0), right)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/series/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ def test_pct_change(self):
def test_pct_change_shift_over_nas(self):
s = Series([1., 1.5, np.nan, 2.5, 3.])

chg = s.pct_change()
chg = s.pct_change(fill_method='ffill')
expected = Series([np.nan, 0.5, 0., 2.5 / 1.5 - 1, .2])
assert_series_equal(chg, expected)

Expand Down