pandas-dev · albertvillanova · Feb 12, 2019 · Feb 12, 2019 · Feb 12, 2019 · Feb 13, 2019
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -29,7 +29,7 @@ Enhancements
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
 
--
+- :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` now accept a ``skipna`` argument (:issue:`25006`)
 -
 
 .. _whatsnew_1000.api_breaking:

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -10293,6 +10293,10 @@ def _check_percentile(self, q):
             The number of consecutive NAs to fill before stopping.
         freq : DateOffset, timedelta, or offset alias string, optional
             Increment to use from time series API (e.g. 'M' or BDay()).
+        skipna : bool, default True
+            Exclude NA/null values before computing percent change.
+
+            .. versionadded:: 1.0.0
         **kwargs
             Additional keyword arguments are passed into
             `DataFrame.shift` or `Series.shift`.
@@ -10309,6 +10313,11 @@ def _check_percentile(self, q):
         Series.shift : Shift the index by some number of periods.
         DataFrame.shift : Shift the index by some number of periods.
 
+        Notes
+        -----
+        The default `skipna=True` drops NAs before computing the percentage
+        change, and the results are reindexed like the original calling object.
+
         Examples
         --------
         **Series**
@@ -10332,22 +10341,53 @@ def _check_percentile(self, q):
         2   -0.055556
         dtype: float64
 
-        See the percentage change in a Series where filling NAs with last
-        valid observation forward to next valid.
+        See how the computing of percentage change is performed in a Series
+        with NAs. With default `skipna=True`, NAs are dropped before the
+        computation and eventually the results are reindexed like the original
+        object, thus keeping the original NAs.
 
-        >>> s = pd.Series([90, 91, None, 85])
+        >>> s = pd.Series([90, 91, np.nan, 85, np.nan, 95])
         >>> s
         0    90.0
         1    91.0
         2     NaN
         3    85.0
+        4     NaN
+        5    95.0
         dtype: float64
 
+        >>> s.pct_change()
+        0         NaN
+        1    0.011111
+        2         NaN
+        3   -0.065934
+        4         NaN
+        5    0.117647
+        dtype: float64
+
+        By contrast, `skipna=False` will not drop NA values before
+        computation, instead evaluating each entry against the entry prior.
+
+        >>> s.pct_change(skipna=False)
+        0         NaN
+        1    0.011111
+        2         NaN
+        3         NaN
+        4         NaN
+        5         NaN
+
+        On the other hand, if a fill method is passed, NAs are filled before
+        the computation. For example, before the computation of percentage
+        change, forward fill method `ffill` first fills NAs with last valid
+        observation forward to next valid.
+
         >>> s.pct_change(fill_method='ffill')
         0         NaN
         1    0.011111
         2    0.000000
         3   -0.065934
+        4    0.000000
+        5    0.117647
         dtype: float64
 
         **DataFrame**
@@ -10389,13 +10429,77 @@ def _check_percentile(self, q):
               2016      2015      2014
         GOOG   NaN -0.151997 -0.086016
         APPL   NaN  0.337604  0.012002
+
+        In a DataFrame with NAs, when computing the percentage change with
+        default `skipna=True`, NAs are first droppped on each column/row, and
+        the results are eventually reindexed as originally.
+
+        >>> df = pd.DataFrame({
+        ...     'a': [90, 91, np.nan, 85, np.nan, 95],
+        ...     'b': [91, np.nan, 85, np.nan, 95, np.nan],
+        ...     'c': [np.nan, 85, np.nan, 95, np.nan, np.nan]})
+        >>> df
+              a     b     c
+        0  90.0  91.0   NaN
+        1  91.0   NaN  85.0
+        2   NaN  85.0   NaN
+        3  85.0   NaN  95.0
+        4   NaN  95.0   NaN
+        5  95.0   NaN   NaN
+
+        >>> df.pct_change()
+                  a         b         c
+        0       NaN       NaN       NaN
+        1  0.011111       NaN       NaN
+        2       NaN -0.065934       NaN
+        3 -0.065934       NaN  0.117647
+        4       NaN  0.117647       NaN
+        5  0.117647       NaN       NaN
+
+        >>> df.pct_change(axis=1)
+            a         b         c
+        0 NaN  0.011111       NaN
+        1 NaN       NaN -0.065934
+        2 NaN       NaN       NaN
+        3 NaN       NaN  0.117647
+        4 NaN       NaN       NaN
+        5 NaN       NaN       NaN
+
+        Otherwise, if a fill method is passed, NAs are filled before the
+        computation.
+
+        >>> df.pct_change(fill_method='ffill')
+                  a         b         c
+        0       NaN       NaN       NaN
+        1  0.011111  0.000000       NaN
+        2  0.000000 -0.065934  0.000000
+        3 -0.065934  0.000000  0.117647
+        4  0.000000  0.117647  0.000000
+        5  0.117647  0.000000  0.000000
         """
 
     @Appender(_shared_docs["pct_change"] % _shared_doc_kwargs)
-    def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, **kwargs):
-        # TODO: Not sure if above is correct - need someone to confirm.
+    def pct_change(
+        self, periods=1, fill_method=None, limit=None, freq=None, skipna=None, **kwargs
+    ):
+        if fill_method is not None and skipna:
+            raise ValueError("cannot pass both fill_method and skipna")
+        elif limit is not None and skipna:
+            raise ValueError("cannot pass both limit and skipna")
+        if fill_method is None and limit is None and skipna is None:
+            skipna = True
         axis = self._get_axis_number(kwargs.pop("axis", self._stat_axis_name))
-        if fill_method is None:
+        if skipna and isinstance(self, ABCDataFrame):
+            # If DataFrame, apply to each column/row
+            return self.apply(
+                lambda s: s.pct_change(
+                    periods=periods, freq=freq, skipna=skipna, **kwargs
+                ),
+                axis=axis,
+            )
+        if skipna:
+            data = self.dropna()
+        elif fill_method is None:
             data = self
         else:
             data = self.fillna(method=fill_method, limit=limit, axis=axis)
@@ -10405,6 +10509,8 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, **kwar
         if freq is None:
             mask = isna(com.values_from_object(data))
             np.putmask(rs.values, mask, np.nan)
+        if skipna:
+            rs = rs.reindex_like(self)
         return rs
 
     def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwargs):

diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
@@ -1627,6 +1627,80 @@ def test_pct_change(self):
 
             tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.parametrize(
+        "skipna, periods, expected_vals",
+        [
+            (
+                True,
+                1,
+                [
+                    [np.nan, np.nan],
+                    [np.nan, np.nan],
+                    [1.0, np.nan],
+                    [0.5, 1.0],
+                    [np.nan, 0.5],
+                    [0.33333333, np.nan],
+                    [np.nan, 0.33333333],
+                ],
+            ),
+            (
+                True,
+                2,
+                [
+                    [np.nan, np.nan],
+                    [np.nan, np.nan],
+                    [np.nan, np.nan],
+                    [2.0, np.nan],
+                    [np.nan, 2.0],
+                    [1.0, np.nan],
+                    [np.nan, 1.0],
+                ],
+            ),
+            (
+                False,
+                1,
+                [
+                    [np.nan, np.nan],
+                    [np.nan, np.nan],
+                    [1.0, np.nan],
+                    [0.5, 1.0],
+                    [np.nan, 0.5],
+                    [np.nan, np.nan],
+                    [np.nan, np.nan],
+                ],
+            ),
+            (
+                False,
+                2,
+                [
+                    [np.nan, np.nan],
+                    [np.nan, np.nan],
+                    [np.nan, np.nan],
+                    [2.0, np.nan],
+                    [np.nan, 2.0],
+                    [0.33333333, np.nan],
+                    [np.nan, 0.33333333],
+                ],
+            ),
+        ],
+    )
+    def test_pct_change_skipna(self, skipna, periods, expected_vals):
+        # GH25006
+        df = DataFrame(
+            [
+                [np.nan, np.nan],
+                [1.0, np.nan],
+                [2.0, 1.0],
+                [3.0, 2.0],
+                [np.nan, 3.0],
+                [4.0, np.nan],
+                [np.nan, 4.0],
+            ]
+        )
+        result = df.pct_change(skipna=skipna, periods=periods)
+        expected = DataFrame(expected_vals)
+        tm.assert_frame_equal(result, expected)
+
     # ----------------------------------------------------------------------
     # Index of max / min
 

diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py
@@ -143,10 +143,10 @@ def test_diff_axis(self):
         assert_frame_equal(df.diff(axis=0), DataFrame([[np.nan, np.nan], [2.0, 2.0]]))
 
     def test_pct_change(self):
-        rs = self.tsframe.pct_change(fill_method=None)
+        rs = self.tsframe.pct_change(skipna=False, fill_method=None)
         assert_frame_equal(rs, self.tsframe / self.tsframe.shift(1) - 1)
 
-        rs = self.tsframe.pct_change(2)
+        rs = self.tsframe.pct_change(periods=2)
         filled = self.tsframe.fillna(method="pad")
         assert_frame_equal(rs, filled / filled.shift(2) - 1)
 
@@ -165,7 +165,7 @@ def test_pct_change_shift_over_nas(self):
 
         df = DataFrame({"a": s, "b": s})
 
-        chg = df.pct_change()
+        chg = df.pct_change(fill_method="ffill")
         expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2])
         edf = DataFrame({"a": expected, "b": expected})
         assert_frame_equal(chg, edf)
@@ -187,13 +187,15 @@ def test_pct_change_periods_freq(self, freq, periods, fill_method, limit):
             freq=freq, fill_method=fill_method, limit=limit
         )
         rs_periods = self.tsframe.pct_change(
-            periods, fill_method=fill_method, limit=limit
+            periods=periods, fill_method=fill_method, limit=limit
         )
         assert_frame_equal(rs_freq, rs_periods)
 
         empty_ts = DataFrame(index=self.tsframe.index, columns=self.tsframe.columns)
         rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit)
-        rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit)
+        rs_periods = empty_ts.pct_change(
+            periods=periods, fill_method=fill_method, limit=limit
+        )
         assert_frame_equal(rs_freq, rs_periods)
 
     def test_frame_ctor_datetime64_column(self):

diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py
@@ -585,6 +585,27 @@ def test_pct_change(self, periods, fill_method, limit, exp):
         else:
             tm.assert_series_equal(res, Series(exp))
 
+    @pytest.mark.parametrize(
+        "fill_method, limit",
+        [
+            ("backfill", None),
+            ("bfill", None),
+            ("pad", None),
+            ("ffill", None),
+            (None, 1),
+        ],
+    )
+    def test_pct_change_skipna_raises(self, fill_method, limit):
+        # GH25006
+        vals = [np.nan, np.nan, 1, 2, np.nan, 4, 10, np.nan]
+        obj = self._typ(vals)
+        if fill_method:
+            msg = "cannot pass both fill_method and skipna"
+        else:
+            msg = "cannot pass both limit and skipna"
+        with pytest.raises(ValueError, match=msg):
+            obj.pct_change(skipna=True, fill_method=fill_method, limit=limit)
+
 
 class TestNDFrame:
     # tests that don't fit elsewhere