From 3d63374d90e74d256344a6730352cdeefefe3cec Mon Sep 17 00:00:00 2001 From: mproszewska Date: Sat, 14 Mar 2020 13:44:19 +0100 Subject: [PATCH 01/31] DOC: Note about dtypes in diff in Dataframe --- pandas/core/frame.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cd5d81bc70dd9..459125aa8344d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6476,6 +6476,9 @@ def diff(self, periods=1, axis=0) -> "DataFrame": ----- For boolean dtypes, this uses :meth:`operator.xor` rather than :meth:`operator.sub`. + For unsigned integer Series, the results will also be unsigned. The result + is consistent with calculating the difference directly, however dtype of + the result is always float64. Examples -------- From a53df3aeeb796b7cf5742844e833215888fa0bc8 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Sat, 14 Mar 2020 13:45:40 +0100 Subject: [PATCH 02/31] DOC: Note about dtypes in diff in Series --- pandas/core/series.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/series.py b/pandas/core/series.py index 568e99622dd29..6500d704a02b7 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2268,6 +2268,9 @@ def diff(self, periods=1) -> "Series": ----- For boolean dtypes, this uses :meth:`operator.xor` rather than :meth:`operator.sub`. + For unsigned integer Series, the results will also be unsigned. The result + is consistent with calculating the difference directly, however dtype of + the result is always float64. Examples -------- From c1d875aaea6aa20c6b305b777535c7c4adc205fe Mon Sep 17 00:00:00 2001 From: mproszewska Date: Sat, 21 Mar 2020 15:47:35 +0100 Subject: [PATCH 03/31] DOC: Change comment --- pandas/core/frame.py | 5 ++--- pandas/core/series.py | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 459125aa8344d..ab1c341898444 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6476,9 +6476,8 @@ def diff(self, periods=1, axis=0) -> "DataFrame": ----- For boolean dtypes, this uses :meth:`operator.xor` rather than :meth:`operator.sub`. - For unsigned integer Series, the results will also be unsigned. The result - is consistent with calculating the difference directly, however dtype of - the result is always float64. + The result is calculated according to current dtype in DataFrame, + however dtype of the result is always float64. Examples -------- diff --git a/pandas/core/series.py b/pandas/core/series.py index 6500d704a02b7..fb62991fb1490 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2268,9 +2268,8 @@ def diff(self, periods=1) -> "Series": ----- For boolean dtypes, this uses :meth:`operator.xor` rather than :meth:`operator.sub`. - For unsigned integer Series, the results will also be unsigned. The result - is consistent with calculating the difference directly, however dtype of - the result is always float64. + The result is calculated according to current dtype in Series, + however dtype of the result is always float64. Examples -------- From 2e8ccd0e144e790c97a9b77a1601a891a513dfd5 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Sat, 21 Mar 2020 17:09:40 +0100 Subject: [PATCH 04/31] DOC: Fix spaces --- pandas/core/frame.py | 2 +- pandas/core/series.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ab1c341898444..57b86df42e6f1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6476,7 +6476,7 @@ def diff(self, periods=1, axis=0) -> "DataFrame": ----- For boolean dtypes, this uses :meth:`operator.xor` rather than :meth:`operator.sub`. - The result is calculated according to current dtype in DataFrame, + The result is calculated according to current dtype in DataFrame, however dtype of the result is always float64. Examples diff --git a/pandas/core/series.py b/pandas/core/series.py index fb62991fb1490..c2871b02f19f1 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2268,7 +2268,7 @@ def diff(self, periods=1) -> "Series": ----- For boolean dtypes, this uses :meth:`operator.xor` rather than :meth:`operator.sub`. - The result is calculated according to current dtype in Series, + The result is calculated according to current dtype in Series, however dtype of the result is always float64. Examples From 50d55eef8b621a88b403a7c3f2dc4b20853193d5 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Fri, 27 Mar 2020 13:57:40 +0100 Subject: [PATCH 05/31] DOC: Add doc decorator and overflow examples --- pandas/core/frame.py | 158 ++++++++++++++++++------------------------ pandas/core/series.py | 99 ++++++++++++++------------ 2 files changed, 125 insertions(+), 132 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 57b86df42e6f1..5e76d9759a27e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6445,98 +6445,78 @@ def melt( # ---------------------------------------------------------------------- # Time series-related - def diff(self, periods=1, axis=0) -> "DataFrame": + @Appender( """ - First discrete difference of element. - - Calculates the difference of a DataFrame element compared with another - element in the DataFrame (default is the element in the same column - of the previous row). - - Parameters - ---------- - periods : int, default 1 - Periods to shift for calculating difference, accepts negative - values. - axis : {0 or 'index', 1 or 'columns'}, default 0 - Take difference over rows (0) or columns (1). - - Returns - ------- - DataFrame - - See Also - -------- - Series.diff: First discrete difference for a Series. - DataFrame.pct_change: Percent change over given number of periods. - DataFrame.shift: Shift index by desired number of periods with an - optional time freq. - - Notes - ----- - For boolean dtypes, this uses :meth:`operator.xor` rather than - :meth:`operator.sub`. - The result is calculated according to current dtype in DataFrame, - however dtype of the result is always float64. - - Examples - -------- - Difference with previous row +Examples +-------- +Difference with previous row - >>> df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6], - ... 'b': [1, 1, 2, 3, 5, 8], - ... 'c': [1, 4, 9, 16, 25, 36]}) - >>> df - a b c - 0 1 1 1 - 1 2 1 4 - 2 3 2 9 - 3 4 3 16 - 4 5 5 25 - 5 6 8 36 - - >>> df.diff() - a b c - 0 NaN NaN NaN - 1 1.0 0.0 3.0 - 2 1.0 1.0 5.0 - 3 1.0 1.0 7.0 - 4 1.0 2.0 9.0 - 5 1.0 3.0 11.0 - - Difference with previous column - - >>> df.diff(axis=1) - a b c - 0 NaN 0.0 0.0 - 1 NaN -1.0 3.0 - 2 NaN -1.0 7.0 - 3 NaN -1.0 13.0 - 4 NaN 0.0 20.0 - 5 NaN 2.0 28.0 - - Difference with 3rd previous row - - >>> df.diff(periods=3) - a b c - 0 NaN NaN NaN - 1 NaN NaN NaN - 2 NaN NaN NaN - 3 3.0 2.0 15.0 - 4 3.0 4.0 21.0 - 5 3.0 6.0 27.0 - - Difference with following row - - >>> df.diff(periods=-1) - a b c - 0 -1.0 0.0 -3.0 - 1 -1.0 -1.0 -5.0 - 2 -1.0 -1.0 -7.0 - 3 -1.0 -2.0 -9.0 - 4 -1.0 -3.0 -11.0 - 5 NaN NaN NaN +>>> df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6], +... 'b': [1, 1, 2, 3, 5, 8], +... 'c': [1, 4, 9, 16, 25, 36]}) +>>> df + a b c +0 1 1 1 +1 2 1 4 +2 3 2 9 +3 4 3 16 +4 5 5 25 +5 6 8 36 + +>>> df.diff() + a b c +0 NaN NaN NaN +1 1.0 0.0 3.0 +2 1.0 1.0 5.0 +3 1.0 1.0 7.0 +4 1.0 2.0 9.0 +5 1.0 3.0 11.0 + +Difference with previous column + +>>> df.diff(axis=1) + a b c +0 NaN 0.0 0.0 +1 NaN -1.0 3.0 +2 NaN -1.0 7.0 +3 NaN -1.0 13.0 +4 NaN 0.0 20.0 +5 NaN 2.0 28.0 + +Difference with 3rd previous row + +>>> df.diff(periods=3) + a b c +0 NaN NaN NaN +1 NaN NaN NaN +2 NaN NaN NaN +3 3.0 2.0 15.0 +4 3.0 4.0 21.0 +5 3.0 6.0 27.0 + +Difference with following row + +>>> df.diff(periods=-1) + a b c +0 -1.0 0.0 -3.0 +1 -1.0 -1.0 -5.0 +2 -1.0 -1.0 -7.0 +3 -1.0 -2.0 -9.0 +4 -1.0 -3.0 -11.0 +5 NaN NaN NaN + +Overflow for input dtype + +>>> df = pd.DataFrame({'a': [1, 0]}, dtype=np.uint8) +>>> df.diff() + a +0 NaN +1 255.0 """ + ) + @doc(Series.diff, klass='Dataframe', extra_params="axis : {0 or 'index', 1 or 'columns'}, default 0\n Take difference over rows (0) or columns (1).", other_klass='Series') + def diff(self, periods=1, axis=0) -> "DataFrame": + bm_axis = self._get_block_manager_axis(axis) new_data = self._data.diff(n=periods, axis=bm_axis) return self._constructor(new_data) diff --git a/pandas/core/series.py b/pandas/core/series.py index c2871b02f19f1..413ec3e2e8c50 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2239,73 +2239,86 @@ def cov(self, other, min_periods=None) -> float: return np.nan return nanops.nancov(this.values, other.values, min_periods=min_periods) + @Appender( + """ +Examples +-------- +Difference with previous row + +>>> s = pd.Series([1, 1, 2, 3, 5, 8]) +>>> s.diff() +0 NaN +1 0.0 +2 1.0 +3 1.0 +4 2.0 +5 3.0 +dtype: float64 + +Difference with 3rd previous row + +>>> s.diff(periods=3) +0 NaN +1 NaN +2 NaN +3 2.0 +4 4.0 +5 6.0 +dtype: float64 + +Difference with following row + +>>> s.diff(periods=-1) +0 0.0 +1 -1.0 +2 -1.0 +3 -2.0 +4 -3.0 +5 NaN +dtype: float64 + +Overflow for input dtype + +>>> s = pd.Series([1, 0], dtype=np.uint8) +>>> s.diff() +0 NaN +1 255.0 +dtype: float64 + """ + ) + @doc(klass='Series', extra_params='', other_klass='Dataframe') def diff(self, periods=1) -> "Series": """ First discrete difference of element. - Calculates the difference of a Series element compared with another - element in the Series (default is element in previous row). + Calculates the difference of a {klass} element compared with another + element in the {klass} (default is element in previous row). Parameters ---------- periods : int, default 1 Periods to shift for calculating difference, accepts negative values. + {extra_params} Returns ------- - Series + {klass} First differences of the Series. See Also -------- - Series.pct_change: Percent change over given number of periods. - Series.shift: Shift index by desired number of periods with an + {klass}.pct_change: Percent change over given number of periods. + {klass}.shift: Shift index by desired number of periods with an optional time freq. - DataFrame.diff: First discrete difference of object. + {other_klass}.diff: First discrete difference of object. Notes ----- For boolean dtypes, this uses :meth:`operator.xor` rather than :meth:`operator.sub`. - The result is calculated according to current dtype in Series, + The result is calculated according to current dtype in {klass}, however dtype of the result is always float64. - - Examples - -------- - Difference with previous row - - >>> s = pd.Series([1, 1, 2, 3, 5, 8]) - >>> s.diff() - 0 NaN - 1 0.0 - 2 1.0 - 3 1.0 - 4 2.0 - 5 3.0 - dtype: float64 - - Difference with 3rd previous row - - >>> s.diff(periods=3) - 0 NaN - 1 NaN - 2 NaN - 3 2.0 - 4 4.0 - 5 6.0 - dtype: float64 - - Difference with following row - - >>> s.diff(periods=-1) - 0 0.0 - 1 -1.0 - 2 -1.0 - 3 -2.0 - 4 -3.0 - 5 NaN - dtype: float64 """ result = algorithms.diff(self.array, periods) return self._constructor(result, index=self.index).__finalize__(self) From 3efba72476ca7b16e794bb6cd5ad916f4102be1e Mon Sep 17 00:00:00 2001 From: mproszewska Date: Fri, 3 Apr 2020 21:11:46 +0200 Subject: [PATCH 06/31] DOC: Remove appender --- pandas/core/frame.py | 72 ++----------------------------------------- pandas/core/series.py | 54 ++++---------------------------- 2 files changed, 8 insertions(+), 118 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5e76d9759a27e..4edfd0d94eeb4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6445,76 +6445,7 @@ def melt( # ---------------------------------------------------------------------- # Time series-related - @Appender( - """ -Examples --------- -Difference with previous row - ->>> df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6], -... 'b': [1, 1, 2, 3, 5, 8], -... 'c': [1, 4, 9, 16, 25, 36]}) ->>> df - a b c -0 1 1 1 -1 2 1 4 -2 3 2 9 -3 4 3 16 -4 5 5 25 -5 6 8 36 - ->>> df.diff() - a b c -0 NaN NaN NaN -1 1.0 0.0 3.0 -2 1.0 1.0 5.0 -3 1.0 1.0 7.0 -4 1.0 2.0 9.0 -5 1.0 3.0 11.0 - -Difference with previous column - ->>> df.diff(axis=1) - a b c -0 NaN 0.0 0.0 -1 NaN -1.0 3.0 -2 NaN -1.0 7.0 -3 NaN -1.0 13.0 -4 NaN 0.0 20.0 -5 NaN 2.0 28.0 - -Difference with 3rd previous row - ->>> df.diff(periods=3) - a b c -0 NaN NaN NaN -1 NaN NaN NaN -2 NaN NaN NaN -3 3.0 2.0 15.0 -4 3.0 4.0 21.0 -5 3.0 6.0 27.0 - -Difference with following row - ->>> df.diff(periods=-1) - a b c -0 -1.0 0.0 -3.0 -1 -1.0 -1.0 -5.0 -2 -1.0 -1.0 -7.0 -3 -1.0 -2.0 -9.0 -4 -1.0 -3.0 -11.0 -5 NaN NaN NaN - -Overflow for input dtype - ->>> df = pd.DataFrame({'a': [1, 0]}, dtype=np.uint8) ->>> df.diff() - a -0 NaN -1 255.0 - """ - ) - @doc(Series.diff, klass='Dataframe', extra_params="axis : {0 or 'index', 1 or 'columns'}, default 0\n Take difference over rows (0) or columns (1).", other_klass='Series') + @doc(Series.diff, klass='Dataframe', extra_params="axis : {0 or 'index', 1 or 'columns'}, default 0\n Take difference over rows (0) or columns (1).", other_klass='Series', examples="Difference with previous row\n\n>>> df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6],\n... 'b': [1, 1, 2, 3, 5, 8],\n... 'c': [1, 4, 9, 16, 25, 36]})\n>>> df\n a b c\n0 1 1 1\n1 2 1 4\n2 3 2 9\n3 4 3 16\n4 5 5 25\n5 6 8 36\n\n>>> df.diff()\n a b c\n0 NaN NaN NaN\n1 1.0 0.0 3.0\n2 1.0 1.0 5.0\n3 \n1.0 1.0 7.0\n4 1.0 2.0 9.0\n5 1.0 3.0 11.0\n\nDifference with previous column\n\n>>> df.diff(axis=1)\n a b c\n0 NaN 0.0 0.0\n1 NaN -1.0 3.0\n2 NaN -1.0 7.0\n3 NaN -1.0 13.0\n4 NaN 0.0 20.0\n5 NaN 2.0 28.0\n\nDifference with 3rd previous row\n\n>>> df.diff(periods=3)\n a b c\n0 NaN NaN NaN\n1 NaN NaN NaN\n2 NaN NaN NaN\n3 \n3.0 2.0 15.0\n4 3.0 4.0 21.0\n5 3.0 6.0 27.0\n\nDifference with following row\n\n>>> df.diff(periods=-1)\n a b c\n0 -1.0 0.0 -3.0\n1 -1.0 -1.0 -5.0\n2 -1.0 -1.0 -7.0\n3 -1.0 -2.0 -9.0\n4 -1.0 -3.0 -11.0\n5 NaN NaN NaN\n\nOverflow for input dtype\n\n>>> df = pd.DataFrame({'a': [1, 0]}, dtype=np.uint8)\n>>> df.diff()\n a\n0 NaN\n1 255.0") def diff(self, periods=1, axis=0) -> "DataFrame": bm_axis = self._get_block_manager_axis(axis) @@ -6540,6 +6471,7 @@ def _gotitem( requested ndim of result subset : object, default None subset to act on + """ if subset is None: subset = self diff --git a/pandas/core/series.py b/pandas/core/series.py index 413ec3e2e8c50..3113ff27f21c2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2239,54 +2239,8 @@ def cov(self, other, min_periods=None) -> float: return np.nan return nanops.nancov(this.values, other.values, min_periods=min_periods) - @Appender( - """ -Examples --------- -Difference with previous row - ->>> s = pd.Series([1, 1, 2, 3, 5, 8]) ->>> s.diff() -0 NaN -1 0.0 -2 1.0 -3 1.0 -4 2.0 -5 3.0 -dtype: float64 - -Difference with 3rd previous row - ->>> s.diff(periods=3) -0 NaN -1 NaN -2 NaN -3 2.0 -4 4.0 -5 6.0 -dtype: float64 - -Difference with following row - ->>> s.diff(periods=-1) -0 0.0 -1 -1.0 -2 -1.0 -3 -2.0 -4 -3.0 -5 NaN -dtype: float64 - -Overflow for input dtype - ->>> s = pd.Series([1, 0], dtype=np.uint8) ->>> s.diff() -0 NaN -1 255.0 -dtype: float64 - """ - ) - @doc(klass='Series', extra_params='', other_klass='Dataframe') + + @doc(klass='Series', extra_params='', other_klass='Dataframe', examples="Difference with previous row\n\n>>> s = pd.Series([1, 1, 2, 3, 5, 8])\n>>> s.diff()\n0 NaN\n1 0.0\n2 1.0\n3 1.0\n4 2.0\n5 3.0\ndtype: float64\n\nDifference with 3rd previous row\n\n>>> s.diff(periods=3)\n0 NaN\n1 NaN\n2 NaN\n3 2.0\n4 4.0\n5 6.0\ndtype: float64\n\nDifference with following row\n\n>>> s.diff(periods=-1)\n0 0.0\n1 -1.0\n2 -1.0\n3 -2.0\n4 -3.0\n5 NaN\ndtype: float64\n\nOverflow for input dtype\n\n>>> s = pd.Series([1, 0], dtype=np.uint8)\n>>> s.diff()\n0 NaN\n1 255.0\ndtype: float64") def diff(self, periods=1) -> "Series": """ First discrete difference of element. @@ -2319,6 +2273,10 @@ def diff(self, periods=1) -> "Series": :meth:`operator.sub`. The result is calculated according to current dtype in {klass}, however dtype of the result is always float64. + + Examples + -------- + {examples} """ result = algorithms.diff(self.array, periods) return self._constructor(result, index=self.index).__finalize__(self) From b7dd328722d72f68b6030a599f2e9c06a0c7f2a5 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Fri, 3 Apr 2020 21:34:35 +0200 Subject: [PATCH 07/31] DOC: fix --- pandas/core/frame.py | 8 +++++++- pandas/core/series.py | 8 ++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4edfd0d94eeb4..97c4e759631bf 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6445,7 +6445,13 @@ def melt( # ---------------------------------------------------------------------- # Time series-related - @doc(Series.diff, klass='Dataframe', extra_params="axis : {0 or 'index', 1 or 'columns'}, default 0\n Take difference over rows (0) or columns (1).", other_klass='Series', examples="Difference with previous row\n\n>>> df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6],\n... 'b': [1, 1, 2, 3, 5, 8],\n... 'c': [1, 4, 9, 16, 25, 36]})\n>>> df\n a b c\n0 1 1 1\n1 2 1 4\n2 3 2 9\n3 4 3 16\n4 5 5 25\n5 6 8 36\n\n>>> df.diff()\n a b c\n0 NaN NaN NaN\n1 1.0 0.0 3.0\n2 1.0 1.0 5.0\n3 \n1.0 1.0 7.0\n4 1.0 2.0 9.0\n5 1.0 3.0 11.0\n\nDifference with previous column\n\n>>> df.diff(axis=1)\n a b c\n0 NaN 0.0 0.0\n1 NaN -1.0 3.0\n2 NaN -1.0 7.0\n3 NaN -1.0 13.0\n4 NaN 0.0 20.0\n5 NaN 2.0 28.0\n\nDifference with 3rd previous row\n\n>>> df.diff(periods=3)\n a b c\n0 NaN NaN NaN\n1 NaN NaN NaN\n2 NaN NaN NaN\n3 \n3.0 2.0 15.0\n4 3.0 4.0 21.0\n5 3.0 6.0 27.0\n\nDifference with following row\n\n>>> df.diff(periods=-1)\n a b c\n0 -1.0 0.0 -3.0\n1 -1.0 -1.0 -5.0\n2 -1.0 -1.0 -7.0\n3 -1.0 -2.0 -9.0\n4 -1.0 -3.0 -11.0\n5 NaN NaN NaN\n\nOverflow for input dtype\n\n>>> df = pd.DataFrame({'a': [1, 0]}, dtype=np.uint8)\n>>> df.diff()\n a\n0 NaN\n1 255.0") + @doc( + Series.diff, + klass="Dataframe", + extra_params="axis : {0 or 'index', 1 or 'columns'}, default 0\n Take difference over rows (0) or columns (1).", + other_klass="Series", + examples="Difference with previous row\n\n>>> df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6],\n... 'b': [1, 1, 2, 3, 5, 8],\n... 'c': [1, 4, 9, 16, 25, 36]})\n>>> df\n a b c\n0 1 1 1\n1 2 1 4\n2 3 2 9\n3 4 3 16\n4 5 5 25\n5 6 8 36\n\n>>> df.diff()\n a b c\n0 NaN NaN NaN\n1 1.0 0.0 3.0\n2 1.0 1.0 5.0\n3 \n1.0 1.0 7.0\n4 1.0 2.0 9.0\n5 1.0 3.0 11.0\n\nDifference with previous column\n\n>>> df.diff(axis=1)\n a b c\n0 NaN 0.0 0.0\n1 NaN -1.0 3.0\n2 NaN -1.0 7.0\n3 NaN -1.0 13.0\n4 NaN 0.0 20.0\n5 NaN 2.0 28.0\n\nDifference with 3rd previous row\n\n>>> df.diff(periods=3)\n a b c\n0 NaN NaN NaN\n1 NaN NaN NaN\n2 NaN NaN NaN\n3 \n3.0 2.0 15.0\n4 3.0 4.0 21.0\n5 3.0 6.0 27.0\n\nDifference with following row\n\n>>> df.diff(periods=-1)\n a b c\n0 -1.0 0.0 -3.0\n1 -1.0 -1.0 -5.0\n2 -1.0 -1.0 -7.0\n3 -1.0 -2.0 -9.0\n4 -1.0 -3.0 -11.0\n5 NaN NaN NaN\n\nOverflow for input dtype\n\n>>> df = pd.DataFrame({'a': [1, 0]}, dtype=np.uint8)\n>>> df.diff()\n a\n0 NaN\n1 255.0", + ) def diff(self, periods=1, axis=0) -> "DataFrame": bm_axis = self._get_block_manager_axis(axis) diff --git a/pandas/core/series.py b/pandas/core/series.py index 3113ff27f21c2..8cbcade7571c6 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2239,8 +2239,12 @@ def cov(self, other, min_periods=None) -> float: return np.nan return nanops.nancov(this.values, other.values, min_periods=min_periods) - - @doc(klass='Series', extra_params='', other_klass='Dataframe', examples="Difference with previous row\n\n>>> s = pd.Series([1, 1, 2, 3, 5, 8])\n>>> s.diff()\n0 NaN\n1 0.0\n2 1.0\n3 1.0\n4 2.0\n5 3.0\ndtype: float64\n\nDifference with 3rd previous row\n\n>>> s.diff(periods=3)\n0 NaN\n1 NaN\n2 NaN\n3 2.0\n4 4.0\n5 6.0\ndtype: float64\n\nDifference with following row\n\n>>> s.diff(periods=-1)\n0 0.0\n1 -1.0\n2 -1.0\n3 -2.0\n4 -3.0\n5 NaN\ndtype: float64\n\nOverflow for input dtype\n\n>>> s = pd.Series([1, 0], dtype=np.uint8)\n>>> s.diff()\n0 NaN\n1 255.0\ndtype: float64") + @doc( + klass="Series", + extra_params="", + other_klass="Dataframe", + examples="Difference with previous row\n\n>>> s = pd.Series([1, 1, 2, 3, 5, 8])\n>>> s.diff()\n0 NaN\n1 0.0\n2 1.0\n3 1.0\n4 2.0\n5 3.0\ndtype: float64\n\nDifference with 3rd previous row\n\n>>> s.diff(periods=3)\n0 NaN\n1 NaN\n2 NaN\n3 2.0\n4 4.0\n5 6.0\ndtype: float64\n\nDifference with following row\n\n>>> s.diff(periods=-1)\n0 0.0\n1 -1.0\n2 -1.0\n3 -2.0\n4 -3.0\n5 NaN\ndtype: float64\n\nOverflow for input dtype\n\n>>> s = pd.Series([1, 0], dtype=np.uint8)\n>>> s.diff()\n0 NaN\n1 255.0\ndtype: float64", + ) def diff(self, periods=1) -> "Series": """ First discrete difference of element. From 7fc66b92ed9b2ecaa6b74cb73ef0e819caae0279 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Fri, 3 Apr 2020 21:57:31 +0200 Subject: [PATCH 08/31] DOC: fix --- pandas/core/frame.py | 22 ++++++++++++++++++++-- pandas/core/series.py | 9 ++++++++- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 97c4e759631bf..e2ca2c90d3e02 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6448,9 +6448,27 @@ def melt( @doc( Series.diff, klass="Dataframe", - extra_params="axis : {0 or 'index', 1 or 'columns'}, default 0\n Take difference over rows (0) or columns (1).", + extra_params="axis : {0 or 'index', 1 or 'columns'}, default 0\n " + "Take difference over rows (0) or columns (1).", other_klass="Series", - examples="Difference with previous row\n\n>>> df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6],\n... 'b': [1, 1, 2, 3, 5, 8],\n... 'c': [1, 4, 9, 16, 25, 36]})\n>>> df\n a b c\n0 1 1 1\n1 2 1 4\n2 3 2 9\n3 4 3 16\n4 5 5 25\n5 6 8 36\n\n>>> df.diff()\n a b c\n0 NaN NaN NaN\n1 1.0 0.0 3.0\n2 1.0 1.0 5.0\n3 \n1.0 1.0 7.0\n4 1.0 2.0 9.0\n5 1.0 3.0 11.0\n\nDifference with previous column\n\n>>> df.diff(axis=1)\n a b c\n0 NaN 0.0 0.0\n1 NaN -1.0 3.0\n2 NaN -1.0 7.0\n3 NaN -1.0 13.0\n4 NaN 0.0 20.0\n5 NaN 2.0 28.0\n\nDifference with 3rd previous row\n\n>>> df.diff(periods=3)\n a b c\n0 NaN NaN NaN\n1 NaN NaN NaN\n2 NaN NaN NaN\n3 \n3.0 2.0 15.0\n4 3.0 4.0 21.0\n5 3.0 6.0 27.0\n\nDifference with following row\n\n>>> df.diff(periods=-1)\n a b c\n0 -1.0 0.0 -3.0\n1 -1.0 -1.0 -5.0\n2 -1.0 -1.0 -7.0\n3 -1.0 -2.0 -9.0\n4 -1.0 -3.0 -11.0\n5 NaN NaN NaN\n\nOverflow for input dtype\n\n>>> df = pd.DataFrame({'a': [1, 0]}, dtype=np.uint8)\n>>> df.diff()\n a\n0 NaN\n1 255.0", + examples="Difference with previous row\n\n>>> df = pd.DataFrame({'a'" + ": [1, 2, 3, 4, 5, 6],\n... 'b': [1, 1, 2, 3, 5, " + "8],\n... 'c': [1, 4, 9, 16, 25, 36]})\n>>> df\n " + " a b c\n0 1 1 1\n1 2 1 4\n2 3 2 9\n3 4 3 16\n4 " + "5 5 25\n5 6 8 36\n\n>>> df.diff()\n a b c\n0 NaN " + "NaN NaN\n1 1.0 0.0 3.0\n2 1.0 1.0 5.0\n3 \n1.0 1.0 7.0" + "\n4 1.0 2.0 9.0\n5 1.0 3.0 11.0\n\nDifference with previous " + "column\n\n>>> df.diff(axis=1)\n a b c\n0 NaN 0.0 0.0\n" + "1 NaN -1.0 3.0\n2 NaN -1.0 7.0\n3 NaN -1.0 13.0\n4 NaN 0.0 2" + "0.0\n5 NaN 2.0 28.0\n\nDifference with 3rd previous row\n\n>>> df" + ".diff(periods=3)\n a b c\n0 NaN NaN NaN\n1 NaN NaN" + " NaN\n2 NaN NaN NaN\n3 \n3.0 2.0 15.0\n4 3.0 4.0 21.0\n5" + " 3.0 6.0 27.0\n\nDifference with following row\n\n>>> df.diff(pe" + "riods=-1)\n a b c\n0 -1.0 0.0 -3.0\n1 -1.0 -1.0 -5.0" + "\n2 -1.0 -1.0 -7.0\n3 -1.0 -2.0 -9.0\n4 -1.0 -3.0 -11.0\n5 NaN " + "NaN NaN\n\nOverflow for input dtype\n\n>>> df = pd.DataFrame({'a'" + ": [1, 0]}, dtype=np.uint8)\n>>> df.diff()\n a\n0 NaN\n1 2" + "55.0", ) def diff(self, periods=1, axis=0) -> "DataFrame": diff --git a/pandas/core/series.py b/pandas/core/series.py index 8cbcade7571c6..b153d4374ee8e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2243,7 +2243,14 @@ def cov(self, other, min_periods=None) -> float: klass="Series", extra_params="", other_klass="Dataframe", - examples="Difference with previous row\n\n>>> s = pd.Series([1, 1, 2, 3, 5, 8])\n>>> s.diff()\n0 NaN\n1 0.0\n2 1.0\n3 1.0\n4 2.0\n5 3.0\ndtype: float64\n\nDifference with 3rd previous row\n\n>>> s.diff(periods=3)\n0 NaN\n1 NaN\n2 NaN\n3 2.0\n4 4.0\n5 6.0\ndtype: float64\n\nDifference with following row\n\n>>> s.diff(periods=-1)\n0 0.0\n1 -1.0\n2 -1.0\n3 -2.0\n4 -3.0\n5 NaN\ndtype: float64\n\nOverflow for input dtype\n\n>>> s = pd.Series([1, 0], dtype=np.uint8)\n>>> s.diff()\n0 NaN\n1 255.0\ndtype: float64", + examples="Difference with previous row\n\n>>> s = pd.Series([1, 1, 2, 3, " + "5, 8])\n>>> s.diff()\n0 NaN\n1 0.0\n2 1.0\n3 1.0\n4 2.0\n" + "5 3.0\ndtype: float64\n\nDifference with 3rd previous row\n\n>>> s.di" + "ff(periods=3)\n0 NaN\n1 NaN\n2 NaN\n3 2.0\n4 4.0\n5 6." + "0\ndtype: float64\n\nDifference with following row\n\n>>> s.diff(periods" + "=-1)\n0 0.0\n1 -1.0\n2 -1.0\n3 -2.0\n4 -3.0\n5 NaN\ndtype:" + " float64\n\nOverflow for input dtype\n\n>>> s = pd.Series([1, 0], dtype=" + "np.uint8)\n>>> s.diff()\n0 NaN\n1 255.0\ndtype: float64", ) def diff(self, periods=1) -> "Series": """ From 8d533368fef711d2746de6d587636c95d09708f3 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Fri, 3 Apr 2020 22:13:41 +0200 Subject: [PATCH 09/31] DOC: Fix --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index b153d4374ee8e..079c01d6d0a25 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2242,7 +2242,7 @@ def cov(self, other, min_periods=None) -> float: @doc( klass="Series", extra_params="", - other_klass="Dataframe", + other_klass="DataFrame", examples="Difference with previous row\n\n>>> s = pd.Series([1, 1, 2, 3, " "5, 8])\n>>> s.diff()\n0 NaN\n1 0.0\n2 1.0\n3 1.0\n4 2.0\n" "5 3.0\ndtype: float64\n\nDifference with 3rd previous row\n\n>>> s.di" From ecf74e5edc12fcfeb7e82d720f6f8c2eadb6fbba Mon Sep 17 00:00:00 2001 From: mproszewska Date: Thu, 9 Apr 2020 18:34:58 +0200 Subject: [PATCH 10/31] DOC: Fix strings --- pandas/core/frame.py | 76 ++++++++++++++++++++++++++++++++----------- pandas/core/series.py | 47 ++++++++++++++++++++------ 2 files changed, 94 insertions(+), 29 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e2ca2c90d3e02..037958c66179a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6451,25 +6451,63 @@ def melt( extra_params="axis : {0 or 'index', 1 or 'columns'}, default 0\n " "Take difference over rows (0) or columns (1).", other_klass="Series", - examples="Difference with previous row\n\n>>> df = pd.DataFrame({'a'" - ": [1, 2, 3, 4, 5, 6],\n... 'b': [1, 1, 2, 3, 5, " - "8],\n... 'c': [1, 4, 9, 16, 25, 36]})\n>>> df\n " - " a b c\n0 1 1 1\n1 2 1 4\n2 3 2 9\n3 4 3 16\n4 " - "5 5 25\n5 6 8 36\n\n>>> df.diff()\n a b c\n0 NaN " - "NaN NaN\n1 1.0 0.0 3.0\n2 1.0 1.0 5.0\n3 \n1.0 1.0 7.0" - "\n4 1.0 2.0 9.0\n5 1.0 3.0 11.0\n\nDifference with previous " - "column\n\n>>> df.diff(axis=1)\n a b c\n0 NaN 0.0 0.0\n" - "1 NaN -1.0 3.0\n2 NaN -1.0 7.0\n3 NaN -1.0 13.0\n4 NaN 0.0 2" - "0.0\n5 NaN 2.0 28.0\n\nDifference with 3rd previous row\n\n>>> df" - ".diff(periods=3)\n a b c\n0 NaN NaN NaN\n1 NaN NaN" - " NaN\n2 NaN NaN NaN\n3 \n3.0 2.0 15.0\n4 3.0 4.0 21.0\n5" - " 3.0 6.0 27.0\n\nDifference with following row\n\n>>> df.diff(pe" - "riods=-1)\n a b c\n0 -1.0 0.0 -3.0\n1 -1.0 -1.0 -5.0" - "\n2 -1.0 -1.0 -7.0\n3 -1.0 -2.0 -9.0\n4 -1.0 -3.0 -11.0\n5 NaN " - "NaN NaN\n\nOverflow for input dtype\n\n>>> df = pd.DataFrame({'a'" - ": [1, 0]}, dtype=np.uint8)\n>>> df.diff()\n a\n0 NaN\n1 2" - "55.0", - ) + examples="""Difference with previous row +>>> df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6], +... 'b': [1, 1, 2, 3, 5, 8], +... 'c': [1, 4, 9, 16, 25, 36]}) +>>> df + a b c +0 1 1 1 +1 2 1 4 +2 3 2 9 +3 4 3 16 +4 5 5 25 +5 6 8 36 +>>> df.diff() + a b c +0 NaN NaN NaN +1 1.0 0.0 3.0 +2 1.0 1.0 5.0 +3 1.0 1.0 7.0 +4 1.0 2.0 9.0 +5 1.0 3.0 11.0 + +Difference with previous column +>>> df.diff(axis=1) + a b c +0 NaN 0.0 0.0 +1 NaN -1.0 3.0 +2 NaN -1.0 7.0 +3 NaN -1.0 13.0 +4 NaN 0.0 20.0 +5 NaN 2.0 28.0 + +Difference with 3rd previous row +>>> df.diff(periods=3) + a b c +0 NaN NaN NaN +1 NaN NaN NaN +2 NaN NaN NaN +3 3.0 2.0 15.0 +4 3.0 4.0 21.0 +5 3.0 6.0 27.0 + +Difference with following row +>>> df.diff(periods=-1) + a b c +0 -1.0 0.0 -3.0 +1 -1.0 -1.0 -5.0 +2 -1.0 -1.0 -7.0 +3 -1.0 -2.0 -9.0 +4 -1.0 -3.0 -11.0 +5 NaN NaN NaN + +Overflow for input dtype +>>> df = pd.DataFrame({'a': [1, 0]}, dtype=np.uint8) +>>> df.diff() + a +0 NaN +1 255.0""") def diff(self, periods=1, axis=0) -> "DataFrame": bm_axis = self._get_block_manager_axis(axis) diff --git a/pandas/core/series.py b/pandas/core/series.py index 079c01d6d0a25..3ab16403320da 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2243,15 +2243,43 @@ def cov(self, other, min_periods=None) -> float: klass="Series", extra_params="", other_klass="DataFrame", - examples="Difference with previous row\n\n>>> s = pd.Series([1, 1, 2, 3, " - "5, 8])\n>>> s.diff()\n0 NaN\n1 0.0\n2 1.0\n3 1.0\n4 2.0\n" - "5 3.0\ndtype: float64\n\nDifference with 3rd previous row\n\n>>> s.di" - "ff(periods=3)\n0 NaN\n1 NaN\n2 NaN\n3 2.0\n4 4.0\n5 6." - "0\ndtype: float64\n\nDifference with following row\n\n>>> s.diff(periods" - "=-1)\n0 0.0\n1 -1.0\n2 -1.0\n3 -2.0\n4 -3.0\n5 NaN\ndtype:" - " float64\n\nOverflow for input dtype\n\n>>> s = pd.Series([1, 0], dtype=" - "np.uint8)\n>>> s.diff()\n0 NaN\n1 255.0\ndtype: float64", - ) + examples="""Difference with previous row +>>> s = pd.Series([1, 1, 2, 3, 5, 8]) +>>> s.diff() +0 NaN +1 0.0 +2 1.0 +3 1.0 +4 2.0 +5 3.0 +dtype: float64 + +Difference with 3rd previous row +>>> s.diff(periods=3) +0 NaN +1 NaN +2 NaN +3 2.0 +4 4.0 +5 6.0 +dtype: float64 + +Difference with following row +>>> s.diff(periods=-1) +0 0.0 +1 -1.0 +2 -1.0 +3 -2.0 +4 -3.0 +5 NaN +dtype: float64 + +Overflow for input dtype +>>> s = pd.Series([1, 0], dtype=np.uint8) +>>> s.diff() +0 NaN +1 255.0 +dtype: float64""") def diff(self, periods=1) -> "Series": """ First discrete difference of element. @@ -4499,7 +4527,6 @@ def to_period(self, freq=None, copy=True) -> "Series": new_values = new_values.copy() assert isinstance(self.index, ABCDatetimeIndex) - new_index = self.index.to_period(freq=freq) return self._constructor(new_values, index=new_index).__finalize__(self) # ---------------------------------------------------------------------- From 74fe0e410b1c5c7d2fd268a1a59a0727b321470a Mon Sep 17 00:00:00 2001 From: mproszewska Date: Thu, 9 Apr 2020 20:17:24 +0200 Subject: [PATCH 11/31] DOC: Fix --- pandas/core/frame.py | 3 +-- pandas/core/series.py | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 037958c66179a..bee55c52038e9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6500,7 +6500,7 @@ def melt( 2 -1.0 -1.0 -7.0 3 -1.0 -2.0 -9.0 4 -1.0 -3.0 -11.0 -5 NaN NaN NaN +5 NaN NaN NaND Overflow for input dtype >>> df = pd.DataFrame({'a': [1, 0]}, dtype=np.uint8) @@ -6533,7 +6533,6 @@ def _gotitem( requested ndim of result subset : object, default None subset to act on - """ if subset is None: subset = self diff --git a/pandas/core/series.py b/pandas/core/series.py index 3ab16403320da..0ffd643de7729 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4527,6 +4527,7 @@ def to_period(self, freq=None, copy=True) -> "Series": new_values = new_values.copy() assert isinstance(self.index, ABCDatetimeIndex) + new_index = self.index.to_period(freq=freq) return self._constructor(new_values, index=new_index).__finalize__(self) # ---------------------------------------------------------------------- From 45558c05aae7fc43a0174bdce084714af08bad9a Mon Sep 17 00:00:00 2001 From: mproszewska Date: Thu, 9 Apr 2020 20:47:50 +0200 Subject: [PATCH 12/31] DOC: Fix --- pandas/core/frame.py | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f95cd69513216..62eaadfbd95e9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6644,41 +6644,6 @@ def melt( 0 NaN 1 255.0""") def diff(self, periods: int = 1, axis: Axis = 0) -> "DataFrame": - """ - First discrete difference of element. - - Calculates the difference of a DataFrame element compared with another - element in the DataFrame (default is the element in the same column - of the previous row). - - Parameters - ---------- - periods : int, default 1 - Periods to shift for calculating difference, accepts negative - values. - axis : {0 or 'index', 1 or 'columns'}, default 0 - Take difference over rows (0) or columns (1). - - Returns - ------- - DataFrame - - See Also - -------- - Series.diff: First discrete difference for a Series. - DataFrame.pct_change: Percent change over given number of periods. - DataFrame.shift: Shift index by desired number of periods with an - optional time freq. - - Notes - ----- - For boolean dtypes, this uses :meth:`operator.xor` rather than - :meth:`operator.sub`. - - Examples - -------- - Difference with previous row ->>>>>>> master bm_axis = self._get_block_manager_axis(axis) new_data = self._data.diff(n=periods, axis=bm_axis) From df4e7d19faa3577f9c8f2f5b25ca879abb078404 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Thu, 9 Apr 2020 21:42:48 +0200 Subject: [PATCH 13/31] DOC: Add newlines --- pandas/core/frame.py | 11 +++++++++-- pandas/core/series.py | 7 ++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 62eaadfbd95e9..1d39829c4a358 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6587,6 +6587,7 @@ def melt( "Take difference over rows (0) or columns (1).", other_klass="Series", examples="""Difference with previous row + >>> df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6], ... 'b': [1, 1, 2, 3, 5, 8], ... 'c': [1, 4, 9, 16, 25, 36]}) @@ -6598,6 +6599,7 @@ def melt( 3 4 3 16 4 5 5 25 5 6 8 36 + >>> df.diff() a b c 0 NaN NaN NaN @@ -6608,6 +6610,7 @@ def melt( 5 1.0 3.0 11.0 Difference with previous column + >>> df.diff(axis=1) a b c 0 NaN 0.0 0.0 @@ -6618,6 +6621,7 @@ def melt( 5 NaN 2.0 28.0 Difference with 3rd previous row + >>> df.diff(periods=3) a b c 0 NaN NaN NaN @@ -6628,6 +6632,7 @@ def melt( 5 3.0 6.0 27.0 Difference with following row + >>> df.diff(periods=-1) a b c 0 -1.0 0.0 -3.0 @@ -6635,14 +6640,16 @@ def melt( 2 -1.0 -1.0 -7.0 3 -1.0 -2.0 -9.0 4 -1.0 -3.0 -11.0 -5 NaN NaN NaND +5 NaN NaN NaN Overflow for input dtype + >>> df = pd.DataFrame({'a': [1, 0]}, dtype=np.uint8) >>> df.diff() a 0 NaN -1 255.0""") +1 255.0""", + ) def diff(self, periods: int = 1, axis: Axis = 0) -> "DataFrame": bm_axis = self._get_block_manager_axis(axis) diff --git a/pandas/core/series.py b/pandas/core/series.py index 9e2d3a1f54f34..11f5d8eae1cfe 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2292,6 +2292,7 @@ def cov(self, other, min_periods=None) -> float: extra_params="", other_klass="DataFrame", examples="""Difference with previous row + >>> s = pd.Series([1, 1, 2, 3, 5, 8]) >>> s.diff() 0 NaN @@ -2303,6 +2304,7 @@ def cov(self, other, min_periods=None) -> float: dtype: float64 Difference with 3rd previous row + >>> s.diff(periods=3) 0 NaN 1 NaN @@ -2313,6 +2315,7 @@ def cov(self, other, min_periods=None) -> float: dtype: float64 Difference with following row + >>> s.diff(periods=-1) 0 0.0 1 -1.0 @@ -2323,11 +2326,13 @@ def cov(self, other, min_periods=None) -> float: dtype: float64 Overflow for input dtype + >>> s = pd.Series([1, 0], dtype=np.uint8) >>> s.diff() 0 NaN 1 255.0 -dtype: float64""") +dtype: float64""", + ) def diff(self, periods: int = 1) -> "Series": """ First discrete difference of element. From c9cd6c7a5991e2a6e3d8db4676ec03b214b0e430 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Fri, 10 Apr 2020 00:18:20 +0200 Subject: [PATCH 14/31] DOC: Fix newline --- pandas/core/frame.py | 2 +- pandas/core/series.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 036a7b4874045..9e880fbcf4947 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6681,7 +6681,7 @@ def melt( Series.diff, klass="Dataframe", extra_params="axis : {0 or 'index', 1 or 'columns'}, default 0\n " - "Take difference over rows (0) or columns (1).", + "Take difference over rows (0) or columns (1).\n", other_klass="Series", examples="""Difference with previous row diff --git a/pandas/core/series.py b/pandas/core/series.py index e97eb7577b9c1..3a843dc3dda29 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2345,7 +2345,6 @@ def diff(self, periods: int = 1) -> "Series": Periods to shift for calculating difference, accepts negative values. {extra_params} - Returns ------- {klass} From ee062bc32bf14add6dddc4be03cb53c48cde0298 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Fri, 10 Apr 2020 01:08:16 +0200 Subject: [PATCH 15/31] DOC: Add dedent --- pandas/core/frame.py | 126 +++++++++++++++++++++--------------------- pandas/core/series.py | 84 ++++++++++++++-------------- 2 files changed, 107 insertions(+), 103 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9e880fbcf4947..03e865fe401ac 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6683,69 +6683,71 @@ def melt( extra_params="axis : {0 or 'index', 1 or 'columns'}, default 0\n " "Take difference over rows (0) or columns (1).\n", other_klass="Series", - examples="""Difference with previous row + examples=dedent( + """ + Difference with previous row ->>> df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6], -... 'b': [1, 1, 2, 3, 5, 8], -... 'c': [1, 4, 9, 16, 25, 36]}) ->>> df - a b c -0 1 1 1 -1 2 1 4 -2 3 2 9 -3 4 3 16 -4 5 5 25 -5 6 8 36 - ->>> df.diff() - a b c -0 NaN NaN NaN -1 1.0 0.0 3.0 -2 1.0 1.0 5.0 -3 1.0 1.0 7.0 -4 1.0 2.0 9.0 -5 1.0 3.0 11.0 - -Difference with previous column - ->>> df.diff(axis=1) - a b c -0 NaN 0.0 0.0 -1 NaN -1.0 3.0 -2 NaN -1.0 7.0 -3 NaN -1.0 13.0 -4 NaN 0.0 20.0 -5 NaN 2.0 28.0 - -Difference with 3rd previous row - ->>> df.diff(periods=3) - a b c -0 NaN NaN NaN -1 NaN NaN NaN -2 NaN NaN NaN -3 3.0 2.0 15.0 -4 3.0 4.0 21.0 -5 3.0 6.0 27.0 - -Difference with following row - ->>> df.diff(periods=-1) - a b c -0 -1.0 0.0 -3.0 -1 -1.0 -1.0 -5.0 -2 -1.0 -1.0 -7.0 -3 -1.0 -2.0 -9.0 -4 -1.0 -3.0 -11.0 -5 NaN NaN NaN - -Overflow for input dtype - ->>> df = pd.DataFrame({'a': [1, 0]}, dtype=np.uint8) ->>> df.diff() - a -0 NaN -1 255.0""", + >>> df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6], + ... 'b': [1, 1, 2, 3, 5, 8], + ... 'c': [1, 4, 9, 16, 25, 36]}) + >>> df + a b c + 0 1 1 1 + 1 2 1 4 + 2 3 2 9 + 3 4 3 16 + 4 5 5 25 + 5 6 8 36 + + >>> df.diff() + a b c + 0 NaN NaN NaN + 1 1.0 0.0 3.0 + 2 1.0 1.0 5.0 + 3 1.0 1.0 7.0 + 4 1.0 2.0 9.0 + 5 1.0 3.0 11.0 + + Difference with previous column + + >>> df.diff(axis=1) + a b c + 0 NaN 0.0 0.0 + 1 NaN -1.0 3.0 + 2 NaN -1.0 7.0 + 3 NaN -1.0 13.0 + 4 NaN 0.0 20.0 + 5 NaN 2.0 28.0 + + Difference with 3rd previous row + + >>> df.diff(periods=3) + a b c + 0 NaN NaN NaN + 1 NaN NaN NaN + 2 NaN NaN NaN + 3 3.0 2.0 15.0 + 4 3.0 4.0 21.0 + 5 3.0 6.0 27.0 + + Difference with following row + + >>> df.diff(periods=-1) + a b c + 0 -1.0 0.0 -3.0 + 1 -1.0 -1.0 -5.0 + 2 -1.0 -1.0 -7.0 + 3 -1.0 -2.0 -9.0 + 4 -1.0 -3.0 -11.0 + 5 NaN NaN NaN + + Overflow for input dtype + + >>> df = pd.DataFrame({'a': [1, 0]}, dtype=np.uint8) + >>> df.diff() + a + 0 NaN + 1 255.0"""), ) def diff(self, periods: int = 1, axis: Axis = 0) -> "DataFrame": diff --git a/pandas/core/series.py b/pandas/core/series.py index 3a843dc3dda29..912769e8efecb 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2290,47 +2290,49 @@ def cov(self, other, min_periods=None) -> float: klass="Series", extra_params="", other_klass="DataFrame", - examples="""Difference with previous row - ->>> s = pd.Series([1, 1, 2, 3, 5, 8]) ->>> s.diff() -0 NaN -1 0.0 -2 1.0 -3 1.0 -4 2.0 -5 3.0 -dtype: float64 - -Difference with 3rd previous row - ->>> s.diff(periods=3) -0 NaN -1 NaN -2 NaN -3 2.0 -4 4.0 -5 6.0 -dtype: float64 - -Difference with following row - ->>> s.diff(periods=-1) -0 0.0 -1 -1.0 -2 -1.0 -3 -2.0 -4 -3.0 -5 NaN -dtype: float64 - -Overflow for input dtype - ->>> s = pd.Series([1, 0], dtype=np.uint8) ->>> s.diff() -0 NaN -1 255.0 -dtype: float64""", + examples=dedent( + """ + Difference with previous row + + >>> s = pd.Series([1, 1, 2, 3, 5, 8]) + >>> s.diff() + 0 NaN + 1 0.0 + 2 1.0 + 3 1.0 + 4 2.0 + 5 3.0 + dtype: float64 + + Difference with 3rd previous row + + >>> s.diff(periods=3) + 0 NaN + 1 NaN + 2 NaN + 3 2.0 + 4 4.0 + 5 6.0 + dtype: float64 + + Difference with following row + + >>> s.diff(periods=-1) + 0 0.0 + 1 -1.0 + 2 -1.0 + 3 -2.0 + 4 -3.0 + 5 NaN + dtype: float64 + + Overflow for input dtype + + >>> s = pd.Series([1, 0], dtype=np.uint8) + >>> s.diff() + 0 NaN + 1 255.0 + dtype: float64"""), ) def diff(self, periods: int = 1) -> "Series": """ From 0f1890523414903949ce6e2a275942aa81dc5fa6 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Thu, 16 Apr 2020 20:47:25 +0200 Subject: [PATCH 16/31] DOC: Lint --- pandas/core/frame.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 03e865fe401ac..e2a23adde0532 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6747,7 +6747,8 @@ def melt( >>> df.diff() a 0 NaN - 1 255.0"""), + 1 255.0""" + ), ) def diff(self, periods: int = 1, axis: Axis = 0) -> "DataFrame": From e799453a7713d8c0ecce3871adb71bc57e3d8cac Mon Sep 17 00:00:00 2001 From: mproszewska Date: Thu, 16 Apr 2020 21:20:20 +0200 Subject: [PATCH 17/31] DOC: Lint --- pandas/core/series.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 912769e8efecb..296c0b5e25e8c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2332,7 +2332,8 @@ def cov(self, other, min_periods=None) -> float: >>> s.diff() 0 NaN 1 255.0 - dtype: float64"""), + dtype: float64""" + ), ) def diff(self, periods: int = 1) -> "Series": """ From b36b310380858e81c75d02a942543b705bf4395c Mon Sep 17 00:00:00 2001 From: mproszewska Date: Fri, 24 Apr 2020 14:00:09 +0200 Subject: [PATCH 18/31] Run tests --- pandas/core/frame.py | 2 +- pandas/core/series.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e2a23adde0532..06d5e63cafbe6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6741,7 +6741,7 @@ def melt( 4 -1.0 -3.0 -11.0 5 NaN NaN NaN - Overflow for input dtype + Overflow in input dtype >>> df = pd.DataFrame({'a': [1, 0]}, dtype=np.uint8) >>> df.diff() diff --git a/pandas/core/series.py b/pandas/core/series.py index 296c0b5e25e8c..c8efbd7f234e7 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2326,7 +2326,7 @@ def cov(self, other, min_periods=None) -> float: 5 NaN dtype: float64 - Overflow for input dtype + Overflow in input dtype >>> s = pd.Series([1, 0], dtype=np.uint8) >>> s.diff() From b92f42b7d32b63b12ad366b5947ef74ed284bab3 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Fri, 24 Apr 2020 20:44:11 +0200 Subject: [PATCH 19/31] Change test_diff --- pandas/tests/arrays/categorical/test_algos.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_algos.py b/pandas/tests/arrays/categorical/test_algos.py index 325fa476d70e6..92e92ee00b16d 100644 --- a/pandas/tests/arrays/categorical/test_algos.py +++ b/pandas/tests/arrays/categorical/test_algos.py @@ -94,14 +94,14 @@ def test_isin_empty(empty): def test_diff(): s = pd.Series([1, 2, 3], dtype="category") - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = s.diff() expected = pd.Series([np.nan, 1, 1]) tm.assert_series_equal(result, expected) expected = expected.to_frame(name="A") df = s.to_frame(name="A") - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.diff() tm.assert_frame_equal(result, expected) From f03d4e9a53d3858c1244fcbca523d4e4d042779a Mon Sep 17 00:00:00 2001 From: mproszewska Date: Mon, 4 May 2020 20:32:41 +0200 Subject: [PATCH 20/31] Change stacklevel --- pandas/core/algorithms.py | 2 +- pandas/tests/arrays/categorical/test_algos.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 62a3808d36ba2..47e2d724215c2 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1857,7 +1857,7 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3): "dtype lost in 'diff()'. In the future this will raise a " "TypeError. Convert to a suitable dtype prior to calling 'diff'.", FutureWarning, - stacklevel=stacklevel, + stacklevel=stacklevel+1, ) arr = np.asarray(arr) dtype = arr.dtype diff --git a/pandas/tests/arrays/categorical/test_algos.py b/pandas/tests/arrays/categorical/test_algos.py index 92e92ee00b16d..325fa476d70e6 100644 --- a/pandas/tests/arrays/categorical/test_algos.py +++ b/pandas/tests/arrays/categorical/test_algos.py @@ -94,14 +94,14 @@ def test_isin_empty(empty): def test_diff(): s = pd.Series([1, 2, 3], dtype="category") - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): result = s.diff() expected = pd.Series([np.nan, 1, 1]) tm.assert_series_equal(result, expected) expected = expected.to_frame(name="A") df = s.to_frame(name="A") - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): result = df.diff() tm.assert_frame_equal(result, expected) From 4a5b36f40a4236b8dd13e66694f61ffb1709e5f6 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Tue, 5 May 2020 03:04:38 +0200 Subject: [PATCH 21/31] Fix lint --- pandas/core/algorithms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 47e2d724215c2..e52deb5e52309 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1857,7 +1857,7 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3): "dtype lost in 'diff()'. In the future this will raise a " "TypeError. Convert to a suitable dtype prior to calling 'diff'.", FutureWarning, - stacklevel=stacklevel+1, + stacklevel=stacklevel + 1, ) arr = np.asarray(arr) dtype = arr.dtype From 08fe1283f81accfb97d5f96752c98a9d96c2aede Mon Sep 17 00:00:00 2001 From: mproszewska <38814059+mproszewska@users.noreply.github.com> Date: Tue, 5 May 2020 11:34:24 +0200 Subject: [PATCH 22/31] Update algorithms.py --- pandas/core/algorithms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 47e2d724215c2..e52deb5e52309 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1857,7 +1857,7 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3): "dtype lost in 'diff()'. In the future this will raise a " "TypeError. Convert to a suitable dtype prior to calling 'diff'.", FutureWarning, - stacklevel=stacklevel+1, + stacklevel=stacklevel + 1, ) arr = np.asarray(arr) dtype = arr.dtype From c94b45e1edd4494eee2a8885c25e041f6100eba6 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Fri, 15 May 2020 17:38:04 +0200 Subject: [PATCH 23/31] PERF: Remove unnecessary copies in sorting functions --- pandas/core/sorting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 25312b180dba1..da9cbe1023599 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -385,7 +385,7 @@ def ensure_key_mapped(values, key: Optional[Callable], levels=None): from pandas.core.indexes.api import Index if not key: - return values.copy() + return values if isinstance(values, ABCMultiIndex): return ensure_key_mapped_multiindex(values, key, level=levels) From 0ab450b9ea5f38582d09acbcd8f697ac62f37919 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Sat, 16 May 2020 19:06:23 +0200 Subject: [PATCH 24/31] Run tests --- pandas/core/sorting.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index da9cbe1023599..2943714a5d015 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -386,7 +386,6 @@ def ensure_key_mapped(values, key: Optional[Callable], levels=None): if not key: return values - if isinstance(values, ABCMultiIndex): return ensure_key_mapped_multiindex(values, key, level=levels) @@ -404,7 +403,7 @@ def ensure_key_mapped(values, key: Optional[Callable], levels=None): else: type_of_values = type(values) result = type_of_values(result) # try to revert to original type otherwise - except TypeError: + except TypeError:opy() raise TypeError( f"User-provided `key` function returned an invalid type {type(result)} \ which could not be converted to {type(values)}." From 54c7304d585c60dd148e3e47aa28514100289eb5 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Sat, 16 May 2020 19:07:12 +0200 Subject: [PATCH 25/31] Run tests --- pandas/core/sorting.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 2943714a5d015..da9cbe1023599 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -386,6 +386,7 @@ def ensure_key_mapped(values, key: Optional[Callable], levels=None): if not key: return values + if isinstance(values, ABCMultiIndex): return ensure_key_mapped_multiindex(values, key, level=levels) @@ -403,7 +404,7 @@ def ensure_key_mapped(values, key: Optional[Callable], levels=None): else: type_of_values = type(values) result = type_of_values(result) # try to revert to original type otherwise - except TypeError:opy() + except TypeError: raise TypeError( f"User-provided `key` function returned an invalid type {type(result)} \ which could not be converted to {type(values)}." From 6d72a346770fc93778a83e171daceec52b60e6d4 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Fri, 22 May 2020 23:19:58 +0200 Subject: [PATCH 26/31] Add asv --- asv_bench/benchmarks/algorithms.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py index 65e52e03c43c7..a96d9bc924308 100644 --- a/asv_bench/benchmarks/algorithms.py +++ b/asv_bench/benchmarks/algorithms.py @@ -174,4 +174,13 @@ def time_argsort(self, N): self.array.argsort() +class SortIndexSeries: + def setup(self): + N = 10 ** 5 + idx = pd.date_range(start="1/1/2000", periods=N, freq="s") + self.s = pd.Series(np.random.randn(N), index=idx) + + def time_sort_index(self): + self.s.sort_index() + from .pandas_vb_common import setup # noqa: F401 isort:skip From 5ba54a6039d3981a4187b38e11b479e53f8dcdd1 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Fri, 22 May 2020 23:20:53 +0200 Subject: [PATCH 27/31] Run black --- asv_bench/benchmarks/algorithms.py | 1 + 1 file changed, 1 insertion(+) diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py index a96d9bc924308..7afa97f9aa394 100644 --- a/asv_bench/benchmarks/algorithms.py +++ b/asv_bench/benchmarks/algorithms.py @@ -183,4 +183,5 @@ def setup(self): def time_sort_index(self): self.s.sort_index() + from .pandas_vb_common import setup # noqa: F401 isort:skip From 276627019d8000792473742c0a9036cf59b5f3cb Mon Sep 17 00:00:00 2001 From: mproszewska Date: Sat, 23 May 2020 00:28:24 +0200 Subject: [PATCH 28/31] Remove asv --- asv_bench/benchmarks/algorithms.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py index 7afa97f9aa394..65e52e03c43c7 100644 --- a/asv_bench/benchmarks/algorithms.py +++ b/asv_bench/benchmarks/algorithms.py @@ -174,14 +174,4 @@ def time_argsort(self, N): self.array.argsort() -class SortIndexSeries: - def setup(self): - N = 10 ** 5 - idx = pd.date_range(start="1/1/2000", periods=N, freq="s") - self.s = pd.Series(np.random.randn(N), index=idx) - - def time_sort_index(self): - self.s.sort_index() - - from .pandas_vb_common import setup # noqa: F401 isort:skip From a53d937a131b1e294f5f8c98df1ce6e9c383a33a Mon Sep 17 00:00:00 2001 From: mproszewska Date: Thu, 28 May 2020 15:47:57 +0200 Subject: [PATCH 29/31] Add requested change --- pandas/core/algorithms.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index e52deb5e52309..40157b1b8f204 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1814,7 +1814,7 @@ def searchsorted(arr, value, side="left", sorter=None): _diff_special = {"float64", "float32", "int64", "int32", "int16", "int8"} -def diff(arr, n: int, axis: int = 0, stacklevel=3): +def diff(arr, n: int, axis: int = 0, stacklevel=4): """ difference of n between self, analogous to s-s.shift(n) @@ -1857,7 +1857,7 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3): "dtype lost in 'diff()'. In the future this will raise a " "TypeError. Convert to a suitable dtype prior to calling 'diff'.", FutureWarning, - stacklevel=stacklevel + 1, + stacklevel=stacklevel, ) arr = np.asarray(arr) dtype = arr.dtype From 949bcc0763bd2041eaa8c84b5bb2e063dcc78624 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Thu, 28 May 2020 15:50:12 +0200 Subject: [PATCH 30/31] Fix stacklevel --- pandas/core/algorithms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 9980711f96cbf..d270a6431be56 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1840,7 +1840,7 @@ def searchsorted(arr, value, side="left", sorter=None): _diff_special = {"float64", "float32", "int64", "int32", "int16", "int8"} -def diff(arr, n: int, axis: int = 0, stacklevel=4): +def diff(arr, n: int, axis: int = 0, stacklevel=3): """ difference of n between self, analogous to s-s.shift(n) From 234689d482b577e817d03803b1b5591ed4178727 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Sun, 31 May 2020 15:11:37 +0200 Subject: [PATCH 31/31] Revert change --- pandas/core/sorting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index da9cbe1023599..25312b180dba1 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -385,7 +385,7 @@ def ensure_key_mapped(values, key: Optional[Callable], levels=None): from pandas.core.indexes.api import Index if not key: - return values + return values.copy() if isinstance(values, ABCMultiIndex): return ensure_key_mapped_multiindex(values, key, level=levels)