From 302803ff23bd762181a8be52b9b8aff7165b0e4a Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 10 Jan 2020 17:12:11 +0000 Subject: [PATCH 1/2] :bug: Series.diff was always setting the dtype to object --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/series.py | 7 +++++++ pandas/tests/series/methods/test_diff.py | 6 ++++++ 3 files changed, 14 insertions(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 0879189a822f8..94874648ab74b 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1146,6 +1146,7 @@ ExtensionArray - Bug in :class:`arrays.PandasArray` when setting a scalar string (:issue:`28118`, :issue:`28150`). - Bug where nullable integers could not be compared to strings (:issue:`28930`) - Bug where :class:`DataFrame` constructor raised ``ValueError`` with list-like data and ``dtype`` specified (:issue:`30280`) +- Bug in :meth:`Series.diff` was always setting the ``dtype`` to ``Object`` (:issue:`30889`) Other diff --git a/pandas/core/series.py b/pandas/core/series.py index ed338700f1011..7d9c46b1447a3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -34,6 +34,7 @@ is_bool, is_categorical_dtype, is_datetime64_dtype, + is_datetime64tz_dtype, is_dict_like, is_extension_array_dtype, is_integer, @@ -2316,6 +2317,12 @@ def diff(self, periods=1) -> "Series": dtype: float64 """ result = algorithms.diff(com.values_from_object(self), periods) + if is_extension_array_dtype(self.dtype) and not is_datetime64tz_dtype( + self.dtype + ): + return self._constructor( + result, index=self.index, dtype=self.dtype + ).__finalize__(self) return self._constructor(result, index=self.index).__finalize__(self) def autocorr(self, lag=1) -> float: diff --git a/pandas/tests/series/methods/test_diff.py b/pandas/tests/series/methods/test_diff.py index 033f75e95f11b..8f47de74b9387 100644 --- a/pandas/tests/series/methods/test_diff.py +++ b/pandas/tests/series/methods/test_diff.py @@ -75,3 +75,9 @@ def test_diff_object_dtype(self): result = s.diff() expected = s - s.shift(1) tm.assert_series_equal(result, expected) + + def test_nullable_integer(self, any_nullable_int_dtype): + # GH 30889 + dtype = any_nullable_int_dtype + result = Series([1, 2, 3], dtype=dtype).diff().dtype + assert result == dtype From ac7bb304f1e8a157d0cab0527f82316c5c86e0d4 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 13 Jan 2020 15:56:30 +0000 Subject: [PATCH 2/2] :art: implement diff within extensionarray --- doc/source/whatsnew/v1.0.0.rst | 2 +- pandas/core/arrays/base.py | 23 ++++++++++++++++++++++- pandas/core/series.py | 6 ++---- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 94874648ab74b..6f84fb9592449 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1146,7 +1146,7 @@ ExtensionArray - Bug in :class:`arrays.PandasArray` when setting a scalar string (:issue:`28118`, :issue:`28150`). - Bug where nullable integers could not be compared to strings (:issue:`28930`) - Bug where :class:`DataFrame` constructor raised ``ValueError`` with list-like data and ``dtype`` specified (:issue:`30280`) -- Bug in :meth:`Series.diff` was always setting the ``dtype`` to ``Object`` (:issue:`30889`) +- Bug in :meth:`Series.diff` was always setting the ``dtype`` to ``object`` (:issue:`30889`) Other diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 9723343ea7af5..1d13bec6b4be1 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -24,7 +24,7 @@ from pandas.core.dtypes.missing import isna from pandas.core import ops -from pandas.core.algorithms import _factorize_array, unique +from pandas.core.algorithms import _factorize_array, diff, unique from pandas.core.missing import backfill_1d, pad_1d from pandas.core.sorting import nargsort @@ -516,6 +516,27 @@ def argsort( result = nargsort(self, kind=kind, ascending=ascending, na_position="last") return result + def diff(self, periods: int = 1): + """ + First discrete difference of element. + + Calculates the difference of a ExtensionArray element compared with another + element in the ExtensionArray (default is element in previous row). + + Parameters + ---------- + periods : int, default 1 + Periods to shift for calculating difference, accepts negative + values. + + Returns + ------- + ExtensionArray + First differences of the ExtensionArray. + """ + + return self._from_sequence(diff(self, periods), dtype=self.dtype) + def fillna(self, value=None, method=None, limit=None): """ Fill NA/NaN values using the specified method. diff --git a/pandas/core/series.py b/pandas/core/series.py index 7d9c46b1447a3..3b3a420ea5719 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2316,13 +2316,11 @@ def diff(self, periods=1) -> "Series": 5 NaN dtype: float64 """ - result = algorithms.diff(com.values_from_object(self), periods) if is_extension_array_dtype(self.dtype) and not is_datetime64tz_dtype( self.dtype ): - return self._constructor( - result, index=self.index, dtype=self.dtype - ).__finalize__(self) + return self.values.diff(periods=periods) + result = algorithms.diff(com.values_from_object(self), periods) return self._constructor(result, index=self.index).__finalize__(self) def autocorr(self, lag=1) -> float: