pandas-dev · jorisvandenbossche · Jan 23, 2020 · Jan 15, 2020 · Jan 15, 2020 · Jan 16, 2020
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -998,6 +998,8 @@ Numeric
 - Bug in :meth:`DataFrame.round` where a :class:`DataFrame` with a :class:`CategoricalIndex` of :class:`IntervalIndex` columns would incorrectly raise a ``TypeError`` (:issue:`30063`)
 - Bug in :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` when there are duplicated indices (:issue:`30463`)
 - Bug in :class:`DataFrame` cumulative operations (e.g. cumsum, cummax) incorrect casting to object-dtype (:issue:`19296`)
+- Bug in :class:`~DataFrame.diff` losing the dtype for extension types (:issue:`30889`)
+- Bug in :class:`DataFrame.diff` raising an ``IndexError`` when one of the columns was a nullable integer dtype (:issue:`30967`)
 
 Conversion
 ^^^^^^^^^^

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -1834,6 +1834,9 @@ def diff(arr, n: int, axis: int = 0):
     na = np.nan
     dtype = arr.dtype
 
+    if is_extension_array_dtype(dtype):
+        return arr.diff(n)
+
     is_timedelta = False
     is_bool = False
     if needs_i8_conversion(arr):

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -578,6 +578,11 @@ def dropna(self):
         """
         return self[~self.isna()]
 
+    def diff(self, periods: int = 1):
+        if hasattr(self, "__sub__"):
+            return self - self.shift(periods)
+        raise TypeError()
+
     def shift(self, periods: int = 1, fill_value: object = None) -> ABCExtensionArray:
         """
         Shift values by desired number.

diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
@@ -15,7 +15,7 @@
 from pandas.core.dtypes.missing import isna
 
 from pandas import compat
-from pandas.core import nanops
+from pandas.core import algorithms, nanops
 from pandas.core.algorithms import searchsorted, take, unique
 from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin
 import pandas.core.common as com
@@ -164,6 +164,10 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
             result = result.copy()
         return cls(result)
 
+    def diff(self, periods: int = 1):
+        result = algorithms.diff(com.values_from_object(self._ndarray), periods)
+        return type(self)(result)
+
     @classmethod
     def _from_factorized(cls, values, original):
         return cls(values)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -1962,6 +1962,14 @@ class ObjectValuesExtensionBlock(ExtensionBlock):
     Series[T].values is an ndarray of objects.
     """
 
+    def diff(self, n: int, axis: int = 1) -> List["Block"]:
+        # Block.shape vs. Block.values.shape mismatch
+        # Do the op, get the object-dtype ndarray, and reshape
+        # to put into an ObjectBlock
+        new_values = algos.diff(self.values, n, axis=axis)
+        new_values = np.atleast_2d(new_values)
+        return [self.make_block(values=new_values)]
+
     def external_values(self, dtype=None):
         return self.values.astype(object)
 

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2316,7 +2316,7 @@ def diff(self, periods=1) -> "Series":
         5    NaN
         dtype: float64
         """
-        result = algorithms.diff(com.values_from_object(self), periods)
+        result = algorithms.diff(self.array, periods)
         return self._constructor(result, index=self.index).__finalize__(self)
 
     def autocorr(self, lag=1) -> float:

diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
@@ -231,6 +231,28 @@ def test_container_shift(self, data, frame, periods, indices):
 
         compare(result, expected)
 
+    @pytest.mark.parametrize("periods", [1, -2])
+    def test_diff(self, data, periods):
+        data = data[:5]
+        try:
+            # does this array implement ops?
+            data - data
+        except Exception:
+            pytest.skip(f"{type(data)} does not support diff")
+        s = pd.Series(data)
+        result = s.diff(periods)
+        expected = pd.Series(data - data.shift(periods))
+        self.assert_series_equal(result, expected)
+
+        df = pd.DataFrame({"A": data, "B": [1.0] * 5})
+        result = df.diff(periods)
+        if periods == 1:
+            b = [np.nan, 0, 0, 0, 0]
+        else:
+            b = [0, 0, 0, np.nan, np.nan]
+        expected = pd.DataFrame({"A": expected, "B": b})
+        tm.assert_frame_equal(result, expected)
+
     @pytest.mark.parametrize(
         "periods, indices",
         [[-4, [-1, -1]], [-1, [1, -1]], [0, [0, 1]], [1, [-1, 0]], [4, [-1, -1]]],