Skip to content

Commit fcde96b

Browse files
committed
Dispatch NDFrame.diff to EAs
Closes pandas-dev#30889 Closes pandas-dev#30967
1 parent 3787133 commit fcde96b

File tree

7 files changed

+46
-2
lines changed

7 files changed

+46
-2
lines changed

doc/source/whatsnew/v1.0.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -998,6 +998,8 @@ Numeric
998998
- Bug in :meth:`DataFrame.round` where a :class:`DataFrame` with a :class:`CategoricalIndex` of :class:`IntervalIndex` columns would incorrectly raise a ``TypeError`` (:issue:`30063`)
999999
- Bug in :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` when there are duplicated indices (:issue:`30463`)
10001000
- Bug in :class:`DataFrame` cumulative operations (e.g. cumsum, cummax) incorrect casting to object-dtype (:issue:`19296`)
1001+
- Bug in :class:`~DataFrame.diff` losing the dtype for extension types (:issue:`30889`)
1002+
- Bug in :class:`DataFrame.diff` raising an ``IndexError`` when one of the columns was a nullable integer dtype (:issue:`30967`)
10011003

10021004
Conversion
10031005
^^^^^^^^^^

pandas/core/algorithms.py

+3
Original file line numberDiff line numberDiff line change
@@ -1834,6 +1834,9 @@ def diff(arr, n: int, axis: int = 0):
18341834
na = np.nan
18351835
dtype = arr.dtype
18361836

1837+
if is_extension_array_dtype(dtype):
1838+
return arr.diff(n)
1839+
18371840
is_timedelta = False
18381841
is_bool = False
18391842
if needs_i8_conversion(arr):

pandas/core/arrays/base.py

+5
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,11 @@ def dropna(self):
578578
"""
579579
return self[~self.isna()]
580580

581+
def diff(self, periods: int = 1):
582+
if hasattr(self, "__sub__"):
583+
return self - self.shift(periods)
584+
raise TypeError()
585+
581586
def shift(self, periods: int = 1, fill_value: object = None) -> ABCExtensionArray:
582587
"""
583588
Shift values by desired number.

pandas/core/arrays/numpy_.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from pandas.core.dtypes.missing import isna
1616

1717
from pandas import compat
18-
from pandas.core import nanops
18+
from pandas.core import algorithms, nanops
1919
from pandas.core.algorithms import searchsorted, take, unique
2020
from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin
2121
import pandas.core.common as com
@@ -164,6 +164,10 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
164164
result = result.copy()
165165
return cls(result)
166166

167+
def diff(self, periods: int = 1):
168+
result = algorithms.diff(com.values_from_object(self._ndarray), periods)
169+
return type(self)(result)
170+
167171
@classmethod
168172
def _from_factorized(cls, values, original):
169173
return cls(values)

pandas/core/internals/blocks.py

+8
Original file line numberDiff line numberDiff line change
@@ -1962,6 +1962,14 @@ class ObjectValuesExtensionBlock(ExtensionBlock):
19621962
Series[T].values is an ndarray of objects.
19631963
"""
19641964

1965+
def diff(self, n: int, axis: int = 1) -> List["Block"]:
1966+
# Block.shape vs. Block.values.shape mismatch
1967+
# Do the op, get the object-dtype ndarray, and reshape
1968+
# to put into an ObjectBlock
1969+
new_values = algos.diff(self.values, n, axis=axis)
1970+
new_values = np.atleast_2d(new_values)
1971+
return [self.make_block(values=new_values)]
1972+
19651973
def external_values(self, dtype=None):
19661974
return self.values.astype(object)
19671975

pandas/core/series.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2316,7 +2316,7 @@ def diff(self, periods=1) -> "Series":
23162316
5 NaN
23172317
dtype: float64
23182318
"""
2319-
result = algorithms.diff(com.values_from_object(self), periods)
2319+
result = algorithms.diff(self.array, periods)
23202320
return self._constructor(result, index=self.index).__finalize__(self)
23212321

23222322
def autocorr(self, lag=1) -> float:

pandas/tests/extension/base/methods.py

+22
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,28 @@ def test_container_shift(self, data, frame, periods, indices):
231231

232232
compare(result, expected)
233233

234+
@pytest.mark.parametrize("periods", [1, -2])
235+
def test_diff(self, data, periods):
236+
data = data[:5]
237+
try:
238+
# does this array implement ops?
239+
data - data
240+
except Exception:
241+
pytest.skip(f"{type(data)} does not support diff")
242+
s = pd.Series(data)
243+
result = s.diff(periods)
244+
expected = pd.Series(data - data.shift(periods))
245+
self.assert_series_equal(result, expected)
246+
247+
df = pd.DataFrame({"A": data, "B": [1.0] * 5})
248+
result = df.diff(periods)
249+
if periods == 1:
250+
b = [np.nan, 0, 0, 0, 0]
251+
else:
252+
b = [0, 0, 0, np.nan, np.nan]
253+
expected = pd.DataFrame({"A": expected, "B": b})
254+
tm.assert_frame_equal(result, expected)
255+
234256
@pytest.mark.parametrize(
235257
"periods, indices",
236258
[[-4, [-1, -1]], [-1, [1, -1]], [0, [0, 1]], [1, [-1, 0]], [4, [-1, -1]]],

0 commit comments

Comments
 (0)