From 882f5937b6ad5a90afba8956e7947ebec3448edf Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 1 Sep 2022 08:12:10 -0700 Subject: [PATCH 1/2] REF: simplify Block.diff --- pandas/core/frame.py | 10 ++++++++-- pandas/core/internals/array_manager.py | 7 ++----- pandas/core/internals/blocks.py | 17 ++++++----------- pandas/core/internals/managers.py | 1 + 4 files changed, 17 insertions(+), 18 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e471e7efb20ae..eb265dd655e34 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9241,8 +9241,14 @@ def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame: periods = int(periods) axis = self._get_axis_number(axis) - if axis == 1 and periods != 0: - return self - self.shift(periods, axis=axis) + if axis == 1: + if periods != 0: + # in the periods == 0 case, this is equivalent diff of 0 periods + # along axis=0, and the Manager method may be somewhat more + # performant, so we dispatch in that case. + return self - self.shift(periods, axis=axis) + # With periods=0 this is equivalent to a diff with axis=0 + axis = 0 new_data = self._mgr.diff(n=periods, axis=axis) return self._constructor(new_data).__finalize__(self, "diff") diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index dcf69dfda1ae8..dbbf0a7dc77a3 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -363,11 +363,8 @@ def putmask(self: T, mask, new, align: bool = True) -> T: ) def diff(self: T, n: int, axis: int) -> T: - if axis == 1: - # DataFrame only calls this for n=0, in which case performing it - # with axis=0 is equivalent - assert n == 0 - axis = 0 + axis = self._normalize_axis(axis) + # Only reached with self.ndim == 2 and (normalized) axis == 0 return self.apply(algos.diff, n=n, axis=axis) def interpolate(self: T, **kwargs) -> T: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 010358d3a21ec..d49945b2a67cc 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1271,6 +1271,7 @@ def interpolate( def diff(self, n: int, axis: int = 1) -> list[Block]: """return block for the diff of the values""" + # only reached with ndim == 2 and axis == 1 new_values = algos.diff(self.values, n, axis=axis) return [self.make_block(values=new_values)] @@ -1830,17 +1831,10 @@ def getitem_block_index(self, slicer: slice) -> ExtensionBlock: return type(self)(new_values, self._mgr_locs, ndim=self.ndim) def diff(self, n: int, axis: int = 1) -> list[Block]: - if axis == 0 and n != 0: - # n==0 case will be a no-op so let is fall through - # Since we only have one column, the result will be all-NA. - # Create this result by shifting along axis=0 past the length of - # our values. - return super().diff(len(self.values), axis=0) - if axis == 1: - # TODO(EA2D): unnecessary with 2D EAs - # we are by definition 1D. - axis = 0 - return super().diff(n, axis) + # only reached with ndim == 2 and axis == 1 + # TODO(EA2D): Can share with NDArrayBackedExtensionBlock + new_values = algos.diff(self.values, n, axis=0) + return [self.make_block(values=new_values)] def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]: """ @@ -1964,6 +1958,7 @@ def diff(self, n: int, axis: int = 0) -> list[Block]: The arguments here are mimicking shift so they are called correctly by apply. """ + # only reached with ndim == 2 and axis == 1 values = self.values new_values = values - values.shift(n, axis=axis) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 900bb077b6014..cfacfc2b38553 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -413,6 +413,7 @@ def putmask(self, mask, new, align: bool = True): ) def diff(self: T, n: int, axis: int) -> T: + # only reached with self.ndim == 2 and axis == 1 axis = self._normalize_axis(axis) return self.apply("diff", n=n, axis=axis) From 657708bd801d8cb0a37466e7a05db0e948eeb494 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 8 Sep 2022 15:35:09 -0700 Subject: [PATCH 2/2] fix ArrayManager.diff --- pandas/core/internals/array_manager.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index dbbf0a7dc77a3..3e0a8df79b037 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -363,8 +363,7 @@ def putmask(self: T, mask, new, align: bool = True) -> T: ) def diff(self: T, n: int, axis: int) -> T: - axis = self._normalize_axis(axis) - # Only reached with self.ndim == 2 and (normalized) axis == 0 + assert self.ndim == 2 and axis == 0 # caller ensures return self.apply(algos.diff, n=n, axis=axis) def interpolate(self: T, **kwargs) -> T: