Skip to content

Commit 4aaef2d

Browse files
authored
REF: simplify Block.diff (#48348)
* REF: simplify Block.diff * fix ArrayManager.diff
1 parent cc920b4 commit 4aaef2d

File tree

4 files changed

+16
-18
lines changed

4 files changed

+16
-18
lines changed

pandas/core/frame.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -9243,8 +9243,14 @@ def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame:
92439243
periods = int(periods)
92449244

92459245
axis = self._get_axis_number(axis)
9246-
if axis == 1 and periods != 0:
9247-
return self - self.shift(periods, axis=axis)
9246+
if axis == 1:
9247+
if periods != 0:
9248+
# in the periods == 0 case, this is equivalent diff of 0 periods
9249+
# along axis=0, and the Manager method may be somewhat more
9250+
# performant, so we dispatch in that case.
9251+
return self - self.shift(periods, axis=axis)
9252+
# With periods=0 this is equivalent to a diff with axis=0
9253+
axis = 0
92489254

92499255
new_data = self._mgr.diff(n=periods, axis=axis)
92509256
return self._constructor(new_data).__finalize__(self, "diff")

pandas/core/internals/array_manager.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -363,11 +363,7 @@ def putmask(self: T, mask, new, align: bool = True) -> T:
363363
)
364364

365365
def diff(self: T, n: int, axis: int) -> T:
366-
if axis == 1:
367-
# DataFrame only calls this for n=0, in which case performing it
368-
# with axis=0 is equivalent
369-
assert n == 0
370-
axis = 0
366+
assert self.ndim == 2 and axis == 0 # caller ensures
371367
return self.apply(algos.diff, n=n, axis=axis)
372368

373369
def interpolate(self: T, **kwargs) -> T:

pandas/core/internals/blocks.py

+6-11
Original file line numberDiff line numberDiff line change
@@ -1271,6 +1271,7 @@ def interpolate(
12711271

12721272
def diff(self, n: int, axis: int = 1) -> list[Block]:
12731273
"""return block for the diff of the values"""
1274+
# only reached with ndim == 2 and axis == 1
12741275
new_values = algos.diff(self.values, n, axis=axis)
12751276
return [self.make_block(values=new_values)]
12761277

@@ -1830,17 +1831,10 @@ def getitem_block_index(self, slicer: slice) -> ExtensionBlock:
18301831
return type(self)(new_values, self._mgr_locs, ndim=self.ndim)
18311832

18321833
def diff(self, n: int, axis: int = 1) -> list[Block]:
1833-
if axis == 0 and n != 0:
1834-
# n==0 case will be a no-op so let is fall through
1835-
# Since we only have one column, the result will be all-NA.
1836-
# Create this result by shifting along axis=0 past the length of
1837-
# our values.
1838-
return super().diff(len(self.values), axis=0)
1839-
if axis == 1:
1840-
# TODO(EA2D): unnecessary with 2D EAs
1841-
# we are by definition 1D.
1842-
axis = 0
1843-
return super().diff(n, axis)
1834+
# only reached with ndim == 2 and axis == 1
1835+
# TODO(EA2D): Can share with NDArrayBackedExtensionBlock
1836+
new_values = algos.diff(self.values, n, axis=0)
1837+
return [self.make_block(values=new_values)]
18441838

18451839
def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]:
18461840
"""
@@ -1964,6 +1958,7 @@ def diff(self, n: int, axis: int = 0) -> list[Block]:
19641958
The arguments here are mimicking shift so they are called correctly
19651959
by apply.
19661960
"""
1961+
# only reached with ndim == 2 and axis == 1
19671962
values = self.values
19681963

19691964
new_values = values - values.shift(n, axis=axis)

pandas/core/internals/managers.py

+1
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,7 @@ def putmask(self, mask, new, align: bool = True):
413413
)
414414

415415
def diff(self: T, n: int, axis: int) -> T:
416+
# only reached with self.ndim == 2 and axis == 1
416417
axis = self._normalize_axis(axis)
417418
return self.apply("diff", n=n, axis=axis)
418419

0 commit comments

Comments
 (0)