Skip to content

Commit fd26355

Browse files
changhiskhanwesm
authored andcommitted
BUG: int dtype diff for frame. refactored diff into common
1 parent 8075f54 commit fd26355

File tree

5 files changed

+53
-18
lines changed

5 files changed

+53
-18
lines changed

pandas/core/common.py

+29
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,35 @@ def mask_out_axis(arr, mask, axis, fill_value=np.nan):
373373

374374
arr[tuple(indexer)] = fill_value
375375

376+
def diff(arr, n, indexer, axis=0):
377+
out_arr = arr - arr.take(indexer, axis=axis)
378+
out_arr = _maybe_upcast(out_arr)
379+
380+
if axis == 0:
381+
if n > 0:
382+
out_arr[:n] = np.nan
383+
elif n < 0:
384+
out_arr[n:] = np.nan
385+
else:
386+
out_arr[:] = np.nan
387+
elif axis == 1:
388+
if n > 0:
389+
out_arr[:, :n] = np.nan
390+
elif n < 0:
391+
out_arr[:, n:] = np.nan
392+
else:
393+
out_arr[:, :] = np.nan
394+
elif axis == 2:
395+
if n > 0:
396+
out_arr[:, :, :n] = np.nan
397+
elif n < 0:
398+
out_arr[:, :, n:] = np.nan
399+
else:
400+
out_arr[:, :, :] = np.nan
401+
else:
402+
raise NotImplementedError()
403+
return out_arr
404+
376405

377406
def take_fast(arr, indexer, mask, needs_masking, axis=0, out=None,
378407
fill_value=np.nan):

pandas/core/frame.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@
2929
from pandas.core.generic import NDFrame
3030
from pandas.core.index import Index, MultiIndex, _ensure_index
3131
from pandas.core.indexing import _NDFrameIndexer, _maybe_droplevels
32-
from pandas.core.internals import BlockManager, make_block, form_blocks
32+
from pandas.core.internals import (BlockManager, make_block, form_blocks,
33+
IntBlock)
3334
from pandas.core.series import Series, _radd_compat
3435
from pandas.compat.scipy import scoreatpercentile as _quantile
3536
from pandas.util import py3compat
@@ -3679,7 +3680,10 @@ def diff(self, periods=1):
36793680
-------
36803681
diffed : DataFrame
36813682
"""
3682-
return self - self.shift(periods)
3683+
indexer = com._shift_indexer(len(self), periods)
3684+
new_blocks = [b.diff(periods, indexer) for b in self._data.blocks]
3685+
new_data = BlockManager(new_blocks, [self.columns, self.index])
3686+
return self._constructor(new_data)
36833687

36843688
def shift(self, periods=1, freq=None, **kwds):
36853689
"""

pandas/core/internals.py

+7
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,13 @@ def take(self, indexer, axis=1, fill_value=np.nan):
294294
def get_values(self, dtype):
295295
return self.values
296296

297+
def diff(self, n, indexer=None):
298+
if indexer is None:
299+
indexer = com._shift_indexer(self.shape[1], n)
300+
new_values = com.diff(self.values, n, indexer, axis=1)
301+
return make_block(new_values, self.items, self.ref_items)
302+
303+
297304
def _mask_missing(array, missing_values):
298305
if not isinstance(missing_values, (list, np.ndarray)):
299306
missing_values = [missing_values]

pandas/core/series.py

+3-16
Original file line numberDiff line numberDiff line change
@@ -1511,22 +1511,9 @@ def diff(self, periods=1):
15111511
-------
15121512
diffed : Series
15131513
"""
1514-
if com.is_integer_dtype(self):
1515-
new_values = np.empty(len(self), dtype=self.dtype)
1516-
new_values = _maybe_upcast(new_values)
1517-
1518-
if periods > 0:
1519-
new_values[periods:] = (self.values[periods:] -
1520-
self.values[:-periods])
1521-
new_values[:periods] = nan
1522-
elif periods < 0:
1523-
new_values[:periods] = (self.values[:periods] -
1524-
self.values[-periods:])
1525-
new_values[periods:] = nan
1526-
1527-
return Series(new_values, index=self.index, name=self.name)
1528-
else:
1529-
return self - self.shift(periods)
1514+
indexer = com._shift_indexer(len(self), periods)
1515+
val = com.diff(self.values, periods, indexer)
1516+
return Series(val, self.index, name=self.name)
15301517

15311518
def autocorr(self):
15321519
"""

pandas/tests/test_frame.py

+8
Original file line numberDiff line numberDiff line change
@@ -5215,6 +5215,14 @@ def test_diff(self):
52155215
assert_series_equal(the_diff['A'],
52165216
self.tsframe['A'] - self.tsframe['A'].shift(1))
52175217

5218+
# int dtype
5219+
a = 10000000000000000
5220+
b = a + 1
5221+
s = Series([a, b])
5222+
5223+
rs = DataFrame({'s': s}).diff()
5224+
self.assertEqual(rs.s[1], 1)
5225+
52185226
def test_diff_mixed_dtype(self):
52195227
df = DataFrame(np.random.randn(5, 3))
52205228
df['A'] = np.array([1, 2, 3, 4, 5], dtype=object)

0 commit comments

Comments
 (0)