From 5997c7a8782ed32010e3dc6c2f40512c07b206d1 Mon Sep 17 00:00:00 2001 From: Chang She Date: Sun, 4 Nov 2012 16:26:29 -0500 Subject: [PATCH 1/2] BUG: Series.diff for integer dtypes #2087 --- pandas/core/series.py | 17 ++++++++++++++++- pandas/tests/test_series.py | 8 ++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 1b105d48f4dbe..044353f55a67c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1511,7 +1511,22 @@ def diff(self, periods=1): ------- diffed : Series """ - return (self - self.shift(periods)) + if com.is_integer_dtype(self): + new_values = np.empty(len(self), dtype=self.dtype) + new_values = _maybe_upcast(new_values) + + if periods > 0: + new_values[periods:] = (self.values[periods:] - + self.values[:-periods]) + new_values[:periods] = nan + elif periods < 0: + new_values[:periods] = (self.values[:periods] - + self.values[-periods:]) + new_values[periods:] = nan + + return Series(new_values, index=self.index, name=self.name) + else: + return self - self.shift(periods) def autocorr(self): """ diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 1f1b3285fb22d..03bfccba83e72 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -2998,6 +2998,14 @@ def test_diff(self): # Just run the function self.ts.diff() + # int dtype + a = 10000000000000000 + b = a + 1 + s = Series([a, b]) + + rs = s.diff() + self.assertEqual(rs[1], 1) + def test_pct_change(self): rs = self.ts.pct_change(fill_method=None) assert_series_equal(rs, self.ts / self.ts.shift(1) - 1) From 83f1569ddf4d1c2dd66f8edb158163229869a0aa Mon Sep 17 00:00:00 2001 From: Chang She Date: Sun, 4 Nov 2012 17:46:50 -0500 Subject: [PATCH 2/2] BUG: int dtype diff for frame. refactored diff into common --- pandas/core/common.py | 29 +++++++++++++++++++++++++++++ pandas/core/frame.py | 8 ++++++-- pandas/core/internals.py | 7 +++++++ pandas/core/series.py | 19 +++---------------- pandas/tests/test_frame.py | 8 ++++++++ 5 files changed, 53 insertions(+), 18 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index e7829ab4b41d5..bcb05c9f1bd0f 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -373,6 +373,35 @@ def mask_out_axis(arr, mask, axis, fill_value=np.nan): arr[tuple(indexer)] = fill_value +def diff(arr, n, indexer, axis=0): + out_arr = arr - arr.take(indexer, axis=axis) + out_arr = _maybe_upcast(out_arr) + + if axis == 0: + if n > 0: + out_arr[:n] = np.nan + elif n < 0: + out_arr[n:] = np.nan + else: + out_arr[:] = np.nan + elif axis == 1: + if n > 0: + out_arr[:, :n] = np.nan + elif n < 0: + out_arr[:, n:] = np.nan + else: + out_arr[:, :] = np.nan + elif axis == 2: + if n > 0: + out_arr[:, :, :n] = np.nan + elif n < 0: + out_arr[:, :, n:] = np.nan + else: + out_arr[:, :, :] = np.nan + else: + raise NotImplementedError() + return out_arr + def take_fast(arr, indexer, mask, needs_masking, axis=0, out=None, fill_value=np.nan): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9a035429bbd0e..cac08bec15c0f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -29,7 +29,8 @@ from pandas.core.generic import NDFrame from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import _NDFrameIndexer, _maybe_droplevels -from pandas.core.internals import BlockManager, make_block, form_blocks +from pandas.core.internals import (BlockManager, make_block, form_blocks, + IntBlock) from pandas.core.series import Series, _radd_compat from pandas.compat.scipy import scoreatpercentile as _quantile from pandas.util import py3compat @@ -3679,7 +3680,10 @@ def diff(self, periods=1): ------- diffed : DataFrame """ - return self - self.shift(periods) + indexer = com._shift_indexer(len(self), periods) + new_blocks = [b.diff(periods, indexer) for b in self._data.blocks] + new_data = BlockManager(new_blocks, [self.columns, self.index]) + return self._constructor(new_data) def shift(self, periods=1, freq=None, **kwds): """ diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 08de0de51aeeb..ead9e47a3be82 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -294,6 +294,13 @@ def take(self, indexer, axis=1, fill_value=np.nan): def get_values(self, dtype): return self.values + def diff(self, n, indexer=None): + if indexer is None: + indexer = com._shift_indexer(self.shape[1], n) + new_values = com.diff(self.values, n, indexer, axis=1) + return make_block(new_values, self.items, self.ref_items) + + def _mask_missing(array, missing_values): if not isinstance(missing_values, (list, np.ndarray)): missing_values = [missing_values] diff --git a/pandas/core/series.py b/pandas/core/series.py index 044353f55a67c..6ca8a01a18434 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1511,22 +1511,9 @@ def diff(self, periods=1): ------- diffed : Series """ - if com.is_integer_dtype(self): - new_values = np.empty(len(self), dtype=self.dtype) - new_values = _maybe_upcast(new_values) - - if periods > 0: - new_values[periods:] = (self.values[periods:] - - self.values[:-periods]) - new_values[:periods] = nan - elif periods < 0: - new_values[:periods] = (self.values[:periods] - - self.values[-periods:]) - new_values[periods:] = nan - - return Series(new_values, index=self.index, name=self.name) - else: - return self - self.shift(periods) + indexer = com._shift_indexer(len(self), periods) + val = com.diff(self.values, periods, indexer) + return Series(val, self.index, name=self.name) def autocorr(self): """ diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index f069a65a1ab12..3d6e49b48c7db 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -5215,6 +5215,14 @@ def test_diff(self): assert_series_equal(the_diff['A'], self.tsframe['A'] - self.tsframe['A'].shift(1)) + # int dtype + a = 10000000000000000 + b = a + 1 + s = Series([a, b]) + + rs = DataFrame({'s': s}).diff() + self.assertEqual(rs.s[1], 1) + def test_diff_mixed_dtype(self): df = DataFrame(np.random.randn(5, 3)) df['A'] = np.array([1, 2, 3, 4, 5], dtype=object)