Skip to content

Commit 80cef54

Browse files
committed
Merge pull request pandas-dev#6672 from gouthambs/iss5609
PERF: Quick Shift Implementation (GH5609)
2 parents 7e78d7c + 1c5e779 commit 80cef54

File tree

5 files changed

+28
-25
lines changed

5 files changed

+28
-25
lines changed

doc/source/release.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,8 @@ Improvements to existing features
182182
- Support passing ``encoding`` with xlwt (:issue:`3710`)
183183
- Performance improvement when converting ``DatetimeIndex`` to floating ordinals
184184
using ``DatetimeConverter`` (:issue:`6636`)
185-
185+
- Performance improvement for ``DataFrame.shift`` (:issue: `5609`)
186+
186187
.. _release.bug_fixes-0.14.0:
187188

188189
Bug Fixes

pandas/core/common.py

-12
Original file line numberDiff line numberDiff line change
@@ -2003,18 +2003,6 @@ def intersection(*seqs):
20032003
return type(seqs[0])(list(result))
20042004

20052005

2006-
def _shift_indexer(N, periods):
2007-
# small reusable utility
2008-
indexer = np.zeros(N, dtype=int)
2009-
2010-
if periods > 0:
2011-
indexer[periods:] = np.arange(N - periods)
2012-
else:
2013-
indexer[:periods] = np.arange(-periods, N)
2014-
2015-
return indexer
2016-
2017-
20182006
def _asarray_tuplesafe(values, dtype=None):
20192007
from pandas.core.index import Index
20202008

pandas/core/generic.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -3222,9 +3222,7 @@ def shift(self, periods=1, freq=None, axis=0, **kwds):
32223222
return self
32233223

32243224
if freq is None and not len(kwds):
3225-
block_axis = self._get_block_manager_axis(axis)
3226-
indexer = com._shift_indexer(len(self._get_axis(axis)), periods)
3227-
new_data = self._data.shift(indexer=indexer, periods=periods, axis=block_axis)
3225+
new_data = self._data.shift(periods=periods, axis=axis)
32283226
else:
32293227
return self.tshift(periods, freq, **kwds)
32303228

pandas/core/internals.py

+12-9
Original file line numberDiff line numberDiff line change
@@ -961,23 +961,20 @@ def diff(self, n):
961961
return [make_block(new_values, self.items, self.ref_items,
962962
ndim=self.ndim, fastpath=True)]
963963

964-
def shift(self, indexer, periods, axis=0):
964+
def shift(self, periods, axis=0):
965965
""" shift the block by periods, possibly upcast """
966-
967-
new_values = self.values.take(indexer, axis=axis)
968966
# convert integer to float if necessary. need to do a lot more than
969967
# that, handle boolean etc also
970-
new_values, fill_value = com._maybe_upcast(new_values)
971-
968+
new_values, fill_value = com._maybe_upcast(self.values)
969+
new_values = np.roll(new_values.T,periods,axis=axis)
972970
axis_indexer = [ slice(None) ] * self.ndim
973971
if periods > 0:
974972
axis_indexer[axis] = slice(None,periods)
975973
else:
976-
axis_indexer = [ slice(None) ] * self.ndim
977974
axis_indexer[axis] = slice(periods,None)
978975
new_values[tuple(axis_indexer)] = fill_value
979976

980-
return [make_block(new_values, self.items, self.ref_items,
977+
return [make_block(new_values.T, self.items, self.ref_items,
981978
ndim=self.ndim, fastpath=True)]
982979

983980
def eval(self, func, other, raise_on_error=True, try_cast=False):
@@ -1910,9 +1907,15 @@ def fillna(self, value, limit=None, inplace=False, downcast=None):
19101907
values = self.values if inplace else self.values.copy()
19111908
return [self.make_block(values.get_values(value), fill_value=value)]
19121909

1913-
def shift(self, indexer, periods, axis=0):
1910+
1911+
def shift(self, periods, axis=0):
19141912
""" shift the block by periods """
1915-
1913+
N = len(self.values.T)
1914+
indexer = np.zeros(N, dtype=int)
1915+
if periods > 0:
1916+
indexer[periods:] = np.arange(N - periods)
1917+
else:
1918+
indexer[:periods] = np.arange(-periods, N)
19161919
new_values = self.values.to_dense().take(indexer)
19171920
# convert integer to float if necessary. need to do a lot more than
19181921
# that, handle boolean etc also

vb_suite/frame_methods.py

+13
Original file line numberDiff line numberDiff line change
@@ -429,3 +429,16 @@ def test_unequal(name):
429429
setup,
430430
start_date=datetime(2014, 2, 7))
431431

432+
433+
#-------------------------------------------------------------------------
434+
# frame shift speedup issue-5609
435+
436+
setup = common_setup + """
437+
df = pd.DataFrame(np.random.rand(10000,500))
438+
"""
439+
frame_shift_axis0 = Benchmark('df.shift(1,axis=0)', setup,
440+
name = 'frame_shift_axis_0',
441+
start_date=datetime(2014,1,1))
442+
frame_shift_axis1 = Benchmark('df.shift(1,axis=1)', setup,
443+
name = 'frame_shift_axis_1',
444+
start_date=datetime(2014,1,1))

0 commit comments

Comments
 (0)