Skip to content

Commit b562eb2

Browse files
committed
Squashed version of the commits below.
This is an implementation of quick shift logic Added a vbench to reflect quick shift implementation This change is a working version that gives the performance improvement and passes tests. Refine in next steps. Slightly modified and cleaner logic. Removed unused indexer, _shift_indexer Fixed the failing tests for SparseDataFrame
1 parent 97a5d1e commit b562eb2

File tree

3 files changed

+17
-21
lines changed

3 files changed

+17
-21
lines changed

pandas/core/common.py

-12
Original file line numberDiff line numberDiff line change
@@ -2003,18 +2003,6 @@ def intersection(*seqs):
20032003
return type(seqs[0])(list(result))
20042004

20052005

2006-
def _shift_indexer(N, periods):
2007-
# small reusable utility
2008-
indexer = np.zeros(N, dtype=int)
2009-
2010-
if periods > 0:
2011-
indexer[periods:] = np.arange(N - periods)
2012-
else:
2013-
indexer[:periods] = np.arange(-periods, N)
2014-
2015-
return indexer
2016-
2017-
20182006
def _asarray_tuplesafe(values, dtype=None):
20192007
from pandas.core.index import Index
20202008

pandas/core/generic.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -3197,9 +3197,7 @@ def shift(self, periods=1, freq=None, axis=0, **kwds):
31973197
return self
31983198

31993199
if freq is None and not len(kwds):
3200-
block_axis = self._get_block_manager_axis(axis)
3201-
indexer = com._shift_indexer(len(self._get_axis(axis)), periods)
3202-
new_data = self._data.shift(indexer=indexer, periods=periods, axis=block_axis)
3200+
new_data = self._data.shift(periods=periods, axis=axis)
32033201
else:
32043202
return self.tshift(periods, freq, **kwds)
32053203

pandas/core/internals.py

+16-6
Original file line numberDiff line numberDiff line change
@@ -956,13 +956,12 @@ def diff(self, n):
956956
return [make_block(new_values, self.items, self.ref_items,
957957
ndim=self.ndim, fastpath=True)]
958958

959-
def shift(self, indexer, periods, axis=0):
959+
def shift(self, periods, axis=0):
960960
""" shift the block by periods, possibly upcast """
961961
# convert integer to float if necessary. need to do a lot more than
962962
# that, handle boolean etc also
963963
new_values, fill_value = com._maybe_upcast(self.values)
964-
new_values = np.roll(self.values.T,periods,axis=axis)
965-
964+
new_values = np.roll(new_values.T,periods,axis=axis)
966965
axis_indexer = [ slice(None) ] * self.ndim
967966
if periods > 0:
968967
axis_indexer[axis] = slice(None,periods)
@@ -972,7 +971,7 @@ def shift(self, indexer, periods, axis=0):
972971

973972
return [make_block(new_values.T, self.items, self.ref_items,
974973
ndim=self.ndim, fastpath=True)]
975-
974+
976975
def eval(self, func, other, raise_on_error=True, try_cast=False):
977976
"""
978977
evaluate the block; return result block from the result
@@ -1894,9 +1893,20 @@ def fillna(self, value, inplace=False, downcast=None):
18941893
values = self.values if inplace else self.values.copy()
18951894
return [self.make_block(values.get_values(value), fill_value=value)]
18961895

1897-
def shift(self, indexer, periods, axis=0):
1896+
@classmethod
1897+
def _shift_indexer(cls,N, periods):
1898+
# small reusable utility
1899+
indexer = np.zeros(N, dtype=int)
1900+
1901+
if periods > 0:
1902+
indexer[periods:] = np.arange(N - periods)
1903+
else:
1904+
indexer[:periods] = np.arange(-periods, N)
1905+
return indexer
1906+
1907+
def shift(self, periods, axis=0):
18981908
""" shift the block by periods """
1899-
1909+
indexer = self._shift_indexer(len(self.values.T),periods)
19001910
new_values = self.values.to_dense().take(indexer)
19011911
# convert integer to float if necessary. need to do a lot more than
19021912
# that, handle boolean etc also

0 commit comments

Comments
 (0)