diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cf41737a04ba6..64a793d7be05c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3619,9 +3619,9 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, method=method, axis=axis) @Appender(_shared_docs['shift'] % _shared_doc_kwargs) - def shift(self, periods=1, freq=None, axis=0): + def shift(self, periods=1, freq=None, axis=0, fill_value=np.nan): return super(DataFrame, self).shift(periods=periods, freq=freq, - axis=axis) + axis=axis, fill_value=fill_value) def set_index(self, keys, drop=True, append=False, inplace=False, verify_integrity=False): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bd1a2371315a0..24203e021a9be 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7539,15 +7539,21 @@ def mask(self, cond, other=np.nan, inplace=False, axis=None, level=None, """) @Appender(_shared_docs['shift'] % _shared_doc_kwargs) - def shift(self, periods=1, freq=None, axis=0): + def shift(self, periods=1, freq=None, axis=0, fill_value=np.nan): if periods == 0: return self block_axis = self._get_block_manager_axis(axis) + shift_kwargs = {'periods': periods, 'axis': block_axis} + if not is_categorical_dtype(self): + shift_kwargs['fill_value'] = fill_value if freq is None: - new_data = self._data.shift(periods=periods, axis=block_axis) + new_data = self._data.shift(**shift_kwargs) else: - return self.tshift(periods, freq) + tshift_kwargs = {'periods': periods, 'freq': freq} + if not is_categorical_dtype(self): + tshift_kwargs['fill_value'] = fill_value + return self.tshift(**tshift_kwargs) return self._constructor(new_data).__finalize__(self) @@ -7587,18 +7593,20 @@ def slice_shift(self, periods=1, axis=0): return new_obj.__finalize__(self) - def tshift(self, periods=1, freq=None, axis=0): + def tshift(self, periods=1, freq=None, axis=0, fill_value=np.nan): """ Shift the time index, using the index's frequency if available. Parameters ---------- periods : int - Number of periods to move, can be positive or negative + Number of periods to move, can be positive or negative. freq : DateOffset, timedelta, or time rule string, default None - Increment to use from the tseries module or time rule (e.g. 'EOM') + Increment to use from the tseries module or time rule (e.g. 'EOM'). axis : int or basestring - Corresponds to the axis that contains the Index + Corresponds to the axis that contains the Index. + fill_value : + Value to use to cover missing values. Notes ----- diff --git a/pandas/core/internals.py b/pandas/core/internals.py index a0e122d390240..59be34ad16133 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1279,12 +1279,12 @@ def diff(self, n, axis=1, mgr=None): new_values = algos.diff(self.values, n, axis=axis) return [self.make_block(values=new_values)] - def shift(self, periods, axis=0, mgr=None): + def shift(self, periods, axis=0, mgr=None, fill_value=np.nan): """ shift the block by periods, possibly upcast """ # convert integer to float if necessary. need to do a lot more than # that, handle boolean etc also - new_values, fill_value = maybe_upcast(self.values) + new_values, fill_value = maybe_upcast(self.values, fill_value) # make sure array sent to np.roll is c_contiguous f_ordered = new_values.flags.f_contiguous @@ -2541,7 +2541,7 @@ def _try_coerce_result(self, result): return result - def shift(self, periods, axis=0, mgr=None): + def shift(self, periods, axis=0, mgr=None, fill_value=np.nan): return self.make_block_same_class(values=self.values.shift(periods), placement=self.mgr_locs) @@ -2879,7 +2879,7 @@ def _try_coerce_result(self, result): def _box_func(self): return lambda x: tslib.Timestamp(x, tz=self.dtype.tz) - def shift(self, periods, axis=0, mgr=None): + def shift(self, periods, axis=0, mgr=None, fill_value=np.nan): """ shift the block by periods """ # think about moving this to the DatetimeIndex. This is a non-freq @@ -3072,7 +3072,7 @@ def fillna(self, value, limit=None, inplace=False, downcast=None, return [self.make_block_same_class(values=values, placement=self.mgr_locs)] - def shift(self, periods, axis=0, mgr=None): + def shift(self, periods, axis=0, mgr=None, fill_value=np.nan): """ shift the block by periods """ N = len(self.values.T) indexer = np.zeros(N, dtype=int) diff --git a/pandas/core/series.py b/pandas/core/series.py index 3e3600898ba7f..d8de1d8963f53 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3358,8 +3358,9 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, axis=axis) @Appender(generic._shared_docs['shift'] % _shared_doc_kwargs) - def shift(self, periods=1, freq=None, axis=0): - return super(Series, self).shift(periods=periods, freq=freq, axis=axis) + def shift(self, periods=1, freq=None, axis=0, fill_value=np.nan): + return super(Series, self).shift(periods=periods, freq=freq, axis=axis, + fill_value=fill_value) def reindex_axis(self, labels, axis=0, **kwargs): """Conform Series to new index with optional filling logic. diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index ceb6c942c81b1..c8ffde727aa94 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -308,6 +308,14 @@ def test_shift_bool(self): columns=['high', 'low']) assert_frame_equal(rs, xp) + def test_shift_bool_fillna(self): + df = DataFrame({'high': [True, False], + 'low': [False, False]}) + rs = df.shift(1, fill_value=True) + xp = DataFrame({'high': [True, True], + 'low': [True, False]}) + assert_frame_equal(rs, xp) + def test_shift_categorical(self): # GH 9416 s1 = pd.Series(['a', 'b', 'c'], dtype='category') diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 0e6e44e839464..c4c47ec9e7698 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1607,6 +1607,17 @@ def test_shift_int(self): expected = ts.astype(float).shift(1) assert_series_equal(shifted, expected) + def test_shift_fillna(self): + # ENH 15486 + ts = self.ts.astype(int) + fillval = 0 + shifted = ts.shift(1, fill_value=fillval) + # default behaviour adds nan so converts to floats + default = ts.shift(1) + default.iloc[0] = fillval + expected = default.astype(int) + assert_series_equal(shifted, expected) + def test_shift_categorical(self): # GH 9416 s = pd.Series(['a', 'b', 'c', 'd'], dtype='category')