Skip to content

Commit 935362f

Browse files
author
David Cottrell
committed
Add test and fix for categorical series .shift #10495.
1 parent 5455aca commit 935362f

File tree

3 files changed

+27
-1
lines changed

3 files changed

+27
-1
lines changed

doc/source/whatsnew/v0.17.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -132,3 +132,4 @@ Bug Fixes
132132

133133
- Bug in `pandas.concat` with ``axis=0`` when column is of dtype ``category`` (:issue:`10177`)
134134
- Bug in ``read_msgpack`` where input type is not always checked (:issue:`10369`)
135+
- Bug in ``Categorical`` ``Series.shift`` (:issue:`10495`)

pandas/core/internals.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -885,7 +885,13 @@ def shift(self, periods, axis=0):
885885
""" shift the block by periods, possibly upcast """
886886
# convert integer to float if necessary. need to do a lot more than
887887
# that, handle boolean etc also
888-
new_values, fill_value = com._maybe_upcast(self.values)
888+
if isinstance(self.values, Categorical):
889+
# hack toward fixing issue 10495
890+
values = self.values._codes
891+
else:
892+
values = self.values
893+
new_values, fill_value = com._maybe_upcast(values)
894+
889895
# make sure array sent to np.roll is c_contiguous
890896
f_ordered = new_values.flags.f_contiguous
891897
if f_ordered:
@@ -906,6 +912,13 @@ def shift(self, periods, axis=0):
906912
if f_ordered:
907913
new_values = new_values.T
908914

915+
if isinstance(self.values, Categorical):
916+
# hack toward fixing issue 10495
917+
new_values[np.isnan(new_values)] = -1
918+
new_values = Categorical.from_codes(new_values,
919+
categories=self.values.categories)
920+
921+
909922
return [make_block(new_values,
910923
ndim=self.ndim, fastpath=True,
911924
placement=self.mgr_locs)]

pandas/tests/test_categorical.py

+12
Original file line numberDiff line numberDiff line change
@@ -1175,6 +1175,18 @@ def test_comparison_with_unknown_scalars(self):
11751175
self.assert_numpy_array_equal(cat == 4 , [False, False, False])
11761176
self.assert_numpy_array_equal(cat != 4 , [True, True, True])
11771177

1178+
def test_shift(self):
1179+
# GH10495
1180+
# Series.shift should not depend on the dtype being categorical or not
1181+
values = ['a', 'b', 'c']
1182+
shifts = [-1, 0, 1]
1183+
results = [['b', 'c', np.nan], ['a', 'b', 'c'], [np.nan, 'a', 'b']]
1184+
1185+
for shift, result in zip(shifts, results):
1186+
b = pd.Series(pd.Categorical(result, categories=values))
1187+
a = pd.Series(values, dtype='category').shift(shift)
1188+
self.assert_series_equal(a, b)
1189+
11781190

11791191
class TestCategoricalAsBlock(tm.TestCase):
11801192
_multiprocess_can_split_ = True

0 commit comments

Comments
 (0)