Skip to content

Fixing shift() for ExtensionArray #23947

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Dec 9, 2018
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -914,6 +914,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your
- Added ``ExtensionDtype._is_numeric`` for controlling whether an extension dtype is considered numeric (:issue:`22290`).
- The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`)
- Bug in :meth:`Series.get` for ``Series`` using ``ExtensionArray`` and integer index (:issue:`21257`)
- :meth:`pandas.api.extensions.ExtensionArray.shift` added as part of the basic ``ExtensionArray`` interface (:issue:`22387`).
- :meth:`~Series.shift` now dispatches to :meth:`ExtensionArray.shift` (:issue:`22386`)
- :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`)
- :meth:`Series.combine()` with scalar argument now works for any function type (:issue:`21248`)
Expand Down
25 changes: 19 additions & 6 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,13 +418,13 @@ def fillna(self, value=None, method=None, limit=None):
return new_values

def dropna(self):
""" Return ExtensionArray without NA values
"""
Return ExtensionArray without NA values

Returns
-------
valid : ExtensionArray
"""

return self[~self.isna()]

def shift(self, periods=1):
Expand All @@ -446,13 +446,25 @@ def shift(self, periods=1):
Returns
-------
shifted : ExtensionArray

Notes
-----
If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is
returned.

If ``periods > len(self)``, then an array of size
len(self) is returned, with all values filled with
``self.dtype.na_value``.
"""
# Note: this implementation assumes that `self.dtype.na_value` can be
# stored in an instance of your ExtensionArray with `self.dtype`.
if periods == 0:
if not len(self) or periods == 0:
return self.copy()
empty = self._from_sequence([self.dtype.na_value] * abs(periods),
dtype=self.dtype)

empty = self._from_sequence(
[self.dtype.na_value] * min(abs(periods), len(self)),
dtype=self.dtype
)
if periods > 0:
a = empty
b = self[:-periods]
Expand All @@ -462,7 +474,8 @@ def shift(self, periods=1):
return self._concat_same_type([a, b])

def unique(self):
"""Compute the ExtensionArray of unique values.
"""
Compute the ExtensionArray of unique values.

Returns
-------
Expand Down
9 changes: 6 additions & 3 deletions pandas/core/arrays/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -882,7 +882,7 @@ def fillna(self, value=None, method=None, limit=None):

def shift(self, periods=1):

if periods == 0:
if not len(self) or periods == 0:
return self.copy()

subtype = np.result_type(np.nan, self.dtype.subtype)
Expand All @@ -893,8 +893,11 @@ def shift(self, periods=1):
else:
arr = self

empty = self._from_sequence([self.dtype.na_value] * abs(periods),
dtype=arr.dtype)
empty = self._from_sequence(
[self.dtype.na_value] * min(abs(periods), len(self)),
dtype=arr.dtype
)

if periods > 0:
a = empty
b = arr[:-periods]
Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/extension/base/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,30 @@ def test_container_shift(self, data, frame, periods, indices):

compare(result, expected)

@pytest.mark.parametrize('periods, indices', [
[-4, [-1, -1]],
[-1, [1, -1]],
[0, [0, 1]],
[1, [-1, 0]],
[4, [-1, -1]]
])
def test_shift_non_empty_array(self, data, periods, indices):
# https://github.com/pandas-dev/pandas/issues/23911
subset = data[:2]
result = subset.shift(periods)
expected = subset.take(indices, allow_fill=True)
self.assert_extension_array_equal(result, expected)

@pytest.mark.parametrize('periods', [
-4, -1, 0, 1, 4
])
def test_shift_empty_array(self, data, periods):
# https://github.com/pandas-dev/pandas/issues/23911
empty = data[:0]
result = empty.shift(periods)
expected = empty
self.assert_extension_array_equal(result, expected)

@pytest.mark.parametrize("as_frame", [True, False])
def test_hash_pandas_object_works(self, data, as_frame):
# https://github.com/pandas-dev/pandas/issues/23066
Expand Down