From 7ff05e7665deec1df98ff996797abcd59739613a Mon Sep 17 00:00:00 2001 From: fwx Date: Tue, 27 Nov 2018 15:26:04 +0530 Subject: [PATCH 01/12] Fixing shift() for ExtensionArray #23911 --- pandas/core/arrays/base.py | 8 +++++--- pandas/tests/extension/base/interface.py | 14 ++++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index eb2fef482ff17..9f7f4c2b81645 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -449,10 +449,12 @@ def shift(self, periods=1): """ # Note: this implementation assumes that `self.dtype.na_value` can be # stored in an instance of your ExtensionArray with `self.dtype`. - if periods == 0: + if len == 0 or periods == 0: return self.copy() - empty = self._from_sequence([self.dtype.na_value] * abs(periods), - dtype=self.dtype) + empty = self._from_sequence( + [self.dtype.na_value] * min(abs(periods), len(self)), + dtype=self.dtype + ) if periods > 0: a = empty b = self[:-periods] diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index 00a480d311b58..ef242f79f7e39 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -1,4 +1,5 @@ import numpy as np +import pytest from pandas.compat import StringIO @@ -86,3 +87,16 @@ def test_isna_extension_array(self, data_missing): assert not na.all() assert na.dtype._is_boolean + + @pytest.mark.parametrize('periods, indices', [ + [-4, [-1, -1]], + [-1, [1, -1]], + [0, [0, 1]], + [1, [-1, 0]], + [4, [-1, -1]] + ]) + def test_shift(self, data, periods, indices): + subset = data[:2] + result = subset.shift(periods) + expected = subset.take(indices, allow_fill=True) + self.assert_extension_array_equal(result, expected) From 532436ec87a8c8de4cc6495280b6713abad196ff Mon Sep 17 00:00:00 2001 From: fwx Date: Tue, 27 Nov 2018 18:10:43 +0530 Subject: [PATCH 02/12] Edited the docstring of shift() to reflect changes. --- pandas/core/arrays/base.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 9f7f4c2b81645..ad8af1d968893 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -418,13 +418,13 @@ def fillna(self, value=None, method=None, limit=None): return new_values def dropna(self): - """ Return ExtensionArray without NA values + """ + Return ExtensionArray without NA values Returns ------- valid : ExtensionArray """ - return self[~self.isna()] def shift(self, periods=1): @@ -446,6 +446,15 @@ def shift(self, periods=1): Returns ------- shifted : ExtensionArray + + Notes + ----- + If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is + returned. + + If ``periods`` > ``len(self)``, then an ExtensionArray of size + len(self) is returned, with all values filled with + ``self.dtype.na_value``. """ # Note: this implementation assumes that `self.dtype.na_value` can be # stored in an instance of your ExtensionArray with `self.dtype`. @@ -464,7 +473,8 @@ def shift(self, periods=1): return self._concat_same_type([a, b]) def unique(self): - """Compute the ExtensionArray of unique values. + """ + Compute the ExtensionArray of unique values. Returns ------- From 6584dcab609bd56107c6eff38a632741315a397c Mon Sep 17 00:00:00 2001 From: fwx Date: Wed, 28 Nov 2018 00:16:34 +0530 Subject: [PATCH 03/12] Adding whatsnew entry for 0.24.0 --- doc/source/whatsnew/v0.24.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 0a066399e27ca..a95674d20241f 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -914,6 +914,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your - Added ``ExtensionDtype._is_numeric`` for controlling whether an extension dtype is considered numeric (:issue:`22290`). - The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`) - Bug in :meth:`Series.get` for ``Series`` using ``ExtensionArray`` and integer index (:issue:`21257`) +- :meth:`ExtensionArray.shift` added as part of the basic ``ExtensionArray`` interface. (:issue:`22387`) - :meth:`~Series.shift` now dispatches to :meth:`ExtensionArray.shift` (:issue:`22386`) - :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`) - :meth:`Series.combine()` with scalar argument now works for any function type (:issue:`21248`) From abba682b6fcee76b2817276c1ab29333d29a9e17 Mon Sep 17 00:00:00 2001 From: fwx Date: Wed, 28 Nov 2018 01:27:00 +0530 Subject: [PATCH 04/12] Fixing SparseArray.shift() --- pandas/core/arrays/sparse.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 9a5ef3b3a7dd0..9bc6b966fe484 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -882,7 +882,7 @@ def fillna(self, value=None, method=None, limit=None): def shift(self, periods=1): - if periods == 0: + if len == 0 or periods == 0: return self.copy() subtype = np.result_type(np.nan, self.dtype.subtype) @@ -893,8 +893,11 @@ def shift(self, periods=1): else: arr = self - empty = self._from_sequence([self.dtype.na_value] * abs(periods), - dtype=arr.dtype) + empty = self._from_sequence( + [self.dtype.na_value] * min(abs(periods), len(self)), + dtype=arr.dtype + ) + if periods > 0: a = empty b = arr[:-periods] From 172545a136ee2fbd7da46610f55eb4dbbfb49054 Mon Sep 17 00:00:00 2001 From: fwx Date: Wed, 28 Nov 2018 01:33:56 +0530 Subject: [PATCH 05/12] PEP8 fix --- pandas/core/arrays/sparse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 9bc6b966fe484..458ef231948d3 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -897,7 +897,7 @@ def shift(self, periods=1): [self.dtype.na_value] * min(abs(periods), len(self)), dtype=arr.dtype ) - + if periods > 0: a = empty b = arr[:-periods] From 67c268d4b86e3909e173d7a3ded05f4e2c34465e Mon Sep 17 00:00:00 2001 From: fwx Date: Sat, 1 Dec 2018 09:54:24 +0530 Subject: [PATCH 06/12] Adding a skip for shift for because the implementation is buggy --- pandas/tests/extension/arrow/test_bool.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py index f259e66e6cc76..c68ab7cfa2c4f 100644 --- a/pandas/tests/extension/arrow/test_bool.py +++ b/pandas/tests/extension/arrow/test_bool.py @@ -39,6 +39,10 @@ class TestInterface(BaseArrowTests, base.BaseInterfaceTests): def test_repr(self, data): raise pytest.skip("TODO") + def test_shift(self, data): + raise pytest.skip("Skipping because the implementation of" + "`ArrowBoolArray` is quite buggy.") + class TestConstructors(BaseArrowTests, base.BaseConstructorsTests): def test_from_dtype(self, data): From b0c13e474f4a28f123fcab78fb399b2464ee245b Mon Sep 17 00:00:00 2001 From: fwx Date: Sun, 2 Dec 2018 00:18:38 +0530 Subject: [PATCH 07/12] Adding in changes requested by @TomAugspurger --- doc/source/whatsnew/v0.24.0.rst | 2 +- pandas/core/arrays/base.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index a95674d20241f..74940e8985ef3 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -914,7 +914,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your - Added ``ExtensionDtype._is_numeric`` for controlling whether an extension dtype is considered numeric (:issue:`22290`). - The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`) - Bug in :meth:`Series.get` for ``Series`` using ``ExtensionArray`` and integer index (:issue:`21257`) -- :meth:`ExtensionArray.shift` added as part of the basic ``ExtensionArray`` interface. (:issue:`22387`) +- :meth:`pandas.api.extensions.ExtensionArray.shift` added as part of the basic ``ExtensionArray`` interface (:issue:`22387`). - :meth:`~Series.shift` now dispatches to :meth:`ExtensionArray.shift` (:issue:`22386`) - :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`) - :meth:`Series.combine()` with scalar argument now works for any function type (:issue:`21248`) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index ad8af1d968893..c1340e6da9773 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -452,7 +452,7 @@ def shift(self, periods=1): If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is returned. - If ``periods`` > ``len(self)``, then an ExtensionArray of size + If ``periods > len(self)``, then an array of size len(self) is returned, with all values filled with ``self.dtype.na_value``. """ From acecc2a0ea4462d8f1a090dfcbc92aa325d2e5c2 Mon Sep 17 00:00:00 2001 From: fwx Date: Sun, 2 Dec 2018 00:56:33 +0530 Subject: [PATCH 08/12] Bugfix: changing len to len(self) --- pandas/core/arrays/base.py | 2 +- pandas/core/arrays/sparse.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index c1340e6da9773..fc905aa27e400 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -458,7 +458,7 @@ def shift(self, periods=1): """ # Note: this implementation assumes that `self.dtype.na_value` can be # stored in an instance of your ExtensionArray with `self.dtype`. - if len == 0 or periods == 0: + if len(self) == 0 or periods == 0: return self.copy() empty = self._from_sequence( [self.dtype.na_value] * min(abs(periods), len(self)), diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 458ef231948d3..5f0a1b0581cf8 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -882,7 +882,7 @@ def fillna(self, value=None, method=None, limit=None): def shift(self, periods=1): - if len == 0 or periods == 0: + if len(self) == 0 or periods == 0: return self.copy() subtype = np.result_type(np.nan, self.dtype.subtype) From e5b7ebc5cf84418c5ae74dcbbcf564b824470971 Mon Sep 17 00:00:00 2001 From: fwx Date: Sun, 2 Dec 2018 10:03:31 +0530 Subject: [PATCH 09/12] Adding tests for empty array --- pandas/tests/extension/base/interface.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index ef242f79f7e39..caf88854cb5d2 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -95,8 +95,17 @@ def test_isna_extension_array(self, data_missing): [1, [-1, 0]], [4, [-1, -1]] ]) - def test_shift(self, data, periods, indices): + def test_shift_non_empty_array(self, data, periods, indices): subset = data[:2] result = subset.shift(periods) expected = subset.take(indices, allow_fill=True) self.assert_extension_array_equal(result, expected) + + @pytest.mark.parametrize('periods', [ + -4, -1, 0, 1, 4 + ]) + def test_shift_empty_array(self, data, periods): + empty = data[:0] + result = empty.shift(periods) + expected = empty + self.assert_extension_array_equal(result, expected) From eb4eea9d63fde0c737f4ab3a5a002331da42505c Mon Sep 17 00:00:00 2001 From: fwx Date: Sun, 2 Dec 2018 10:03:54 +0530 Subject: [PATCH 10/12] Adding relevant skips for ArrowBoolArray --- pandas/tests/extension/arrow/test_bool.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py index c68ab7cfa2c4f..e9756ad5db009 100644 --- a/pandas/tests/extension/arrow/test_bool.py +++ b/pandas/tests/extension/arrow/test_bool.py @@ -39,7 +39,11 @@ class TestInterface(BaseArrowTests, base.BaseInterfaceTests): def test_repr(self, data): raise pytest.skip("TODO") - def test_shift(self, data): + def test_shift_non_empty_array(self, data): + raise pytest.skip("Skipping because the implementation of" + "`ArrowBoolArray` is quite buggy.") + + def test_shift_empty_array(self, data): raise pytest.skip("Skipping because the implementation of" "`ArrowBoolArray` is quite buggy.") From d6594047d761c034012b4c6b23e210106022bdb2 Mon Sep 17 00:00:00 2001 From: fwx Date: Thu, 6 Dec 2018 01:22:42 +0530 Subject: [PATCH 11/12] Changes requested by @jreback --- pandas/core/arrays/base.py | 2 +- pandas/core/arrays/sparse.py | 2 +- pandas/tests/extension/arrow/test_bool.py | 8 -------- pandas/tests/extension/base/interface.py | 23 ---------------------- pandas/tests/extension/base/methods.py | 24 +++++++++++++++++++++++ 5 files changed, 26 insertions(+), 33 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index fc905aa27e400..599c42ff1944e 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -458,7 +458,7 @@ def shift(self, periods=1): """ # Note: this implementation assumes that `self.dtype.na_value` can be # stored in an instance of your ExtensionArray with `self.dtype`. - if len(self) == 0 or periods == 0: + if not len(self) or periods == 0: return self.copy() empty = self._from_sequence( [self.dtype.na_value] * min(abs(periods), len(self)), diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 5f0a1b0581cf8..b4b511cec3601 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -882,7 +882,7 @@ def fillna(self, value=None, method=None, limit=None): def shift(self, periods=1): - if len(self) == 0 or periods == 0: + if not len(self) or periods == 0: return self.copy() subtype = np.result_type(np.nan, self.dtype.subtype) diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py index e9756ad5db009..f259e66e6cc76 100644 --- a/pandas/tests/extension/arrow/test_bool.py +++ b/pandas/tests/extension/arrow/test_bool.py @@ -39,14 +39,6 @@ class TestInterface(BaseArrowTests, base.BaseInterfaceTests): def test_repr(self, data): raise pytest.skip("TODO") - def test_shift_non_empty_array(self, data): - raise pytest.skip("Skipping because the implementation of" - "`ArrowBoolArray` is quite buggy.") - - def test_shift_empty_array(self, data): - raise pytest.skip("Skipping because the implementation of" - "`ArrowBoolArray` is quite buggy.") - class TestConstructors(BaseArrowTests, base.BaseConstructorsTests): def test_from_dtype(self, data): diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index caf88854cb5d2..00a480d311b58 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -1,5 +1,4 @@ import numpy as np -import pytest from pandas.compat import StringIO @@ -87,25 +86,3 @@ def test_isna_extension_array(self, data_missing): assert not na.all() assert na.dtype._is_boolean - - @pytest.mark.parametrize('periods, indices', [ - [-4, [-1, -1]], - [-1, [1, -1]], - [0, [0, 1]], - [1, [-1, 0]], - [4, [-1, -1]] - ]) - def test_shift_non_empty_array(self, data, periods, indices): - subset = data[:2] - result = subset.shift(periods) - expected = subset.take(indices, allow_fill=True) - self.assert_extension_array_equal(result, expected) - - @pytest.mark.parametrize('periods', [ - -4, -1, 0, 1, 4 - ]) - def test_shift_empty_array(self, data, periods): - empty = data[:0] - result = empty.shift(periods) - expected = empty - self.assert_extension_array_equal(result, expected) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index e9a89c1af2f22..ace5c5346bf5a 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -189,6 +189,30 @@ def test_container_shift(self, data, frame, periods, indices): compare(result, expected) + @pytest.mark.parametrize('periods, indices', [ + [-4, [-1, -1]], + [-1, [1, -1]], + [0, [0, 1]], + [1, [-1, 0]], + [4, [-1, -1]] + ]) + def test_shift_non_empty_array(self, data, periods, indices): + # https://github.com/pandas-dev/pandas/issues/23911 + subset = data[:2] + result = subset.shift(periods) + expected = subset.take(indices, allow_fill=True) + self.assert_extension_array_equal(result, expected) + + @pytest.mark.parametrize('periods', [ + -4, -1, 0, 1, 4 + ]) + def test_shift_empty_array(self, data, periods): + # https://github.com/pandas-dev/pandas/issues/23911 + empty = data[:0] + result = empty.shift(periods) + expected = empty + self.assert_extension_array_equal(result, expected) + @pytest.mark.parametrize("as_frame", [True, False]) def test_hash_pandas_object_works(self, data, as_frame): # https://github.com/pandas-dev/pandas/issues/23066 From 31a9cc853894e4d5feee02b4c036086dd0ec9968 Mon Sep 17 00:00:00 2001 From: fwx Date: Sun, 9 Dec 2018 01:04:03 +0530 Subject: [PATCH 12/12] PEP8 fix --- pandas/core/arrays/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 599c42ff1944e..b22bdf3a3f19b 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -460,6 +460,7 @@ def shift(self, periods=1): # stored in an instance of your ExtensionArray with `self.dtype`. if not len(self) or periods == 0: return self.copy() + empty = self._from_sequence( [self.dtype.na_value] * min(abs(periods), len(self)), dtype=self.dtype