From 567d4fa2b629670b9fc7e150cd9f21d29d91982c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 19 Dec 2018 06:46:22 -0600 Subject: [PATCH 1/3] ENH: ExtensionArray.repeat --- doc/source/whatsnew/v0.24.0.rst | 3 ++- pandas/core/arrays/base.py | 27 ++++++++++++++++++++++++++ pandas/tests/extension/base/methods.py | 23 ++++++++++++++++++++++ 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index fe5e4a57c557a..298e4588ac654 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -987,7 +987,8 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your **Other changes** -- ``ExtensionArray`` has gained the abstract methods ``.dropna()`` (:issue:`21185`) +- :meth:`~pandas.api.types.ExtensionArray.dropna` has been added (:issue:`21185`) +- :meth:`~pandas.api.types.ExtensionArray.repeat` has been added (:issue:`24024`) - ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instantiate a registered ``DecimalDtype``; furthermore the ``ExtensionDtype`` has gained the method ``construct_array_type`` (:issue:`21185`) - An ``ExtensionArray`` with a boolean dtype now works correctly as a boolean indexer. :meth:`pandas.api.types.is_bool_dtype` now properly considers them boolean (:issue:`22326`) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index cf145064fd7b1..c7582c926e87c 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -580,6 +580,33 @@ def factorize(self, na_sentinel=-1): uniques = self._from_factorized(uniques, self) return labels, uniques + def repeat(self, repeats, axis=None): + """ + Repeat elements of an array. + + Parameters + ---------- + repeats : int + This should be a non-negative integer. Repeating 0 times + will return an empty array. + + Returns + ------- + repeated_array : ExtensionArray + Same type as the input, with elements repeated `repeats` times. + + See Also + -------- + numpy.repeat : Similar method for :class:`numpy.ndarray`. + ExtensionArray.take : Take arbitrary positions. + """ + if axis is not None: + raise ValueError("'axis' must be None.") + if repeats < 0: + raise ValueError("negative repeats are not allowed.") + ind = np.arange(len(self)).repeat(repeats) + return self.take(ind) + # ------------------------------------------------------------------------ # Indexing methods # ------------------------------------------------------------------------ diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 4a409a84f3db4..6db15a3d33710 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -264,3 +264,26 @@ def test_where_series(self, data, na_value, as_frame): if as_frame: expected = expected.to_frame(name='a') self.assert_equal(result, expected) + + @pytest.mark.parametrize("repeats", [0, 1, 2]) + def test_repeat(self, data, repeats): + a, b, c = data[:3] + data = type(data)._from_sequence([a, b, c], dtype=data.dtype) + result = data.repeat(repeats) + + if repeats == 0: + expected = [] + elif repeats == 1: + expected = [a, b, c] + else: + expected = [a, a, b, b, c, c] + expected = type(data)._from_sequence(expected, dtype=data.dtype) + self.assert_equal(result, expected) + + def test_repeat_raises(self, data): + with pytest.raises(ValueError, match="'axis'"): + data.repeat(2, axis=1) + + with pytest.raises(ValueError, + match="negative"): + data.repeat(-1) From 757b49bfeba1685e681a542df2a756507bce5a30 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 19 Dec 2018 06:50:17 -0600 Subject: [PATCH 2/3] PR number --- doc/source/whatsnew/v0.24.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 298e4588ac654..a71c3018aae4e 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -988,7 +988,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your **Other changes** - :meth:`~pandas.api.types.ExtensionArray.dropna` has been added (:issue:`21185`) -- :meth:`~pandas.api.types.ExtensionArray.repeat` has been added (:issue:`24024`) +- :meth:`~pandas.api.types.ExtensionArray.repeat` has been added (:issue:`24349`) - ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instantiate a registered ``DecimalDtype``; furthermore the ``ExtensionDtype`` has gained the method ``construct_array_type`` (:issue:`21185`) - An ``ExtensionArray`` with a boolean dtype now works correctly as a boolean indexer. :meth:`pandas.api.types.is_bool_dtype` now properly considers them boolean (:issue:`22326`) From a16c253de660e645f5855ca9ec423feb09bafdac Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 19 Dec 2018 08:35:39 -0600 Subject: [PATCH 3/3] updates --- pandas/core/arrays/base.py | 2 ++ pandas/tests/extension/base/methods.py | 14 +++++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index c7582c926e87c..a848dafbb06ef 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -584,6 +584,8 @@ def repeat(self, repeats, axis=None): """ Repeat elements of an array. + .. versionadded:: 0.24.0 + Parameters ---------- repeats : int diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 6db15a3d33710..3403d0e9e02f1 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -265,11 +265,16 @@ def test_where_series(self, data, na_value, as_frame): expected = expected.to_frame(name='a') self.assert_equal(result, expected) + @pytest.mark.parametrize("as_series", [True, False]) @pytest.mark.parametrize("repeats", [0, 1, 2]) - def test_repeat(self, data, repeats): + def test_repeat(self, data, repeats, as_series): a, b, c = data[:3] - data = type(data)._from_sequence([a, b, c], dtype=data.dtype) - result = data.repeat(repeats) + arr = type(data)._from_sequence([a, b, c], dtype=data.dtype) + + if as_series: + arr = pd.Series(arr) + + result = arr.repeat(repeats) if repeats == 0: expected = [] @@ -278,6 +283,9 @@ def test_repeat(self, data, repeats): else: expected = [a, a, b, b, c, c] expected = type(data)._from_sequence(expected, dtype=data.dtype) + if as_series: + index = pd.Series(np.arange(len(arr))).repeat(repeats).index + expected = pd.Series(expected, index=index) self.assert_equal(result, expected) def test_repeat_raises(self, data):