diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index a848dafbb06ef..5311d6b8d9d90 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -12,6 +12,7 @@ from pandas.compat import PY3, set_function_name from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError +from pandas.util._decorators import Appender, Substitution from pandas.core.dtypes.common import is_list_like from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries @@ -20,6 +21,8 @@ _not_implemented_message = "{} does not implement {}." +_extension_array_shared_docs = dict() + class ExtensionArray(object): """ @@ -580,32 +583,55 @@ def factorize(self, na_sentinel=-1): uniques = self._from_factorized(uniques, self) return labels, uniques - def repeat(self, repeats, axis=None): - """ - Repeat elements of an array. + _extension_array_shared_docs['repeat'] = """ + Repeat elements of a %(klass)s. - .. versionadded:: 0.24.0 + Returns a new %(klass)s where each element of the current %(klass)s + is repeated consecutively a given number of times. Parameters ---------- - repeats : int - This should be a non-negative integer. Repeating 0 times - will return an empty array. + repeats : int or array of ints + The number of repetitions for each element. This should be a + non-negative integer. Repeating 0 times will return an empty + %(klass)s. + *args + Additional arguments have no effect but might be accepted for + compatibility with numpy. + **kwargs + Additional keywords have no effect but might be accepted for + compatibility with numpy. Returns ------- - repeated_array : ExtensionArray - Same type as the input, with elements repeated `repeats` times. + repeated_array : %(klass)s + Newly created %(klass)s with repeated elements. See Also -------- + Series.repeat : Equivalent function for Series. + Index.repeat : Equivalent function for Index. numpy.repeat : Similar method for :class:`numpy.ndarray`. ExtensionArray.take : Take arbitrary positions. + + Examples + -------- + >>> cat = pd.Categorical(['a', 'b', 'c']) + >>> cat + [a, b, c] + Categories (3, object): [a, b, c] + >>> cat.repeat(2) + [a, a, b, b, c, c] + Categories (3, object): [a, b, c] + >>> cat.repeat([1, 2, 3]) + [a, b, b, c, c, c] + Categories (3, object): [a, b, c] """ - if axis is not None: - raise ValueError("'axis' must be None.") - if repeats < 0: - raise ValueError("negative repeats are not allowed.") + + @Substitution(klass='ExtensionArray') + @Appender(_extension_array_shared_docs['repeat']) + def repeat(self, repeats, *args, **kwargs): + nv.validate_repeat(args, kwargs) ind = np.arange(len(self)).repeat(repeats) return self.take(ind) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 9a8b345cea1b3..62362e643b9ae 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -39,7 +39,7 @@ from pandas.io.formats import console from pandas.io.formats.terminal import get_terminal_size -from .base import ExtensionArray +from .base import ExtensionArray, _extension_array_shared_docs _take_msg = textwrap.dedent("""\ Interpreting negative values in 'indexer' as missing values. @@ -2394,15 +2394,9 @@ def describe(self): return result + @Substitution(klass='Categorical') + @Appender(_extension_array_shared_docs['repeat']) def repeat(self, repeats, *args, **kwargs): - """ - Repeat elements of a Categorical. - - See Also - -------- - numpy.ndarray.repeat - - """ nv.validate_repeat(args, kwargs) codes = self._codes.repeat(repeats) return self._constructor(values=codes, dtype=self.dtype, fastpath=True) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 1a1648a3b8480..d67645c8b4451 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -20,12 +20,13 @@ ABCDatetimeIndex, ABCInterval, ABCIntervalIndex, ABCPeriodIndex, ABCSeries) from pandas.core.dtypes.missing import isna, notna +from pandas.core.arrays.base import ( + ExtensionArray, _extension_array_shared_docs) +from pandas.core.arrays.categorical import Categorical import pandas.core.common as com from pandas.core.config import get_option from pandas.core.indexes.base import Index, ensure_index -from . import Categorical, ExtensionArray - _VALID_CLOSED = {'left', 'right', 'both', 'neither'} _interval_shared_docs = {} @@ -1000,35 +1001,11 @@ def to_tuples(self, na_tuple=True): tuples = np.where(~self.isna(), tuples, np.nan) return tuples - def repeat(self, repeats, **kwargs): - """ - Repeat elements of an IntervalArray. - - Returns a new IntervalArray where each element of the current - IntervalArray is repeated consecutively a given number of times. - - Parameters - ---------- - repeats : int - The number of repetitions for each element. - - **kwargs - Additional keywords have no effect but might be accepted for - compatibility with numpy. - - Returns - ------- - IntervalArray - Newly created IntervalArray with repeated elements. - - See Also - -------- - Index.repeat : Equivalent function for Index. - Series.repeat : Equivalent function for Series. - numpy.repeat : Underlying implementation. - """ - left_repeat = self.left.repeat(repeats, **kwargs) - right_repeat = self.right.repeat(repeats, **kwargs) + @Appender(_extension_array_shared_docs['repeat'] % _shared_docs_kwargs) + def repeat(self, repeats, *args, **kwargs): + nv.validate_repeat(args, kwargs) + left_repeat = self.left.repeat(repeats) + right_repeat = self.right.repeat(repeats) return self._shallow_copy(left=left_repeat, right=right_repeat) _interval_shared_docs['overlaps'] = """ diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 60febc5f5636d..a2fd0effd68c5 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -11,7 +11,6 @@ period_asfreq_arr) from pandas._libs.tslibs.timedeltas import Timedelta, delta_to_nanoseconds import pandas.compat as compat -from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, cache_readonly from pandas.util._validators import validate_fillna_kwargs @@ -593,19 +592,6 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): def strftime(self, date_format): return self._format_native_types(date_format=date_format) - def repeat(self, repeats, *args, **kwargs): - """ - Repeat elements of a PeriodArray. - - See Also - -------- - numpy.ndarray.repeat - """ - # TODO(DatetimeArray): remove - nv.validate_repeat(args, kwargs) - values = self._data.repeat(repeats) - return type(self)(values, self.freq) - def astype(self, dtype, copy=True): # TODO: Figure out something better here... # We have DatetimeLikeArrayMixin -> diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a2cf88fa9cb1a..9f7660a164977 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -832,41 +832,48 @@ def _assert_take_fillable(self, values, indices, allow_fill=True, taken = values.take(indices) return taken - def repeat(self, repeats, *args, **kwargs): - """ - Repeat elements of an Index. + _index_shared_docs['repeat'] = """ + Repeat elements of a %(klass)s. - Returns a new index where each element of the current index + Returns a new %(klass)s where each element of the current %(klass)s is repeated consecutively a given number of times. Parameters ---------- - repeats : int - The number of repetitions for each element. + repeats : int or array of ints + The number of repetitions for each element. This should be a + non-negative integer. Repeating 0 times will return an empty + %(klass)s. + *args + Additional arguments have no effect but might be accepted for + compatibility with numpy. **kwargs Additional keywords have no effect but might be accepted for compatibility with numpy. Returns ------- - pandas.Index - Newly created Index with repeated elements. + repeated_index : %(klass)s + Newly created %(klass)s with repeated elements. See Also -------- Series.repeat : Equivalent function for Series. - numpy.repeat : Underlying implementation. + numpy.repeat : Similar method for :class:`numpy.ndarray`. Examples -------- - >>> idx = pd.Index([1, 2, 3]) + >>> idx = pd.Index(['a', 'b', 'c']) >>> idx - Int64Index([1, 2, 3], dtype='int64') + Index(['a', 'b', 'c'], dtype='object') >>> idx.repeat(2) - Int64Index([1, 1, 2, 2, 3, 3], dtype='int64') - >>> idx.repeat(3) - Int64Index([1, 1, 1, 2, 2, 2, 3, 3, 3], dtype='int64') + Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object') + >>> idx.repeat([1, 2, 3]) + Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object') """ + + @Appender(_index_shared_docs['repeat'] % _index_doc_kwargs) + def repeat(self, repeats, *args, **kwargs): nv.validate_repeat(args, kwargs) return self._shallow_copy(self._values.repeat(repeats)) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index db0cb88b06b2b..64adfe15c04bf 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -445,17 +445,11 @@ def isin(self, values): return algorithms.isin(self.asi8, values.asi8) + @Appender(_index_shared_docs['repeat'] % _index_doc_kwargs) def repeat(self, repeats, *args, **kwargs): - """ - Analogous to ndarray.repeat. - """ nv.validate_repeat(args, kwargs) - if is_period_dtype(self): - freq = self.freq - else: - freq = None - return self._shallow_copy(self.asi8.repeat(repeats), - freq=freq) + freq = self.freq if is_period_dtype(self) else None + return self._shallow_copy(self.asi8.repeat(repeats), freq=freq) @Appender(_index_shared_docs['where'] % _index_doc_kwargs) def where(self, cond, other=None): diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 86ef3695ee292..ef4a85e964cad 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1857,6 +1857,7 @@ def append(self, other): def argsort(self, *args, **kwargs): return self.values.argsort(*args, **kwargs) + @Appender(_index_shared_docs['repeat'] % _index_doc_kwargs) def repeat(self, repeats, *args, **kwargs): nv.validate_repeat(args, kwargs) return MultiIndex(levels=self.levels, diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 17666cd651a50..b15604a57fb81 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -924,10 +924,6 @@ def wrapper(self, other): wrapper.__name__ = '__{}__'.format(op.__name__) return wrapper - def repeat(self, repeats, *args, **kwargs): - # TODO(DatetimeArray): Just use Index.repeat - return Index.repeat(self, repeats, *args, **kwargs) - def view(self, dtype=None, type=None): # TODO(DatetimeArray): remove if dtype is None or dtype is __builtins__['type'](self): diff --git a/pandas/core/series.py b/pandas/core/series.py index 773f2d17cf0fc..0c6022cfd2472 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1038,12 +1038,58 @@ def _set_values(self, key, value): def repeat(self, repeats, *args, **kwargs): """ - Repeat elements of an Series. Refer to `numpy.ndarray.repeat` - for more information about the `repeats` argument. + Repeat elements of a Series. + + Returns a new Series where each element of the current Series + is repeated consecutively a given number of times. + + Parameters + ---------- + repeats : int or array of ints + The number of repetitions for each element. This should be a + non-negative integer. Repeating 0 times will return an empty + Series. + *args + Additional arguments have no effect but might be accepted for + compatibility with numpy. + **kwargs + Additional keywords have no effect but might be accepted for + compatibility with numpy. + + Returns + ------- + repeated_series : Series + Newly created Series with repeated elements. See Also -------- - numpy.ndarray.repeat + Index.repeat : Equivalent function for Index. + numpy.repeat : Similar method for :class:`numpy.ndarray`. + + Examples + -------- + >>> s = pd.Series(['a', 'b', 'c']) + >>> s + 0 a + 1 b + 2 c + dtype: object + >>> s.repeat(2) + 0 a + 0 a + 1 b + 1 b + 2 c + 2 c + dtype: object + >>> s.repeat([1, 2, 3]) + 0 a + 1 b + 1 b + 2 c + 2 c + 2 c + dtype: object """ nv.validate_repeat(args, kwargs) new_index = self.index.repeat(repeats) diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py index 4251273e424dd..8f7fd87e4315f 100644 --- a/pandas/tests/arrays/categorical/test_analytics.py +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -292,22 +292,6 @@ def test_validate_inplace(self): with pytest.raises(ValueError): cat.sort_values(inplace=value) - def test_repeat(self): - # GH10183 - cat = Categorical(["a", "b"], categories=["a", "b"]) - exp = Categorical(["a", "a", "b", "b"], categories=["a", "b"]) - res = cat.repeat(2) - tm.assert_categorical_equal(res, exp) - - def test_numpy_repeat(self): - cat = Categorical(["a", "b"], categories=["a", "b"]) - exp = Categorical(["a", "a", "b", "b"], categories=["a", "b"]) - tm.assert_categorical_equal(np.repeat(cat, 2), exp) - - msg = "the 'axis' parameter is not supported" - with pytest.raises(ValueError, match=msg): - np.repeat(cat, 2, axis=1) - def test_isna(self): exp = np.array([False, False, True]) c = Categorical(["a", "b", np.nan]) diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py index 9604010571294..e81e64d90ff5f 100644 --- a/pandas/tests/arrays/interval/test_interval.py +++ b/pandas/tests/arrays/interval/test_interval.py @@ -26,22 +26,6 @@ def left_right_dtypes(request): class TestMethods(object): - @pytest.mark.parametrize('repeats', [0, 1, 5]) - def test_repeat(self, left_right_dtypes, repeats): - left, right = left_right_dtypes - result = IntervalArray.from_arrays(left, right).repeat(repeats) - expected = IntervalArray.from_arrays( - left.repeat(repeats), right.repeat(repeats)) - tm.assert_extension_array_equal(result, expected) - - @pytest.mark.parametrize('bad_repeats, msg', [ - (-1, 'negative dimensions are not allowed'), - ('foo', r'invalid literal for (int|long)\(\) with base 10')]) - def test_repeat_errors(self, bad_repeats, msg): - array = IntervalArray.from_breaks(range(4)) - with pytest.raises(ValueError, match=msg): - array.repeat(bad_repeats) - @pytest.mark.parametrize('new_closed', [ 'left', 'right', 'both', 'neither']) def test_set_closed(self, closed, new_closed): diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 3403d0e9e02f1..bd59a9d3c4b16 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -265,33 +265,33 @@ def test_where_series(self, data, na_value, as_frame): expected = expected.to_frame(name='a') self.assert_equal(result, expected) + @pytest.mark.parametrize("use_numpy", [True, False]) @pytest.mark.parametrize("as_series", [True, False]) - @pytest.mark.parametrize("repeats", [0, 1, 2]) - def test_repeat(self, data, repeats, as_series): - a, b, c = data[:3] - arr = type(data)._from_sequence([a, b, c], dtype=data.dtype) - + @pytest.mark.parametrize("repeats", [0, 1, 2, [1, 2, 3]]) + def test_repeat(self, data, repeats, as_series, use_numpy): + arr = type(data)._from_sequence(data[:3], dtype=data.dtype) if as_series: arr = pd.Series(arr) - result = arr.repeat(repeats) + result = np.repeat(arr, repeats) if use_numpy else arr.repeat(repeats) - if repeats == 0: - expected = [] - elif repeats == 1: - expected = [a, b, c] - else: - expected = [a, a, b, b, c, c] + repeats = [repeats] * 3 if isinstance(repeats, int) else repeats + expected = [x for x, n in zip(arr, repeats) for _ in range(n)] expected = type(data)._from_sequence(expected, dtype=data.dtype) if as_series: - index = pd.Series(np.arange(len(arr))).repeat(repeats).index - expected = pd.Series(expected, index=index) - self.assert_equal(result, expected) + expected = pd.Series(expected, index=arr.index.repeat(repeats)) - def test_repeat_raises(self, data): - with pytest.raises(ValueError, match="'axis'"): - data.repeat(2, axis=1) + self.assert_equal(result, expected) - with pytest.raises(ValueError, - match="negative"): - data.repeat(-1) + @pytest.mark.parametrize("use_numpy", [True, False]) + @pytest.mark.parametrize('repeats, kwargs, error, msg', [ + (2, dict(axis=1), ValueError, "'axis"), + (-1, dict(), ValueError, "negative"), + ([1, 2], dict(), ValueError, "shape"), + (2, dict(foo='bar'), TypeError, "'foo'")]) + def test_repeat_raises(self, data, repeats, kwargs, error, msg, use_numpy): + with pytest.raises(error, match=msg): + if use_numpy: + np.repeat(data, repeats, **kwargs) + else: + data.repeat(repeats, **kwargs) diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 6648be5d2818a..eebff39fdf46f 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -317,26 +317,6 @@ def test_shift(self): # This is tested in test_arithmetic pass - def test_repeat(self): - index = pd.period_range('2001-01-01', periods=2, freq='D') - exp = pd.PeriodIndex(['2001-01-01', '2001-01-01', - '2001-01-02', '2001-01-02'], freq='D') - for res in [index.repeat(2), np.repeat(index, 2)]: - tm.assert_index_equal(res, exp) - - index = pd.period_range('2001-01-01', periods=2, freq='2D') - exp = pd.PeriodIndex(['2001-01-01', '2001-01-01', - '2001-01-03', '2001-01-03'], freq='2D') - for res in [index.repeat(2), np.repeat(index, 2)]: - tm.assert_index_equal(res, exp) - - index = pd.PeriodIndex(['2001-01', 'NaT', '2003-01'], freq='M') - exp = pd.PeriodIndex(['2001-01', '2001-01', '2001-01', - 'NaT', 'NaT', 'NaT', - '2003-01', '2003-01', '2003-01'], freq='M') - for res in [index.repeat(3), np.repeat(index, 3)]: - tm.assert_index_equal(res, exp) - def test_nat(self): assert pd.PeriodIndex._na_value is NaT assert pd.PeriodIndex([], freq='M')._na_value is NaT diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 5d78333016f74..a3fe2e5eff576 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -37,13 +37,19 @@ def test_where(self): # This is handled in test_indexing pass - def test_repeat(self): + @pytest.mark.parametrize('use_numpy', [True, False]) + @pytest.mark.parametrize('index', [ + pd.period_range('2000-01-01', periods=3, freq='D'), + pytest.param( + pd.period_range('2001-01-01', periods=3, freq='2D'), + marks=pytest.mark.xfail(reason='GH 24391')), + pd.PeriodIndex(['2001-01', 'NaT', '2003-01'], freq='M')]) + def test_repeat_freqstr(self, index, use_numpy): # GH10183 - idx = pd.period_range('2000-01-01', periods=3, freq='D') - res = idx.repeat(3) - exp = PeriodIndex(idx.values.repeat(3), freq='D') - tm.assert_index_equal(res, exp) - assert res.freqstr == 'D' + expected = PeriodIndex([p for p in index for _ in range(3)]) + result = np.repeat(index, 3) if use_numpy else index.repeat(3) + tm.assert_index_equal(result, expected) + assert result.freqstr == index.freqstr def test_fillna_period(self): # GH 11343 @@ -445,17 +451,6 @@ def test_pindex_qaccess(self): # Todo: fix these accessors! assert s['05Q4'] == s[2] - def test_numpy_repeat(self): - index = period_range('20010101', periods=2) - expected = PeriodIndex([Period('2001-01-01'), Period('2001-01-01'), - Period('2001-01-02'), Period('2001-01-02')]) - - tm.assert_index_equal(np.repeat(index, 2), expected) - - msg = "the 'axis' parameter is not supported" - with pytest.raises(ValueError, match=msg): - np.repeat(index, 2, axis=1) - def test_pindex_multiples(self): pi = PeriodIndex(start='1/1/11', end='12/31/11', freq='2M') expected = PeriodIndex(['2011-01', '2011-03', '2011-05', '2011-07',