From 55424375993292456a726b82e40ec90f76cf5282 Mon Sep 17 00:00:00 2001 From: Pedro Pazzini Date: Sat, 10 Mar 2018 16:39:03 -0300 Subject: [PATCH 1/2] DOC: Improved the docstring of Series.repeat --- pandas/core/series.py | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 069f0372ab6e1..7c091d5a40071 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -918,12 +918,49 @@ def _set_values(self, key, value): @deprecate_kwarg(old_arg_name='reps', new_arg_name='repeats') def repeat(self, repeats, *args, **kwargs): """ - Repeat elements of an Series. Refer to `numpy.ndarray.repeat` - for more information about the `repeats` argument. + Repeat elements of a Series. + + Each element of the Series is repeated 'repeats' times. + + Parameters + ---------- + repeats : int + The number of repetitions for each element. + *args + These parameters will be passed to a validation function. + **kwargs + These parameters will be passed to a validation function. See also -------- - numpy.ndarray.repeat + Series.append : Concatenate two or more Series. + pandas.concat : Concatenate two or more DataFrames. + numpy.repeat : Repeat elements of an array. + + Returns + ------- + Series + The repeated version of the Series. + + Examples + -------- + >>> df = pd.DataFrame({'col1' : ['A','B','C'],'col2' : [ 0 , 1 , 2 ]}) + >>> df + col1 col2 + 0 A 0 + 1 B 1 + 2 C 2 + >>> df.col1.repeat(3) + 0 A + 0 A + 0 A + 1 B + 1 B + 1 B + 2 C + 2 C + 2 C + Name: col1, dtype: object """ nv.validate_repeat(args, kwargs) new_index = self.index.repeat(repeats) From 6607f79b6137310c8afaeabbc3bf11e9b2a19113 Mon Sep 17 00:00:00 2001 From: Pedro Pazzini Date: Sat, 10 Mar 2018 19:02:47 -0300 Subject: [PATCH 2/2] Spacing, variables referencing, *args and **kwargs --- pandas/core/series.py | 180 ++++++++++-------------------------------- 1 file changed, 41 insertions(+), 139 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index a7b25bc5f8d65..f121c6a12c6bf 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -920,21 +920,21 @@ def repeat(self, repeats, *args, **kwargs): """ Repeat elements of a Series. - Each element of the Series is repeated 'repeats' times. + Each element of the Series is repeated `repeats` times. Parameters ---------- repeats : int The number of repetitions for each element. - *args - These parameters will be passed to a validation function. - **kwargs - These parameters will be passed to a validation function. + *args, **kwargs + Additional keywords have no effect but might be accepted for + compatibility with numpy. See also -------- Series.append : Concatenate two or more Series. pandas.concat : Concatenate two or more DataFrames. + Index.repeat : Eqivalent function for Index. numpy.repeat : Repeat elements of an array. Returns @@ -942,11 +942,15 @@ def repeat(self, repeats, *args, **kwargs): Series The repeated version of the Series. + Notes + ----- + Unlike numpy.repeat, `axis` is not a valid argument. + Examples -------- - >>> df = pd.DataFrame({'col1' : ['A','B','C'],'col2' : [ 0 , 1 , 2 ]}) + >>> df = pd.DataFrame({'col1': ['A', 'B', 'C'], 'col2': [0, 1, 2]}) >>> df - col1 col2 + col1 col2 0 A 0 1 B 1 2 C 2 @@ -1353,77 +1357,8 @@ def unique(self): return result + @Appender(base._shared_docs['drop_duplicates'] % _shared_doc_kwargs) def drop_duplicates(self, keep='first', inplace=False): - """ - Return Series with duplicate values removed. - - Parameters - ---------- - keep : {'first', 'last', ``False``}, default 'first' - - 'first' : Drop duplicates except for the first occurrence. - - 'last' : Drop duplicates except for the last occurrence. - - ``False`` : Drop all duplicates. - inplace : boolean, default ``False`` - If ``True``, performs operation inplace and returns None. - - Returns - ------- - deduplicated : Series - - See Also - -------- - Index.drop_duplicates : equivalent method on Index - DataFrame.drop_duplicates : equivalent method on DataFrame - Series.duplicated : related method on Series, indicating duplicate - Series values. - - Examples - -------- - Generate an Series with duplicated entries. - - >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'], - ... name='animal') - >>> s - 0 lama - 1 cow - 2 lama - 3 beetle - 4 lama - 5 hippo - Name: animal, dtype: object - - With the 'keep' parameter, the selection behaviour of duplicated values - can be changed. The value 'first' keeps the first occurrence for each - set of duplicated entries. The default value of keep is 'first'. - - >>> s.drop_duplicates() - 0 lama - 1 cow - 3 beetle - 5 hippo - Name: animal, dtype: object - - The value 'last' for parameter 'keep' keeps the last occurrence for - each set of duplicated entries. - - >>> s.drop_duplicates(keep='last') - 1 cow - 3 beetle - 4 lama - 5 hippo - Name: animal, dtype: object - - The value ``False`` for parameter 'keep' discards all sets of - duplicated entries. Setting the value of 'inplace' to ``True`` performs - the operation inplace and returns ``None``. - - >>> s.drop_duplicates(keep=False, inplace=True) - >>> s - 1 cow - 3 beetle - 5 hippo - Name: animal, dtype: object - """ return super(Series, self).drop_duplicates(keep=keep, inplace=inplace) @Appender(base._shared_docs['duplicated'] % _shared_doc_kwargs) @@ -2802,54 +2737,28 @@ def reindex_axis(self, labels, axis=0, **kwargs): return self.reindex(index=labels, **kwargs) def memory_usage(self, index=True, deep=False): - """ - Return the memory usage of the Series. - - The memory usage can optionally include the contribution of - the index and of elements of `object` dtype. + """Memory usage of the Series Parameters ---------- - index : bool, default True - Specifies whether to include the memory usage of the Series index. - deep : bool, default False - If True, introspect the data deeply by interrogating - `object` dtypes for system-level memory consumption, and include - it in the returned value. + index : bool + Specifies whether to include memory usage of Series index + deep : bool + Introspect the data deeply, interrogate + `object` dtypes for system-level memory consumption Returns ------- - int - Bytes of memory consumed. + scalar bytes of memory consumed - See Also - -------- - numpy.ndarray.nbytes : Total bytes consumed by the elements of the - array. - DataFrame.memory_usage : Bytes consumed by a DataFrame. + Notes + ----- + Memory usage does not include memory consumed by elements that + are not components of the array if deep=False - Examples + See Also -------- - - >>> s = pd.Series(range(3)) - >>> s.memory_usage() - 104 - - Not including the index gives the size of the rest of the data, which - is necessarily smaller: - - >>> s.memory_usage(index=False) - 24 - - The memory footprint of `object` values is ignored by default: - - >>> s = pd.Series(["a", "b"]) - >>> s.values - array(['a', 'b'], dtype=object) - >>> s.memory_usage() - 96 - >>> s.memory_usage(deep=True) - 212 + numpy.ndarray.nbytes """ v = super(Series, self).memory_usage(deep=deep) if index: @@ -2877,21 +2786,20 @@ def _take(self, indices, axis=0, convert=True, is_copy=False): def isin(self, values): """ - Check whether `values` are contained in Series. - - Return a boolean Series showing whether each element in the Series - matches an element in the passed sequence of `values` exactly. + Return a boolean :class:`~pandas.Series` showing whether each element + in the :class:`~pandas.Series` is exactly contained in the passed + sequence of ``values``. Parameters ---------- values : set or list-like The sequence of values to test. Passing in a single string will raise a ``TypeError``. Instead, turn a single string into a - list of one element. + ``list`` of one element. .. versionadded:: 0.18.1 - Support for values as a set. + Support for values as a set Returns ------- @@ -2900,37 +2808,31 @@ def isin(self, values): Raises ------ TypeError - * If `values` is a string + * If ``values`` is a string See Also -------- - pandas.DataFrame.isin : equivalent method on DataFrame + pandas.DataFrame.isin Examples -------- - >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama', - ... 'hippo'], name='animal') - >>> s.isin(['cow', 'lama']) + >>> s = pd.Series(list('abc')) + >>> s.isin(['a', 'c', 'e']) 0 True - 1 True + 1 False 2 True - 3 False - 4 True - 5 False - Name: animal, dtype: bool + dtype: bool - Passing a single string as ``s.isin('lama')`` will raise an error. Use + Passing a single string as ``s.isin('a')`` will raise an error. Use a list of one element instead: - >>> s.isin(['lama']) + >>> s.isin(['a']) 0 True 1 False - 2 True - 3 False - 4 True - 5 False - Name: animal, dtype: bool + 2 False + dtype: bool + """ result = algorithms.isin(com._values_from_object(self), values) return self._constructor(result, index=self.index).__finalize__(self)