From 3e968a5ebc2aecbcf9a6931c8431124e2b2354b6 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 19 Oct 2018 16:38:14 +0200 Subject: [PATCH 01/12] ENH: add set_index to Series --- doc/source/whatsnew/v0.24.0.rst | 1 + pandas/core/frame.py | 114 ++++++-------------- pandas/core/generic.py | 144 ++++++++++++++++++++++++- pandas/core/series.py | 84 +++++++++++++++ pandas/tests/series/test_alter_axes.py | 133 +++++++++++++++++++++-- 5 files changed, 385 insertions(+), 91 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 007f5b7feb060..57aafefbb2c8c 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -272,6 +272,7 @@ Other Enhancements - :meth:`DataFrame.to_sql` now supports writing ``TIMESTAMP WITH TIME ZONE`` types for supported databases. For databases that don't support timezones, datetime data will be stored as timezone unaware local timestamps. See the :ref:`io.sql_datetime_data` for implications (:issue:`9086`). - :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`) - :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`) +- :class:`Series` has gained the method :meth:`Series.set_index`, which works like its :class:`DataFrame` counterpart :meth:`DataFrame.set_index` (:issue:`21684`) - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`) - :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support an ``ambiguous`` argument for handling datetimes that are rounded to ambiguous times (:issue:`18946`) - :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support a ``nonexistent`` argument for handling datetimes that are rounded to nonexistent times. See :ref:`timeseries.timezone_nonexistent` (:issue:`22647`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e313e0f37a445..057c8d6bbe394 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -82,6 +82,7 @@ from pandas.core.accessor import CachedAccessor from pandas.core.arrays import Categorical, ExtensionArray from pandas.core.config import get_option + from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import (Index, MultiIndex, ensure_index, ensure_index_from_sequences) @@ -3914,22 +3915,32 @@ def shift(self, periods=1, freq=None, axis=0): def set_index(self, keys, drop=True, append=False, inplace=False, verify_integrity=False): """ - Set the DataFrame index (row labels) using one or more existing - columns. By default yields a new object. + Set the DataFrame index (row labels) using one or more columns. Parameters ---------- keys : column label or list of column labels / arrays + Either a column label, Series, Index, MultiIndex, list, + np.ndarray or a list containing only column labels, Series, Index, + MultiIndex, list, np.ndarray. drop : boolean, default True - Delete columns to be used as the new index + Delete columns to be used as the new index. append : boolean, default False - Whether to append columns to existing index + Whether to append columns to existing index. inplace : boolean, default False - Modify the DataFrame in place (do not create a new object) + Modify the DataFrame in place (do not create a new object). verify_integrity : boolean, default False Check the new index for duplicates. Otherwise defer the check until necessary. Setting to False will improve the performance of this - method + method. + + Returns + ------- + reindexed : DataFrame if inplace is False, else None + + See Also + -------- + Series.set_index: Corresponding method for Series Returns ------- @@ -3939,24 +3950,25 @@ def set_index(self, keys, drop=True, append=False, inplace=False, -------- >>> df = pd.DataFrame({'month': [1, 4, 7, 10], ... 'year': [2012, 2014, 2013, 2014], - ... 'sale':[55, 40, 84, 31]}) - month sale year - 0 1 55 2012 - 1 4 40 2014 - 2 7 84 2013 - 3 10 31 2014 + ... 'sale': [55, 40, 84, 31]}) + >>> df + month year sale + 0 1 2012 55 + 1 4 2014 40 + 2 7 2013 84 + 3 10 2014 31 Set the index to become the 'month' column: >>> df.set_index('month') - sale year + year sale month - 1 55 2012 - 4 40 2014 - 7 84 2013 - 10 31 2014 + 1 2012 55 + 4 2014 40 + 7 2013 84 + 10 2014 31 - Create a multi-index using columns 'year' and 'month': + Create a MultiIndex using columns 'year' and 'month': >>> df.set_index(['year', 'month']) sale @@ -3966,7 +3978,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False, 2013 7 84 2014 10 31 - Create a multi-index using a set of values and a column: + Create a MultiIndex using a set of values and a column: >>> df.set_index([[1, 2, 3, 4], 'year']) month sale @@ -3976,7 +3988,6 @@ def set_index(self, keys, drop=True, append=False, inplace=False, 3 2013 7 84 4 2014 10 31 """ - inplace = validate_bool_kwarg(inplace, 'inplace') if not isinstance(keys, list): keys = [keys] @@ -3999,65 +4010,10 @@ def set_index(self, keys, drop=True, append=False, inplace=False, if missing: raise KeyError('{}'.format(missing)) - if inplace: - frame = self - else: - frame = self.copy() - - arrays = [] - names = [] - if append: - names = [x for x in self.index.names] - if isinstance(self.index, ABCMultiIndex): - for i in range(self.index.nlevels): - arrays.append(self.index._get_level_values(i)) - else: - arrays.append(self.index) - - to_remove = [] - for col in keys: - if isinstance(col, ABCMultiIndex): - for n in range(col.nlevels): - arrays.append(col._get_level_values(n)) - names.extend(col.names) - elif isinstance(col, (ABCIndexClass, ABCSeries)): - # if Index then not MultiIndex (treated above) - arrays.append(col) - names.append(col.name) - elif isinstance(col, (list, np.ndarray)): - arrays.append(col) - names.append(None) - elif (is_list_like(col) - and not (isinstance(col, tuple) and col in self)): - # all other list-likes (but avoid valid column keys) - col = list(col) # ensure iterator do not get read twice etc. - arrays.append(col) - names.append(None) - # from here, col can only be a column label - else: - arrays.append(frame[col]._values) - names.append(col) - if drop: - to_remove.append(col) - - index = ensure_index_from_sequences(arrays, names) - - if verify_integrity and not index.is_unique: - duplicates = index[index.duplicated()].unique() - raise ValueError('Index has duplicate keys: {dup}'.format( - dup=duplicates)) - - # use set to handle duplicate column names gracefully in case of drop - for c in set(to_remove): - del frame[c] - - # clear up memory usage - index._cleanup() - - frame.index = index - - if not inplace: - return frame + vi = verify_integrity + return super(DataFrame, self).set_index(keys=keys, drop=drop, + append=append, inplace=inplace, + verify_integrity=vi) def reset_index(self, level=None, drop=False, inplace=False, col_level=0, col_fill=''): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9f56433c6868e..ea83ce80fdde1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -34,11 +34,13 @@ from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask from pandas.core.dtypes.inference import is_hashable from pandas.core.dtypes.missing import isna, notna -from pandas.core.dtypes.generic import ABCSeries, ABCPanel, ABCDataFrame +from pandas.core.dtypes.generic import (ABCIndexClass, ABCMultiIndex, ABCPanel, + ABCSeries, ABCDataFrame) from pandas.core.base import PandasObject, SelectionMixin -from pandas.core.index import (Index, MultiIndex, ensure_index, - InvalidIndexError, RangeIndex) +from pandas.core.index import (Index, MultiIndex, + InvalidIndexError, RangeIndex, + ensure_index, ensure_index_from_sequences) import pandas.core.indexing as indexing from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.period import PeriodIndex, Period @@ -648,6 +650,142 @@ def _set_axis(self, axis, labels): self._data.set_axis(axis, labels) self._clear_item_cache() + def set_index(self, keys, drop=True, append=False, inplace=False, + verify_integrity=False): + """ + Set the index (row labels) using one or more given arrays (or labels). + + Parameters + ---------- + keys : column label or list of column labels / arrays + Either a Series, Index, MultiIndex, list, np.ndarray or a list + containing only Series, Index, MultiIndex, list, np.ndarray. + + For DataFrame, additionally column labels may be used. + drop : boolean, default True + Delete columns to be used as the new index (only for DataFrame). + append : boolean, default False + Whether to append columns to existing index. + inplace : boolean, default False + Modify the Series/DataFrame in place (do not create a new object). + verify_integrity : boolean, default False + Check the new index for duplicates. Otherwise defer the check until + necessary. Setting to False will improve the performance of this + method. + + Returns + ------- + reindexed : Series/DataFrame if inplace is False, else None + + See Also + -------- + DataFrame.set_index: method adapted for DataFrame + Series.set_index: method adapted for Series + + Examples + -------- + >>> df = pd.DataFrame({'month': [1, 4, 7, 10], + ... 'year': [2012, 2014, 2013, 2014], + ... 'sale': [55, 40, 84, 31]}) + >>> df + month year sale + 0 1 2012 55 + 1 4 2014 40 + 2 7 2013 84 + 3 10 2014 31 + + Set the index to become the 'month' column: + + >>> df.set_index('month') + year sale + month + 1 2012 55 + 4 2014 40 + 7 2013 84 + 10 2014 31 + + Create a MultiIndex using columns 'year' and 'month': + + >>> df.set_index(['year', 'month']) + sale + year month + 2012 1 55 + 2014 4 40 + 2013 7 84 + 2014 10 31 + + Create a MultiIndex using a set of values and a column: + + >>> df.set_index([[1, 2, 3, 4], 'year']) + month sale + year + 1 2012 1 55 + 2 2014 4 40 + 3 2013 7 84 + 4 2014 10 31 + """ + # parameter keys is checked in Series.set_index / DataFrame.set_index! + inplace = validate_bool_kwarg(inplace, 'inplace') + if inplace: + obj = self + else: + obj = self.copy() + + arrays = [] + names = [] + if append: + names = [x for x in self.index.names] + if isinstance(self.index, ABCMultiIndex): + for i in range(self.index.nlevels): + arrays.append(self.index._get_level_values(i)) + else: + arrays.append(self.index) + + to_remove = [] + for col in keys: + if isinstance(col, ABCMultiIndex): + for n in range(col.nlevels): + arrays.append(col._get_level_values(n)) + names.extend(col.names) + elif isinstance(col, (ABCIndexClass, ABCSeries)): + # if Index then not MultiIndex (treated above) + arrays.append(col) + names.append(col.name) + elif isinstance(col, (list, np.ndarray)): + arrays.append(col) + names.append(None) + elif (is_list_like(col) + and not (isinstance(col, tuple) and col in self)): + # all other list-likes (but avoid valid column keys) + col = list(col) # ensure iterator do not get read twice etc. + arrays.append(col) + names.append(None) + # from here, col can only be a column label + else: + arrays.append(obj[col]._values) + names.append(col) + if drop: + to_remove.append(col) + + index = ensure_index_from_sequences(arrays, names) + + if verify_integrity and not index.is_unique: + duplicates = list(index[index.duplicated()]) + raise ValueError('Index has duplicate keys: {dup}'.format( + dup=duplicates)) + + # use set to handle duplicate column names gracefully in case of drop + for c in set(to_remove): + del obj[c] + + # clear up memory usage + index._cleanup() + + obj.index = index + + if not inplace: + return obj + def transpose(self, *args, **kwargs): """ Permute the dimensions of the %(klass)s diff --git a/pandas/core/series.py b/pandas/core/series.py index 8fba3030be9d4..4c3825767dd92 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1069,6 +1069,90 @@ def _set_value(self, label, value, takeable=False): return self _set_value.__doc__ = set_value.__doc__ + def set_index(self, arrays, append=False, inplace=False, + verify_integrity=False): + """ + Set the Series index (row labels) using one or more columns. + + Parameters + ---------- + arrays : array or list of arrays + Either a Series, Index, MultiIndex, list, np.ndarray or a list + containing only Series, Index, MultiIndex, list, np.ndarray. + append : boolean, default False + Whether to append columns to existing index. + inplace : boolean, default False + Modify the Series in place (do not create a new object). + verify_integrity : boolean, default False + Check the new index for duplicates. Otherwise defer the check until + necessary. Setting to False will improve the performance of this. + method. + + Returns + ------- + reindexed : Series if inplace is False, else None + + See Also + -------- + DataFrame.set_index: Corresponding method for DataFrame + + Examples + -------- + >>> s = pd.Series(range(10, 13)) + >>> s + 0 10 + 1 11 + 2 12 + dtype: int64 + + Set the index to become `['a', 'b', 'c']`: + + >>> s.set_index(['a', 'b', 'c']) + a 10 + b 11 + c 12 + dtype: int64 + + Create a MultiIndex by appending to the existing index: + + >>> s.set_index(['a', 'b', 'c'], append=True) + 0 a 10 + 1 b 11 + 2 c 12 + dtype: int64 + + Create a MultiIndex by passing a list of arrays: + + >>> t = (s ** 2).set_index([['a', 'b', 'c'], ['I', 'II', 'III']]) + >>> t + a I 100 + b II 121 + c III 144 + dtype: int64 + + Apply index from another object (of the same length!): + + >>> s.set_index(t.index) + a I 10 + b II 11 + c III 12 + dtype: int64 + """ + if not isinstance(arrays, list): + arrays = [arrays] + elif all(is_scalar(x) for x in arrays): + arrays = [arrays] + + if any(not is_list_like(x, allow_sets=False) + or getattr(x, 'ndim', 1) > 1 for x in arrays): + raise TypeError('The parameter "arrays" may only contain a ' + 'combination of valid column keys and ' + 'one-dimensional list-likes') + + return super(Series, self).set_index(keys=arrays, drop=False, + append=append, inplace=inplace, + verify_integrity=verify_integrity) + def reset_index(self, level=None, drop=False, name=None, inplace=False): """ Generate a new DataFrame or Series with the index reset. diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index 79de3dc3be19f..7ec72434789c1 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -14,17 +14,132 @@ class TestSeriesAlterAxes(object): - def test_setindex(self, string_series): - # wrong type - pytest.raises(TypeError, setattr, string_series, 'index', None) + def test_set_index_directly(self, string_series): + idx = Index(np.arange(len(string_series))[::-1]) + + string_series.index = idx + tm.assert_index_equal(string_series.index, idx) + with tm.assert_raises_regex(ValueError, 'Length mismatch'): + string_series.index = idx[::2] + + # MultiIndex constructor does not work directly on Series -> lambda + @pytest.mark.parametrize('box', [Series, Index, np.array, + lambda x: MultiIndex.from_arrays([x])]) + @pytest.mark.parametrize('inplace', [True, False]) + def test_set_index(self, string_series, inplace, box): + idx = box(string_series.index[::-1]) + + expected = Index(string_series.index[::-1]) + + if inplace: + result = string_series.copy() + result.set_index(idx, inplace=True) + else: + result = string_series.set_index(idx) + + tm.assert_index_equal(result.index, expected) + with tm.assert_raises_regex(ValueError, 'Length mismatch'): + string_series.set_index(idx[::2], inplace=inplace) + + def test_set_index_cast(self): + # issue casting an index then set_index + s = Series([1.1, 2.2, 3.3], index=[2010, 2011, 2012]) + s2 = s.set_index(s.index.astype(np.int32)) + tm.assert_series_equal(s, s2) + + # MultiIndex constructor does not work directly on Series -> lambda + # also test index name if append=True (name is duplicate here for B) + @pytest.mark.parametrize('box', [Series, Index, np.array, + lambda x: MultiIndex.from_arrays([x])]) + @pytest.mark.parametrize('index_name', [None, 'B', 'test']) + def test_set_index_append(self, string_series, index_name, box): + string_series.index.name = index_name + + arrays = box(string_series.index[::-1]) + # np.array and list "forget" the name of series.index + names = [index_name, None if box in [np.array, list] else index_name] + + idx = MultiIndex.from_arrays([string_series.index, + string_series.index[::-1]], + names=names) + expected = string_series.copy() + expected.index = idx + + result = string_series.set_index(arrays, append=True) + + tm.assert_series_equal(result, expected) + + def test_set_index_append_to_multiindex(self, string_series): + s = string_series.set_index(string_series.index[::-1], append=True) + + idx = np.random.randn(len(s)) + expected = string_series.set_index([string_series.index[::-1], idx], + append=True) + + result = s.set_index(idx, append=True) + + tm.assert_series_equal(result, expected) + + # MultiIndex constructor does not work directly on Series -> lambda + # also test index name if append=True (name is duplicate here for A & B) + @pytest.mark.parametrize('box', [Series, Index, np.array, list, + lambda x: MultiIndex.from_arrays([x])]) + @pytest.mark.parametrize('append, index_name', [(True, None), (True, 'B'), + (True, 'test'), (False, None)]) + def test_set_index_pass_arrays(self, string_series, append, + index_name, box): + string_series.index.name = index_name + + idx = string_series.index[::-1] + idx.name = 'B' + arrays = [box(idx), np.random.randn(len(string_series))] - # wrong length - pytest.raises(Exception, setattr, string_series, 'index', - np.arange(len(string_series) - 1)) + result = string_series.set_index(arrays, append=append) + + # to test against already-tested behavior, we add sequentially, + # hence second append always True + expected = string_series.set_index(arrays[0], append=append) + expected = expected.set_index(arrays[1], append=True) + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('append', [True, False]) + def test_set_index_pass_multiindex(self, string_series, append): + arrays = MultiIndex.from_arrays([string_series.values, + string_series.index[::-1]], + names=['A', 'B']) + + result = string_series.set_index(arrays, append=append) + + expected = string_series.set_index([string_series.values, + string_series.index[::-1]], + append=append) + expected.index.names = [None, 'A', 'B'] if append else ['A', 'B'] + + tm.assert_series_equal(result, expected) - # works - string_series.index = np.arange(len(string_series)) - assert isinstance(string_series.index, Index) + def test_set_index_verify_integrity(self, string_series): + idx = np.zeros(len(string_series)) + + with tm.assert_raises_regex(ValueError, + 'Index has duplicate keys'): + string_series.set_index(idx, verify_integrity=True) + # with MultiIndex + with tm.assert_raises_regex(ValueError, + 'Index has duplicate keys'): + string_series.set_index([idx, idx], verify_integrity=True) + + def test_set_index_raise(self, string_series): + msg = 'The parameter "arrays" may only contain a combination.*' + # forbidden type, e.g. set + with tm.assert_raises_regex(TypeError, msg): + string_series.set_index(set(string_series.index), + verify_integrity=True) + + # wrong type in list with arrays + with tm.assert_raises_regex(TypeError, msg): + string_series.set_index([string_series.index, 'X'], + verify_integrity=True) # Renaming From 26723ac294bab1c373a4bd71fc3326b5cfa37084 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 15 Nov 2018 19:42:12 +0100 Subject: [PATCH 02/12] ENH: add set_index to Series --- pandas/core/frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 057c8d6bbe394..d4a8658a1a158 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -82,7 +82,6 @@ from pandas.core.accessor import CachedAccessor from pandas.core.arrays import Categorical, ExtensionArray from pandas.core.config import get_option - from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import (Index, MultiIndex, ensure_index, ensure_index_from_sequences) From d9d9f45a2adb20735898b386a8bcd2bbe04acacd Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 28 Oct 2018 23:46:26 +0100 Subject: [PATCH 03/12] Fix some oversights --- pandas/core/series.py | 7 ++---- pandas/tests/series/test_alter_axes.py | 35 +++++++++++++++----------- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 4c3825767dd92..d35a36c130ad5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1138,15 +1138,12 @@ def set_index(self, arrays, append=False, inplace=False, c III 12 dtype: int64 """ - if not isinstance(arrays, list): - arrays = [arrays] - elif all(is_scalar(x) for x in arrays): + if not isinstance(arrays, list) or all(is_scalar(x) for x in arrays): arrays = [arrays] if any(not is_list_like(x, allow_sets=False) or getattr(x, 'ndim', 1) > 1 for x in arrays): - raise TypeError('The parameter "arrays" may only contain a ' - 'combination of valid column keys and ' + raise TypeError('The parameter "arrays" may only contain ' 'one-dimensional list-likes') return super(Series, self).set_index(keys=arrays, drop=False, diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index 7ec72434789c1..6e9d2114d92ce 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -24,6 +24,7 @@ def test_set_index_directly(self, string_series): # MultiIndex constructor does not work directly on Series -> lambda @pytest.mark.parametrize('box', [Series, Index, np.array, + list, tuple, iter, lambda x: MultiIndex.from_arrays([x])]) @pytest.mark.parametrize('inplace', [True, False]) def test_set_index(self, string_series, inplace, box): @@ -39,7 +40,7 @@ def test_set_index(self, string_series, inplace, box): tm.assert_index_equal(result.index, expected) with tm.assert_raises_regex(ValueError, 'Length mismatch'): - string_series.set_index(idx[::2], inplace=inplace) + string_series.set_index(string_series.index[::2], inplace=inplace) def test_set_index_cast(self): # issue casting an index then set_index @@ -48,16 +49,18 @@ def test_set_index_cast(self): tm.assert_series_equal(s, s2) # MultiIndex constructor does not work directly on Series -> lambda - # also test index name if append=True (name is duplicate here for B) + # also test index name if append=True (name is duplicate here for 'B') @pytest.mark.parametrize('box', [Series, Index, np.array, + list, tuple, iter, lambda x: MultiIndex.from_arrays([x])]) @pytest.mark.parametrize('index_name', [None, 'B', 'test']) def test_set_index_append(self, string_series, index_name, box): string_series.index.name = index_name arrays = box(string_series.index[::-1]) - # np.array and list "forget" the name of series.index - names = [index_name, None if box in [np.array, list] else index_name] + # np.array/list/tuple/iter "forget" the name of series.index + names = [index_name, + None if box in [np.array, list, tuple, iter] else index_name] idx = MultiIndex.from_arrays([string_series.index, string_series.index[::-1]], @@ -81,8 +84,9 @@ def test_set_index_append_to_multiindex(self, string_series): tm.assert_series_equal(result, expected) # MultiIndex constructor does not work directly on Series -> lambda - # also test index name if append=True (name is duplicate here for A & B) - @pytest.mark.parametrize('box', [Series, Index, np.array, list, + # also test index name if append=True (name is duplicate here for 'B') + @pytest.mark.parametrize('box', [Series, Index, np.array, + list, tuple, iter, lambda x: MultiIndex.from_arrays([x])]) @pytest.mark.parametrize('append, index_name', [(True, None), (True, 'B'), (True, 'test'), (False, None)]) @@ -96,10 +100,15 @@ def test_set_index_pass_arrays(self, string_series, append, result = string_series.set_index(arrays, append=append) + if box == iter: + # content was consumed -> re-read + arrays[0] = box(idx) + # to test against already-tested behavior, we add sequentially, - # hence second append always True - expected = string_series.set_index(arrays[0], append=append) - expected = expected.set_index(arrays[1], append=True) + # hence second append always True; must wrap keys in list, otherwise + # box = list would be illegal + expected = string_series.set_index([arrays[0]], append=append) + expected = expected.set_index([arrays[1]], append=True) tm.assert_series_equal(result, expected) @@ -121,16 +130,14 @@ def test_set_index_pass_multiindex(self, string_series, append): def test_set_index_verify_integrity(self, string_series): idx = np.zeros(len(string_series)) - with tm.assert_raises_regex(ValueError, - 'Index has duplicate keys'): + with tm.assert_raises_regex(ValueError, 'Index has duplicate keys'): string_series.set_index(idx, verify_integrity=True) # with MultiIndex - with tm.assert_raises_regex(ValueError, - 'Index has duplicate keys'): + with tm.assert_raises_regex(ValueError, 'Index has duplicate keys'): string_series.set_index([idx, idx], verify_integrity=True) def test_set_index_raise(self, string_series): - msg = 'The parameter "arrays" may only contain a combination.*' + msg = 'The parameter "arrays" may only contain one-dimensional.*' # forbidden type, e.g. set with tm.assert_raises_regex(TypeError, msg): string_series.set_index(set(string_series.index), From 8f5ed33c897c04c0ef46c0944570cf5b4142f064 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 30 Oct 2018 18:28:40 +0100 Subject: [PATCH 04/12] Remove abbreviation --- pandas/core/frame.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d4a8658a1a158..8e27f8f1b338e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4009,10 +4009,9 @@ def set_index(self, keys, drop=True, append=False, inplace=False, if missing: raise KeyError('{}'.format(missing)) - vi = verify_integrity - return super(DataFrame, self).set_index(keys=keys, drop=drop, - append=append, inplace=inplace, - verify_integrity=vi) + return super(DataFrame, self).set_index( + keys=keys, drop=drop, append=append, inplace=inplace, + verify_integrity=verify_integrity) def reset_index(self, level=None, drop=False, inplace=False, col_level=0, col_fill=''): From 6cf3ac1a3540a8d15a2c8eda034dddd2a5b64500 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 15 Nov 2018 19:44:26 +0100 Subject: [PATCH 05/12] Template docstrings --- pandas/core/frame.py | 126 +++++++++++++++++------------------------ pandas/core/generic.py | 55 ++---------------- pandas/core/series.py | 116 ++++++++++++++++--------------------- 3 files changed, 108 insertions(+), 189 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8e27f8f1b338e..5299e9b73e347 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3911,82 +3911,62 @@ def shift(self, periods=1, freq=None, axis=0): return super(DataFrame, self).shift(periods=periods, freq=freq, axis=axis) + @Substitution( + klass='DataFrame', other_klass='Series', + params=""" +keys : column label or list of column labels / arrays + Either a column label, Series, Index, MultiIndex, list, np.ndarray + or a list containing only column labels, Series, Index, MultiIndex, + list np.ndarray. +drop : boolean, default True + Delete columns to be used as the new index. +""".strip(), # remove linebreaks at beginning/end + examples=""" +>>> df = pd.DataFrame({'month': [1, 4, 7, 10], +... 'year': [2012, 2014, 2013, 2014], +... 'sale': [55, 40, 84, 31]}) +>>> df + month year sale +0 1 2012 55 +1 4 2014 40 +2 7 2013 84 +3 10 2014 31 + +Set the index to become the 'month' column: + +>>> df.set_index('month') + year sale +month +1 2012 55 +4 2014 40 +7 2013 84 +10 2014 31 + +Create a MultiIndex using columns 'year' and 'month': + +>>> df.set_index(['year', 'month']) + sale +year month +2012 1 55 +2014 4 40 +2013 7 84 +2014 10 31 + +Create a MultiIndex using a set of values and a column: + +>>> df.set_index([[1, 2, 3, 4], 'year']) + month sale + year +1 2012 1 55 +2 2014 4 40 +3 2013 7 84 +4 2014 10 31 +""".strip() # remove linebreaks at beginning/end + ) + @Appender(NDFrame.set_index.__doc__) def set_index(self, keys, drop=True, append=False, inplace=False, verify_integrity=False): - """ - Set the DataFrame index (row labels) using one or more columns. - - Parameters - ---------- - keys : column label or list of column labels / arrays - Either a column label, Series, Index, MultiIndex, list, - np.ndarray or a list containing only column labels, Series, Index, - MultiIndex, list, np.ndarray. - drop : boolean, default True - Delete columns to be used as the new index. - append : boolean, default False - Whether to append columns to existing index. - inplace : boolean, default False - Modify the DataFrame in place (do not create a new object). - verify_integrity : boolean, default False - Check the new index for duplicates. Otherwise defer the check until - necessary. Setting to False will improve the performance of this - method. - - Returns - ------- - reindexed : DataFrame if inplace is False, else None - - See Also - -------- - Series.set_index: Corresponding method for Series - - Returns - ------- - DataFrame - Examples - -------- - >>> df = pd.DataFrame({'month': [1, 4, 7, 10], - ... 'year': [2012, 2014, 2013, 2014], - ... 'sale': [55, 40, 84, 31]}) - >>> df - month year sale - 0 1 2012 55 - 1 4 2014 40 - 2 7 2013 84 - 3 10 2014 31 - - Set the index to become the 'month' column: - - >>> df.set_index('month') - year sale - month - 1 2012 55 - 4 2014 40 - 7 2013 84 - 10 2014 31 - - Create a MultiIndex using columns 'year' and 'month': - - >>> df.set_index(['year', 'month']) - sale - year month - 2012 1 55 - 2014 4 40 - 2013 7 84 - 2014 10 31 - - Create a MultiIndex using a set of values and a column: - - >>> df.set_index([[1, 2, 3, 4], 'year']) - month sale - year - 1 2012 1 55 - 2 2014 4 40 - 3 2013 7 84 - 4 2014 10 31 - """ if not isinstance(keys, list): keys = [keys] diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ea83ce80fdde1..c33c35befe8a4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -657,17 +657,11 @@ def set_index(self, keys, drop=True, append=False, inplace=False, Parameters ---------- - keys : column label or list of column labels / arrays - Either a Series, Index, MultiIndex, list, np.ndarray or a list - containing only Series, Index, MultiIndex, list, np.ndarray. - - For DataFrame, additionally column labels may be used. - drop : boolean, default True - Delete columns to be used as the new index (only for DataFrame). + %(params)s append : boolean, default False Whether to append columns to existing index. inplace : boolean, default False - Modify the Series/DataFrame in place (do not create a new object). + Modify the %(klass)s in place (do not create a new object). verify_integrity : boolean, default False Check the new index for duplicates. Otherwise defer the check until necessary. Setting to False will improve the performance of this @@ -675,54 +669,15 @@ def set_index(self, keys, drop=True, append=False, inplace=False, Returns ------- - reindexed : Series/DataFrame if inplace is False, else None + reindexed : %(klass)s if inplace is False, else None See Also -------- - DataFrame.set_index: method adapted for DataFrame - Series.set_index: method adapted for Series + %(other_klass)s.set_index: method adapted for %(other_klass)s Examples -------- - >>> df = pd.DataFrame({'month': [1, 4, 7, 10], - ... 'year': [2012, 2014, 2013, 2014], - ... 'sale': [55, 40, 84, 31]}) - >>> df - month year sale - 0 1 2012 55 - 1 4 2014 40 - 2 7 2013 84 - 3 10 2014 31 - - Set the index to become the 'month' column: - - >>> df.set_index('month') - year sale - month - 1 2012 55 - 4 2014 40 - 7 2013 84 - 10 2014 31 - - Create a MultiIndex using columns 'year' and 'month': - - >>> df.set_index(['year', 'month']) - sale - year month - 2012 1 55 - 2014 4 40 - 2013 7 84 - 2014 10 31 - - Create a MultiIndex using a set of values and a column: - - >>> df.set_index([[1, 2, 3, 4], 'year']) - month sale - year - 1 2012 1 55 - 2 2014 4 40 - 3 2013 7 84 - 4 2014 10 31 + %(examples)s """ # parameter keys is checked in Series.set_index / DataFrame.set_index! inplace = validate_bool_kwarg(inplace, 'inplace') diff --git a/pandas/core/series.py b/pandas/core/series.py index d35a36c130ad5..915a4b65545f6 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1069,75 +1069,59 @@ def _set_value(self, label, value, takeable=False): return self _set_value.__doc__ = set_value.__doc__ + @Substitution( + klass='Series', other_klass='DataFrame', + params=""" +arrays : array or list of arrays + Either a Series, Index, MultiIndex, list, np.ndarray or a list + containing only Series, Index, MultiIndex, list, np.ndarray. +""".strip(), # remove linebreaks at beginning/end + examples=""" +>>> s = pd.Series(range(10, 13)) +>>> s +0 10 +1 11 +2 12 +dtype: int64 + +Set the index to become `['a', 'b', 'c']`: + +>>> s.set_index(['a', 'b', 'c']) +a 10 +b 11 +c 12 +dtype: int64 + +Create a MultiIndex by appending to the existing index: + +>>> s.set_index(['a', 'b', 'c'], append=True) +0 a 10 +1 b 11 +2 c 12 +dtype: int64 + +Create a MultiIndex by passing a list of arrays: + +>>> t = (s ** 2).set_index([['a', 'b', 'c'], ['I', 'II', 'III']]) +>>> t +a I 100 +b II 121 +c III 144 +dtype: int64 + +Apply index from another object (of the same length!): + +>>> s.set_index(t.index) +a I 10 +b II 11 +c III 12 +dtype: int64 +""".strip() # remove linebreaks at beginning/end + ) + @Appender(generic.NDFrame.set_index.__doc__) def set_index(self, arrays, append=False, inplace=False, verify_integrity=False): - """ - Set the Series index (row labels) using one or more columns. - - Parameters - ---------- - arrays : array or list of arrays - Either a Series, Index, MultiIndex, list, np.ndarray or a list - containing only Series, Index, MultiIndex, list, np.ndarray. - append : boolean, default False - Whether to append columns to existing index. - inplace : boolean, default False - Modify the Series in place (do not create a new object). - verify_integrity : boolean, default False - Check the new index for duplicates. Otherwise defer the check until - necessary. Setting to False will improve the performance of this. - method. - Returns - ------- - reindexed : Series if inplace is False, else None - - See Also - -------- - DataFrame.set_index: Corresponding method for DataFrame - - Examples - -------- - >>> s = pd.Series(range(10, 13)) - >>> s - 0 10 - 1 11 - 2 12 - dtype: int64 - - Set the index to become `['a', 'b', 'c']`: - - >>> s.set_index(['a', 'b', 'c']) - a 10 - b 11 - c 12 - dtype: int64 - - Create a MultiIndex by appending to the existing index: - - >>> s.set_index(['a', 'b', 'c'], append=True) - 0 a 10 - 1 b 11 - 2 c 12 - dtype: int64 - - Create a MultiIndex by passing a list of arrays: - - >>> t = (s ** 2).set_index([['a', 'b', 'c'], ['I', 'II', 'III']]) - >>> t - a I 100 - b II 121 - c III 144 - dtype: int64 - - Apply index from another object (of the same length!): - - >>> s.set_index(t.index) - a I 10 - b II 11 - c III 12 - dtype: int64 - """ if not isinstance(arrays, list) or all(is_scalar(x) for x in arrays): arrays = [arrays] From 4358d52ebec2d212013dcf7ca4f2212e4549ae0f Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 1 Nov 2018 08:33:49 +0100 Subject: [PATCH 06/12] Review (TomAugspurger) --- pandas/core/frame.py | 98 +++++++++++++++++++++---------------------- pandas/core/series.py | 92 ++++++++++++++++++++-------------------- 2 files changed, 95 insertions(+), 95 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5299e9b73e347..fe050e9d09f61 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3913,55 +3913,55 @@ def shift(self, periods=1, freq=None, axis=0): @Substitution( klass='DataFrame', other_klass='Series', - params=""" -keys : column label or list of column labels / arrays - Either a column label, Series, Index, MultiIndex, list, np.ndarray - or a list containing only column labels, Series, Index, MultiIndex, - list np.ndarray. -drop : boolean, default True - Delete columns to be used as the new index. -""".strip(), # remove linebreaks at beginning/end - examples=""" ->>> df = pd.DataFrame({'month': [1, 4, 7, 10], -... 'year': [2012, 2014, 2013, 2014], -... 'sale': [55, 40, 84, 31]}) ->>> df - month year sale -0 1 2012 55 -1 4 2014 40 -2 7 2013 84 -3 10 2014 31 - -Set the index to become the 'month' column: - ->>> df.set_index('month') - year sale -month -1 2012 55 -4 2014 40 -7 2013 84 -10 2014 31 - -Create a MultiIndex using columns 'year' and 'month': - ->>> df.set_index(['year', 'month']) - sale -year month -2012 1 55 -2014 4 40 -2013 7 84 -2014 10 31 - -Create a MultiIndex using a set of values and a column: - ->>> df.set_index([[1, 2, 3, 4], 'year']) - month sale - year -1 2012 1 55 -2 2014 4 40 -3 2013 7 84 -4 2014 10 31 -""".strip() # remove linebreaks at beginning/end + params=dedent("""\ + keys : column label or list of column labels / arrays + Either a column label, Series, Index, MultiIndex, list, np.ndarray + or a list containing only column labels, Series, Index, MultiIndex, + list np.ndarray. + drop : boolean, default True + Delete columns to be used as the new index.\ + """), + examples=dedent("""\ + >>> df = pd.DataFrame({'month': [1, 4, 7, 10], + ... 'year': [2012, 2014, 2013, 2014], + ... 'sale': [55, 40, 84, 31]}) + >>> df + month year sale + 0 1 2012 55 + 1 4 2014 40 + 2 7 2013 84 + 3 10 2014 31 + + Set the index to become the 'month' column: + + >>> df.set_index('month') + year sale + month + 1 2012 55 + 4 2014 40 + 7 2013 84 + 10 2014 31 + + Create a MultiIndex using columns 'year' and 'month': + + >>> df.set_index(['year', 'month']) + sale + year month + 2012 1 55 + 2014 4 40 + 2013 7 84 + 2014 10 31 + + Create a MultiIndex using a set of values and a column: + + >>> df.set_index([[1, 2, 3, 4], 'year']) + month sale + year + 1 2012 1 55 + 2 2014 4 40 + 3 2013 7 84 + 4 2014 10 31 + """) ) @Appender(NDFrame.set_index.__doc__) def set_index(self, keys, drop=True, append=False, inplace=False, diff --git a/pandas/core/series.py b/pandas/core/series.py index 915a4b65545f6..1f5710b548fe1 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1071,52 +1071,52 @@ def _set_value(self, label, value, takeable=False): @Substitution( klass='Series', other_klass='DataFrame', - params=""" -arrays : array or list of arrays - Either a Series, Index, MultiIndex, list, np.ndarray or a list - containing only Series, Index, MultiIndex, list, np.ndarray. -""".strip(), # remove linebreaks at beginning/end - examples=""" ->>> s = pd.Series(range(10, 13)) ->>> s -0 10 -1 11 -2 12 -dtype: int64 - -Set the index to become `['a', 'b', 'c']`: - ->>> s.set_index(['a', 'b', 'c']) -a 10 -b 11 -c 12 -dtype: int64 - -Create a MultiIndex by appending to the existing index: - ->>> s.set_index(['a', 'b', 'c'], append=True) -0 a 10 -1 b 11 -2 c 12 -dtype: int64 - -Create a MultiIndex by passing a list of arrays: - ->>> t = (s ** 2).set_index([['a', 'b', 'c'], ['I', 'II', 'III']]) ->>> t -a I 100 -b II 121 -c III 144 -dtype: int64 - -Apply index from another object (of the same length!): - ->>> s.set_index(t.index) -a I 10 -b II 11 -c III 12 -dtype: int64 -""".strip() # remove linebreaks at beginning/end + params=dedent("""\ + arrays : array or list of arrays + Either a Series, Index, MultiIndex, list, np.ndarray or a list + containing only Series, Index, MultiIndex, list, np.ndarray.\ + """), + examples=dedent("""\ + >>> s = pd.Series(range(10, 13)) + >>> s + 0 10 + 1 11 + 2 12 + dtype: int64 + + Set the index to become `['a', 'b', 'c']`: + + >>> s.set_index(['a', 'b', 'c']) + a 10 + b 11 + c 12 + dtype: int64 + + Create a MultiIndex by appending to the existing index: + + >>> s.set_index(['a', 'b', 'c'], append=True) + 0 a 10 + 1 b 11 + 2 c 12 + dtype: int64 + + Create a MultiIndex by passing a list of arrays: + + >>> t = (s ** 2).set_index([['a', 'b', 'c'], ['I', 'II', 'III']]) + >>> t + a I 100 + b II 121 + c III 144 + dtype: int64 + + Apply index from another object (of the same length!): + + >>> s.set_index(t.index) + a I 10 + b II 11 + c III 12 + dtype: int64 + """) ) @Appender(generic.NDFrame.set_index.__doc__) def set_index(self, arrays, append=False, inplace=False, From b89af709aced27bcf7b0dd4df407548988b2e363 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 15 Nov 2018 19:50:51 +0100 Subject: [PATCH 07/12] Lint; change to pytest.raises --- pandas/core/frame.py | 2 +- pandas/tests/series/test_alter_axes.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index fe050e9d09f61..76e8528517ead 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -72,7 +72,7 @@ is_iterator, is_sequence, is_named_tuple) -from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass, ABCMultiIndex +from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass from pandas.core.dtypes.missing import isna, notna from pandas.core import algorithms diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index 6e9d2114d92ce..c832e29b9f391 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -19,7 +19,7 @@ def test_set_index_directly(self, string_series): string_series.index = idx tm.assert_index_equal(string_series.index, idx) - with tm.assert_raises_regex(ValueError, 'Length mismatch'): + with pytest.raises(ValueError, match='Length mismatch'): string_series.index = idx[::2] # MultiIndex constructor does not work directly on Series -> lambda @@ -39,7 +39,7 @@ def test_set_index(self, string_series, inplace, box): result = string_series.set_index(idx) tm.assert_index_equal(result.index, expected) - with tm.assert_raises_regex(ValueError, 'Length mismatch'): + with pytest.raises(ValueError, match='Length mismatch'): string_series.set_index(string_series.index[::2], inplace=inplace) def test_set_index_cast(self): @@ -130,21 +130,21 @@ def test_set_index_pass_multiindex(self, string_series, append): def test_set_index_verify_integrity(self, string_series): idx = np.zeros(len(string_series)) - with tm.assert_raises_regex(ValueError, 'Index has duplicate keys'): + with pytest.raises(ValueError, match='Index has duplicate keys'): string_series.set_index(idx, verify_integrity=True) # with MultiIndex - with tm.assert_raises_regex(ValueError, 'Index has duplicate keys'): + with pytest.raises(ValueError, match='Index has duplicate keys'): string_series.set_index([idx, idx], verify_integrity=True) def test_set_index_raise(self, string_series): msg = 'The parameter "arrays" may only contain one-dimensional.*' # forbidden type, e.g. set - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): string_series.set_index(set(string_series.index), verify_integrity=True) # wrong type in list with arrays - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): string_series.set_index([string_series.index, 'X'], verify_integrity=True) From e4d36988cfbcaa02c6a55018a4646045c84cc7c3 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 27 Nov 2018 20:32:23 +0100 Subject: [PATCH 08/12] Fix docstring template --- pandas/core/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 100e02e78e7cf..1b40f2e91bcb2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -662,9 +662,9 @@ def set_index(self, keys, drop=True, append=False, inplace=False, See Also -------- %(other_klass)s.set_index: method adapted for %(other_klass)s - %(klass).reset_index : Opposite of set_index. - %(klass).reindex : Change to new indices or expand indices. - %(klass).reindex_like : Change to same indices as other %(klass). + %(klass)s.reset_index : Opposite of set_index. + %(klass)s.reindex : Change to new indices or expand indices. + %(klass)s.reindex_like : Change to same indices as other %(klass)s. Examples -------- From 31b59902b2ebf8810b03d231d33d47a72d912b7a Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 4 Jan 2019 23:44:22 +0100 Subject: [PATCH 09/12] Fix docstrings --- pandas/core/frame.py | 8 +++----- pandas/core/generic.py | 11 ++++++----- pandas/core/series.py | 6 ++---- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c31e31db25c88..5d2c3eda073f3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4042,9 +4042,8 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): Either a column label, Series, Index, MultiIndex, list, np.ndarray or a list containing only column labels, Series, Index, MultiIndex, list np.ndarray. - drop : boolean, default True - Delete columns to be used as the new index.\ - """), + drop : bool, default True + Delete columns to be used as the new index."""), examples=dedent("""\ >>> df = pd.DataFrame({'month': [1, 4, 7, 10], ... 'year': [2012, 2014, 2013, 2014], @@ -4084,8 +4083,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): 1 2012 1 55 2 2014 4 40 3 2013 7 84 - 4 2014 10 31 - """) + 4 2014 10 31""") ) @Appender(NDFrame.set_index.__doc__) def set_index(self, keys, drop=True, append=False, inplace=False, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 077564cda7d6a..8d3d9c6191a7f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -639,22 +639,23 @@ def set_index(self, keys, drop=True, append=False, inplace=False, Parameters ---------- %(params)s - append : boolean, default False + append : bool, default False Whether to append columns to existing index. - inplace : boolean, default False + inplace : bool, default False Modify the %(klass)s in place (do not create a new object). - verify_integrity : boolean, default False + verify_integrity : bool, default False Check the new index for duplicates. Otherwise defer the check until necessary. Setting to False will improve the performance of this method. Returns ------- - reindexed : %(klass)s if inplace is False, else None + %(klass)s + The reindexed %(klass)s. Will be None if inplace is True. See Also -------- - %(other_klass)s.set_index: method adapted for %(other_klass)s + %(other_klass)s.set_index: Method adapted for %(other_klass)s. %(klass)s.reset_index : Opposite of set_index. %(klass)s.reindex : Change to new indices or expand indices. %(klass)s.reindex_like : Change to same indices as other %(klass)s. diff --git a/pandas/core/series.py b/pandas/core/series.py index 0f2cd3784b714..06647494cad1c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1176,8 +1176,7 @@ def _set_value(self, label, value, takeable=False): params=dedent("""\ arrays : array or list of arrays Either a Series, Index, MultiIndex, list, np.ndarray or a list - containing only Series, Index, MultiIndex, list, np.ndarray.\ - """), + containing only Series, Index, MultiIndex, list, np.ndarray."""), examples=dedent("""\ >>> s = pd.Series(range(10, 13)) >>> s @@ -1217,8 +1216,7 @@ def _set_value(self, label, value, takeable=False): a I 10 b II 11 c III 12 - dtype: int64 - """) + dtype: int64""") ) @Appender(generic.NDFrame.set_index.__doc__) def set_index(self, arrays, append=False, inplace=False, From aaa8644dc51cfbf5c6efe6fb1aee2e5e59a2bb55 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sat, 5 Jan 2019 00:12:43 +0100 Subject: [PATCH 10/12] Review (TomAugspurger) --- pandas/core/generic.py | 4 +++- pandas/core/series.py | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8d3d9c6191a7f..17e45f04c1c14 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -664,7 +664,9 @@ def set_index(self, keys, drop=True, append=False, inplace=False, -------- %(examples)s """ - # parameter keys is checked in Series.set_index / DataFrame.set_index! + # parameter keys is checked in Series.set_index / DataFrame.set_index, + # will always be passed as a list of list-likes! + inplace = validate_bool_kwarg(inplace, 'inplace') if inplace: obj = self diff --git a/pandas/core/series.py b/pandas/core/series.py index 06647494cad1c..716ccfc48ed82 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1222,6 +1222,8 @@ def _set_value(self, label, value, takeable=False): def set_index(self, arrays, append=False, inplace=False, verify_integrity=False): + # NDFrame.set_index expects a list of list-likes. Lists of scalars + # must be wrapped in another list to avoid being interpreted as keys. if not isinstance(arrays, list) or all(is_scalar(x) for x in arrays): arrays = [arrays] From 81cfd82b8a13e4cf181f890130ea46502363e9c9 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sat, 5 Jan 2019 00:57:43 +0100 Subject: [PATCH 11/12] Review (WillAyd) --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 17e45f04c1c14..8d9851aa3eb79 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -655,7 +655,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False, See Also -------- - %(other_klass)s.set_index: Method adapted for %(other_klass)s. + %(other_klass)s.set_index : Method adapted for %(other_klass)s. %(klass)s.reset_index : Opposite of set_index. %(klass)s.reindex : Change to new indices or expand indices. %(klass)s.reindex_like : Change to same indices as other %(klass)s. From 7f35d2c6b2a0ac9b5a888f8619e9a406e85856d3 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 6 Jan 2019 15:39:08 +0100 Subject: [PATCH 12/12] Refactor docstring into _shared_docs --- pandas/core/frame.py | 50 ------------------- pandas/core/generic.py | 110 ++++++++++++++++++++++++++++++----------- pandas/core/series.py | 2 +- 3 files changed, 82 insertions(+), 80 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5d2c3eda073f3..26f3c1c3948d5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4035,56 +4035,6 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): return super(DataFrame, self).shift(periods=periods, freq=freq, axis=axis, fill_value=fill_value) - @Substitution( - klass='DataFrame', other_klass='Series', - params=dedent("""\ - keys : column label or list of column labels / arrays - Either a column label, Series, Index, MultiIndex, list, np.ndarray - or a list containing only column labels, Series, Index, MultiIndex, - list np.ndarray. - drop : bool, default True - Delete columns to be used as the new index."""), - examples=dedent("""\ - >>> df = pd.DataFrame({'month': [1, 4, 7, 10], - ... 'year': [2012, 2014, 2013, 2014], - ... 'sale': [55, 40, 84, 31]}) - >>> df - month year sale - 0 1 2012 55 - 1 4 2014 40 - 2 7 2013 84 - 3 10 2014 31 - - Set the index to become the 'month' column: - - >>> df.set_index('month') - year sale - month - 1 2012 55 - 4 2014 40 - 7 2013 84 - 10 2014 31 - - Create a MultiIndex using columns 'year' and 'month': - - >>> df.set_index(['year', 'month']) - sale - year month - 2012 1 55 - 2014 4 40 - 2013 7 84 - 2014 10 31 - - Create a MultiIndex using a set of values and a column: - - >>> df.set_index([[1, 2, 3, 4], 'year']) - month sale - year - 1 2012 1 55 - 2 2014 4 40 - 3 2013 7 84 - 4 2014 10 31""") - ) @Appender(NDFrame.set_index.__doc__) def set_index(self, keys, drop=True, append=False, inplace=False, verify_integrity=False): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8d9851aa3eb79..00193249fe828 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -631,39 +631,91 @@ def _set_axis(self, axis, labels): self._data.set_axis(axis, labels) self._clear_item_cache() - def set_index(self, keys, drop=True, append=False, inplace=False, - verify_integrity=False): - """ - Set the index (row labels) using one or more given arrays (or labels). + _shared_docs['set_index'] = """ + Set the index (row labels) using one or more given arrays (or labels). - Parameters - ---------- - %(params)s - append : bool, default False - Whether to append columns to existing index. - inplace : bool, default False - Modify the %(klass)s in place (do not create a new object). - verify_integrity : bool, default False - Check the new index for duplicates. Otherwise defer the check until - necessary. Setting to False will improve the performance of this - method. + Parameters + ---------- + %(params)s + append : bool, default False + Whether to append columns to existing index. + inplace : bool, default False + Modify the %(klass)s in place (do not create a new object). + verify_integrity : bool, default False + Check the new index for duplicates. Otherwise defer the check until + necessary. Setting to False will improve the performance of this + method. - Returns - ------- - %(klass)s - The reindexed %(klass)s. Will be None if inplace is True. + Returns + ------- + %(klass)s + The reindexed %(klass)s. Will be None if inplace is True. - See Also - -------- - %(other_klass)s.set_index : Method adapted for %(other_klass)s. - %(klass)s.reset_index : Opposite of set_index. - %(klass)s.reindex : Change to new indices or expand indices. - %(klass)s.reindex_like : Change to same indices as other %(klass)s. + See Also + -------- + %(other_klass)s.set_index : Method adapted for %(other_klass)s. + %(klass)s.reset_index : Opposite of set_index. + %(klass)s.reindex : Change to new indices or expand indices. + %(klass)s.reindex_like : Change to same indices as other %(klass)s. - Examples - -------- - %(examples)s - """ + Examples + -------- + %(examples)s + """ + + @Substitution( + klass='DataFrame', other_klass='Series', + params=dedent("""\ + keys : column label or list of column labels / arrays + Either a column label, Series, Index, MultiIndex, list, np.ndarray + or a list containing only column labels, Series, Index, MultiIndex, + list np.ndarray. + drop : bool, default True + Delete columns to be used as the new index."""), + examples=dedent("""\ + >>> df = pd.DataFrame({'month': [1, 4, 7, 10], + ... 'year': [2012, 2014, 2013, 2014], + ... 'sale': [55, 40, 84, 31]}) + >>> df + month year sale + 0 1 2012 55 + 1 4 2014 40 + 2 7 2013 84 + 3 10 2014 31 + + Set the index to become the 'month' column: + + >>> df.set_index('month') + year sale + month + 1 2012 55 + 4 2014 40 + 7 2013 84 + 10 2014 31 + + Create a MultiIndex using columns 'year' and 'month': + + >>> df.set_index(['year', 'month']) + sale + year month + 2012 1 55 + 2014 4 40 + 2013 7 84 + 2014 10 31 + + Create a MultiIndex using a set of values and a column: + + >>> df.set_index([[1, 2, 3, 4], 'year']) + month sale + year + 1 2012 1 55 + 2 2014 4 40 + 3 2013 7 84 + 4 2014 10 31""") + ) + @Appender(_shared_docs["set_index"]) + def set_index(self, keys, drop=True, append=False, inplace=False, + verify_integrity=False): # parameter keys is checked in Series.set_index / DataFrame.set_index, # will always be passed as a list of list-likes! diff --git a/pandas/core/series.py b/pandas/core/series.py index 716ccfc48ed82..e4df55c9afcb4 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1218,7 +1218,7 @@ def _set_value(self, label, value, takeable=False): c III 12 dtype: int64""") ) - @Appender(generic.NDFrame.set_index.__doc__) + @Appender(generic._shared_docs['set_index']) def set_index(self, arrays, append=False, inplace=False, verify_integrity=False):