Skip to content

Commit 949e699

Browse files
committed
ENH: add set_index to Series
1 parent 145c227 commit 949e699

File tree

5 files changed

+385
-95
lines changed

5 files changed

+385
-95
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ Other Enhancements
194194
The default compression for ``to_csv``, ``to_json``, and ``to_pickle`` methods has been updated to ``'infer'`` (:issue:`22004`).
195195
- :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`)
196196
- :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`)
197+
- :class:`Series` has gained the method :meth:`Series.set_index`, which works like its :class:`DataFrame` counterpart :meth:`DataFrame.set_index` (:issue:`21684`)
197198
- :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`)
198199
- :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support an ``ambiguous`` argument for handling datetimes that are rounded to ambiguous times (:issue:`18946`)
199200
- :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`).

pandas/core/frame.py

+35-83
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@
8383
from pandas.core.accessor import CachedAccessor
8484
from pandas.core.arrays import Categorical, ExtensionArray
8585
from pandas.core.config import get_option
86+
8687
from pandas.core.generic import NDFrame, _shared_docs
8788
from pandas.core.index import (Index, MultiIndex, ensure_index,
8889
ensure_index_from_sequences)
@@ -3923,45 +3924,56 @@ def shift(self, periods=1, freq=None, axis=0):
39233924
def set_index(self, keys, drop=True, append=False, inplace=False,
39243925
verify_integrity=False):
39253926
"""
3926-
Set the DataFrame index (row labels) using one or more existing
3927-
columns. By default yields a new object.
3927+
Set the DataFrame index (row labels) using one or more columns.
39283928
39293929
Parameters
39303930
----------
39313931
keys : column label or list of column labels / arrays
3932+
Either a column label, Series, Index, MultiIndex, list,
3933+
np.ndarray or a list containing only column labels, Series, Index,
3934+
MultiIndex, list, np.ndarray.
39323935
drop : boolean, default True
3933-
Delete columns to be used as the new index
3936+
Delete columns to be used as the new index.
39343937
append : boolean, default False
3935-
Whether to append columns to existing index
3938+
Whether to append columns to existing index.
39363939
inplace : boolean, default False
3937-
Modify the DataFrame in place (do not create a new object)
3940+
Modify the DataFrame in place (do not create a new object).
39383941
verify_integrity : boolean, default False
39393942
Check the new index for duplicates. Otherwise defer the check until
39403943
necessary. Setting to False will improve the performance of this
3941-
method
3944+
method.
3945+
3946+
Returns
3947+
-------
3948+
reindexed : DataFrame if inplace is False, else None
3949+
3950+
See Also
3951+
--------
3952+
Series.set_index: Corresponding method for Series
39423953
39433954
Examples
39443955
--------
39453956
>>> df = pd.DataFrame({'month': [1, 4, 7, 10],
39463957
... 'year': [2012, 2014, 2013, 2014],
3947-
... 'sale':[55, 40, 84, 31]})
3948-
month sale year
3949-
0 1 55 2012
3950-
1 4 40 2014
3951-
2 7 84 2013
3952-
3 10 31 2014
3958+
... 'sale': [55, 40, 84, 31]})
3959+
>>> df
3960+
month year sale
3961+
0 1 2012 55
3962+
1 4 2014 40
3963+
2 7 2013 84
3964+
3 10 2014 31
39533965
39543966
Set the index to become the 'month' column:
39553967
39563968
>>> df.set_index('month')
3957-
sale year
3969+
year sale
39583970
month
3959-
1 55 2012
3960-
4 40 2014
3961-
7 84 2013
3962-
10 31 2014
3971+
1 2012 55
3972+
4 2014 40
3973+
7 2013 84
3974+
10 2014 31
39633975
3964-
Create a multi-index using columns 'year' and 'month':
3976+
Create a MultiIndex using columns 'year' and 'month':
39653977
39663978
>>> df.set_index(['year', 'month'])
39673979
sale
@@ -3971,7 +3983,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
39713983
2013 7 84
39723984
2014 10 31
39733985
3974-
Create a multi-index using a set of values and a column:
3986+
Create a MultiIndex using a set of values and a column:
39753987
39763988
>>> df.set_index([[1, 2, 3, 4], 'year'])
39773989
month sale
@@ -3980,12 +3992,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
39803992
2 2014 4 40
39813993
3 2013 7 84
39823994
4 2014 10 31
3983-
3984-
Returns
3985-
-------
3986-
dataframe : DataFrame
39873995
"""
3988-
inplace = validate_bool_kwarg(inplace, 'inplace')
39893996
if not isinstance(keys, list):
39903997
keys = [keys]
39913998

@@ -4008,65 +4015,10 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
40084015
if missing:
40094016
raise KeyError('{}'.format(missing))
40104017

4011-
if inplace:
4012-
frame = self
4013-
else:
4014-
frame = self.copy()
4015-
4016-
arrays = []
4017-
names = []
4018-
if append:
4019-
names = [x for x in self.index.names]
4020-
if isinstance(self.index, ABCMultiIndex):
4021-
for i in range(self.index.nlevels):
4022-
arrays.append(self.index._get_level_values(i))
4023-
else:
4024-
arrays.append(self.index)
4025-
4026-
to_remove = []
4027-
for col in keys:
4028-
if isinstance(col, ABCMultiIndex):
4029-
for n in range(col.nlevels):
4030-
arrays.append(col._get_level_values(n))
4031-
names.extend(col.names)
4032-
elif isinstance(col, (ABCIndexClass, ABCSeries)):
4033-
# if Index then not MultiIndex (treated above)
4034-
arrays.append(col)
4035-
names.append(col.name)
4036-
elif isinstance(col, (list, np.ndarray)):
4037-
arrays.append(col)
4038-
names.append(None)
4039-
elif (is_list_like(col)
4040-
and not (isinstance(col, tuple) and col in self)):
4041-
# all other list-likes (but avoid valid column keys)
4042-
col = list(col) # ensure iterator do not get read twice etc.
4043-
arrays.append(col)
4044-
names.append(None)
4045-
# from here, col can only be a column label
4046-
else:
4047-
arrays.append(frame[col]._values)
4048-
names.append(col)
4049-
if drop:
4050-
to_remove.append(col)
4051-
4052-
index = ensure_index_from_sequences(arrays, names)
4053-
4054-
if verify_integrity and not index.is_unique:
4055-
duplicates = index[index.duplicated()].unique()
4056-
raise ValueError('Index has duplicate keys: {dup}'.format(
4057-
dup=duplicates))
4058-
4059-
# use set to handle duplicate column names gracefully in case of drop
4060-
for c in set(to_remove):
4061-
del frame[c]
4062-
4063-
# clear up memory usage
4064-
index._cleanup()
4065-
4066-
frame.index = index
4067-
4068-
if not inplace:
4069-
return frame
4018+
vi = verify_integrity
4019+
return super(DataFrame, self).set_index(keys=keys, drop=drop,
4020+
append=append, inplace=inplace,
4021+
verify_integrity=vi)
40704022

40714023
def reset_index(self, level=None, drop=False, inplace=False, col_level=0,
40724024
col_fill=''):

pandas/core/generic.py

+141-3
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,13 @@
3232
from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask
3333
from pandas.core.dtypes.inference import is_hashable
3434
from pandas.core.dtypes.missing import isna, notna
35-
from pandas.core.dtypes.generic import ABCSeries, ABCPanel, ABCDataFrame
35+
from pandas.core.dtypes.generic import (ABCIndexClass, ABCMultiIndex, ABCPanel,
36+
ABCSeries, ABCDataFrame)
3637

3738
from pandas.core.base import PandasObject, SelectionMixin
38-
from pandas.core.index import (Index, MultiIndex, ensure_index,
39-
InvalidIndexError, RangeIndex)
39+
from pandas.core.index import (Index, MultiIndex,
40+
InvalidIndexError, RangeIndex,
41+
ensure_index, ensure_index_from_sequences)
4042
import pandas.core.indexing as indexing
4143
from pandas.core.indexes.datetimes import DatetimeIndex
4244
from pandas.core.indexes.period import PeriodIndex, Period
@@ -643,6 +645,142 @@ def _set_axis(self, axis, labels):
643645
self._data.set_axis(axis, labels)
644646
self._clear_item_cache()
645647

648+
def set_index(self, keys, drop=True, append=False, inplace=False,
649+
verify_integrity=False):
650+
"""
651+
Set the index (row labels) using one or more given arrays (or labels).
652+
653+
Parameters
654+
----------
655+
keys : column label or list of column labels / arrays
656+
Either a Series, Index, MultiIndex, list, np.ndarray or a list
657+
containing only Series, Index, MultiIndex, list, np.ndarray.
658+
659+
For DataFrame, additionally column labels may be used.
660+
drop : boolean, default True
661+
Delete columns to be used as the new index (only for DataFrame).
662+
append : boolean, default False
663+
Whether to append columns to existing index.
664+
inplace : boolean, default False
665+
Modify the Series/DataFrame in place (do not create a new object).
666+
verify_integrity : boolean, default False
667+
Check the new index for duplicates. Otherwise defer the check until
668+
necessary. Setting to False will improve the performance of this
669+
method.
670+
671+
Returns
672+
-------
673+
reindexed : Series/DataFrame if inplace is False, else None
674+
675+
See Also
676+
--------
677+
DataFrame.set_index: method adapted for DataFrame
678+
Series.set_index: method adapted for Series
679+
680+
Examples
681+
--------
682+
>>> df = pd.DataFrame({'month': [1, 4, 7, 10],
683+
... 'year': [2012, 2014, 2013, 2014],
684+
... 'sale': [55, 40, 84, 31]})
685+
>>> df
686+
month year sale
687+
0 1 2012 55
688+
1 4 2014 40
689+
2 7 2013 84
690+
3 10 2014 31
691+
692+
Set the index to become the 'month' column:
693+
694+
>>> df.set_index('month')
695+
year sale
696+
month
697+
1 2012 55
698+
4 2014 40
699+
7 2013 84
700+
10 2014 31
701+
702+
Create a MultiIndex using columns 'year' and 'month':
703+
704+
>>> df.set_index(['year', 'month'])
705+
sale
706+
year month
707+
2012 1 55
708+
2014 4 40
709+
2013 7 84
710+
2014 10 31
711+
712+
Create a MultiIndex using a set of values and a column:
713+
714+
>>> df.set_index([[1, 2, 3, 4], 'year'])
715+
month sale
716+
year
717+
1 2012 1 55
718+
2 2014 4 40
719+
3 2013 7 84
720+
4 2014 10 31
721+
"""
722+
# parameter keys is checked in Series.set_index / DataFrame.set_index!
723+
inplace = validate_bool_kwarg(inplace, 'inplace')
724+
if inplace:
725+
obj = self
726+
else:
727+
obj = self.copy()
728+
729+
arrays = []
730+
names = []
731+
if append:
732+
names = [x for x in self.index.names]
733+
if isinstance(self.index, ABCMultiIndex):
734+
for i in range(self.index.nlevels):
735+
arrays.append(self.index._get_level_values(i))
736+
else:
737+
arrays.append(self.index)
738+
739+
to_remove = []
740+
for col in keys:
741+
if isinstance(col, ABCMultiIndex):
742+
for n in range(col.nlevels):
743+
arrays.append(col._get_level_values(n))
744+
names.extend(col.names)
745+
elif isinstance(col, (ABCIndexClass, ABCSeries)):
746+
# if Index then not MultiIndex (treated above)
747+
arrays.append(col)
748+
names.append(col.name)
749+
elif isinstance(col, (list, np.ndarray)):
750+
arrays.append(col)
751+
names.append(None)
752+
elif (is_list_like(col)
753+
and not (isinstance(col, tuple) and col in self)):
754+
# all other list-likes (but avoid valid column keys)
755+
col = list(col) # ensure iterator do not get read twice etc.
756+
arrays.append(col)
757+
names.append(None)
758+
# from here, col can only be a column label
759+
else:
760+
arrays.append(obj[col]._values)
761+
names.append(col)
762+
if drop:
763+
to_remove.append(col)
764+
765+
index = ensure_index_from_sequences(arrays, names)
766+
767+
if verify_integrity and not index.is_unique:
768+
duplicates = list(index[index.duplicated()])
769+
raise ValueError('Index has duplicate keys: {dup}'.format(
770+
dup=duplicates))
771+
772+
# use set to handle duplicate column names gracefully in case of drop
773+
for c in set(to_remove):
774+
del obj[c]
775+
776+
# clear up memory usage
777+
index._cleanup()
778+
779+
obj.index = index
780+
781+
if not inplace:
782+
return obj
783+
646784
def transpose(self, *args, **kwargs):
647785
"""
648786
Permute the dimensions of the %(klass)s

0 commit comments

Comments
 (0)