Skip to content

Commit aadf50b

Browse files
committed
ENH: Add set_index to Series
1 parent 8a1c8ad commit aadf50b

File tree

5 files changed

+344
-77
lines changed

5 files changed

+344
-77
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ Other Enhancements
181181
The default compression for ``to_csv``, ``to_json``, and ``to_pickle`` methods has been updated to ``'infer'`` (:issue:`22004`).
182182
- :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`)
183183
- :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`)
184+
- :class:`Series` has gained the method :meth:`Series.set_index`, which works like its :class:`DataFrame` counterpart :meth:`DataFrame.set_index` (:issue:`21684`)
184185
- :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`)
185186
- :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`).
186187
- :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`).

pandas/core/frame.py

+8-63
Original file line numberDiff line numberDiff line change
@@ -3843,6 +3843,10 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
38433843
necessary. Setting to False will improve the performance of this
38443844
method
38453845
3846+
Returns
3847+
-------
3848+
reindexed : DataFrame if inplace is False, else None
3849+
38463850
Examples
38473851
--------
38483852
>>> df = pd.DataFrame({'month': [1, 4, 7, 10],
@@ -3883,73 +3887,14 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
38833887
2 2014 4 40
38843888
3 2013 7 84
38853889
4 2014 10 31
3886-
3887-
Returns
3888-
-------
3889-
dataframe : DataFrame
38903890
"""
3891-
inplace = validate_bool_kwarg(inplace, 'inplace')
38923891
if not isinstance(keys, list):
38933892
keys = [keys]
38943893

3895-
if inplace:
3896-
frame = self
3897-
else:
3898-
frame = self.copy()
3899-
3900-
arrays = []
3901-
names = []
3902-
if append:
3903-
names = [x for x in self.index.names]
3904-
if isinstance(self.index, MultiIndex):
3905-
for i in range(self.index.nlevels):
3906-
arrays.append(self.index._get_level_values(i))
3907-
else:
3908-
arrays.append(self.index)
3909-
3910-
to_remove = []
3911-
for col in keys:
3912-
if isinstance(col, MultiIndex):
3913-
# append all but the last column so we don't have to modify
3914-
# the end of this loop
3915-
for n in range(col.nlevels - 1):
3916-
arrays.append(col._get_level_values(n))
3917-
3918-
level = col._get_level_values(col.nlevels - 1)
3919-
names.extend(col.names)
3920-
elif isinstance(col, Series):
3921-
level = col._values
3922-
names.append(col.name)
3923-
elif isinstance(col, Index):
3924-
level = col
3925-
names.append(col.name)
3926-
elif isinstance(col, (list, np.ndarray, Index)):
3927-
level = col
3928-
names.append(None)
3929-
else:
3930-
level = frame[col]._values
3931-
names.append(col)
3932-
if drop:
3933-
to_remove.append(col)
3934-
arrays.append(level)
3935-
3936-
index = ensure_index_from_sequences(arrays, names)
3937-
3938-
if verify_integrity and not index.is_unique:
3939-
duplicates = index[index.duplicated()].unique()
3940-
raise ValueError('Index has duplicate keys: {dup}'.format(
3941-
dup=duplicates))
3942-
3943-
for c in to_remove:
3944-
del frame[c]
3945-
3946-
# clear up memory usage
3947-
index._cleanup()
3948-
3949-
frame.index = index
3950-
3951-
if not inplace:
3952-
return frame
3894+
vi = verify_integrity
3895+
return super(DataFrame, self).set_index(keys=keys, drop=drop,
3896+
append=append, inplace=inplace,
3897+
verify_integrity=vi)
39533898

39543899
def reset_index(self, level=None, drop=False, inplace=False, col_level=0,
39553900
col_fill=''):

pandas/core/generic.py

+130-3
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,13 @@
3232
from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask
3333
from pandas.core.dtypes.inference import is_hashable
3434
from pandas.core.dtypes.missing import isna, notna
35-
from pandas.core.dtypes.generic import ABCSeries, ABCPanel, ABCDataFrame
35+
from pandas.core.dtypes.generic import (ABCIndexClass, ABCMultiIndex, ABCPanel,
36+
ABCSeries, ABCDataFrame)
3637

3738
from pandas.core.base import PandasObject, SelectionMixin
38-
from pandas.core.index import (Index, MultiIndex, ensure_index,
39-
InvalidIndexError, RangeIndex)
39+
from pandas.core.index import (Index, MultiIndex,
40+
InvalidIndexError, RangeIndex,
41+
ensure_index, ensure_index_from_sequences)
4042
import pandas.core.indexing as indexing
4143
from pandas.core.indexes.datetimes import DatetimeIndex
4244
from pandas.core.indexes.period import PeriodIndex, Period
@@ -663,6 +665,131 @@ def _set_axis(self, axis, labels):
663665
y : same as input
664666
"""
665667

668+
def set_index(self, keys, drop=True, append=False, inplace=False,
669+
verify_integrity=False):
670+
"""
671+
Set the Series/DataFrame index (row labels) using one or more given
672+
arrays (or column labels in case of DataFrame).
673+
By default yields a new object.
674+
675+
Parameters
676+
----------
677+
keys : column label or list of column labels / arrays. For Series case,
678+
only array or list of arrays is allowed.
679+
drop : boolean, default True
680+
Delete columns to be used as the new index (only for DataFrame).
681+
append : boolean, default False
682+
Whether to append columns to existing index
683+
inplace : boolean, default False
684+
Modify the Series/DataFrame in place (do not create a new object)
685+
verify_integrity : boolean, default False
686+
Check the new index for duplicates. Otherwise defer the check until
687+
necessary. Setting to False will improve the performance of this
688+
method
689+
690+
Returns
691+
-------
692+
reindexed : Series/DataFrame if inplace is False, else None
693+
694+
Examples
695+
--------
696+
>>> df = pd.DataFrame({'month': [1, 4, 7, 10],
697+
... 'year': [2012, 2014, 2013, 2014],
698+
... 'sale':[55, 40, 84, 31]})
699+
month sale year
700+
0 1 55 2012
701+
1 4 40 2014
702+
2 7 84 2013
703+
3 10 31 2014
704+
705+
Set the index to become the 'month' column:
706+
707+
>>> df.set_index('month')
708+
sale year
709+
month
710+
1 55 2012
711+
4 40 2014
712+
7 84 2013
713+
10 31 2014
714+
715+
Create a multi-index using columns 'year' and 'month':
716+
717+
>>> df.set_index(['year', 'month'])
718+
sale
719+
year month
720+
2012 1 55
721+
2014 4 40
722+
2013 7 84
723+
2014 10 31
724+
725+
Create a multi-index using a set of values and a column:
726+
727+
>>> df.set_index([[1, 2, 3, 4], 'year'])
728+
month sale
729+
year
730+
1 2012 1 55
731+
2 2014 4 40
732+
3 2013 7 84
733+
4 2014 10 31
734+
"""
735+
inplace = validate_bool_kwarg(inplace, 'inplace')
736+
if inplace:
737+
obj = self
738+
else:
739+
obj = self.copy()
740+
741+
arrays = []
742+
names = []
743+
if append:
744+
names = [x for x in self.index.names]
745+
if isinstance(self.index, ABCMultiIndex):
746+
for i in range(self.index.nlevels):
747+
arrays.append(self.index._get_level_values(i))
748+
else:
749+
arrays.append(self.index)
750+
751+
to_remove = []
752+
for col in keys:
753+
if isinstance(col, ABCMultiIndex):
754+
for n in range(col.nlevels):
755+
arrays.append(col._get_level_values(n))
756+
names.extend(col.names)
757+
elif isinstance(col, ABCIndexClass):
758+
# Index but not MultiIndex (treated above)
759+
arrays.append(col)
760+
names.append(col.name)
761+
elif isinstance(col, ABCSeries):
762+
arrays.append(col._values)
763+
names.append(col.name)
764+
elif isinstance(col, (list, np.ndarray)):
765+
arrays.append(col)
766+
names.append(None)
767+
# from here, col can only be a column label (and obj a DataFrame);
768+
# see checks in Series.set_index and DataFrame.set_index
769+
else:
770+
arrays.append(obj[col]._values)
771+
names.append(col)
772+
if drop:
773+
to_remove.append(col)
774+
775+
index = ensure_index_from_sequences(arrays, names)
776+
777+
if verify_integrity and not index.is_unique:
778+
duplicates = list(index[index.duplicated()])
779+
raise ValueError('Index has duplicate keys: {dup}'.format(
780+
dup=duplicates))
781+
782+
for c in to_remove:
783+
del obj[c]
784+
785+
# clear up memory usage
786+
index._cleanup()
787+
788+
obj.index = index
789+
790+
if not inplace:
791+
return obj
792+
666793
@Appender(_shared_docs['transpose'] % _shared_doc_kwargs)
667794
def transpose(self, *args, **kwargs):
668795

pandas/core/series.py

+82-2
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@
3535
_is_unorderable_exception,
3636
ensure_platform_int,
3737
pandas_dtype)
38-
from pandas.core.dtypes.generic import (
39-
ABCSparseArray, ABCDataFrame, ABCIndexClass)
38+
from pandas.core.dtypes.generic import (ABCDataFrame, ABCIndexClass,
39+
ABCSeries, ABCSparseArray)
4040
from pandas.core.dtypes.cast import (
4141
maybe_upcast, infer_dtype_from_scalar,
4242
maybe_convert_platform,
@@ -1093,6 +1093,86 @@ def _set_value(self, label, value, takeable=False):
10931093
return self
10941094
_set_value.__doc__ = set_value.__doc__
10951095

1096+
def set_index(self, arrays, append=False, inplace=False,
1097+
verify_integrity=False):
1098+
"""
1099+
Set the Series index (row labels) using one or more columns.
1100+
By default yields a new object.
1101+
1102+
Parameters
1103+
----------
1104+
arrays : array or list of arrays
1105+
Either a Series, Index, MultiIndex, list, np.ndarray or a list
1106+
containing only Series, Index, MultiIndex, list, np.ndarray
1107+
append : boolean, default False
1108+
Whether to append columns to existing index
1109+
inplace : boolean, default False
1110+
Modify the Series in place (do not create a new object)
1111+
verify_integrity : boolean, default False
1112+
Check the new index for duplicates. Otherwise defer the check until
1113+
necessary. Setting to False will improve the performance of this
1114+
method
1115+
1116+
Returns
1117+
-------
1118+
reindexed : Series if inplace is False, else None
1119+
1120+
Examples
1121+
--------
1122+
>>> s = pd.Series(range(3))
1123+
0 10
1124+
1 11
1125+
2 12
1126+
dtype: int64
1127+
1128+
Set the index to become `['a', 'b', 'c']`:
1129+
1130+
>>> s.set_index(['a', 'b', 'c'])
1131+
a 10
1132+
b 11
1133+
c 12
1134+
dtype: int64
1135+
1136+
Create a multi-index by appending to the existing index:
1137+
1138+
>>> s.set_index(['a', 'b', 'c'], append=True)
1139+
0 a 10
1140+
1 b 11
1141+
2 c 12
1142+
dtype: int64
1143+
1144+
Create a multi-index by passing a list of arrays:
1145+
1146+
>>> t = s.set_index([['a', 'b', 'c'], ['I', 'II', 'III']]) ** 2
1147+
>>> t
1148+
a I 100
1149+
b II 121
1150+
c III 144
1151+
dtype: int64
1152+
1153+
Apply index from another object (of the same length!):
1154+
1155+
>>> s.set_index(t.index)
1156+
a I 10
1157+
b II 11
1158+
c III 12
1159+
dtype: int64
1160+
"""
1161+
if not isinstance(arrays, list):
1162+
arrays = [arrays]
1163+
elif all(is_scalar(x) for x in arrays):
1164+
arrays = [arrays]
1165+
1166+
if any(not isinstance(x, (ABCSeries, ABCIndexClass, list, np.ndarray))
1167+
for x in arrays):
1168+
raise TypeError('arrays must be Series, Index, MultiIndex, list, '
1169+
'np.ndarray or list containing only Series, '
1170+
'Index, MultiIndex, list, np.ndarray')
1171+
1172+
return super(Series, self).set_index(keys=arrays, drop=False,
1173+
append=append, inplace=inplace,
1174+
verify_integrity=verify_integrity)
1175+
10961176
def reset_index(self, level=None, drop=False, name=None, inplace=False):
10971177
"""
10981178
Generate a new DataFrame or Series with the index reset.

0 commit comments

Comments
 (0)