Skip to content

Commit 3edbaea

Browse files
committed
ENH: Add set_index to Series
1 parent 8a1c8ad commit 3edbaea

File tree

5 files changed

+379
-94
lines changed

5 files changed

+379
-94
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ Other Enhancements
181181
The default compression for ``to_csv``, ``to_json``, and ``to_pickle`` methods has been updated to ``'infer'`` (:issue:`22004`).
182182
- :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`)
183183
- :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`)
184+
- :class:`Series` has gained the method :meth:`Series.set_index`, which works like its :class:`DataFrame` counterpart :meth:`DataFrame.set_index` (:issue:`21684`)
184185
- :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`)
185186
- :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`).
186187
- :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`).

pandas/core/frame.py

+32-80
Original file line numberDiff line numberDiff line change
@@ -3826,43 +3826,54 @@ def shift(self, periods=1, freq=None, axis=0):
38263826
def set_index(self, keys, drop=True, append=False, inplace=False,
38273827
verify_integrity=False):
38283828
"""
3829-
Set the DataFrame index (row labels) using one or more existing
3830-
columns. By default yields a new object.
3829+
Set the DataFrame index (row labels) using one or more columns.
38313830
38323831
Parameters
38333832
----------
38343833
keys : column label or list of column labels / arrays
3834+
Either a column label, Series, Index, MultiIndex, list,
3835+
np.ndarray or a list containing only column labels, Series, Index,
3836+
MultiIndex, list, np.ndarray.
38353837
drop : boolean, default True
3836-
Delete columns to be used as the new index
3838+
Delete columns to be used as the new index.
38373839
append : boolean, default False
3838-
Whether to append columns to existing index
3840+
Whether to append columns to existing index.
38393841
inplace : boolean, default False
3840-
Modify the DataFrame in place (do not create a new object)
3842+
Modify the DataFrame in place (do not create a new object).
38413843
verify_integrity : boolean, default False
38423844
Check the new index for duplicates. Otherwise defer the check until
38433845
necessary. Setting to False will improve the performance of this
3844-
method
3846+
method.
3847+
3848+
Returns
3849+
-------
3850+
reindexed : DataFrame if inplace is False, else None
3851+
3852+
See Also
3853+
--------
3854+
Series.set_index: Corresponding method for Series
38453855
38463856
Examples
38473857
--------
38483858
>>> df = pd.DataFrame({'month': [1, 4, 7, 10],
38493859
... 'year': [2012, 2014, 2013, 2014],
3850-
... 'sale':[55, 40, 84, 31]})
3851-
month sale year
3852-
0 1 55 2012
3853-
1 4 40 2014
3854-
2 7 84 2013
3855-
3 10 31 2014
3860+
... 'sale': [55, 40, 84, 31]})
3861+
>>> df
3862+
month year sale
3863+
0 1 2012 55
3864+
1 4 2014 40
3865+
2 7 2013 84
3866+
3 10 2014 31
38563867
38573868
Set the index to become the 'month' column:
38583869
38593870
>>> df.set_index('month')
3860-
sale year
3871+
year sale
38613872
month
3862-
1 55 2012
3863-
4 40 2014
3864-
7 84 2013
3865-
10 31 2014
3873+
1 2012 55
3874+
4 2014 40
3875+
7 2013 84
3876+
10 2014 31
38663877
38673878
Create a multi-index using columns 'year' and 'month':
38683879
@@ -3883,73 +3894,14 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
38833894
2 2014 4 40
38843895
3 2013 7 84
38853896
4 2014 10 31
3886-
3887-
Returns
3888-
-------
3889-
dataframe : DataFrame
38903897
"""
3891-
inplace = validate_bool_kwarg(inplace, 'inplace')
38923898
if not isinstance(keys, list):
38933899
keys = [keys]
38943900

3895-
if inplace:
3896-
frame = self
3897-
else:
3898-
frame = self.copy()
3899-
3900-
arrays = []
3901-
names = []
3902-
if append:
3903-
names = [x for x in self.index.names]
3904-
if isinstance(self.index, MultiIndex):
3905-
for i in range(self.index.nlevels):
3906-
arrays.append(self.index._get_level_values(i))
3907-
else:
3908-
arrays.append(self.index)
3909-
3910-
to_remove = []
3911-
for col in keys:
3912-
if isinstance(col, MultiIndex):
3913-
# append all but the last column so we don't have to modify
3914-
# the end of this loop
3915-
for n in range(col.nlevels - 1):
3916-
arrays.append(col._get_level_values(n))
3917-
3918-
level = col._get_level_values(col.nlevels - 1)
3919-
names.extend(col.names)
3920-
elif isinstance(col, Series):
3921-
level = col._values
3922-
names.append(col.name)
3923-
elif isinstance(col, Index):
3924-
level = col
3925-
names.append(col.name)
3926-
elif isinstance(col, (list, np.ndarray, Index)):
3927-
level = col
3928-
names.append(None)
3929-
else:
3930-
level = frame[col]._values
3931-
names.append(col)
3932-
if drop:
3933-
to_remove.append(col)
3934-
arrays.append(level)
3935-
3936-
index = ensure_index_from_sequences(arrays, names)
3937-
3938-
if verify_integrity and not index.is_unique:
3939-
duplicates = index[index.duplicated()].unique()
3940-
raise ValueError('Index has duplicate keys: {dup}'.format(
3941-
dup=duplicates))
3942-
3943-
for c in to_remove:
3944-
del frame[c]
3945-
3946-
# clear up memory usage
3947-
index._cleanup()
3948-
3949-
frame.index = index
3950-
3951-
if not inplace:
3952-
return frame
3901+
vi = verify_integrity
3902+
return super(DataFrame, self).set_index(keys=keys, drop=drop,
3903+
append=append, inplace=inplace,
3904+
verify_integrity=vi)
39533905

39543906
def reset_index(self, level=None, drop=False, inplace=False, col_level=0,
39553907
col_fill=''):

pandas/core/generic.py

+137-3
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,13 @@
3232
from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask
3333
from pandas.core.dtypes.inference import is_hashable
3434
from pandas.core.dtypes.missing import isna, notna
35-
from pandas.core.dtypes.generic import ABCSeries, ABCPanel, ABCDataFrame
35+
from pandas.core.dtypes.generic import (ABCIndexClass, ABCMultiIndex, ABCPanel,
36+
ABCSeries, ABCDataFrame)
3637

3738
from pandas.core.base import PandasObject, SelectionMixin
38-
from pandas.core.index import (Index, MultiIndex, ensure_index,
39-
InvalidIndexError, RangeIndex)
39+
from pandas.core.index import (Index, MultiIndex,
40+
InvalidIndexError, RangeIndex,
41+
ensure_index, ensure_index_from_sequences)
4042
import pandas.core.indexing as indexing
4143
from pandas.core.indexes.datetimes import DatetimeIndex
4244
from pandas.core.indexes.period import PeriodIndex, Period
@@ -663,6 +665,138 @@ def _set_axis(self, axis, labels):
663665
y : same as input
664666
"""
665667

668+
def set_index(self, keys, drop=True, append=False, inplace=False,
669+
verify_integrity=False):
670+
"""
671+
Set the index (row labels) using one or more given arrays (or labels).
672+
673+
Parameters
674+
----------
675+
keys : column label or list of column labels / arrays
676+
Either a Series, Index, MultiIndex, list, np.ndarray or a list
677+
containing only Series, Index, MultiIndex, list, np.ndarray.
678+
679+
For DataFrame, additionally column labels may be used.
680+
drop : boolean, default True
681+
Delete columns to be used as the new index (only for DataFrame).
682+
append : boolean, default False
683+
Whether to append columns to existing index.
684+
inplace : boolean, default False
685+
Modify the Series/DataFrame in place (do not create a new object).
686+
verify_integrity : boolean, default False
687+
Check the new index for duplicates. Otherwise defer the check until
688+
necessary. Setting to False will improve the performance of this
689+
method.
690+
691+
Returns
692+
-------
693+
reindexed : Series/DataFrame if inplace is False, else None
694+
695+
See Also
696+
--------
697+
DataFrame.set_index: method adapted for DataFrame
698+
Series.set_index: method adapted for Series
699+
700+
Examples
701+
--------
702+
>>> df = pd.DataFrame({'month': [1, 4, 7, 10],
703+
... 'year': [2012, 2014, 2013, 2014],
704+
... 'sale': [55, 40, 84, 31]})
705+
>>> df
706+
month year sale
707+
0 1 2012 55
708+
1 4 2014 40
709+
2 7 2013 84
710+
3 10 2014 31
711+
712+
Set the index to become the 'month' column:
713+
714+
>>> df.set_index('month')
715+
year sale
716+
month
717+
1 2012 55
718+
4 2014 40
719+
7 2013 84
720+
10 2014 31
721+
722+
Create a multi-index using columns 'year' and 'month':
723+
724+
>>> df.set_index(['year', 'month'])
725+
sale
726+
year month
727+
2012 1 55
728+
2014 4 40
729+
2013 7 84
730+
2014 10 31
731+
732+
Create a multi-index using a set of values and a column:
733+
734+
>>> df.set_index([[1, 2, 3, 4], 'year'])
735+
month sale
736+
year
737+
1 2012 1 55
738+
2 2014 4 40
739+
3 2013 7 84
740+
4 2014 10 31
741+
"""
742+
inplace = validate_bool_kwarg(inplace, 'inplace')
743+
if inplace:
744+
obj = self
745+
else:
746+
obj = self.copy()
747+
748+
arrays = []
749+
names = []
750+
if append:
751+
names = [x for x in self.index.names]
752+
if isinstance(self.index, ABCMultiIndex):
753+
for i in range(self.index.nlevels):
754+
arrays.append(self.index._get_level_values(i))
755+
else:
756+
arrays.append(self.index)
757+
758+
to_remove = []
759+
for col in keys:
760+
if isinstance(col, ABCMultiIndex):
761+
for n in range(col.nlevels):
762+
arrays.append(col._get_level_values(n))
763+
names.extend(col.names)
764+
elif isinstance(col, ABCIndexClass):
765+
# Index but not MultiIndex (treated above)
766+
arrays.append(col)
767+
names.append(col.name)
768+
elif isinstance(col, ABCSeries):
769+
arrays.append(col._values)
770+
names.append(col.name)
771+
elif isinstance(col, (list, np.ndarray)):
772+
arrays.append(col)
773+
names.append(None)
774+
# from here, col can only be a column label (and obj a DataFrame);
775+
# see checks in Series.set_index and DataFrame.set_index
776+
else:
777+
arrays.append(obj[col]._values)
778+
names.append(col)
779+
if drop:
780+
to_remove.append(col)
781+
782+
index = ensure_index_from_sequences(arrays, names)
783+
784+
if verify_integrity and not index.is_unique:
785+
duplicates = list(index[index.duplicated()])
786+
raise ValueError('Index has duplicate keys: {dup}'.format(
787+
dup=duplicates))
788+
789+
for c in to_remove:
790+
del obj[c]
791+
792+
# clear up memory usage
793+
index._cleanup()
794+
795+
obj.index = index
796+
797+
if not inplace:
798+
return obj
799+
666800
@Appender(_shared_docs['transpose'] % _shared_doc_kwargs)
667801
def transpose(self, *args, **kwargs):
668802

0 commit comments

Comments
 (0)