Skip to content

Commit 904c4ae

Browse files
committed
ENH: Add set_index to Series (WIP)
1 parent 2156431 commit 904c4ae

File tree

5 files changed

+183
-80
lines changed

5 files changed

+183
-80
lines changed

pandas/core/frame.py

+8-66
Original file line numberDiff line numberDiff line change
@@ -3928,6 +3928,10 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
39283928
necessary. Setting to False will improve the performance of this
39293929
method
39303930
3931+
Returns
3932+
-------
3933+
reindexed : DataFrame if inplace is False, else None
3934+
39313935
Examples
39323936
--------
39333937
>>> df = pd.DataFrame({'month': [1, 4, 7, 10],
@@ -3968,73 +3972,11 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
39683972
2 2014 4 40
39693973
3 2013 7 84
39703974
4 2014 10 31
3971-
3972-
Returns
3973-
-------
3974-
dataframe : DataFrame
39753975
"""
3976-
inplace = validate_bool_kwarg(inplace, 'inplace')
3977-
if not isinstance(keys, list):
3978-
keys = [keys]
3979-
3980-
if inplace:
3981-
frame = self
3982-
else:
3983-
frame = self.copy()
3984-
3985-
arrays = []
3986-
names = []
3987-
if append:
3988-
names = [x for x in self.index.names]
3989-
if isinstance(self.index, MultiIndex):
3990-
for i in range(self.index.nlevels):
3991-
arrays.append(self.index._get_level_values(i))
3992-
else:
3993-
arrays.append(self.index)
3994-
3995-
to_remove = []
3996-
for col in keys:
3997-
if isinstance(col, MultiIndex):
3998-
# append all but the last column so we don't have to modify
3999-
# the end of this loop
4000-
for n in range(col.nlevels - 1):
4001-
arrays.append(col._get_level_values(n))
4002-
4003-
level = col._get_level_values(col.nlevels - 1)
4004-
names.extend(col.names)
4005-
elif isinstance(col, Series):
4006-
level = col._values
4007-
names.append(col.name)
4008-
elif isinstance(col, Index):
4009-
level = col
4010-
names.append(col.name)
4011-
elif isinstance(col, (list, np.ndarray, Index)):
4012-
level = col
4013-
names.append(None)
4014-
else:
4015-
level = frame[col]._values
4016-
names.append(col)
4017-
if drop:
4018-
to_remove.append(col)
4019-
arrays.append(level)
4020-
4021-
index = ensure_index_from_sequences(arrays, names)
4022-
4023-
if verify_integrity and not index.is_unique:
4024-
duplicates = index[index.duplicated()].unique()
4025-
raise ValueError('Index has duplicate keys: {dup}'.format(
4026-
dup=duplicates))
4027-
4028-
for c in to_remove:
4029-
del frame[c]
4030-
4031-
# clear up memory usage
4032-
index._cleanup()
4033-
4034-
frame.index = index
4035-
4036-
if not inplace:
4037-
return frame
3976+
vi = verify_integrity
3977+
return super(DataFrame, self).set_index(keys=keys, drop=drop,
3978+
append=append, inplace=inplace,
3979+
verify_integrity=vi)
40383980

40393981
def reset_index(self, level=None, drop=False, inplace=False, col_level=0,
40403982
col_fill=''):

pandas/core/generic.py

+138-2
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,9 @@
3535
from pandas.core.dtypes.generic import ABCSeries, ABCPanel, ABCDataFrame
3636

3737
from pandas.core.base import PandasObject, SelectionMixin
38-
from pandas.core.index import (Index, MultiIndex, ensure_index,
39-
InvalidIndexError, RangeIndex)
38+
from pandas.core.index import (Index, MultiIndex,
39+
InvalidIndexError, RangeIndex,
40+
ensure_index, ensure_index_from_sequences)
4041
import pandas.core.indexing as indexing
4142
from pandas.core.indexes.datetimes import DatetimeIndex
4243
from pandas.core.indexes.period import PeriodIndex, Period
@@ -666,6 +667,141 @@ def _set_axis(self, axis, labels):
666667
y : same as input
667668
"""
668669

670+
def set_index(self, keys, drop=True, append=False, inplace=False,
671+
verify_integrity=False):
672+
"""
673+
Set the Series/DataFrame index (row labels) using one or more given
674+
arrays (or column labels in case of DataFrame).
675+
By default yields a new object.
676+
677+
Parameters
678+
----------
679+
keys : column label or list of column labels / arrays. For Series case,
680+
only array or list of arrays is allowed.
681+
drop : boolean, default True
682+
Delete columns to be used as the new index (only for DataFrame).
683+
append : boolean, default False
684+
Whether to append columns to existing index
685+
inplace : boolean, default False
686+
Modify the Series/DataFrame in place (do not create a new object)
687+
verify_integrity : boolean, default False
688+
Check the new index for duplicates. Otherwise defer the check until
689+
necessary. Setting to False will improve the performance of this
690+
method
691+
692+
Returns
693+
-------
694+
reindexed : Series/DataFrame if inplace is False, else None
695+
696+
Examples
697+
--------
698+
>>> df = pd.DataFrame({'month': [1, 4, 7, 10],
699+
... 'year': [2012, 2014, 2013, 2014],
700+
... 'sale':[55, 40, 84, 31]})
701+
month sale year
702+
0 1 55 2012
703+
1 4 40 2014
704+
2 7 84 2013
705+
3 10 31 2014
706+
707+
Set the index to become the 'month' column:
708+
709+
>>> df.set_index('month')
710+
sale year
711+
month
712+
1 55 2012
713+
4 40 2014
714+
7 84 2013
715+
10 31 2014
716+
717+
Create a multi-index using columns 'year' and 'month':
718+
719+
>>> df.set_index(['year', 'month'])
720+
sale
721+
year month
722+
2012 1 55
723+
2014 4 40
724+
2013 7 84
725+
2014 10 31
726+
727+
Create a multi-index using a set of values and a column:
728+
729+
>>> df.set_index([[1, 2, 3, 4], 'year'])
730+
month sale
731+
year
732+
1 2012 1 55
733+
2 2014 4 40
734+
3 2013 7 84
735+
4 2014 10 31
736+
"""
737+
from pandas import Series
738+
739+
inplace = validate_bool_kwarg(inplace, 'inplace')
740+
if not isinstance(keys, list):
741+
keys = [keys]
742+
743+
if inplace:
744+
obj = self
745+
else:
746+
obj = self.copy()
747+
748+
arrays = []
749+
names = []
750+
if append:
751+
names = [x for x in self.index.names]
752+
if isinstance(self.index, MultiIndex):
753+
for i in range(self.index.nlevels):
754+
arrays.append(self.index._get_level_values(i))
755+
else:
756+
arrays.append(self.index)
757+
758+
to_remove = []
759+
for col in keys:
760+
if isinstance(col, MultiIndex):
761+
# append all but the last column so we don't have to modify
762+
# the end of this loop
763+
for n in range(col.nlevels - 1):
764+
arrays.append(col._get_level_values(n))
765+
766+
level = col._get_level_values(col.nlevels - 1)
767+
names.extend(col.names)
768+
elif isinstance(col, Series):
769+
level = col._values
770+
names.append(col.name)
771+
elif isinstance(col, Index):
772+
level = col
773+
names.append(col.name)
774+
elif isinstance(col, (list, np.ndarray)):
775+
level = col
776+
names.append(None)
777+
elif isinstance(obj, Series):
778+
# col may not be a column label for Series case
779+
raise ValueError('asdf')
780+
else:
781+
level = obj[col]._values
782+
names.append(col)
783+
if drop:
784+
to_remove.append(col)
785+
arrays.append(level)
786+
787+
index = ensure_index_from_sequences(arrays, names)
788+
789+
if verify_integrity and not index.is_unique:
790+
duplicates = index[index.duplicated()].unique()
791+
raise ValueError('Index has duplicate keys: {dup}'.format(
792+
dup=duplicates))
793+
794+
for c in to_remove:
795+
del obj[c]
796+
797+
# clear up memory usage
798+
index._cleanup()
799+
800+
obj.index = index
801+
802+
if not inplace:
803+
return obj
804+
669805
@Appender(_shared_docs['transpose'] % _shared_doc_kwargs)
670806
def transpose(self, *args, **kwargs):
671807

pandas/core/series.py

+30
Original file line numberDiff line numberDiff line change
@@ -1094,6 +1094,36 @@ def _set_value(self, label, value, takeable=False):
10941094
return self
10951095
_set_value.__doc__ = set_value.__doc__
10961096

1097+
def set_index(self, arrays, append=False, inplace=False,
1098+
verify_integrity=False):
1099+
"""
1100+
Set the Series index (row labels) using one or more columns.
1101+
By default yields a new object.
1102+
1103+
Parameters
1104+
----------
1105+
arrays : array or list of arrays
1106+
append : boolean, default False
1107+
Whether to append columns to existing index
1108+
inplace : boolean, default False
1109+
Modify the Series in place (do not create a new object)
1110+
verify_integrity : boolean, default False
1111+
Check the new index for duplicates. Otherwise defer the check until
1112+
necessary. Setting to False will improve the performance of this
1113+
method
1114+
1115+
Returns
1116+
-------
1117+
reindexed : Series if inplace is False, else None
1118+
1119+
Examples
1120+
--------
1121+
>>> ...
1122+
"""
1123+
return super(Series, self).set_index(keys=arrays, drop=False,
1124+
append=append, inplace=inplace,
1125+
verify_integrity=verify_integrity)
1126+
10971127
def reset_index(self, level=None, drop=False, name=None, inplace=False):
10981128
"""
10991129
Generate a new DataFrame or Series with the index reset.

pandas/tests/frame/test_indexing.py

+3
Original file line numberDiff line numberDiff line change
@@ -2320,6 +2320,9 @@ def test_at_time_between_time_datetimeindex(self):
23202320
result.loc[bkey] = df.iloc[binds]
23212321
assert_frame_equal(result, df)
23222322

2323+
def test_set_index(self):
2324+
pytest.skip('TODO')
2325+
23232326
def test_xs(self):
23242327
idx = self.frame.index[5]
23252328
xs = self.frame.xs(idx)

pandas/tests/series/test_alter_axes.py

+4-12
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,11 @@
2020

2121
class TestSeriesAlterAxes(TestData):
2222

23-
def test_setindex(self):
24-
# wrong type
25-
series = self.series.copy()
26-
pytest.raises(TypeError, setattr, series, 'index', None)
27-
28-
# wrong length
29-
series = self.series.copy()
30-
pytest.raises(Exception, setattr, series, 'index',
31-
np.arange(len(series) - 1))
23+
def test_set_index(self):
24+
# TODO!
3225

33-
# works
3426
series = self.series.copy()
35-
series.index = np.arange(len(series))
27+
series = series.set_index(np.arange(len(series)))
3628
assert isinstance(series.index, Index)
3729

3830
def test_rename(self):
@@ -116,7 +108,7 @@ def test_set_index_makes_timeseries(self):
116108
idx = tm.makeDateIndex(10)
117109

118110
s = Series(lrange(10))
119-
s.index = idx
111+
s = s.set_index(idx)
120112
assert s.index.is_all_dates
121113

122114
def test_reset_index(self):

0 commit comments

Comments
 (0)