83
83
from pandas .core .accessor import CachedAccessor
84
84
from pandas .core .arrays import Categorical , ExtensionArray
85
85
from pandas .core .config import get_option
86
+
86
87
from pandas .core .generic import NDFrame , _shared_docs
87
88
from pandas .core .index import (Index , MultiIndex , ensure_index ,
88
89
ensure_index_from_sequences )
@@ -3923,45 +3924,56 @@ def shift(self, periods=1, freq=None, axis=0):
3923
3924
def set_index (self , keys , drop = True , append = False , inplace = False ,
3924
3925
verify_integrity = False ):
3925
3926
"""
3926
- Set the DataFrame index (row labels) using one or more existing
3927
- columns. By default yields a new object.
3927
+ Set the DataFrame index (row labels) using one or more columns.
3928
3928
3929
3929
Parameters
3930
3930
----------
3931
3931
keys : column label or list of column labels / arrays
3932
+ Either a column label, Series, Index, MultiIndex, list,
3933
+ np.ndarray or a list containing only column labels, Series, Index,
3934
+ MultiIndex, list, np.ndarray.
3932
3935
drop : boolean, default True
3933
- Delete columns to be used as the new index
3936
+ Delete columns to be used as the new index.
3934
3937
append : boolean, default False
3935
- Whether to append columns to existing index
3938
+ Whether to append columns to existing index.
3936
3939
inplace : boolean, default False
3937
- Modify the DataFrame in place (do not create a new object)
3940
+ Modify the DataFrame in place (do not create a new object).
3938
3941
verify_integrity : boolean, default False
3939
3942
Check the new index for duplicates. Otherwise defer the check until
3940
3943
necessary. Setting to False will improve the performance of this
3941
- method
3944
+ method.
3945
+
3946
+ Returns
3947
+ -------
3948
+ reindexed : DataFrame if inplace is False, else None
3949
+
3950
+ See Also
3951
+ --------
3952
+ Series.set_index: Corresponding method for Series
3942
3953
3943
3954
Examples
3944
3955
--------
3945
3956
>>> df = pd.DataFrame({'month': [1, 4, 7, 10],
3946
3957
... 'year': [2012, 2014, 2013, 2014],
3947
- ... 'sale':[55, 40, 84, 31]})
3948
- month sale year
3949
- 0 1 55 2012
3950
- 1 4 40 2014
3951
- 2 7 84 2013
3952
- 3 10 31 2014
3958
+ ... 'sale': [55, 40, 84, 31]})
3959
+ >>> df
3960
+ month year sale
3961
+ 0 1 2012 55
3962
+ 1 4 2014 40
3963
+ 2 7 2013 84
3964
+ 3 10 2014 31
3953
3965
3954
3966
Set the index to become the 'month' column:
3955
3967
3956
3968
>>> df.set_index('month')
3957
- sale year
3969
+ year sale
3958
3970
month
3959
- 1 55 2012
3960
- 4 40 2014
3961
- 7 84 2013
3962
- 10 31 2014
3971
+ 1 2012 55
3972
+ 4 2014 40
3973
+ 7 2013 84
3974
+ 10 2014 31
3963
3975
3964
- Create a multi-index using columns 'year' and 'month':
3976
+ Create a MultiIndex using columns 'year' and 'month':
3965
3977
3966
3978
>>> df.set_index(['year', 'month'])
3967
3979
sale
@@ -3971,7 +3983,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
3971
3983
2013 7 84
3972
3984
2014 10 31
3973
3985
3974
- Create a multi-index using a set of values and a column:
3986
+ Create a MultiIndex using a set of values and a column:
3975
3987
3976
3988
>>> df.set_index([[1, 2, 3, 4], 'year'])
3977
3989
month sale
@@ -3980,12 +3992,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
3980
3992
2 2014 4 40
3981
3993
3 2013 7 84
3982
3994
4 2014 10 31
3983
-
3984
- Returns
3985
- -------
3986
- dataframe : DataFrame
3987
3995
"""
3988
- inplace = validate_bool_kwarg (inplace , 'inplace' )
3989
3996
if not isinstance (keys , list ):
3990
3997
keys = [keys ]
3991
3998
@@ -4008,65 +4015,10 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
4008
4015
if missing :
4009
4016
raise KeyError ('{}' .format (missing ))
4010
4017
4011
- if inplace :
4012
- frame = self
4013
- else :
4014
- frame = self .copy ()
4015
-
4016
- arrays = []
4017
- names = []
4018
- if append :
4019
- names = [x for x in self .index .names ]
4020
- if isinstance (self .index , ABCMultiIndex ):
4021
- for i in range (self .index .nlevels ):
4022
- arrays .append (self .index ._get_level_values (i ))
4023
- else :
4024
- arrays .append (self .index )
4025
-
4026
- to_remove = []
4027
- for col in keys :
4028
- if isinstance (col , ABCMultiIndex ):
4029
- for n in range (col .nlevels ):
4030
- arrays .append (col ._get_level_values (n ))
4031
- names .extend (col .names )
4032
- elif isinstance (col , (ABCIndexClass , ABCSeries )):
4033
- # if Index then not MultiIndex (treated above)
4034
- arrays .append (col )
4035
- names .append (col .name )
4036
- elif isinstance (col , (list , np .ndarray )):
4037
- arrays .append (col )
4038
- names .append (None )
4039
- elif (is_list_like (col )
4040
- and not (isinstance (col , tuple ) and col in self )):
4041
- # all other list-likes (but avoid valid column keys)
4042
- col = list (col ) # ensure iterator do not get read twice etc.
4043
- arrays .append (col )
4044
- names .append (None )
4045
- # from here, col can only be a column label
4046
- else :
4047
- arrays .append (frame [col ]._values )
4048
- names .append (col )
4049
- if drop :
4050
- to_remove .append (col )
4051
-
4052
- index = ensure_index_from_sequences (arrays , names )
4053
-
4054
- if verify_integrity and not index .is_unique :
4055
- duplicates = index [index .duplicated ()].unique ()
4056
- raise ValueError ('Index has duplicate keys: {dup}' .format (
4057
- dup = duplicates ))
4058
-
4059
- # use set to handle duplicate column names gracefully in case of drop
4060
- for c in set (to_remove ):
4061
- del frame [c ]
4062
-
4063
- # clear up memory usage
4064
- index ._cleanup ()
4065
-
4066
- frame .index = index
4067
-
4068
- if not inplace :
4069
- return frame
4018
+ vi = verify_integrity
4019
+ return super (DataFrame , self ).set_index (keys = keys , drop = drop ,
4020
+ append = append , inplace = inplace ,
4021
+ verify_integrity = vi )
4070
4022
4071
4023
def reset_index (self , level = None , drop = False , inplace = False , col_level = 0 ,
4072
4024
col_fill = '' ):
0 commit comments