15
15
from pandas .compat import range , zip , lrange , lzip , u , map
16
16
from pandas import compat
17
17
from pandas .core import algorithms
18
- from pandas .core .base import PandasObject , FrozenList , FrozenNDArray , IndexOpsMixin , _shared_docs , PandasDelegate
18
+ from pandas .core .base import PandasObject , FrozenList , FrozenNDArray , IndexOpsMixin , PandasDelegate
19
+ import pandas .core .base as base
19
20
from pandas .util .decorators import (Appender , Substitution , cache_readonly ,
20
21
deprecate , deprecate_kwarg )
21
22
import pandas .core .common as com
29
30
from pandas .io .common import PerformanceWarning
30
31
31
32
32
-
33
-
34
33
# simplify
35
34
default_pprint = lambda x , max_seq_items = None : com .pprint_thing (x ,
36
35
escape_chars = ('\t ' , '\r ' , '\n ' ),
45
44
46
45
_index_doc_kwargs = dict (klass = 'Index' , inplace = '' ,
47
46
duplicated = 'np.array' )
47
+ _index_shared_docs = dict ()
48
48
49
49
50
50
def _try_get_item (x ):
@@ -108,6 +108,7 @@ class Index(IndexOpsMixin, PandasObject):
108
108
_allow_datetime_index_ops = False
109
109
_allow_period_index_ops = False
110
110
_is_numeric_dtype = False
111
+ _can_hold_na = True
111
112
112
113
_engine_type = _index .ObjectEngine
113
114
@@ -1236,6 +1237,43 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None):
1236
1237
taken = self .values .take (indices )
1237
1238
return self ._shallow_copy (taken )
1238
1239
1240
+ @cache_readonly
1241
+ def _isnan (self ):
1242
+ """ return if each value is nan"""
1243
+ if self ._can_hold_na :
1244
+ return isnull (self )
1245
+ else :
1246
+ # shouldn't reach to this condition by checking hasnans beforehand
1247
+ values = np .empty (len (self ), dtype = np .bool_ )
1248
+ values .fill (False )
1249
+ return values
1250
+
1251
+ @cache_readonly
1252
+ def _nan_idxs (self ):
1253
+ if self ._can_hold_na :
1254
+ w , = self ._isnan .nonzero ()
1255
+ return w
1256
+ else :
1257
+ return np .array ([], dtype = np .int64 )
1258
+
1259
+ @cache_readonly
1260
+ def hasnans (self ):
1261
+ """ return if I have any nans; enables various perf speedups """
1262
+ if self ._can_hold_na :
1263
+ return self ._isnan .any ()
1264
+ else :
1265
+ return False
1266
+
1267
+ def _convert_for_op (self , value ):
1268
+ """ Convert value to be insertable to ndarray """
1269
+ return value
1270
+
1271
+ def _assert_can_do_op (self , value ):
1272
+ """ Check value is valid for scalar op """
1273
+ if not lib .isscalar (value ):
1274
+ msg = "'value' must be a scalar, passed: {0}"
1275
+ raise TypeError (msg .format (type (value ).__name__ ))
1276
+
1239
1277
def putmask (self , mask , value ):
1240
1278
"""
1241
1279
return a new Index of the values set with the mask
@@ -1245,8 +1283,12 @@ def putmask(self, mask, value):
1245
1283
numpy.ndarray.putmask
1246
1284
"""
1247
1285
values = self .values .copy ()
1248
- np .putmask (values , mask , value )
1249
- return self ._shallow_copy (values )
1286
+ try :
1287
+ np .putmask (values , mask , self ._convert_for_op (value ))
1288
+ return self ._shallow_copy (values )
1289
+ except (ValueError , TypeError ):
1290
+ # coerces to object
1291
+ return self .astype (object ).putmask (mask , value )
1250
1292
1251
1293
def format (self , name = False , formatter = None , ** kwargs ):
1252
1294
"""
@@ -2766,15 +2808,45 @@ def drop(self, labels, errors='raise'):
2766
2808
return self .delete (indexer )
2767
2809
2768
2810
@deprecate_kwarg ('take_last' , 'keep' , mapping = {True : 'last' , False : 'first' })
2769
- @Appender (_shared_docs ['drop_duplicates' ] % _index_doc_kwargs )
2811
+ @Appender (base . _shared_docs ['drop_duplicates' ] % _index_doc_kwargs )
2770
2812
def drop_duplicates (self , keep = 'first' ):
2771
2813
return super (Index , self ).drop_duplicates (keep = keep )
2772
2814
2773
2815
@deprecate_kwarg ('take_last' , 'keep' , mapping = {True : 'last' , False : 'first' })
2774
- @Appender (_shared_docs ['duplicated' ] % _index_doc_kwargs )
2816
+ @Appender (base . _shared_docs ['duplicated' ] % _index_doc_kwargs )
2775
2817
def duplicated (self , keep = 'first' ):
2776
2818
return super (Index , self ).duplicated (keep = keep )
2777
2819
2820
+ _index_shared_docs ['fillna' ] = """
2821
+ Fill NA/NaN values with the specified value
2822
+
2823
+ Parameters
2824
+ ----------
2825
+ value : scalar
2826
+ Scalar value to use to fill holes (e.g. 0).
2827
+ This value cannot be a list-likes.
2828
+ downcast : dict, default is None
2829
+ a dict of item->dtype of what to downcast if possible,
2830
+ or the string 'infer' which will try to downcast to an appropriate
2831
+ equal type (e.g. float64 to int64 if possible)
2832
+
2833
+ Returns
2834
+ -------
2835
+ filled : Index
2836
+ """
2837
+
2838
+ @Appender (_index_shared_docs ['fillna' ])
2839
+ def fillna (self , value = None , downcast = None ):
2840
+ self ._assert_can_do_op (value )
2841
+ if self .hasnans :
2842
+ result = self .putmask (self ._isnan , value )
2843
+ if downcast is None :
2844
+ # no need to care metadata other than name
2845
+ # because it can't have freq if
2846
+ return Index (result , name = self .name )
2847
+
2848
+ return self ._shallow_copy ()
2849
+
2778
2850
def _evaluate_with_timedelta_like (self , other , op , opstr ):
2779
2851
raise TypeError ("can only perform ops with timedelta like values" )
2780
2852
@@ -3200,6 +3272,16 @@ def __array__(self, dtype=None):
3200
3272
""" the array interface, return my values """
3201
3273
return np .array (self ._data , dtype = dtype )
3202
3274
3275
+ @cache_readonly
3276
+ def _isnan (self ):
3277
+ """ return if each value is nan"""
3278
+ return self ._data .codes == - 1
3279
+
3280
+ @Appender (_index_shared_docs ['fillna' ])
3281
+ def fillna (self , value , downcast = None ):
3282
+ self ._assert_can_do_op (value )
3283
+ return CategoricalIndex (self ._data .fillna (value ), name = self .name )
3284
+
3203
3285
def argsort (self , * args , ** kwargs ):
3204
3286
return self .values .argsort (* args , ** kwargs )
3205
3287
@@ -3214,7 +3296,7 @@ def is_unique(self):
3214
3296
return not self .duplicated ().any ()
3215
3297
3216
3298
@deprecate_kwarg ('take_last' , 'keep' , mapping = {True : 'last' , False : 'first' })
3217
- @Appender (_shared_docs ['duplicated' ] % _index_doc_kwargs )
3299
+ @Appender (base . _shared_docs ['duplicated' ] % _index_doc_kwargs )
3218
3300
def duplicated (self , keep = 'first' ):
3219
3301
from pandas .hashtable import duplicated_int64
3220
3302
return duplicated_int64 (self .codes .astype ('i8' ), keep )
@@ -3612,6 +3694,8 @@ class Int64Index(NumericIndex):
3612
3694
_inner_indexer = _algos .inner_join_indexer_int64
3613
3695
_outer_indexer = _algos .outer_join_indexer_int64
3614
3696
3697
+ _can_hold_na = False
3698
+
3615
3699
_engine_type = _index .Int64Engine
3616
3700
3617
3701
def __new__ (cls , data = None , dtype = None , copy = False , name = None , fastpath = False , ** kwargs ):
@@ -3646,11 +3730,6 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, *
3646
3730
def inferred_type (self ):
3647
3731
return 'integer'
3648
3732
3649
- @cache_readonly
3650
- def hasnans (self ):
3651
- # by definition
3652
- return False
3653
-
3654
3733
@property
3655
3734
def asi8 (self ):
3656
3735
# do not cache or you'll create a memory leak
@@ -3872,19 +3951,6 @@ def is_all_dates(self):
3872
3951
"""
3873
3952
return False
3874
3953
3875
- @cache_readonly
3876
- def _nan_idxs (self ):
3877
- w , = self ._isnan .nonzero ()
3878
- return w
3879
-
3880
- @cache_readonly
3881
- def _isnan (self ):
3882
- return np .isnan (self .values )
3883
-
3884
- @cache_readonly
3885
- def hasnans (self ):
3886
- return self ._isnan .any ()
3887
-
3888
3954
@cache_readonly
3889
3955
def is_unique (self ):
3890
3956
return super (Float64Index , self ).is_unique and self ._nan_idxs .size < 2
@@ -4409,7 +4475,7 @@ def is_unique(self):
4409
4475
return not self .duplicated ().any ()
4410
4476
4411
4477
@deprecate_kwarg ('take_last' , 'keep' , mapping = {True : 'last' , False : 'first' })
4412
- @Appender (_shared_docs ['duplicated' ] % _index_doc_kwargs )
4478
+ @Appender (base . _shared_docs ['duplicated' ] % _index_doc_kwargs )
4413
4479
def duplicated (self , keep = 'first' ):
4414
4480
from pandas .core .groupby import get_group_index
4415
4481
from pandas .hashtable import duplicated_int64
@@ -4419,6 +4485,11 @@ def duplicated(self, keep='first'):
4419
4485
4420
4486
return duplicated_int64 (ids , keep )
4421
4487
4488
+ @Appender (_index_shared_docs ['fillna' ])
4489
+ def fillna (self , value = None , downcast = None ):
4490
+ # isnull is not implemented for MultiIndex
4491
+ raise NotImplementedError ('isnull is not defined for MultiIndex' )
4492
+
4422
4493
def get_value (self , series , key ):
4423
4494
# somewhat broken encapsulation
4424
4495
from pandas .core .indexing import maybe_droplevels
0 commit comments