31
31
is_object_dtype ,
32
32
is_categorical_dtype ,
33
33
is_interval_dtype ,
34
+ is_period_dtype ,
34
35
is_bool ,
35
36
is_bool_dtype ,
36
37
is_signed_integer_dtype ,
37
38
is_unsigned_integer_dtype ,
38
39
is_integer_dtype , is_float_dtype ,
39
40
is_datetime64_any_dtype ,
41
+ is_datetime64tz_dtype ,
40
42
is_timedelta64_dtype ,
41
43
needs_i8_conversion ,
42
44
is_iterator , is_list_like ,
@@ -412,7 +414,7 @@ def _simple_new(cls, values, name=None, dtype=None, **kwargs):
412
414
values = np .array (values , copy = False )
413
415
if is_object_dtype (values ):
414
416
values = cls (values , name = name , dtype = dtype ,
415
- ** kwargs )._values
417
+ ** kwargs )._ndarray_values
416
418
417
419
result = object .__new__ (cls )
418
420
result ._data = values
@@ -594,6 +596,40 @@ def values(self):
594
596
""" return the underlying data as an ndarray """
595
597
return self ._data .view (np .ndarray )
596
598
599
+ @property
600
+ def _values (self ):
601
+ # type: () -> Union[ExtensionArray, Index]
602
+ # TODO(EA): remove index types as they become extension arrays
603
+ """The best array representation.
604
+
605
+ This is an ndarray, ExtensionArray, or Index subclass. This differs
606
+ from ``_ndarray_values``, which always returns an ndarray.
607
+
608
+ Both ``_values`` and ``_ndarray_values`` are consistent between
609
+ ``Series`` and ``Index``.
610
+
611
+ It may differ from the public '.values' method.
612
+
613
+ index | values | _values | _ndarray_values |
614
+ ----------------- | -------------- -| ----------- | --------------- |
615
+ CategoricalIndex | Categorical | Categorical | codes |
616
+ DatetimeIndex[tz] | ndarray[M8ns] | DTI[tz] | ndarray[M8ns] |
617
+
618
+ For the following, the ``._values`` is currently ``ndarray[object]``,
619
+ but will soon be an ``ExtensionArray``
620
+
621
+ index | values | _values | _ndarray_values |
622
+ ----------------- | --------------- | ------------ | --------------- |
623
+ PeriodIndex | ndarray[object] | ndarray[obj] | ndarray[int] |
624
+ IntervalIndex | ndarray[object] | ndarray[obj] | ndarray[object] |
625
+
626
+ See Also
627
+ --------
628
+ values
629
+ _ndarray_values
630
+ """
631
+ return self .values
632
+
597
633
def get_values (self ):
598
634
""" return the underlying data as an ndarray """
599
635
return self .values
@@ -664,7 +700,7 @@ def ravel(self, order='C'):
664
700
--------
665
701
numpy.ndarray.ravel
666
702
"""
667
- return self ._values .ravel (order = order )
703
+ return self ._ndarray_values .ravel (order = order )
668
704
669
705
# construction helpers
670
706
@classmethod
@@ -1597,7 +1633,7 @@ def _constructor(self):
1597
1633
@cache_readonly
1598
1634
def _engine (self ):
1599
1635
# property, for now, slow to look up
1600
- return self ._engine_type (lambda : self ._values , len (self ))
1636
+ return self ._engine_type (lambda : self ._ndarray_values , len (self ))
1601
1637
1602
1638
def _validate_index_level (self , level ):
1603
1639
"""
@@ -2228,27 +2264,37 @@ def union(self, other):
2228
2264
other = other .astype ('O' )
2229
2265
return this .union (other )
2230
2266
2267
+ # TODO(EA): setops-refactor, clean all this up
2268
+ if is_period_dtype (self ) or is_datetime64tz_dtype (self ):
2269
+ lvals = self ._ndarray_values
2270
+ else :
2271
+ lvals = self ._values
2272
+ if is_period_dtype (other ) or is_datetime64tz_dtype (other ):
2273
+ rvals = other ._ndarray_values
2274
+ else :
2275
+ rvals = other ._values
2276
+
2231
2277
if self .is_monotonic and other .is_monotonic :
2232
2278
try :
2233
- result = self ._outer_indexer (self . _values , other . _values )[0 ]
2279
+ result = self ._outer_indexer (lvals , rvals )[0 ]
2234
2280
except TypeError :
2235
2281
# incomparable objects
2236
- result = list (self . _values )
2282
+ result = list (lvals )
2237
2283
2238
2284
# worth making this faster? a very unusual case
2239
- value_set = set (self . _values )
2240
- result .extend ([x for x in other . _values if x not in value_set ])
2285
+ value_set = set (lvals )
2286
+ result .extend ([x for x in rvals if x not in value_set ])
2241
2287
else :
2242
2288
indexer = self .get_indexer (other )
2243
2289
indexer , = (indexer == - 1 ).nonzero ()
2244
2290
2245
2291
if len (indexer ) > 0 :
2246
- other_diff = algos .take_nd (other . _values , indexer ,
2292
+ other_diff = algos .take_nd (rvals , indexer ,
2247
2293
allow_fill = False )
2248
- result = _concat ._concat_compat ((self . _values , other_diff ))
2294
+ result = _concat ._concat_compat ((lvals , other_diff ))
2249
2295
2250
2296
try :
2251
- self . _values [0 ] < other_diff [0 ]
2297
+ lvals [0 ] < other_diff [0 ]
2252
2298
except TypeError as e :
2253
2299
warnings .warn ("%s, sort order is undefined for "
2254
2300
"incomparable objects" % e , RuntimeWarning ,
@@ -2260,7 +2306,7 @@ def union(self, other):
2260
2306
result .sort ()
2261
2307
2262
2308
else :
2263
- result = self . _values
2309
+ result = lvals
2264
2310
2265
2311
try :
2266
2312
result = np .sort (result )
@@ -2311,20 +2357,30 @@ def intersection(self, other):
2311
2357
other = other .astype ('O' )
2312
2358
return this .intersection (other )
2313
2359
2360
+ # TODO(EA): setops-refactor, clean all this up
2361
+ if is_period_dtype (self ):
2362
+ lvals = self ._ndarray_values
2363
+ else :
2364
+ lvals = self ._values
2365
+ if is_period_dtype (other ):
2366
+ rvals = other ._ndarray_values
2367
+ else :
2368
+ rvals = other ._values
2369
+
2314
2370
if self .is_monotonic and other .is_monotonic :
2315
2371
try :
2316
- result = self ._inner_indexer (self . _values , other . _values )[0 ]
2372
+ result = self ._inner_indexer (lvals , rvals )[0 ]
2317
2373
return self ._wrap_union_result (other , result )
2318
2374
except TypeError :
2319
2375
pass
2320
2376
2321
2377
try :
2322
- indexer = Index (other . _values ).get_indexer (self . _values )
2378
+ indexer = Index (rvals ).get_indexer (lvals )
2323
2379
indexer = indexer .take ((indexer != - 1 ).nonzero ()[0 ])
2324
2380
except Exception :
2325
2381
# duplicates
2326
2382
indexer = algos .unique1d (
2327
- Index (other . _values ).get_indexer_non_unique (self . _values )[0 ])
2383
+ Index (rvals ).get_indexer_non_unique (lvals )[0 ])
2328
2384
indexer = indexer [indexer != - 1 ]
2329
2385
2330
2386
taken = other .take (indexer )
@@ -2700,7 +2756,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
2700
2756
raise ValueError ('limit argument only valid if doing pad, '
2701
2757
'backfill or nearest reindexing' )
2702
2758
2703
- indexer = self ._engine .get_indexer (target ._values )
2759
+ indexer = self ._engine .get_indexer (target ._ndarray_values )
2704
2760
2705
2761
return _ensure_platform_int (indexer )
2706
2762
@@ -2716,12 +2772,13 @@ def _get_fill_indexer(self, target, method, limit=None, tolerance=None):
2716
2772
if self .is_monotonic_increasing and target .is_monotonic_increasing :
2717
2773
method = (self ._engine .get_pad_indexer if method == 'pad' else
2718
2774
self ._engine .get_backfill_indexer )
2719
- indexer = method (target ._values , limit )
2775
+ indexer = method (target ._ndarray_values , limit )
2720
2776
else :
2721
2777
indexer = self ._get_fill_indexer_searchsorted (target , method ,
2722
2778
limit )
2723
2779
if tolerance is not None :
2724
- indexer = self ._filter_indexer_tolerance (target ._values , indexer ,
2780
+ indexer = self ._filter_indexer_tolerance (target ._ndarray_values ,
2781
+ indexer ,
2725
2782
tolerance )
2726
2783
return indexer
2727
2784
@@ -2812,7 +2869,7 @@ def get_indexer_non_unique(self, target):
2812
2869
self = Index (self .asi8 )
2813
2870
tgt_values = target .asi8
2814
2871
else :
2815
- tgt_values = target ._values
2872
+ tgt_values = target ._ndarray_values
2816
2873
2817
2874
indexer , missing = self ._engine .get_indexer_non_unique (tgt_values )
2818
2875
return _ensure_platform_int (indexer ), missing
@@ -3247,16 +3304,17 @@ def _join_multi(self, other, how, return_indexers=True):
3247
3304
def _join_non_unique (self , other , how = 'left' , return_indexers = False ):
3248
3305
from pandas .core .reshape .merge import _get_join_indexers
3249
3306
3250
- left_idx , right_idx = _get_join_indexers ([self ._values ],
3251
- [other ._values ], how = how ,
3307
+ left_idx , right_idx = _get_join_indexers ([self ._ndarray_values ],
3308
+ [other ._ndarray_values ],
3309
+ how = how ,
3252
3310
sort = True )
3253
3311
3254
3312
left_idx = _ensure_platform_int (left_idx )
3255
3313
right_idx = _ensure_platform_int (right_idx )
3256
3314
3257
- join_index = np .asarray (self ._values .take (left_idx ))
3315
+ join_index = np .asarray (self ._ndarray_values .take (left_idx ))
3258
3316
mask = left_idx == - 1
3259
- np .putmask (join_index , mask , other ._values .take (right_idx ))
3317
+ np .putmask (join_index , mask , other ._ndarray_values .take (right_idx ))
3260
3318
3261
3319
join_index = self ._wrap_joined_index (join_index , other )
3262
3320
@@ -3403,8 +3461,8 @@ def _join_monotonic(self, other, how='left', return_indexers=False):
3403
3461
else :
3404
3462
return ret_index
3405
3463
3406
- sv = self ._values
3407
- ov = other ._values
3464
+ sv = self ._ndarray_values
3465
+ ov = other ._ndarray_values
3408
3466
3409
3467
if self .is_unique and other .is_unique :
3410
3468
# We can perform much better than the general case
@@ -3756,7 +3814,7 @@ def insert(self, loc, item):
3756
3814
item = self ._na_value
3757
3815
3758
3816
_self = np .asarray (self )
3759
- item = self ._coerce_scalar_to_index (item )._values
3817
+ item = self ._coerce_scalar_to_index (item )._ndarray_values
3760
3818
idx = np .concatenate ((_self [:loc ], item , _self [loc :]))
3761
3819
return self ._shallow_copy_with_infer (idx )
3762
3820
0 commit comments