49
49
)
50
50
from pandas .core .dtypes .generic import (
51
51
ABCExtensionArray ,
52
- ABCIndex ,
53
52
ABCIndexClass ,
54
53
ABCMultiIndex ,
55
54
ABCSeries ,
69
68
# dtype access #
70
69
# --------------- #
71
70
def _ensure_data (
72
- values , dtype : Optional [DtypeObj ] = None
71
+ values : ArrayLike , dtype : Optional [DtypeObj ] = None
73
72
) -> Tuple [np .ndarray , DtypeObj ]:
74
73
"""
75
74
routine to ensure that our data is of the correct
@@ -95,6 +94,12 @@ def _ensure_data(
95
94
pandas_dtype : np.dtype or ExtensionDtype
96
95
"""
97
96
97
+ if dtype is not None :
98
+ # We only have non-None dtype when called from `isin`, and
99
+ # both Datetimelike and Categorical dispatch before getting here.
100
+ assert not needs_i8_conversion (dtype )
101
+ assert not is_categorical_dtype (dtype )
102
+
98
103
if not isinstance (values , ABCMultiIndex ):
99
104
# extract_array would raise
100
105
values = extract_array (values , extract_numpy = True )
@@ -131,21 +136,20 @@ def _ensure_data(
131
136
return ensure_object (values ), np .dtype ("object" )
132
137
133
138
# datetimelike
134
- vals_dtype = getattr (values , "dtype" , None )
135
- if needs_i8_conversion (vals_dtype ) or needs_i8_conversion (dtype ):
136
- if is_period_dtype (vals_dtype ) or is_period_dtype (dtype ):
139
+ if needs_i8_conversion (values .dtype ) or needs_i8_conversion (dtype ):
140
+ if is_period_dtype (values .dtype ) or is_period_dtype (dtype ):
137
141
from pandas import PeriodIndex
138
142
139
- values = PeriodIndex (values )
143
+ values = PeriodIndex (values ). _data
140
144
dtype = values .dtype
141
- elif is_timedelta64_dtype (vals_dtype ) or is_timedelta64_dtype (dtype ):
145
+ elif is_timedelta64_dtype (values . dtype ) or is_timedelta64_dtype (dtype ):
142
146
from pandas import TimedeltaIndex
143
147
144
- values = TimedeltaIndex (values )
148
+ values = TimedeltaIndex (values ). _data
145
149
dtype = values .dtype
146
150
else :
147
151
# Datetime
148
- if values .ndim > 1 and is_datetime64_ns_dtype (vals_dtype ):
152
+ if values .ndim > 1 and is_datetime64_ns_dtype (values . dtype ):
149
153
# Avoid calling the DatetimeIndex constructor as it is 1D only
150
154
# Note: this is reached by DataFrame.rank calls GH#27027
151
155
# TODO(EA2D): special case not needed with 2D EAs
@@ -155,12 +159,12 @@ def _ensure_data(
155
159
156
160
from pandas import DatetimeIndex
157
161
158
- values = DatetimeIndex (values )
162
+ values = DatetimeIndex (values ). _data
159
163
dtype = values .dtype
160
164
161
165
return values .asi8 , dtype
162
166
163
- elif is_categorical_dtype (vals_dtype ) and (
167
+ elif is_categorical_dtype (values . dtype ) and (
164
168
is_categorical_dtype (dtype ) or dtype is None
165
169
):
166
170
values = values .codes
@@ -237,11 +241,11 @@ def _ensure_arraylike(values):
237
241
}
238
242
239
243
240
- def _get_hashtable_algo (values ):
244
+ def _get_hashtable_algo (values : np . ndarray ):
241
245
"""
242
246
Parameters
243
247
----------
244
- values : arraylike
248
+ values : np.ndarray
245
249
246
250
Returns
247
251
-------
@@ -255,15 +259,15 @@ def _get_hashtable_algo(values):
255
259
return htable , values
256
260
257
261
258
- def _get_values_for_rank (values ):
262
+ def _get_values_for_rank (values : ArrayLike ):
259
263
if is_categorical_dtype (values ):
260
- values = values ._values_for_rank ()
264
+ values = cast ( "Categorical" , values ) ._values_for_rank ()
261
265
262
266
values , _ = _ensure_data (values )
263
267
return values
264
268
265
269
266
- def get_data_algo (values ):
270
+ def get_data_algo (values : ArrayLike ):
267
271
values = _get_values_for_rank (values )
268
272
269
273
ndtype = _check_object_for_strings (values )
@@ -421,20 +425,28 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
421
425
f"to isin(), you passed a [{ type (values ).__name__ } ]"
422
426
)
423
427
424
- if not isinstance (values , (ABCIndex , ABCSeries , ABCExtensionArray , np .ndarray )):
428
+ if not isinstance (
429
+ values , (ABCIndexClass , ABCSeries , ABCExtensionArray , np .ndarray )
430
+ ):
425
431
values = construct_1d_object_array_from_listlike (list (values ))
426
432
# TODO: could use ensure_arraylike here
433
+ elif isinstance (values , ABCMultiIndex ):
434
+ # Avoid raising in extract_array
435
+ values = np .array (values )
427
436
428
437
comps = _ensure_arraylike (comps )
429
438
comps = extract_array (comps , extract_numpy = True )
430
- if is_categorical_dtype (comps ):
439
+ if is_categorical_dtype (comps . dtype ):
431
440
# TODO(extension)
432
441
# handle categoricals
433
442
return cast ("Categorical" , comps ).isin (values )
434
443
435
- if needs_i8_conversion (comps ):
444
+ if needs_i8_conversion (comps . dtype ):
436
445
# Dispatch to DatetimeLikeArrayMixin.isin
437
446
return array (comps ).isin (values )
447
+ elif needs_i8_conversion (values .dtype ) and not is_object_dtype (comps .dtype ):
448
+ # e.g. comps are integers and values are datetime64s
449
+ return np .zeros (comps .shape , dtype = bool )
438
450
439
451
comps , dtype = _ensure_data (comps )
440
452
values , _ = _ensure_data (values , dtype = dtype )
@@ -474,7 +486,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
474
486
475
487
476
488
def factorize_array (
477
- values , na_sentinel : int = - 1 , size_hint = None , na_value = None , mask = None
489
+ values : np . ndarray , na_sentinel : int = - 1 , size_hint = None , na_value = None , mask = None
478
490
) -> Tuple [np .ndarray , np .ndarray ]:
479
491
"""
480
492
Factorize an array-like to codes and uniques.
@@ -838,7 +850,7 @@ def value_counts_arraylike(values, dropna: bool):
838
850
return keys , counts
839
851
840
852
841
- def duplicated (values , keep = "first" ) -> np .ndarray :
853
+ def duplicated (values : ArrayLike , keep : str = "first" ) -> np .ndarray :
842
854
"""
843
855
Return boolean ndarray denoting duplicate values.
844
856
0 commit comments