55
55
is_scalar ,
56
56
is_timedelta64_dtype ,
57
57
needs_i8_conversion ,
58
- pandas_dtype ,
59
58
)
60
59
from pandas .core .dtypes .dtypes import PandasDtype
61
60
from pandas .core .dtypes .generic import (
99
98
# --------------- #
100
99
# dtype access #
101
100
# --------------- #
102
- def _ensure_data (values : ArrayLike ) -> tuple [ np .ndarray , DtypeObj ] :
101
+ def _ensure_data (values : ArrayLike ) -> np .ndarray :
103
102
"""
104
103
routine to ensure that our data is of the correct
105
104
input dtype for lower-level routines
@@ -114,12 +113,11 @@ def _ensure_data(values: ArrayLike) -> tuple[np.ndarray, DtypeObj]:
114
113
115
114
Parameters
116
115
----------
117
- values : array-like
116
+ values : np.ndarray or ExtensionArray
118
117
119
118
Returns
120
119
-------
121
- values : ndarray
122
- pandas_dtype : np.dtype or ExtensionDtype
120
+ np.ndarray
123
121
"""
124
122
125
123
if not isinstance (values , ABCMultiIndex ):
@@ -128,55 +126,54 @@ def _ensure_data(values: ArrayLike) -> tuple[np.ndarray, DtypeObj]:
128
126
129
127
# we check some simple dtypes first
130
128
if is_object_dtype (values .dtype ):
131
- return ensure_object (np .asarray (values )), np . dtype ( "object" )
129
+ return ensure_object (np .asarray (values ))
132
130
133
131
elif is_bool_dtype (values .dtype ):
134
132
if isinstance (values , np .ndarray ):
135
133
# i.e. actually dtype == np.dtype("bool")
136
- return np .asarray (values ).view ("uint8" ), values . dtype
134
+ return np .asarray (values ).view ("uint8" )
137
135
else :
138
136
# i.e. all-bool Categorical, BooleanArray
139
137
try :
140
- return np .asarray (values ).astype ("uint8" , copy = False ), values . dtype
138
+ return np .asarray (values ).astype ("uint8" , copy = False )
141
139
except TypeError :
142
140
# GH#42107 we have pd.NAs present
143
- return np .asarray (values ), values . dtype
141
+ return np .asarray (values )
144
142
145
143
elif is_integer_dtype (values .dtype ):
146
- return np .asarray (values ), values . dtype
144
+ return np .asarray (values )
147
145
148
146
elif is_float_dtype (values .dtype ):
149
147
# Note: checking `values.dtype == "float128"` raises on Windows and 32bit
150
148
# error: Item "ExtensionDtype" of "Union[Any, ExtensionDtype, dtype[Any]]"
151
149
# has no attribute "itemsize"
152
150
if values .dtype .itemsize in [2 , 12 , 16 ]: # type: ignore[union-attr]
153
151
# we dont (yet) have float128 hashtable support
154
- return ensure_float64 (values ), values . dtype
155
- return np .asarray (values ), values . dtype
152
+ return ensure_float64 (values )
153
+ return np .asarray (values )
156
154
157
155
elif is_complex_dtype (values .dtype ):
158
156
# Incompatible return value type (got "Tuple[Union[Any, ExtensionArray,
159
157
# ndarray[Any, Any]], Union[Any, ExtensionDtype]]", expected
160
158
# "Tuple[ndarray[Any, Any], Union[dtype[Any], ExtensionDtype]]")
161
- return values , values . dtype # type: ignore[return-value]
159
+ return values # type: ignore[return-value]
162
160
163
161
# datetimelike
164
162
elif needs_i8_conversion (values .dtype ):
165
163
if isinstance (values , np .ndarray ):
166
164
values = sanitize_to_nanoseconds (values )
167
165
npvalues = values .view ("i8" )
168
166
npvalues = cast (np .ndarray , npvalues )
169
- return npvalues , values . dtype
167
+ return npvalues
170
168
171
169
elif is_categorical_dtype (values .dtype ):
172
170
values = cast ("Categorical" , values )
173
171
values = values .codes
174
- dtype = pandas_dtype ("category" )
175
- return values , dtype
172
+ return values
176
173
177
174
# we have failed, return object
178
175
values = np .asarray (values , dtype = object )
179
- return ensure_object (values ), np . dtype ( "object" )
176
+ return ensure_object (values )
180
177
181
178
182
179
def _reconstruct_data (
@@ -268,7 +265,7 @@ def _get_hashtable_algo(values: np.ndarray):
268
265
htable : HashTable subclass
269
266
values : ndarray
270
267
"""
271
- values , _ = _ensure_data (values )
268
+ values = _ensure_data (values )
272
269
273
270
ndtype = _check_object_for_strings (values )
274
271
htable = _hashtables [ndtype ]
@@ -279,7 +276,7 @@ def _get_values_for_rank(values: ArrayLike) -> np.ndarray:
279
276
if is_categorical_dtype (values ):
280
277
values = cast ("Categorical" , values )._values_for_rank ()
281
278
282
- values , _ = _ensure_data (values )
279
+ values = _ensure_data (values )
283
280
if values .dtype .kind in ["i" , "u" , "f" ]:
284
281
# rank_t includes only object, int64, uint64, float64
285
282
dtype = values .dtype .kind + "8"
@@ -747,7 +744,8 @@ def factorize(
747
744
codes , uniques = values .factorize (na_sentinel = na_sentinel )
748
745
dtype = original .dtype
749
746
else :
750
- values , dtype = _ensure_data (values )
747
+ dtype = values .dtype
748
+ values = _ensure_data (values )
751
749
na_value : Scalar
752
750
753
751
if original .dtype .kind in ["m" , "M" ]:
@@ -886,7 +884,7 @@ def value_counts_arraylike(values, dropna: bool):
886
884
"""
887
885
values = _ensure_arraylike (values )
888
886
original = values
889
- values , _ = _ensure_data (values )
887
+ values = _ensure_data (values )
890
888
891
889
# TODO: handle uint8
892
890
keys , counts = htable .value_count (values , dropna )
@@ -923,7 +921,7 @@ def duplicated(
923
921
-------
924
922
duplicated : ndarray[bool]
925
923
"""
926
- values , _ = _ensure_data (values )
924
+ values = _ensure_data (values )
927
925
return htable .duplicated (values , keep = keep )
928
926
929
927
@@ -959,7 +957,7 @@ def mode(values, dropna: bool = True) -> Series:
959
957
mask = values .isnull ()
960
958
values = values [~ mask ]
961
959
962
- values , _ = _ensure_data (values )
960
+ values = _ensure_data (values )
963
961
964
962
npresult = htable .mode (values , dropna = dropna )
965
963
try :
@@ -1261,7 +1259,8 @@ def compute(self, method: str) -> Series:
1261
1259
return dropped .sort_values (ascending = ascending ).head (n )
1262
1260
1263
1261
# fast method
1264
- arr , new_dtype = _ensure_data (dropped .values )
1262
+ new_dtype = dropped .dtype
1263
+ arr = _ensure_data (dropped .values )
1265
1264
if method == "nlargest" :
1266
1265
arr = - arr
1267
1266
if is_integer_dtype (new_dtype ):
0 commit comments