|
37 | 37 | from pandas.core.dtypes.cast import (
|
38 | 38 | construct_1d_object_array_from_listlike,
|
39 | 39 | infer_dtype_from_array,
|
| 40 | + sanitize_to_nanoseconds, |
40 | 41 | )
|
41 | 42 | from pandas.core.dtypes.common import (
|
42 | 43 | ensure_float64,
|
43 |
| - ensure_int64, |
44 | 44 | ensure_object,
|
45 | 45 | ensure_platform_int,
|
46 |
| - ensure_uint64, |
47 | 46 | is_array_like,
|
48 | 47 | is_bool_dtype,
|
49 | 48 | is_categorical_dtype,
|
50 | 49 | is_complex_dtype,
|
51 | 50 | is_datetime64_dtype,
|
52 |
| - is_datetime64_ns_dtype, |
53 | 51 | is_extension_array_dtype,
|
54 | 52 | is_float_dtype,
|
55 | 53 | is_integer,
|
56 | 54 | is_integer_dtype,
|
57 | 55 | is_list_like,
|
58 | 56 | is_numeric_dtype,
|
59 | 57 | is_object_dtype,
|
60 |
| - is_period_dtype, |
61 | 58 | is_scalar,
|
62 |
| - is_signed_integer_dtype, |
63 | 59 | is_timedelta64_dtype,
|
64 |
| - is_unsigned_integer_dtype, |
65 | 60 | needs_i8_conversion,
|
66 | 61 | pandas_dtype,
|
67 | 62 | )
|
@@ -134,71 +129,49 @@ def _ensure_data(values: ArrayLike) -> tuple[np.ndarray, DtypeObj]:
|
134 | 129 | values = extract_array(values, extract_numpy=True)
|
135 | 130 |
|
136 | 131 | # we check some simple dtypes first
|
137 |
| - if is_object_dtype(values): |
| 132 | + if is_object_dtype(values.dtype): |
138 | 133 | return ensure_object(np.asarray(values)), np.dtype("object")
|
139 | 134 |
|
140 |
| - try: |
141 |
| - if is_bool_dtype(values): |
142 |
| - # we are actually coercing to uint64 |
143 |
| - # until our algos support uint8 directly (see TODO) |
144 |
| - return np.asarray(values).astype("uint64"), np.dtype("bool") |
145 |
| - elif is_signed_integer_dtype(values): |
146 |
| - return ensure_int64(values), values.dtype |
147 |
| - elif is_unsigned_integer_dtype(values): |
148 |
| - return ensure_uint64(values), values.dtype |
149 |
| - elif is_float_dtype(values): |
| 135 | + elif is_bool_dtype(values.dtype): |
| 136 | + if isinstance(values, np.ndarray): |
| 137 | + # i.e. actually dtype == np.dtype("bool") |
| 138 | + return np.asarray(values).view("uint8"), values.dtype |
| 139 | + else: |
| 140 | + # i.e. all-bool Categorical, BooleanArray |
| 141 | + return np.asarray(values).astype("uint8", copy=False), values.dtype |
| 142 | + |
| 143 | + elif is_integer_dtype(values.dtype): |
| 144 | + return np.asarray(values), values.dtype |
| 145 | + |
| 146 | + elif is_float_dtype(values.dtype): |
| 147 | + # Note: checking `values.dtype == "float128"` raises on Windows and 32bit |
| 148 | + # error: Item "ExtensionDtype" of "Union[Any, ExtensionDtype, dtype[Any]]" |
| 149 | + # has no attribute "itemsize" |
| 150 | + if values.dtype.itemsize in [2, 12, 16]: # type: ignore[union-attr] |
| 151 | + # we dont (yet) have float128 hashtable support |
150 | 152 | return ensure_float64(values), values.dtype
|
151 |
| - elif is_complex_dtype(values): |
152 |
| - |
153 |
| - # ignore the fact that we are casting to float |
154 |
| - # which discards complex parts |
155 |
| - with catch_warnings(): |
156 |
| - simplefilter("ignore", np.ComplexWarning) |
157 |
| - values = ensure_float64(values) |
158 |
| - return values, np.dtype("float64") |
| 153 | + return np.asarray(values), values.dtype |
159 | 154 |
|
160 |
| - except (TypeError, ValueError, OverflowError): |
161 |
| - # if we are trying to coerce to a dtype |
162 |
| - # and it is incompatible this will fall through to here |
163 |
| - return ensure_object(values), np.dtype("object") |
| 155 | + elif is_complex_dtype(values.dtype): |
| 156 | + # ignore the fact that we are casting to float |
| 157 | + # which discards complex parts |
| 158 | + with catch_warnings(): |
| 159 | + simplefilter("ignore", np.ComplexWarning) |
| 160 | + values = ensure_float64(values) |
| 161 | + return values, np.dtype("float64") |
164 | 162 |
|
165 | 163 | # datetimelike
|
166 |
| - if needs_i8_conversion(values.dtype): |
167 |
| - if is_period_dtype(values.dtype): |
168 |
| - from pandas import PeriodIndex |
169 |
| - |
170 |
| - values = PeriodIndex(values)._data |
171 |
| - elif is_timedelta64_dtype(values.dtype): |
172 |
| - from pandas import TimedeltaIndex |
173 |
| - |
174 |
| - values = TimedeltaIndex(values)._data |
175 |
| - else: |
176 |
| - # Datetime |
177 |
| - if values.ndim > 1 and is_datetime64_ns_dtype(values.dtype): |
178 |
| - # Avoid calling the DatetimeIndex constructor as it is 1D only |
179 |
| - # Note: this is reached by DataFrame.rank calls GH#27027 |
180 |
| - # TODO(EA2D): special case not needed with 2D EAs |
181 |
| - asi8 = values.view("i8") |
182 |
| - dtype = values.dtype |
183 |
| - # error: Incompatible return value type (got "Tuple[Any, |
184 |
| - # Union[dtype, ExtensionDtype, None]]", expected |
185 |
| - # "Tuple[ndarray, Union[dtype, ExtensionDtype]]") |
186 |
| - return asi8, dtype # type: ignore[return-value] |
187 |
| - |
188 |
| - from pandas import DatetimeIndex |
189 |
| - |
190 |
| - values = DatetimeIndex(values)._data |
191 |
| - dtype = values.dtype |
192 |
| - return values.asi8, dtype |
| 164 | + elif needs_i8_conversion(values.dtype): |
| 165 | + if isinstance(values, np.ndarray): |
| 166 | + values = sanitize_to_nanoseconds(values) |
| 167 | + npvalues = values.view("i8") |
| 168 | + npvalues = cast(np.ndarray, npvalues) |
| 169 | + return npvalues, values.dtype |
193 | 170 |
|
194 | 171 | elif is_categorical_dtype(values.dtype):
|
195 | 172 | values = cast("Categorical", values)
|
196 | 173 | values = values.codes
|
197 | 174 | dtype = pandas_dtype("category")
|
198 |
| - |
199 |
| - # we are actually coercing to int64 |
200 |
| - # until our algos support int* directly (not all do) |
201 |
| - values = ensure_int64(values) |
202 | 175 | return values, dtype
|
203 | 176 |
|
204 | 177 | # we have failed, return object
|
@@ -268,8 +241,15 @@ def _ensure_arraylike(values) -> ArrayLike:
|
268 | 241 |
|
269 | 242 | _hashtables = {
|
270 | 243 | "float64": htable.Float64HashTable,
|
| 244 | + "float32": htable.Float32HashTable, |
271 | 245 | "uint64": htable.UInt64HashTable,
|
| 246 | + "uint32": htable.UInt32HashTable, |
| 247 | + "uint16": htable.UInt16HashTable, |
| 248 | + "uint8": htable.UInt8HashTable, |
272 | 249 | "int64": htable.Int64HashTable,
|
| 250 | + "int32": htable.Int32HashTable, |
| 251 | + "int16": htable.Int16HashTable, |
| 252 | + "int8": htable.Int8HashTable, |
273 | 253 | "string": htable.StringHashTable,
|
274 | 254 | "object": htable.PyObjectHashTable,
|
275 | 255 | }
|
@@ -298,6 +278,10 @@ def _get_values_for_rank(values: ArrayLike) -> np.ndarray:
|
298 | 278 | values = cast("Categorical", values)._values_for_rank()
|
299 | 279 |
|
300 | 280 | values, _ = _ensure_data(values)
|
| 281 | + if values.dtype.kind in ["i", "u", "f"]: |
| 282 | + # rank_t includes only object, int64, uint64, float64 |
| 283 | + dtype = values.dtype.kind + "8" |
| 284 | + values = values.astype(dtype, copy=False) |
301 | 285 | return values
|
302 | 286 |
|
303 | 287 |
|
|
0 commit comments