|
10 | 10 | datetime,
|
11 | 11 | timedelta,
|
12 | 12 | )
|
| 13 | +import inspect |
13 | 14 | from typing import (
|
14 | 15 | TYPE_CHECKING,
|
15 | 16 | Any,
|
|
87 | 88 | is_timedelta64_dtype,
|
88 | 89 | is_timedelta64_ns_dtype,
|
89 | 90 | is_unsigned_integer_dtype,
|
| 91 | + pandas_dtype, |
90 | 92 | )
|
91 | 93 | from pandas.core.dtypes.dtypes import (
|
92 | 94 | DatetimeTZDtype,
|
@@ -1227,6 +1229,107 @@ def astype_nansafe(
|
1227 | 1229 | return arr.astype(dtype, copy=copy)
|
1228 | 1230 |
|
1229 | 1231 |
|
| 1232 | +def astype_array(values: ArrayLike, dtype: DtypeObj, copy: bool = False) -> ArrayLike: |
| 1233 | + """ |
| 1234 | + Cast array (ndarray or ExtensionArray) to the new dtype. |
| 1235 | +
|
| 1236 | + Parameters |
| 1237 | + ---------- |
| 1238 | + values : ndarray or ExtensionArray |
| 1239 | + dtype : dtype object |
| 1240 | + copy : bool, default False |
| 1241 | + copy if indicated |
| 1242 | +
|
| 1243 | + Returns |
| 1244 | + ------- |
| 1245 | + ndarray or ExtensionArray |
| 1246 | + """ |
| 1247 | + if ( |
| 1248 | + values.dtype.kind in ["m", "M"] |
| 1249 | + and dtype.kind in ["i", "u"] |
| 1250 | + and isinstance(dtype, np.dtype) |
| 1251 | + and dtype.itemsize != 8 |
| 1252 | + ): |
| 1253 | + # TODO(2.0) remove special case once deprecation on DTA/TDA is enforced |
| 1254 | + msg = rf"cannot astype a datetimelike from [{values.dtype}] to [{dtype}]" |
| 1255 | + raise TypeError(msg) |
| 1256 | + |
| 1257 | + if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype): |
| 1258 | + return astype_dt64_to_dt64tz(values, dtype, copy, via_utc=True) |
| 1259 | + |
| 1260 | + if is_dtype_equal(values.dtype, dtype): |
| 1261 | + if copy: |
| 1262 | + return values.copy() |
| 1263 | + return values |
| 1264 | + |
| 1265 | + if isinstance(values, ABCExtensionArray): |
| 1266 | + values = values.astype(dtype, copy=copy) |
| 1267 | + |
| 1268 | + else: |
| 1269 | + values = astype_nansafe(values, dtype, copy=copy) |
| 1270 | + |
| 1271 | + # in pandas we don't store numpy str dtypes, so convert to object |
| 1272 | + if isinstance(dtype, np.dtype) and issubclass(values.dtype.type, str): |
| 1273 | + values = np.array(values, dtype=object) |
| 1274 | + |
| 1275 | + return values |
| 1276 | + |
| 1277 | + |
| 1278 | +def astype_array_safe( |
| 1279 | + values: ArrayLike, dtype, copy: bool = False, errors: str = "raise" |
| 1280 | +) -> ArrayLike: |
| 1281 | + """ |
| 1282 | + Cast array (ndarray or ExtensionArray) to the new dtype. |
| 1283 | +
|
| 1284 | + This basically is the implementation for DataFrame/Series.astype and |
| 1285 | + includes all custom logic for pandas (NaN-safety, converting str to object, |
| 1286 | + not allowing ) |
| 1287 | +
|
| 1288 | + Parameters |
| 1289 | + ---------- |
| 1290 | + values : ndarray or ExtensionArray |
| 1291 | + dtype : str, dtype convertible |
| 1292 | + copy : bool, default False |
| 1293 | + copy if indicated |
| 1294 | + errors : str, {'raise', 'ignore'}, default 'raise' |
| 1295 | + - ``raise`` : allow exceptions to be raised |
| 1296 | + - ``ignore`` : suppress exceptions. On error return original object |
| 1297 | +
|
| 1298 | + Returns |
| 1299 | + ------- |
| 1300 | + ndarray or ExtensionArray |
| 1301 | + """ |
| 1302 | + errors_legal_values = ("raise", "ignore") |
| 1303 | + |
| 1304 | + if errors not in errors_legal_values: |
| 1305 | + invalid_arg = ( |
| 1306 | + "Expected value of kwarg 'errors' to be one of " |
| 1307 | + f"{list(errors_legal_values)}. Supplied value is '{errors}'" |
| 1308 | + ) |
| 1309 | + raise ValueError(invalid_arg) |
| 1310 | + |
| 1311 | + if inspect.isclass(dtype) and issubclass(dtype, ExtensionDtype): |
| 1312 | + msg = ( |
| 1313 | + f"Expected an instance of {dtype.__name__}, " |
| 1314 | + "but got the class instead. Try instantiating 'dtype'." |
| 1315 | + ) |
| 1316 | + raise TypeError(msg) |
| 1317 | + |
| 1318 | + dtype = pandas_dtype(dtype) |
| 1319 | + |
| 1320 | + try: |
| 1321 | + new_values = astype_array(values, dtype, copy=copy) |
| 1322 | + except (ValueError, TypeError): |
| 1323 | + # e.g. astype_nansafe can fail on object-dtype of strings |
| 1324 | + # trying to convert to float |
| 1325 | + if errors == "ignore": |
| 1326 | + new_values = values |
| 1327 | + else: |
| 1328 | + raise |
| 1329 | + |
| 1330 | + return new_values |
| 1331 | + |
| 1332 | + |
1230 | 1333 | def soft_convert_objects(
|
1231 | 1334 | values: np.ndarray,
|
1232 | 1335 | datetime: bool = True,
|
|
0 commit comments