-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
REF: move Block.astype implementation to dtypes/cast.py #40141
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
648e005
2f52197
aefc462
01941b7
76d1e1c
3133809
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,7 @@ | |
datetime, | ||
timedelta, | ||
) | ||
import inspect | ||
from typing import ( | ||
TYPE_CHECKING, | ||
Any, | ||
|
@@ -86,6 +87,7 @@ | |
is_timedelta64_dtype, | ||
is_timedelta64_ns_dtype, | ||
is_unsigned_integer_dtype, | ||
pandas_dtype, | ||
) | ||
from pandas.core.dtypes.dtypes import ( | ||
DatetimeTZDtype, | ||
|
@@ -1225,6 +1227,107 @@ def astype_nansafe( | |
return arr.astype(dtype, copy=copy) | ||
|
||
|
||
def astype_array(values: ArrayLike, dtype: DtypeObj, copy: bool = False): | ||
""" | ||
Cast array (ndarray or ExtensionArray) to the new dtype. | ||
|
||
Parameters | ||
---------- | ||
values : ndarray or ExtensionArray | ||
dtype : dtype object | ||
copy : bool, default False | ||
copy if indicated | ||
|
||
Returns | ||
------- | ||
ndarray or ExtensionArray | ||
""" | ||
if ( | ||
values.dtype.kind in ["m", "M"] | ||
and dtype.kind in ["i", "u"] | ||
and isinstance(dtype, np.dtype) | ||
and dtype.itemsize != 8 | ||
): | ||
# TODO(2.0) remove special case once deprecation on DTA/TDA is enforced | ||
msg = rf"cannot astype a datetimelike from [{values.dtype}] to [{dtype}]" | ||
raise TypeError(msg) | ||
|
||
if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype): | ||
return astype_dt64_to_dt64tz(values, dtype, copy, via_utc=True) | ||
|
||
if is_dtype_equal(values.dtype, dtype): | ||
if copy: | ||
return values.copy() | ||
return values | ||
|
||
if isinstance(values, ABCExtensionArray): | ||
values = values.astype(dtype, copy=copy) | ||
|
||
else: | ||
values = astype_nansafe(values, dtype, copy=copy) | ||
|
||
# in pandas we don't store numpy str dtypes, so convert to object | ||
if isinstance(dtype, np.dtype) and issubclass(values.dtype.type, str): | ||
values = np.array(values, dtype=object) | ||
|
||
return values | ||
|
||
|
||
def astype_array_safe( | ||
values: ArrayLike, dtype, copy: bool = False, errors: str = "raise" | ||
): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same |
||
""" | ||
Cast array (ndarray or ExtensionArray) to the new dtype. | ||
|
||
This basically is the implementation for DataFrame/Series.astype and | ||
includes all custom logic for pandas (NaN-safety, converting str to object, | ||
not allowing ) | ||
|
||
Parameters | ||
---------- | ||
values : ndarray or ExtensionArray | ||
dtype : str, dtype convertible | ||
copy : bool, default False | ||
copy if indicated | ||
errors : str, {'raise', 'ignore'}, default 'raise' | ||
- ``raise`` : allow exceptions to be raised | ||
- ``ignore`` : suppress exceptions. On error return original object | ||
|
||
Returns | ||
------- | ||
ndarray or ExtensionArray | ||
""" | ||
errors_legal_values = ("raise", "ignore") | ||
|
||
if errors not in errors_legal_values: | ||
invalid_arg = ( | ||
"Expected value of kwarg 'errors' to be one of " | ||
f"{list(errors_legal_values)}. Supplied value is '{errors}'" | ||
) | ||
raise ValueError(invalid_arg) | ||
|
||
if inspect.isclass(dtype) and issubclass(dtype, ExtensionDtype): | ||
msg = ( | ||
f"Expected an instance of {dtype.__name__}, " | ||
"but got the class instead. Try instantiating 'dtype'." | ||
) | ||
raise TypeError(msg) | ||
|
||
dtype = pandas_dtype(dtype) | ||
|
||
try: | ||
new_values = astype_array(values, dtype, copy=copy) | ||
except (ValueError, TypeError): | ||
# e.g. astype_nansafe can fail on object-dtype of strings | ||
# trying to convert to float | ||
if errors == "ignore": | ||
new_values = values | ||
else: | ||
raise | ||
|
||
return new_values | ||
|
||
|
||
def soft_convert_objects( | ||
values: np.ndarray, | ||
datetime: bool = True, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,5 @@ | ||
from __future__ import annotations | ||
|
||
import inspect | ||
import re | ||
from typing import ( | ||
TYPE_CHECKING, | ||
|
@@ -36,8 +35,7 @@ | |
from pandas.util._validators import validate_bool_kwarg | ||
|
||
from pandas.core.dtypes.cast import ( | ||
astype_dt64_to_dt64tz, | ||
astype_nansafe, | ||
astype_array_safe, | ||
can_hold_element, | ||
find_common_type, | ||
infer_dtype_from, | ||
|
@@ -49,7 +47,6 @@ | |
) | ||
from pandas.core.dtypes.common import ( | ||
is_categorical_dtype, | ||
is_datetime64_dtype, | ||
is_datetime64tz_dtype, | ||
is_dtype_equal, | ||
is_extension_array_dtype, | ||
|
@@ -652,33 +649,11 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"): | |
------- | ||
Block | ||
""" | ||
errors_legal_values = ("raise", "ignore") | ||
|
||
if errors not in errors_legal_values: | ||
invalid_arg = ( | ||
"Expected value of kwarg 'errors' to be one of " | ||
f"{list(errors_legal_values)}. Supplied value is '{errors}'" | ||
) | ||
raise ValueError(invalid_arg) | ||
|
||
if inspect.isclass(dtype) and issubclass(dtype, ExtensionDtype): | ||
msg = ( | ||
f"Expected an instance of {dtype.__name__}, " | ||
"but got the class instead. Try instantiating 'dtype'." | ||
) | ||
raise TypeError(msg) | ||
|
||
dtype = pandas_dtype(dtype) | ||
values = self.values | ||
if values.dtype.kind in ["m", "M"]: | ||
values = self.array_values() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could move this into astype_array_safe and use ensure_wrapped_if_datetimelike; would make it robust to AM/BM (though i think both AM and BM now have PRs to make the arrays EAs to begin with) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since ArrayManager already stores it as EAs (after this array), I would prefer to leave it here (then your PR changing to store EAs in BlockManager as well can remove those two lines) |
||
|
||
try: | ||
new_values = self._astype(dtype, copy=copy) | ||
except (ValueError, TypeError): | ||
# e.g. astype_nansafe can fail on object-dtype of strings | ||
# trying to convert to float | ||
if errors == "ignore": | ||
new_values = self.values | ||
else: | ||
raise | ||
new_values = astype_array_safe(values, dtype, copy=copy, errors=errors) | ||
|
||
newb = self.make_block(new_values) | ||
if newb.shape != self.shape: | ||
|
@@ -689,37 +664,6 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"): | |
) | ||
return newb | ||
|
||
def _astype(self, dtype: DtypeObj, copy: bool) -> ArrayLike: | ||
values = self.values | ||
if values.dtype.kind in ["m", "M"]: | ||
values = self.array_values() | ||
|
||
if ( | ||
values.dtype.kind in ["m", "M"] | ||
and dtype.kind in ["i", "u"] | ||
and isinstance(dtype, np.dtype) | ||
and dtype.itemsize != 8 | ||
): | ||
# TODO(2.0) remove special case once deprecation on DTA/TDA is enforced | ||
msg = rf"cannot astype a datetimelike from [{values.dtype}] to [{dtype}]" | ||
raise TypeError(msg) | ||
|
||
if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype): | ||
return astype_dt64_to_dt64tz(values, dtype, copy, via_utc=True) | ||
|
||
if is_dtype_equal(values.dtype, dtype): | ||
if copy: | ||
return values.copy() | ||
return values | ||
|
||
if isinstance(values, ExtensionArray): | ||
values = values.astype(dtype, copy=copy) | ||
|
||
else: | ||
values = astype_nansafe(values, dtype, copy=copy) | ||
|
||
return values | ||
|
||
def convert( | ||
self, | ||
copy: bool = True, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you add a return annotation