-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: Fix Series.astype and Categorical.astype to update existing Categorical data #18710
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -54,7 +54,7 @@ | |
import pandas.core.dtypes.concat as _concat | ||
|
||
from pandas.core.dtypes.generic import ABCSeries, ABCDatetimeIndex | ||
from pandas.core.common import is_null_slice | ||
from pandas.core.common import is_null_slice, _any_not_none | ||
import pandas.core.algorithms as algos | ||
|
||
from pandas.core.index import Index, MultiIndex, _ensure_index | ||
|
@@ -573,7 +573,6 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, | |
raise TypeError(msg) | ||
|
||
# may need to convert to categorical | ||
# this is only called for non-categoricals | ||
if self.is_categorical_astype(dtype): | ||
|
||
# deprecated 17636 | ||
|
@@ -589,13 +588,16 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, | |
"CategoricalDtype instead", | ||
FutureWarning, stacklevel=7) | ||
|
||
kwargs = kwargs.copy() | ||
categories = getattr(dtype, 'categories', None) | ||
ordered = getattr(dtype, 'ordered', False) | ||
categories = kwargs.get('categories', None) | ||
ordered = kwargs.get('ordered', None) | ||
if _any_not_none(categories, ordered): | ||
dtype = CategoricalDtype(categories, ordered) | ||
|
||
kwargs.setdefault('categories', categories) | ||
kwargs.setdefault('ordered', ordered) | ||
return self.make_block(Categorical(self.values, **kwargs)) | ||
if is_categorical_dtype(self.values): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think you need all of this logic, wouldn't
be enough (if values is a Categorical already or dtype is a CDT, it will infer correctly, and if its not it will as well). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think that quite works, since In the numpy case, Likewise, Seems like the two paths are necessary? Or am I overlooking something? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok seems reasonable then |
||
# GH 10696/18593: update an existing categorical efficiently | ||
return self.make_block(self.values.astype(dtype, copy=copy)) | ||
|
||
return self.make_block(Categorical(self.values, dtype=dtype)) | ||
|
||
# astype processing | ||
dtype = np.dtype(dtype) | ||
|
@@ -2427,23 +2429,6 @@ def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None): | |
|
||
return self.make_block_same_class(new_values, new_mgr_locs) | ||
|
||
def _astype(self, dtype, copy=False, errors='raise', values=None, | ||
klass=None, mgr=None): | ||
""" | ||
Coerce to the new type (if copy=True, return a new copy) | ||
raise on an except if raise == True | ||
""" | ||
|
||
if self.is_categorical_astype(dtype): | ||
values = self.values | ||
else: | ||
values = np.asarray(self.values).astype(dtype, copy=False) | ||
|
||
if copy: | ||
values = values.copy() | ||
|
||
return self.make_block(values) | ||
|
||
def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs): | ||
""" convert to our native types format, slicing if desired """ | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we might want to add some explicit tests for
_update_dtype
at some point (separate PR)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added them in the other PR during the initial implementation actually:
pandas/pandas/tests/dtypes/test_dtypes.py
Lines 127 to 152 in 265e327