-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
PERF: avoid unneeded recoding of categoricals and reuse CategoricalDtypes for greater slicing speed #21659
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
PERF: avoid unneeded recoding of categoricals and reuse CategoricalDtypes for greater slicing speed #21659
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -169,7 +169,7 @@ def _create_categorical(cls, data, categories=None, ordered=None, | |
data = data.set_categories(categories, ordered=ordered) | ||
elif ordered is not None and ordered != data.ordered: | ||
data = data.set_ordered(ordered) | ||
if isinstance(dtype, CategoricalDtype): | ||
if isinstance(dtype, CategoricalDtype) and dtype != data.dtype: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should be noted that in master this dtype comparison is quite slow: >>> d = pd.api.types.CategoricalDtype(categories=['a', 'b', 'c'])
>>> %timeit d == d
149 µs # master
524 ns # this PR This is the reason for the focus on improving dtype comparisons also in this PR, so this check doesn't cause slowdowns on other parts of pandas. Otherwise the performance benefits of this PR would be ambivalent, causing some slowdowns also). |
||
# we want to silently ignore dtype='category' | ||
data = data._set_dtype(dtype) | ||
return data | ||
|
@@ -236,7 +236,7 @@ def _is_dtype_compat(self, other): | |
if not is_list_like(values): | ||
values = [values] | ||
other = CategoricalIndex(self._create_categorical( | ||
other, categories=self.categories, ordered=self.ordered)) | ||
other, dtype=self.dtype)) | ||
if not other.isin(values).all(): | ||
raise TypeError("cannot append a non-category item to a " | ||
"CategoricalIndex") | ||
|
@@ -798,8 +798,7 @@ def _evaluate_compare(self, other): | |
other = other._values | ||
elif isinstance(other, Index): | ||
other = self._create_categorical( | ||
other._values, categories=self.categories, | ||
ordered=self.ordered) | ||
other._values, dtype=self.dtype) | ||
|
||
if isinstance(other, (ABCCategorical, np.ndarray, | ||
ABCSeries)): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
prob doesn't make any difference, but this could be elif (existing if)