|
12 | 12 | from pandas._libs import NaT, algos as libalgos, hashtable as htable, lib
|
13 | 13 | from pandas._typing import ArrayLike, Dtype, Ordered, Scalar
|
14 | 14 | from pandas.compat.numpy import function as nv
|
15 |
| -from pandas.util._decorators import cache_readonly, deprecate_kwarg, doc |
| 15 | +from pandas.util._decorators import cache_readonly, deprecate_kwarg |
16 | 16 | from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs
|
17 | 17 |
|
18 | 18 | from pandas.core.dtypes.cast import (
|
|
43 | 43 | from pandas.core import ops
|
44 | 44 | from pandas.core.accessor import PandasDelegate, delegate_names
|
45 | 45 | import pandas.core.algorithms as algorithms
|
46 |
| -from pandas.core.algorithms import _get_data_algo, factorize, take_1d, unique1d |
| 46 | +from pandas.core.algorithms import factorize, get_data_algo, take_1d, unique1d |
47 | 47 | from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
|
48 |
| -from pandas.core.base import ( |
49 |
| - ExtensionArray, |
50 |
| - NoNewAttributesMixin, |
51 |
| - PandasObject, |
52 |
| - _shared_docs, |
53 |
| -) |
| 48 | +from pandas.core.base import ExtensionArray, NoNewAttributesMixin, PandasObject |
54 | 49 | import pandas.core.common as com
|
55 | 50 | from pandas.core.construction import array, extract_array, sanitize_array
|
56 | 51 | from pandas.core.indexers import check_array_indexer, deprecate_ndim_indexing
|
|
63 | 58 |
|
64 | 59 | def _cat_compare_op(op):
|
65 | 60 | opname = f"__{op.__name__}__"
|
| 61 | + fill_value = True if op is operator.ne else False |
66 | 62 |
|
67 | 63 | @unpack_zerodim_and_defer(opname)
|
68 | 64 | def func(self, other):
|
@@ -97,26 +93,23 @@ def func(self, other):
|
97 | 93 | else:
|
98 | 94 | other_codes = other._codes
|
99 | 95 |
|
100 |
| - f = getattr(self._codes, opname) |
101 |
| - ret = f(other_codes) |
| 96 | + ret = op(self._codes, other_codes) |
102 | 97 | mask = (self._codes == -1) | (other_codes == -1)
|
103 | 98 | if mask.any():
|
104 |
| - # In other series, the leads to False, so do that here too |
105 |
| - if opname == "__ne__": |
106 |
| - ret[(self._codes == -1) & (other_codes == -1)] = True |
107 |
| - else: |
108 |
| - ret[mask] = False |
| 99 | + ret[mask] = fill_value |
109 | 100 | return ret
|
110 | 101 |
|
111 | 102 | if is_scalar(other):
|
112 | 103 | if other in self.categories:
|
113 | 104 | i = self.categories.get_loc(other)
|
114 |
| - ret = getattr(self._codes, opname)(i) |
| 105 | + ret = op(self._codes, i) |
115 | 106 |
|
116 | 107 | if opname not in {"__eq__", "__ge__", "__gt__"}:
|
117 |
| - # check for NaN needed if we are not equal or larger |
| 108 | + # GH#29820 performance trick; get_loc will always give i>=0, |
| 109 | + # so in the cases (__ne__, __le__, __lt__) the setting |
| 110 | + # here is a no-op, so can be skipped. |
118 | 111 | mask = self._codes == -1
|
119 |
| - ret[mask] = False |
| 112 | + ret[mask] = fill_value |
120 | 113 | return ret
|
121 | 114 | else:
|
122 | 115 | return ops.invalid_comparison(self, other, op)
|
@@ -1315,11 +1308,6 @@ def memory_usage(self, deep=False):
|
1315 | 1308 | """
|
1316 | 1309 | return self._codes.nbytes + self.dtype.categories.memory_usage(deep=deep)
|
1317 | 1310 |
|
1318 |
| - @doc(_shared_docs["searchsorted"], klass="Categorical") |
1319 |
| - def searchsorted(self, value, side="left", sorter=None): |
1320 |
| - value = self._validate_searchsorted_value(value) |
1321 |
| - return self.codes.searchsorted(value, side=side, sorter=sorter) |
1322 |
| - |
1323 | 1311 | def isna(self):
|
1324 | 1312 | """
|
1325 | 1313 | Detect missing values
|
@@ -1428,9 +1416,6 @@ def check_for_ordered(self, op):
|
1428 | 1416 | "Categorical to an ordered one\n"
|
1429 | 1417 | )
|
1430 | 1418 |
|
1431 |
| - def _values_for_argsort(self): |
1432 |
| - return self._codes |
1433 |
| - |
1434 | 1419 | def argsort(self, ascending=True, kind="quicksort", **kwargs):
|
1435 | 1420 | """
|
1436 | 1421 | Return the indices that would sort the Categorical.
|
@@ -1879,7 +1864,7 @@ def __getitem__(self, key):
|
1879 | 1864 | if result.ndim > 1:
|
1880 | 1865 | deprecate_ndim_indexing(result)
|
1881 | 1866 | return result
|
1882 |
| - return self._constructor(result, dtype=self.dtype, fastpath=True) |
| 1867 | + return self._from_backing_data(result) |
1883 | 1868 |
|
1884 | 1869 | def __setitem__(self, key, value):
|
1885 | 1870 | """
|
@@ -2546,8 +2531,8 @@ def _get_codes_for_values(values, categories):
|
2546 | 2531 |
|
2547 | 2532 | # Only hit here when we've already coerced to object dtypee.
|
2548 | 2533 |
|
2549 |
| - hash_klass, vals = _get_data_algo(values) |
2550 |
| - _, cats = _get_data_algo(categories) |
| 2534 | + hash_klass, vals = get_data_algo(values) |
| 2535 | + _, cats = get_data_algo(categories) |
2551 | 2536 | t = hash_klass(len(cats))
|
2552 | 2537 | t.map_locations(cats)
|
2553 | 2538 | return coerce_indexer_dtype(t.lookup(vals), cats)
|
|
0 commit comments