Skip to content

Commit b6f150d

Browse files
committed
PERF: Categorical._from_backing_data (pandas-dev#40339)
1 parent 7617b71 commit b6f150d

File tree

4 files changed

+19
-4
lines changed

4 files changed

+19
-4
lines changed

pandas/_libs/algos_take_helper.pxi.in

+2-2
Original file line numberDiff line numberDiff line change
@@ -230,10 +230,10 @@ ctypedef fused take_t:
230230
object
231231

232232

233-
cdef _take_2d(ndarray[take_t, ndim=2] values, object idx):
233+
cdef _take_2d(ndarray[take_t, ndim=2] values, ndarray[intp_t, ndim=2] idx):
234234
cdef:
235235
Py_ssize_t i, j, N, K
236-
ndarray[Py_ssize_t, ndim=2, cast=True] indexer = idx
236+
ndarray[intp_t, ndim=2, cast=True] indexer = idx
237237
ndarray[take_t, ndim=2] result
238238

239239
N, K = (<object>values).shape

pandas/_libs/internals.pyx

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ from pandas._libs.util cimport is_integer_object
2525

2626

2727
@cython.final
28+
@cython.freelist(32)
2829
cdef class BlockPlacement:
2930
# __slots__ = '_as_slice', '_as_array', '_len'
3031
cdef:

pandas/core/arrays/categorical.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -1503,6 +1503,7 @@ def value_counts(self, dropna: bool = True):
15031503
count = np.bincount(np.where(mask, code, ncat))
15041504
ix = np.append(ix, -1)
15051505

1506+
ix = coerce_indexer_dtype(ix, self.dtype.categories)
15061507
ix = self._from_backing_data(ix)
15071508

15081509
return Series(count, index=CategoricalIndex(ix), dtype="int64")
@@ -1798,7 +1799,13 @@ def _codes(self) -> np.ndarray:
17981799
return self._ndarray
17991800

18001801
def _from_backing_data(self, arr: np.ndarray) -> Categorical:
1801-
return type(self)(arr, dtype=self.dtype, fastpath=True)
1802+
assert isinstance(arr, np.ndarray)
1803+
assert arr.dtype == self._ndarray.dtype
1804+
1805+
res = object.__new__(type(self))
1806+
res._ndarray = arr
1807+
res._dtype = self.dtype
1808+
return res
18021809

18031810
def _box_func(self, i: int):
18041811
if i == -1:
@@ -2120,6 +2127,7 @@ def mode(self, dropna=True):
21202127
good = self._codes != -1
21212128
codes = self._codes[good]
21222129
codes = sorted(htable.mode_int64(ensure_int64(codes), dropna))
2130+
codes = coerce_indexer_dtype(codes, self.dtype.categories)
21232131
return self._from_backing_data(codes)
21242132

21252133
# ------------------------------------------------------------------

pandas/core/indexes/extension.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,13 @@ def insert(self: _T, loc: int, item) -> _T:
418418
dtype = find_common_type([self.dtype, dtype])
419419
return self.astype(dtype).insert(loc, item)
420420
else:
421-
new_vals = np.concatenate((arr._ndarray[:loc], [code], arr._ndarray[loc:]))
421+
new_vals = np.concatenate(
422+
(
423+
arr._ndarray[:loc],
424+
np.asarray([code], dtype=arr._ndarray.dtype),
425+
arr._ndarray[loc:],
426+
)
427+
)
422428
new_arr = arr._from_backing_data(new_vals)
423429
return type(self)._simple_new(new_arr, name=self.name)
424430

0 commit comments

Comments
 (0)