From 4e123714a1ec13c2e22acbddc93a597c0ae96a4e Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Tue, 15 Jun 2021 14:43:28 -0700 Subject: [PATCH 1/3] Groupsort indexer contiguity --- pandas/_libs/algos.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index c2b9c723b7c72..7afde2fa01c4d 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -217,8 +217,8 @@ def groupsort_indexer(const intp_t[:] index, Py_ssize_t ngroups): This is a reverse of the label factorization process. """ cdef: - Py_ssize_t i, loc, label, n - ndarray[intp_t] indexer, where, counts + Py_ssize_t i, label, n + intp_t[::1] indexer, where, counts counts = np.zeros(ngroups + 1, dtype=np.intp) n = len(index) From 962d938ed0c160dfd19d302b867689f384847f49 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Tue, 15 Jun 2021 15:06:41 -0700 Subject: [PATCH 2/3] fixup --- pandas/_libs/algos.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 7afde2fa01c4d..9585786ffad9a 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -241,7 +241,7 @@ def groupsort_indexer(const intp_t[:] index, Py_ssize_t ngroups): indexer[where[label]] = i where[label] += 1 - return indexer, counts + return np.asarray(indexer), np.asarray(counts) cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil: From 2c7d752a9546f21dda84987876fc885c27ce3a72 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Mon, 12 Jul 2021 10:24:14 -0400 Subject: [PATCH 3/3] Merge master --- pandas/_libs/algos.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 6ca9f10f30e9f..adfe4de1263ed 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -241,7 +241,7 @@ def groupsort_indexer(const intp_t[:] index, Py_ssize_t ngroups): indexer[where[label]] = i where[label] += 1 - return np.asarray(indexer), np.asarray(counts) + return indexer.base, counts.base cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil: