From fe1cfc82d6ef6e8d9aa23264c2e02fd2182150bc Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 1 Mar 2024 14:43:07 -0800
Subject: [PATCH 01/15] Hashtable cleanups

---
 pandas/_libs/hashtable.pxd                 |  2 +-
 pandas/_libs/hashtable_class_helper.pxi.in | 46 +++++++---------------
 pandas/_libs/hashtable_func_helper.pxi.in  |  6 +--
 3 files changed, 19 insertions(+), 35 deletions(-)
diff --git a/pandas/_libs/hashtable.pxd b/pandas/_libs/hashtable.pxd
index eaec9e8462450..22b923580c491 100644
--- a/pandas/_libs/hashtable.pxd
+++ b/pandas/_libs/hashtable.pxd
@@ -180,7 +180,7 @@ cdef class Vector:
     cdef bint external_view_exists
 
 cdef class Int64Vector(Vector):
-    cdef Int64VectorData *data
+    cdef Int64VectorData data
     cdef ndarray ao
 
     cdef resize(self)
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 26dcf0b6c4ce3..f1a0e61a57056 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -163,8 +163,8 @@ ctypedef fused vector_data:
     Complex64VectorData
     StringVectorData
 
-cdef bint needs_resize(vector_data *data) noexcept nogil:
-    return data.n == data.m
+cdef bint needs_resize(size_t size, size_t capacity) noexcept nogil:
+    return size == capacity
 
 # ----------------------------------------------------------------------
 # Vector
@@ -204,15 +204,11 @@ cdef class {{name}}Vector(Vector):
     # Int64Vector is the only one we need exposed for other cython files.
     {{if dtype != 'int64'}}
     cdef:
-        {{name}}VectorData *data
+        {{name}}VectorData data
         ndarray ao
     {{endif}}
 
     def __cinit__(self):
-        self.data = <{{name}}VectorData *>PyMem_Malloc(
-            sizeof({{name}}VectorData))
-        if not self.data:
-            raise MemoryError()
         self.data.n = 0
         self.data.m = _INIT_VEC_CAP
         self.ao = np.empty(self.data.m, dtype=np.{{dtype}})
@@ -223,11 +219,6 @@ cdef class {{name}}Vector(Vector):
         self.ao.resize(self.data.m, refcheck=False)
         self.data.data = <{{c_type}}*>self.ao.data
 
-    def __dealloc__(self):
-        if self.data is not NULL:
-            PyMem_Free(self.data)
-            self.data = NULL
-
     def __len__(self) -> int:
         return self.data.n
 
@@ -243,13 +234,13 @@ cdef class {{name}}Vector(Vector):
 
     cdef void append(self, {{c_type}} x) noexcept:
 
-        if needs_resize(self.data):
+        if needs_resize(self.data.n, self.data.m):
             if self.external_view_exists:
                 raise ValueError("external reference but "
                                  "Vector.resize() needed")
             self.resize()
 
-        append_data_{{dtype}}(self.data, x)
+        append_data_{{dtype}}(&self.data, x)
 
     cdef extend(self, const {{c_type}}[:] x):
         for i in range(len(x)):
@@ -260,12 +251,9 @@ cdef class {{name}}Vector(Vector):
 cdef class StringVector(Vector):
 
     cdef:
-        StringVectorData *data
+        StringVectorData data
 
     def __cinit__(self):
-        self.data = <StringVectorData *>PyMem_Malloc(sizeof(StringVectorData))
-        if not self.data:
-            raise MemoryError()
         self.data.n = 0
         self.data.m = _INIT_VEC_CAP
         self.data.data = <char **>malloc(self.data.m * sizeof(char *))
@@ -288,11 +276,7 @@ cdef class StringVector(Vector):
             self.data.data[i] = orig_data[i]
 
     def __dealloc__(self):
-        if self.data is not NULL:
-            if self.data.data is not NULL:
-                free(self.data.data)
-            PyMem_Free(self.data)
-            self.data = NULL
+        free(self.data.data)
 
     def __len__(self) -> int:
         return self.data.n
@@ -313,10 +297,10 @@ cdef class StringVector(Vector):
 
     cdef void append(self, char *x) noexcept:
 
-        if needs_resize(self.data):
+        if needs_resize(self.data.n, self.data.m):
             self.resize()
 
-        append_data_string(self.data, x)
+        append_data_string(&self.data, x)
 
     cdef extend(self, ndarray[object] x):
         for i in range(len(x)):
@@ -652,7 +636,7 @@ cdef class {{name}}HashTable(HashTable):
 
         if return_inverse:
             labels = np.empty(n, dtype=np.intp)
-        ud = uniques.data
+        ud = &uniques.data
         use_na_value = na_value is not None
         use_mask = mask is not None
         if not use_mask and use_result_mask:
@@ -662,7 +646,7 @@ cdef class {{name}}HashTable(HashTable):
             raise NotImplementedError  # pragma: no cover
 
         result_mask = UInt8Vector()
-        rmd = result_mask.data
+        rmd = &result_mask.data
 
         if use_mask:
             mask_values = mask.view("uint8")
@@ -700,7 +684,7 @@ cdef class {{name}}HashTable(HashTable):
                             continue
 
                         seen_na = True
-                        if needs_resize(ud):
+                        if needs_resize(ud.n, ud.m):
                             with gil:
                                 if uniques.external_view_exists:
                                     raise ValueError("external reference to "
@@ -722,7 +706,7 @@ cdef class {{name}}HashTable(HashTable):
                     # k hasn't been seen yet
                     k = kh_put_{{dtype}}(self.table, val, &ret)
 
-                    if needs_resize(ud):
+                    if needs_resize(ud.n, ud.m):
                         with gil:
                             if uniques.external_view_exists:
                                 raise ValueError("external reference to "
@@ -846,7 +830,7 @@ cdef class {{name}}HashTable(HashTable):
             {{name}}VectorData *ud
 
         labels = np.empty(n, dtype=np.intp)
-        ud = uniques.data
+        ud = &uniques.data
 
         with nogil:
             for i in range(n):
@@ -865,7 +849,7 @@ cdef class {{name}}HashTable(HashTable):
                     k = kh_put_{{dtype}}(self.table, val, &ret)
                     self.table.vals[k] = count
 
-                    if needs_resize(ud):
+                    if needs_resize(ud.n, ud.m):
                         with gil:
                             uniques.resize()
                     append_data_{{dtype}}(ud, val)
diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
index 336af306d410f..7bd252a0f43cd 100644
--- a/pandas/_libs/hashtable_func_helper.pxi.in
+++ b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -472,7 +472,7 @@ def _unique_label_indices_{{dtype}}(const {{c_type}}[:] labels) -> ndarray:
         kh_{{ttype}}_t *table = kh_init_{{ttype}}()
         {{name}}Vector idx = {{name}}Vector()
         ndarray[{{c_type}}, ndim=1] arr
-        {{name}}VectorData *ud = idx.data
+        {{name}}VectorData ud = idx.data
 
     kh_resize_{{ttype}}(table, min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT))
 
@@ -480,10 +480,10 @@ def _unique_label_indices_{{dtype}}(const {{c_type}}[:] labels) -> ndarray:
         for i in range(n):
             kh_put_{{ttype}}(table, labels[i], &ret)
             if ret != 0:
-                if needs_resize(ud):
+                if needs_resize(ud.n, ud.m):
                     with gil:
                         idx.resize()
-                append_data_{{ttype}}(ud, i)
+                append_data_{{ttype}}(&ud, i)
 
     kh_destroy_{{ttype}}(table)
 

From 2d8ad8e8928a78888c52261b7b8536212b692deb Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 1 Mar 2024 14:49:08 -0800
Subject: [PATCH 02/15] Remove unused imports

---
 pandas/_libs/hashtable.pyx | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx
index ccac3d0b50d45..8250d0242c31f 100644
--- a/pandas/_libs/hashtable.pyx
+++ b/pandas/_libs/hashtable.pyx
@@ -1,8 +1,4 @@
 cimport cython
-from cpython.mem cimport (
-    PyMem_Free,
-    PyMem_Malloc,
-)
 from cpython.ref cimport (
     Py_INCREF,
     PyObject,

From d2ff43763b435f3c7663709e4118efc1e344851d Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 1 Mar 2024 15:14:28 -0800
Subject: [PATCH 03/15] renamed .n -> size, .m -> capacity

---
 pandas/_libs/hashtable.pxd                 |  2 +-
 pandas/_libs/hashtable_class_helper.pxi.in | 82 +++++++++++-----------
 pandas/_libs/hashtable_func_helper.pxi.in  |  2 +-
 pandas/_libs/intervaltree.pxi.in           | 10 +--
 4 files changed, 48 insertions(+), 48 deletions(-)

diff --git a/pandas/_libs/hashtable.pxd b/pandas/_libs/hashtable.pxd
index 22b923580c491..29ace4a339ced 100644
--- a/pandas/_libs/hashtable.pxd
+++ b/pandas/_libs/hashtable.pxd
@@ -174,7 +174,7 @@ cdef class StringHashTable(HashTable):
 
 cdef struct Int64VectorData:
     int64_t *data
-    Py_ssize_t n, m
+    Py_ssize_t size, capacity
 
 cdef class Vector:
     cdef bint external_view_exists
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index f1a0e61a57056..ea41c6ee11c8a 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -133,7 +133,7 @@ dtypes = [('Complex128', 'complex128', 'khcomplex128_t'),
 
 ctypedef struct {{name}}VectorData:
     {{c_type}} *data
-    Py_ssize_t n, m
+    Py_ssize_t size, capacity
 
 {{endif}}
 
@@ -143,8 +143,8 @@ ctypedef struct {{name}}VectorData:
 cdef void append_data_{{dtype}}({{name}}VectorData *data,
                                        {{c_type}} x) noexcept nogil:
 
-    data.data[data.n] = x
-    data.n += 1
+    data.data[data.size] = x
+    data.size += 1
 
 {{endfor}}
 
@@ -209,32 +209,32 @@ cdef class {{name}}Vector(Vector):
     {{endif}}
 
     def __cinit__(self):
-        self.data.n = 0
-        self.data.m = _INIT_VEC_CAP
-        self.ao = np.empty(self.data.m, dtype=np.{{dtype}})
+        self.data.size = 0
+        self.data.capacity = _INIT_VEC_CAP
+        self.ao = np.empty(self.data.capacity, dtype=np.{{dtype}})
         self.data.data = <{{c_type}}*>self.ao.data
 
     cdef resize(self):
-        self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)
-        self.ao.resize(self.data.m, refcheck=False)
+        self.data.capacity = max(self.data.capacity * 4, _INIT_VEC_CAP)
+        self.ao.resize(self.data.capacity, refcheck=False)
         self.data.data = <{{c_type}}*>self.ao.data
 
     def __len__(self) -> int:
-        return self.data.n
+        return self.data.size
 
     cpdef ndarray to_array(self):
-        if self.data.m != self.data.n:
+        if self.data.capacity != self.data.size:
             if self.external_view_exists:
                 # should never happen
                 raise ValueError("should have raised on append()")
-            self.ao.resize(self.data.n, refcheck=False)
-            self.data.m = self.data.n
+            self.ao.resize(self.data.size, refcheck=False)
+            self.data.capacity = self.data.size
         self.external_view_exists = True
         return self.ao
 
     cdef void append(self, {{c_type}} x) noexcept:
 
-        if needs_resize(self.data.n, self.data.m):
+        if needs_resize(self.data.size, self.data.capacity):
             if self.external_view_exists:
                 raise ValueError("external reference but "
                                  "Vector.resize() needed")
@@ -254,32 +254,32 @@ cdef class StringVector(Vector):
         StringVectorData data
 
     def __cinit__(self):
-        self.data.n = 0
-        self.data.m = _INIT_VEC_CAP
-        self.data.data = <char **>malloc(self.data.m * sizeof(char *))
+        self.data.size = 0
+        self.data.capacity = _INIT_VEC_CAP
+        self.data.data = <char **>malloc(self.data.capacity * sizeof(char *))
         if not self.data.data:
             raise MemoryError()
 
     cdef resize(self):
         cdef:
             char **orig_data
-            Py_ssize_t i, m
+            Py_ssize_t i, capacity
 
-        m = self.data.m
-        self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)
+        capacity = self.data.capacity
+        self.data.capacity = max(self.data.capacity * 4, _INIT_VEC_CAP)
 
         orig_data = self.data.data
-        self.data.data = <char **>malloc(self.data.m * sizeof(char *))
+        self.data.data = <char **>malloc(self.data.capacity * sizeof(char *))
         if not self.data.data:
             raise MemoryError()
-        for i in range(m):
+        for i in range(capacity):
             self.data.data[i] = orig_data[i]
 
     def __dealloc__(self):
         free(self.data.data)
 
     def __len__(self) -> int:
-        return self.data.n
+        return self.data.size
 
     cpdef ndarray[object, ndim=1] to_array(self):
         cdef:
@@ -287,17 +287,17 @@ cdef class StringVector(Vector):
             Py_ssize_t n
             object val
 
-        ao = np.empty(self.data.n, dtype=object)
-        for i in range(self.data.n):
+        ao = np.empty(self.data.size, dtype=object)
+        for i in range(self.data.size):
             val = self.data.data[i]
             ao[i] = val
         self.external_view_exists = True
-        self.data.m = self.data.n
+        self.data.capacity = self.data.size
         return ao
 
     cdef void append(self, char *x) noexcept:
 
-        if needs_resize(self.data.n, self.data.m):
+        if needs_resize(self.data.size, self.data.capacity):
             self.resize()
 
         append_data_string(&self.data, x)
@@ -311,37 +311,37 @@ cdef class ObjectVector(Vector):
 
     cdef:
         PyObject **data
-        Py_ssize_t n, m
+        Py_ssize_t size, capacity
         ndarray ao
 
     def __cinit__(self):
-        self.n = 0
-        self.m = _INIT_VEC_CAP
+        self.size = 0
+        self.capacity = _INIT_VEC_CAP
         self.ao = np.empty(_INIT_VEC_CAP, dtype=object)
         self.data = <PyObject**>self.ao.data
 
     def __len__(self) -> int:
-        return self.n
+        return self.size
 
     cdef append(self, object obj):
-        if self.n == self.m:
+        if self.size == self.capacity:
             if self.external_view_exists:
                 raise ValueError("external reference but "
                                  "Vector.resize() needed")
-            self.m = max(self.m * 2, _INIT_VEC_CAP)
-            self.ao.resize(self.m, refcheck=False)
+            self.size = max(self.capacity * 2, _INIT_VEC_CAP)
+            self.ao.resize(self.capacity, refcheck=False)
             self.data = <PyObject**>self.ao.data
 
         Py_INCREF(obj)
-        self.data[self.n] = <PyObject*>obj
-        self.n += 1
+        self.data[self.size] = <PyObject*>obj
+        self.size += 1
 
     cpdef ndarray[object, ndim=1] to_array(self):
-        if self.m != self.n:
+        if self.capacity != self.size:
             if self.external_view_exists:
                 raise ValueError("should have raised on append()")
-            self.ao.resize(self.n, refcheck=False)
-            self.m = self.n
+            self.ao.resize(self.size, refcheck=False)
+            self.capacity = self.size
         self.external_view_exists = True
         return self.ao
 
@@ -684,7 +684,7 @@ cdef class {{name}}HashTable(HashTable):
                             continue
 
                         seen_na = True
-                        if needs_resize(ud.n, ud.m):
+                        if needs_resize(ud.size, ud.capacity):
                             with gil:
                                 if uniques.external_view_exists:
                                     raise ValueError("external reference to "
@@ -706,7 +706,7 @@ cdef class {{name}}HashTable(HashTable):
                     # k hasn't been seen yet
                     k = kh_put_{{dtype}}(self.table, val, &ret)
 
-                    if needs_resize(ud.n, ud.m):
+                    if needs_resize(ud.size, ud.capacity):
                         with gil:
                             if uniques.external_view_exists:
                                 raise ValueError("external reference to "
@@ -849,7 +849,7 @@ cdef class {{name}}HashTable(HashTable):
                     k = kh_put_{{dtype}}(self.table, val, &ret)
                     self.table.vals[k] = count
 
-                    if needs_resize(ud.n, ud.m):
+                    if needs_resize(ud.size, ud.capacity):
                         with gil:
                             uniques.resize()
                     append_data_{{dtype}}(ud, val)
diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
index 7bd252a0f43cd..9a9cc5a08473c 100644
--- a/pandas/_libs/hashtable_func_helper.pxi.in
+++ b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -480,7 +480,7 @@ def _unique_label_indices_{{dtype}}(const {{c_type}}[:] labels) -> ndarray:
         for i in range(n):
             kh_put_{{ttype}}(table, labels[i], &ret)
             if ret != 0:
-                if needs_resize(ud.n, ud.m):
+                if needs_resize(ud.size, ud.capacity):
                     with gil:
                         idx.resize()
                 append_data_{{ttype}}(&ud, i)
diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in
index a6cec0fb30ecc..b94f60c272e5d 100644
--- a/pandas/_libs/intervaltree.pxi.in
+++ b/pandas/_libs/intervaltree.pxi.in
@@ -145,12 +145,12 @@ cdef class IntervalTree(IntervalMixin):
                 # overflow -> no match, which is already handled below
                 pass
 
-            if result.data.n == old_len:
+            if result.data.size == old_len:
                 result.append(-1)
-            elif result.data.n > old_len + 1:
+            elif result.data.size > old_len + 1:
                 raise KeyError(
                     'indexer does not intersect a unique set of intervals')
-            old_len = result.data.n
+            old_len = result.data.size
         return result.to_array().astype('intp')
 
     def get_indexer_non_unique(self, ndarray[scalar_t, ndim=1] target):
@@ -172,10 +172,10 @@ cdef class IntervalTree(IntervalMixin):
                 # overflow -> no match, which is already handled below
                 pass
 
-            if result.data.n == old_len:
+            if result.data.size == old_len:
                 result.append(-1)
                 missing.append(i)
-            old_len = result.data.n
+            old_len = result.data.size
         return (result.to_array().astype('intp'),
                 missing.to_array().astype('intp'))
 

From 9703000446df7a60b6b62a7e753e0fdc42cc22b7 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 1 Mar 2024 15:29:30 -0800
Subject: [PATCH 04/15] size_t -> Py_ssize_t

---
 pandas/_libs/hashtable_class_helper.pxi.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index ea41c6ee11c8a..4891582cf096b 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -163,7 +163,7 @@ ctypedef fused vector_data:
     Complex64VectorData
     StringVectorData
 
-cdef bint needs_resize(size_t size, size_t capacity) noexcept nogil:
+cdef bint needs_resize(Py_ssize_t size, Py_ssize_t capacity) noexcept nogil:
     return size == capacity
 
 # ----------------------------------------------------------------------

From 0ed0c61c7da12351a83ad09bb9f0a42d627f1a4b Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 1 Mar 2024 15:49:19 -0800
Subject: [PATCH 05/15] revert needs_resize

---
 pandas/_libs/hashtable_class_helper.pxi.in | 14 +++++++-------
 pandas/_libs/hashtable_func_helper.pxi.in  |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 4891582cf096b..80087e3b761d6 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -163,8 +163,8 @@ ctypedef fused vector_data:
     Complex64VectorData
     StringVectorData
 
-cdef bint needs_resize(Py_ssize_t size, Py_ssize_t capacity) noexcept nogil:
-    return size == capacity
+cdef bint needs_resize(const vector_data *data) noexcept nogil:
+    return data.size == data.capacity
 
 # ----------------------------------------------------------------------
 # Vector
@@ -234,7 +234,7 @@ cdef class {{name}}Vector(Vector):
 
     cdef void append(self, {{c_type}} x) noexcept:
 
-        if needs_resize(self.data.size, self.data.capacity):
+        if needs_resize(&self.data):
             if self.external_view_exists:
                 raise ValueError("external reference but "
                                  "Vector.resize() needed")
@@ -297,7 +297,7 @@ cdef class StringVector(Vector):
 
     cdef void append(self, char *x) noexcept:
 
-        if needs_resize(self.data.size, self.data.capacity):
+        if needs_resize(&self.data):
             self.resize()
 
         append_data_string(&self.data, x)
@@ -684,7 +684,7 @@ cdef class {{name}}HashTable(HashTable):
                             continue
 
                         seen_na = True
-                        if needs_resize(ud.size, ud.capacity):
+                        if needs_resize(ud):
                             with gil:
                                 if uniques.external_view_exists:
                                     raise ValueError("external reference to "
@@ -706,7 +706,7 @@ cdef class {{name}}HashTable(HashTable):
                     # k hasn't been seen yet
                     k = kh_put_{{dtype}}(self.table, val, &ret)
 
-                    if needs_resize(ud.size, ud.capacity):
+                    if needs_resize(ud):
                         with gil:
                             if uniques.external_view_exists:
                                 raise ValueError("external reference to "
@@ -849,7 +849,7 @@ cdef class {{name}}HashTable(HashTable):
                     k = kh_put_{{dtype}}(self.table, val, &ret)
                     self.table.vals[k] = count
 
-                    if needs_resize(ud.size, ud.capacity):
+                    if needs_resize(ud):
                         with gil:
                             uniques.resize()
                     append_data_{{dtype}}(ud, val)
diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
index 9a9cc5a08473c..1cd740c1dd1f8 100644
--- a/pandas/_libs/hashtable_func_helper.pxi.in
+++ b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -480,7 +480,7 @@ def _unique_label_indices_{{dtype}}(const {{c_type}}[:] labels) -> ndarray:
         for i in range(n):
             kh_put_{{ttype}}(table, labels[i], &ret)
             if ret != 0:
-                if needs_resize(ud.size, ud.capacity):
+                if needs_resize(&ud):
                     with gil:
                         idx.resize()
                 append_data_{{ttype}}(&ud, i)

From 0704699bc9f4b4f6b408c2b3e02fd8cde5af8bb4 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 1 Mar 2024 15:53:11 -0800
Subject: [PATCH 06/15] remove unnecessary pointers

---
 pandas/_libs/hashtable_class_helper.pxi.in | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 80087e3b761d6..406833a7f66a0 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -628,15 +628,15 @@ cdef class {{name}}HashTable(HashTable):
             int ret = 0
             {{c_type}} val, na_value2
             khiter_t k
-            {{name}}VectorData *ud
+            {{name}}VectorData ud
             UInt8Vector result_mask
-            UInt8VectorData *rmd
+            UInt8VectorData rmd
             bint use_na_value, use_mask, seen_na = False
             const uint8_t[:] mask_values
 
         if return_inverse:
             labels = np.empty(n, dtype=np.intp)
-        ud = &uniques.data
+        ud = uniques.data
         use_na_value = na_value is not None
         use_mask = mask is not None
         if not use_mask and use_result_mask:
@@ -646,7 +646,7 @@ cdef class {{name}}HashTable(HashTable):
             raise NotImplementedError  # pragma: no cover
 
         result_mask = UInt8Vector()
-        rmd = &result_mask.data
+        rmd = result_mask.data
 
         if use_mask:
             mask_values = mask.view("uint8")
@@ -684,7 +684,7 @@ cdef class {{name}}HashTable(HashTable):
                             continue
 
                         seen_na = True
-                        if needs_resize(ud):
+                        if needs_resize(&ud):
                             with gil:
                                 if uniques.external_view_exists:
                                     raise ValueError("external reference to "
@@ -696,8 +696,8 @@ cdef class {{name}}HashTable(HashTable):
                                                      "result_mask held, but "
                                                      "Vector.resize() needed")
                                 result_mask.resize()
-                        append_data_{{dtype}}(ud, val)
-                        append_data_uint8(rmd, 1)
+                        append_data_{{dtype}}(&ud, val)
+                        append_data_uint8(&rmd, 1)
                         continue
 
                 k = kh_get_{{dtype}}(self.table, val)
@@ -706,7 +706,7 @@ cdef class {{name}}HashTable(HashTable):
                     # k hasn't been seen yet
                     k = kh_put_{{dtype}}(self.table, val, &ret)
 
-                    if needs_resize(ud):
+                    if needs_resize(&ud):
                         with gil:
                             if uniques.external_view_exists:
                                 raise ValueError("external reference to "
@@ -721,7 +721,7 @@ cdef class {{name}}HashTable(HashTable):
                                 result_mask.resize()
                     append_data_{{dtype}}(ud, val)
                     if use_result_mask:
-                        append_data_uint8(rmd, 0)
+                        append_data_uint8(&rmd, 0)
 
                     if return_inverse:
                         self.table.vals[k] = count

From bf1864d9405950f7190188b4d18cb8730be69b61 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 1 Mar 2024 16:01:19 -0800
Subject: [PATCH 07/15] fix build issues

---
 pandas/_libs/hashtable_class_helper.pxi.in | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 406833a7f66a0..de2e9bdc55203 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -719,7 +719,7 @@ cdef class {{name}}HashTable(HashTable):
                                                      "result_mask held, but "
                                                      "Vector.resize() needed")
                                 result_mask.resize()
-                    append_data_{{dtype}}(ud, val)
+                    append_data_{{dtype}}(&ud, val)
                     if use_result_mask:
                         append_data_uint8(&rmd, 0)
 
@@ -827,10 +827,10 @@ cdef class {{name}}HashTable(HashTable):
             {{c_type}} val
             khiter_t k
             {{name}}Vector uniques = {{name}}Vector()
-            {{name}}VectorData *ud
+            {{name}}VectorData ud
 
         labels = np.empty(n, dtype=np.intp)
-        ud = &uniques.data
+        ud = uniques.data
 
         with nogil:
             for i in range(n):
@@ -849,10 +849,10 @@ cdef class {{name}}HashTable(HashTable):
                     k = kh_put_{{dtype}}(self.table, val, &ret)
                     self.table.vals[k] = count
 
-                    if needs_resize(ud):
+                    if needs_resize(&ud):
                         with gil:
                             uniques.resize()
-                    append_data_{{dtype}}(ud, val)
+                    append_data_{{dtype}}(&ud, val)
                     labels[i] = count
                     count += 1
 

From b114654c983fa94cb28933d72db3ffc73346635d Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 1 Mar 2024 20:20:12 -0800
Subject: [PATCH 08/15] Removed ud variable

---
 pandas/_libs/hashtable_class_helper.pxi.in | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index de2e9bdc55203..1f0dc02ef130b 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -628,7 +628,6 @@ cdef class {{name}}HashTable(HashTable):
             int ret = 0
             {{c_type}} val, na_value2
             khiter_t k
-            {{name}}VectorData ud
             UInt8Vector result_mask
             UInt8VectorData rmd
             bint use_na_value, use_mask, seen_na = False
@@ -636,7 +635,6 @@ cdef class {{name}}HashTable(HashTable):
 
         if return_inverse:
             labels = np.empty(n, dtype=np.intp)
-        ud = uniques.data
         use_na_value = na_value is not None
         use_mask = mask is not None
         if not use_mask and use_result_mask:
@@ -684,7 +682,7 @@ cdef class {{name}}HashTable(HashTable):
                             continue
 
                         seen_na = True
-                        if needs_resize(&ud):
+                        if needs_resize(&uniques.data):
                             with gil:
                                 if uniques.external_view_exists:
                                     raise ValueError("external reference to "
@@ -696,7 +694,7 @@ cdef class {{name}}HashTable(HashTable):
                                                      "result_mask held, but "
                                                      "Vector.resize() needed")
                                 result_mask.resize()
-                        append_data_{{dtype}}(&ud, val)
+                        append_data_{{dtype}}(&uniques.data, val)
                         append_data_uint8(&rmd, 1)
                         continue
 
@@ -706,7 +704,7 @@ cdef class {{name}}HashTable(HashTable):
                     # k hasn't been seen yet
                     k = kh_put_{{dtype}}(self.table, val, &ret)
 
-                    if needs_resize(&ud):
+                    if needs_resize(&uniques.data):
                         with gil:
                             if uniques.external_view_exists:
                                 raise ValueError("external reference to "
@@ -719,7 +717,7 @@ cdef class {{name}}HashTable(HashTable):
                                                      "result_mask held, but "
                                                      "Vector.resize() needed")
                                 result_mask.resize()
-                    append_data_{{dtype}}(&ud, val)
+                    append_data_{{dtype}}(&uniques.data, val)
                     if use_result_mask:
                         append_data_uint8(&rmd, 0)
 

From 3590a6b3d7beee8c2a3d7c35476f05267382e75a Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sat, 2 Mar 2024 08:40:26 -0800
Subject: [PATCH 09/15] Fix ObjectVector issue

---
 pandas/_libs/hashtable_class_helper.pxi.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 1f0dc02ef130b..6fa4cb5c4a3ea 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -328,7 +328,7 @@ cdef class ObjectVector(Vector):
             if self.external_view_exists:
                 raise ValueError("external reference but "
                                  "Vector.resize() needed")
-            self.size = max(self.capacity * 2, _INIT_VEC_CAP)
+            self.capacity = max(self.capacity * 2, _INIT_VEC_CAP)
             self.ao.resize(self.capacity, refcheck=False)
             self.data = <PyObject**>self.ao.data
 

From d4f24c2d3605c8a8e307bdc86aed5250e069e355 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sat, 2 Mar 2024 09:18:22 -0800
Subject: [PATCH 10/15] try setting NULL in dealloc

---
 pandas/_libs/hashtable_class_helper.pxi.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 6fa4cb5c4a3ea..cb7b240e23c3a 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -277,6 +277,7 @@ cdef class StringVector(Vector):
 
     def __dealloc__(self):
         free(self.data.data)
+        self.data.data = NULL
 
     def __len__(self) -> int:
         return self.data.size

From 54ebc26eca6447edd5d01661a67ee354fd3b30c6 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sat, 2 Mar 2024 09:53:37 -0800
Subject: [PATCH 11/15] reset things

---
 pandas/_libs/hashtable.pxd                 |   4 +-
 pandas/_libs/hashtable.pyx                 |   4 +
 pandas/_libs/hashtable_class_helper.pxi.in | 129 ++++++++++++---------
 pandas/_libs/hashtable_func_helper.pxi.in  |   6 +-
 pandas/_libs/intervaltree.pxi.in           |  10 +-
 5 files changed, 87 insertions(+), 66 deletions(-)

diff --git a/pandas/_libs/hashtable.pxd b/pandas/_libs/hashtable.pxd
index 29ace4a339ced..eaec9e8462450 100644
--- a/pandas/_libs/hashtable.pxd
+++ b/pandas/_libs/hashtable.pxd
@@ -174,13 +174,13 @@ cdef class StringHashTable(HashTable):
 
 cdef struct Int64VectorData:
     int64_t *data
-    Py_ssize_t size, capacity
+    Py_ssize_t n, m
 
 cdef class Vector:
     cdef bint external_view_exists
 
 cdef class Int64Vector(Vector):
-    cdef Int64VectorData data
+    cdef Int64VectorData *data
     cdef ndarray ao
 
     cdef resize(self)
diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx
index 8250d0242c31f..ccac3d0b50d45 100644
--- a/pandas/_libs/hashtable.pyx
+++ b/pandas/_libs/hashtable.pyx
@@ -1,4 +1,8 @@
 cimport cython
+from cpython.mem cimport (
+    PyMem_Free,
+    PyMem_Malloc,
+)
 from cpython.ref cimport (
     Py_INCREF,
     PyObject,
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index cb7b240e23c3a..26dcf0b6c4ce3 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -133,7 +133,7 @@ dtypes = [('Complex128', 'complex128', 'khcomplex128_t'),
 
 ctypedef struct {{name}}VectorData:
     {{c_type}} *data
-    Py_ssize_t size, capacity
+    Py_ssize_t n, m
 
 {{endif}}
 
@@ -143,8 +143,8 @@ ctypedef struct {{name}}VectorData:
 cdef void append_data_{{dtype}}({{name}}VectorData *data,
                                        {{c_type}} x) noexcept nogil:
 
-    data.data[data.size] = x
-    data.size += 1
+    data.data[data.n] = x
+    data.n += 1
 
 {{endfor}}
 
@@ -163,8 +163,8 @@ ctypedef fused vector_data:
     Complex64VectorData
     StringVectorData
 
-cdef bint needs_resize(const vector_data *data) noexcept nogil:
-    return data.size == data.capacity
+cdef bint needs_resize(vector_data *data) noexcept nogil:
+    return data.n == data.m
 
 # ----------------------------------------------------------------------
 # Vector
@@ -204,43 +204,52 @@ cdef class {{name}}Vector(Vector):
     # Int64Vector is the only one we need exposed for other cython files.
     {{if dtype != 'int64'}}
     cdef:
-        {{name}}VectorData data
+        {{name}}VectorData *data
         ndarray ao
     {{endif}}
 
     def __cinit__(self):
-        self.data.size = 0
-        self.data.capacity = _INIT_VEC_CAP
-        self.ao = np.empty(self.data.capacity, dtype=np.{{dtype}})
+        self.data = <{{name}}VectorData *>PyMem_Malloc(
+            sizeof({{name}}VectorData))
+        if not self.data:
+            raise MemoryError()
+        self.data.n = 0
+        self.data.m = _INIT_VEC_CAP
+        self.ao = np.empty(self.data.m, dtype=np.{{dtype}})
         self.data.data = <{{c_type}}*>self.ao.data
 
     cdef resize(self):
-        self.data.capacity = max(self.data.capacity * 4, _INIT_VEC_CAP)
-        self.ao.resize(self.data.capacity, refcheck=False)
+        self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)
+        self.ao.resize(self.data.m, refcheck=False)
         self.data.data = <{{c_type}}*>self.ao.data
 
+    def __dealloc__(self):
+        if self.data is not NULL:
+            PyMem_Free(self.data)
+            self.data = NULL
+
     def __len__(self) -> int:
-        return self.data.size
+        return self.data.n
 
     cpdef ndarray to_array(self):
-        if self.data.capacity != self.data.size:
+        if self.data.m != self.data.n:
             if self.external_view_exists:
                 # should never happen
                 raise ValueError("should have raised on append()")
-            self.ao.resize(self.data.size, refcheck=False)
-            self.data.capacity = self.data.size
+            self.ao.resize(self.data.n, refcheck=False)
+            self.data.m = self.data.n
         self.external_view_exists = True
         return self.ao
 
     cdef void append(self, {{c_type}} x) noexcept:
 
-        if needs_resize(&self.data):
+        if needs_resize(self.data):
             if self.external_view_exists:
                 raise ValueError("external reference but "
                                  "Vector.resize() needed")
             self.resize()
 
-        append_data_{{dtype}}(&self.data, x)
+        append_data_{{dtype}}(self.data, x)
 
     cdef extend(self, const {{c_type}}[:] x):
         for i in range(len(x)):
@@ -251,36 +260,42 @@ cdef class {{name}}Vector(Vector):
 cdef class StringVector(Vector):
 
     cdef:
-        StringVectorData data
+        StringVectorData *data
 
     def __cinit__(self):
-        self.data.size = 0
-        self.data.capacity = _INIT_VEC_CAP
-        self.data.data = <char **>malloc(self.data.capacity * sizeof(char *))
+        self.data = <StringVectorData *>PyMem_Malloc(sizeof(StringVectorData))
+        if not self.data:
+            raise MemoryError()
+        self.data.n = 0
+        self.data.m = _INIT_VEC_CAP
+        self.data.data = <char **>malloc(self.data.m * sizeof(char *))
         if not self.data.data:
             raise MemoryError()
 
     cdef resize(self):
         cdef:
             char **orig_data
-            Py_ssize_t i, capacity
+            Py_ssize_t i, m
 
-        capacity = self.data.capacity
-        self.data.capacity = max(self.data.capacity * 4, _INIT_VEC_CAP)
+        m = self.data.m
+        self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)
 
         orig_data = self.data.data
-        self.data.data = <char **>malloc(self.data.capacity * sizeof(char *))
+        self.data.data = <char **>malloc(self.data.m * sizeof(char *))
         if not self.data.data:
             raise MemoryError()
-        for i in range(capacity):
+        for i in range(m):
             self.data.data[i] = orig_data[i]
 
     def __dealloc__(self):
-        free(self.data.data)
-        self.data.data = NULL
+        if self.data is not NULL:
+            if self.data.data is not NULL:
+                free(self.data.data)
+            PyMem_Free(self.data)
+            self.data = NULL
 
     def __len__(self) -> int:
-        return self.data.size
+        return self.data.n
 
     cpdef ndarray[object, ndim=1] to_array(self):
         cdef:
@@ -288,20 +303,20 @@ cdef class StringVector(Vector):
             Py_ssize_t n
             object val
 
-        ao = np.empty(self.data.size, dtype=object)
-        for i in range(self.data.size):
+        ao = np.empty(self.data.n, dtype=object)
+        for i in range(self.data.n):
             val = self.data.data[i]
             ao[i] = val
         self.external_view_exists = True
-        self.data.capacity = self.data.size
+        self.data.m = self.data.n
         return ao
 
     cdef void append(self, char *x) noexcept:
 
-        if needs_resize(&self.data):
+        if needs_resize(self.data):
             self.resize()
 
-        append_data_string(&self.data, x)
+        append_data_string(self.data, x)
 
     cdef extend(self, ndarray[object] x):
         for i in range(len(x)):
@@ -312,37 +327,37 @@ cdef class ObjectVector(Vector):
 
     cdef:
         PyObject **data
-        Py_ssize_t size, capacity
+        Py_ssize_t n, m
         ndarray ao
 
     def __cinit__(self):
-        self.size = 0
-        self.capacity = _INIT_VEC_CAP
+        self.n = 0
+        self.m = _INIT_VEC_CAP
         self.ao = np.empty(_INIT_VEC_CAP, dtype=object)
         self.data = <PyObject**>self.ao.data
 
     def __len__(self) -> int:
-        return self.size
+        return self.n
 
     cdef append(self, object obj):
-        if self.size == self.capacity:
+        if self.n == self.m:
             if self.external_view_exists:
                 raise ValueError("external reference but "
                                  "Vector.resize() needed")
-            self.capacity = max(self.capacity * 2, _INIT_VEC_CAP)
-            self.ao.resize(self.capacity, refcheck=False)
+            self.m = max(self.m * 2, _INIT_VEC_CAP)
+            self.ao.resize(self.m, refcheck=False)
             self.data = <PyObject**>self.ao.data
 
         Py_INCREF(obj)
-        self.data[self.size] = <PyObject*>obj
-        self.size += 1
+        self.data[self.n] = <PyObject*>obj
+        self.n += 1
 
     cpdef ndarray[object, ndim=1] to_array(self):
-        if self.capacity != self.size:
+        if self.m != self.n:
             if self.external_view_exists:
                 raise ValueError("should have raised on append()")
-            self.ao.resize(self.size, refcheck=False)
-            self.capacity = self.size
+            self.ao.resize(self.n, refcheck=False)
+            self.m = self.n
         self.external_view_exists = True
         return self.ao
 
@@ -629,13 +644,15 @@ cdef class {{name}}HashTable(HashTable):
             int ret = 0
             {{c_type}} val, na_value2
             khiter_t k
+            {{name}}VectorData *ud
             UInt8Vector result_mask
-            UInt8VectorData rmd
+            UInt8VectorData *rmd
             bint use_na_value, use_mask, seen_na = False
             const uint8_t[:] mask_values
 
         if return_inverse:
             labels = np.empty(n, dtype=np.intp)
+        ud = uniques.data
         use_na_value = na_value is not None
         use_mask = mask is not None
         if not use_mask and use_result_mask:
@@ -683,7 +700,7 @@ cdef class {{name}}HashTable(HashTable):
                             continue
 
                         seen_na = True
-                        if needs_resize(&uniques.data):
+                        if needs_resize(ud):
                             with gil:
                                 if uniques.external_view_exists:
                                     raise ValueError("external reference to "
@@ -695,8 +712,8 @@ cdef class {{name}}HashTable(HashTable):
                                                      "result_mask held, but "
                                                      "Vector.resize() needed")
                                 result_mask.resize()
-                        append_data_{{dtype}}(&uniques.data, val)
-                        append_data_uint8(&rmd, 1)
+                        append_data_{{dtype}}(ud, val)
+                        append_data_uint8(rmd, 1)
                         continue
 
                 k = kh_get_{{dtype}}(self.table, val)
@@ -705,7 +722,7 @@ cdef class {{name}}HashTable(HashTable):
                     # k hasn't been seen yet
                     k = kh_put_{{dtype}}(self.table, val, &ret)
 
-                    if needs_resize(&uniques.data):
+                    if needs_resize(ud):
                         with gil:
                             if uniques.external_view_exists:
                                 raise ValueError("external reference to "
@@ -718,9 +735,9 @@ cdef class {{name}}HashTable(HashTable):
                                                      "result_mask held, but "
                                                      "Vector.resize() needed")
                                 result_mask.resize()
-                    append_data_{{dtype}}(&uniques.data, val)
+                    append_data_{{dtype}}(ud, val)
                     if use_result_mask:
-                        append_data_uint8(&rmd, 0)
+                        append_data_uint8(rmd, 0)
 
                     if return_inverse:
                         self.table.vals[k] = count
@@ -826,7 +843,7 @@ cdef class {{name}}HashTable(HashTable):
             {{c_type}} val
             khiter_t k
             {{name}}Vector uniques = {{name}}Vector()
-            {{name}}VectorData ud
+            {{name}}VectorData *ud
 
         labels = np.empty(n, dtype=np.intp)
         ud = uniques.data
@@ -848,10 +865,10 @@ cdef class {{name}}HashTable(HashTable):
                     k = kh_put_{{dtype}}(self.table, val, &ret)
                     self.table.vals[k] = count
 
-                    if needs_resize(&ud):
+                    if needs_resize(ud):
                         with gil:
                             uniques.resize()
-                    append_data_{{dtype}}(&ud, val)
+                    append_data_{{dtype}}(ud, val)
                     labels[i] = count
                     count += 1
 
diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
index 1cd740c1dd1f8..336af306d410f 100644
--- a/pandas/_libs/hashtable_func_helper.pxi.in
+++ b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -472,7 +472,7 @@ def _unique_label_indices_{{dtype}}(const {{c_type}}[:] labels) -> ndarray:
         kh_{{ttype}}_t *table = kh_init_{{ttype}}()
         {{name}}Vector idx = {{name}}Vector()
         ndarray[{{c_type}}, ndim=1] arr
-        {{name}}VectorData ud = idx.data
+        {{name}}VectorData *ud = idx.data
 
     kh_resize_{{ttype}}(table, min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT))
 
@@ -480,10 +480,10 @@ def _unique_label_indices_{{dtype}}(const {{c_type}}[:] labels) -> ndarray:
         for i in range(n):
             kh_put_{{ttype}}(table, labels[i], &ret)
             if ret != 0:
-                if needs_resize(&ud):
+                if needs_resize(ud):
                     with gil:
                         idx.resize()
-                append_data_{{ttype}}(&ud, i)
+                append_data_{{ttype}}(ud, i)
 
     kh_destroy_{{ttype}}(table)
 
diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in
index b94f60c272e5d..a6cec0fb30ecc 100644
--- a/pandas/_libs/intervaltree.pxi.in
+++ b/pandas/_libs/intervaltree.pxi.in
@@ -145,12 +145,12 @@ cdef class IntervalTree(IntervalMixin):
                 # overflow -> no match, which is already handled below
                 pass
 
-            if result.data.size == old_len:
+            if result.data.n == old_len:
                 result.append(-1)
-            elif result.data.size > old_len + 1:
+            elif result.data.n > old_len + 1:
                 raise KeyError(
                     'indexer does not intersect a unique set of intervals')
-            old_len = result.data.size
+            old_len = result.data.n
         return result.to_array().astype('intp')
 
     def get_indexer_non_unique(self, ndarray[scalar_t, ndim=1] target):
@@ -172,10 +172,10 @@ cdef class IntervalTree(IntervalMixin):
                 # overflow -> no match, which is already handled below
                 pass
 
-            if result.data.size == old_len:
+            if result.data.n == old_len:
                 result.append(-1)
                 missing.append(i)
-            old_len = result.data.size
+            old_len = result.data.n
         return (result.to_array().astype('intp'),
                 missing.to_array().astype('intp'))
 

From 04c1748f5ea2830202e065840d534c6b33fc9927 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sat, 2 Mar 2024 10:00:54 -0800
Subject: [PATCH 12/15] try smaller scope

---
 pandas/_libs/hashtable.pxd                 |  2 +-
 pandas/_libs/hashtable_class_helper.pxi.in | 52 +++++++---------------
 pandas/_libs/hashtable_func_helper.pxi.in  |  6 +--
 3 files changed, 21 insertions(+), 39 deletions(-)

diff --git a/pandas/_libs/hashtable.pxd b/pandas/_libs/hashtable.pxd
index eaec9e8462450..22b923580c491 100644
--- a/pandas/_libs/hashtable.pxd
+++ b/pandas/_libs/hashtable.pxd
@@ -180,7 +180,7 @@ cdef class Vector:
     cdef bint external_view_exists
 
 cdef class Int64Vector(Vector):
-    cdef Int64VectorData *data
+    cdef Int64VectorData data
     cdef ndarray ao
 
     cdef resize(self)
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 26dcf0b6c4ce3..c445849db18a7 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -204,15 +204,11 @@ cdef class {{name}}Vector(Vector):
     # Int64Vector is the only one we need exposed for other cython files.
     {{if dtype != 'int64'}}
     cdef:
-        {{name}}VectorData *data
+        {{name}}VectorData data
         ndarray ao
     {{endif}}
 
     def __cinit__(self):
-        self.data = <{{name}}VectorData *>PyMem_Malloc(
-            sizeof({{name}}VectorData))
-        if not self.data:
-            raise MemoryError()
         self.data.n = 0
         self.data.m = _INIT_VEC_CAP
         self.ao = np.empty(self.data.m, dtype=np.{{dtype}})
@@ -223,11 +219,6 @@ cdef class {{name}}Vector(Vector):
         self.ao.resize(self.data.m, refcheck=False)
         self.data.data = <{{c_type}}*>self.ao.data
 
-    def __dealloc__(self):
-        if self.data is not NULL:
-            PyMem_Free(self.data)
-            self.data = NULL
-
     def __len__(self) -> int:
         return self.data.n
 
@@ -243,13 +234,13 @@ cdef class {{name}}Vector(Vector):
 
     cdef void append(self, {{c_type}} x) noexcept:
 
-        if needs_resize(self.data):
+        if needs_resize(&self.data):
             if self.external_view_exists:
                 raise ValueError("external reference but "
                                  "Vector.resize() needed")
             self.resize()
 
-        append_data_{{dtype}}(self.data, x)
+        append_data_{{dtype}}(&self.data, x)
 
     cdef extend(self, const {{c_type}}[:] x):
         for i in range(len(x)):
@@ -260,12 +251,9 @@ cdef class {{name}}Vector(Vector):
 cdef class StringVector(Vector):
 
     cdef:
-        StringVectorData *data
+        StringVectorData data
 
     def __cinit__(self):
-        self.data = <StringVectorData *>PyMem_Malloc(sizeof(StringVectorData))
-        if not self.data:
-            raise MemoryError()
         self.data.n = 0
         self.data.m = _INIT_VEC_CAP
         self.data.data = <char **>malloc(self.data.m * sizeof(char *))
@@ -288,11 +276,7 @@ cdef class StringVector(Vector):
             self.data.data[i] = orig_data[i]
 
     def __dealloc__(self):
-        if self.data is not NULL:
-            if self.data.data is not NULL:
-                free(self.data.data)
-            PyMem_Free(self.data)
-            self.data = NULL
+        free(self.data.data)
 
     def __len__(self) -> int:
         return self.data.n
@@ -313,10 +297,10 @@ cdef class StringVector(Vector):
 
     cdef void append(self, char *x) noexcept:
 
-        if needs_resize(self.data):
+        if needs_resize(&self.data):
             self.resize()
 
-        append_data_string(self.data, x)
+        append_data_string(&self.data, x)
 
     cdef extend(self, ndarray[object] x):
         for i in range(len(x)):
@@ -644,15 +628,13 @@ cdef class {{name}}HashTable(HashTable):
             int ret = 0
             {{c_type}} val, na_value2
             khiter_t k
-            {{name}}VectorData *ud
             UInt8Vector result_mask
-            UInt8VectorData *rmd
+            UInt8VectorData rmd
             bint use_na_value, use_mask, seen_na = False
             const uint8_t[:] mask_values
 
         if return_inverse:
             labels = np.empty(n, dtype=np.intp)
-        ud = uniques.data
         use_na_value = na_value is not None
         use_mask = mask is not None
         if not use_mask and use_result_mask:
@@ -700,7 +682,7 @@ cdef class {{name}}HashTable(HashTable):
                             continue
 
                         seen_na = True
-                        if needs_resize(ud):
+                        if needs_resize(&uniques.data):
                             with gil:
                                 if uniques.external_view_exists:
                                     raise ValueError("external reference to "
@@ -712,8 +694,8 @@ cdef class {{name}}HashTable(HashTable):
                                                      "result_mask held, but "
                                                      "Vector.resize() needed")
                                 result_mask.resize()
-                        append_data_{{dtype}}(ud, val)
-                        append_data_uint8(rmd, 1)
+                        append_data_{{dtype}}(&uniques.data, val)
+                        append_data_uint8(&rmd, 1)
                         continue
 
                 k = kh_get_{{dtype}}(self.table, val)
@@ -722,7 +704,7 @@ cdef class {{name}}HashTable(HashTable):
                     # k hasn't been seen yet
                     k = kh_put_{{dtype}}(self.table, val, &ret)
 
-                    if needs_resize(ud):
+                    if needs_resize(&uniques.data):
                         with gil:
                             if uniques.external_view_exists:
                                 raise ValueError("external reference to "
@@ -735,9 +717,9 @@ cdef class {{name}}HashTable(HashTable):
                                                      "result_mask held, but "
                                                      "Vector.resize() needed")
                                 result_mask.resize()
-                    append_data_{{dtype}}(ud, val)
+                    append_data_{{dtype}}(&uniques.data, val)
                     if use_result_mask:
-                        append_data_uint8(rmd, 0)
+                        append_data_uint8(&rmd, 0)
 
                     if return_inverse:
                         self.table.vals[k] = count
@@ -843,7 +825,7 @@ cdef class {{name}}HashTable(HashTable):
             {{c_type}} val
             khiter_t k
             {{name}}Vector uniques = {{name}}Vector()
-            {{name}}VectorData *ud
+            {{name}}VectorData ud
 
         labels = np.empty(n, dtype=np.intp)
         ud = uniques.data
@@ -865,10 +847,10 @@ cdef class {{name}}HashTable(HashTable):
                     k = kh_put_{{dtype}}(self.table, val, &ret)
                     self.table.vals[k] = count
 
-                    if needs_resize(ud):
+                    if needs_resize(&ud):
                         with gil:
                             uniques.resize()
-                    append_data_{{dtype}}(ud, val)
+                    append_data_{{dtype}}(&ud, val)
                     labels[i] = count
                     count += 1
 
diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
index 336af306d410f..1cd740c1dd1f8 100644
--- a/pandas/_libs/hashtable_func_helper.pxi.in
+++ b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -472,7 +472,7 @@ def _unique_label_indices_{{dtype}}(const {{c_type}}[:] labels) -> ndarray:
         kh_{{ttype}}_t *table = kh_init_{{ttype}}()
         {{name}}Vector idx = {{name}}Vector()
         ndarray[{{c_type}}, ndim=1] arr
-        {{name}}VectorData *ud = idx.data
+        {{name}}VectorData ud = idx.data
 
     kh_resize_{{ttype}}(table, min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT))
 
@@ -480,10 +480,10 @@ def _unique_label_indices_{{dtype}}(const {{c_type}}[:] labels) -> ndarray:
         for i in range(n):
             kh_put_{{ttype}}(table, labels[i], &ret)
             if ret != 0:
-                if needs_resize(ud):
+                if needs_resize(&ud):
                     with gil:
                         idx.resize()
-                append_data_{{ttype}}(ud, i)
+                append_data_{{ttype}}(&ud, i)
 
     kh_destroy_{{ttype}}(table)
 

From fa05ef2d43c8a17e01168699e62c9bd92192f6a4 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sat, 2 Mar 2024 14:51:56 -0800
Subject: [PATCH 13/15] Smaller scope

---
 pandas/_libs/hashtable_class_helper.pxi.in | 26 ++++++++++++----------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index c445849db18a7..629b6b42db852 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -628,13 +628,15 @@ cdef class {{name}}HashTable(HashTable):
             int ret = 0
             {{c_type}} val, na_value2
             khiter_t k
+            {{name}}VectorData *ud
             UInt8Vector result_mask
-            UInt8VectorData rmd
+            UInt8VectorData *rmd
             bint use_na_value, use_mask, seen_na = False
             const uint8_t[:] mask_values
 
         if return_inverse:
             labels = np.empty(n, dtype=np.intp)
+        ud = &uniques.data
         use_na_value = na_value is not None
         use_mask = mask is not None
         if not use_mask and use_result_mask:
@@ -644,7 +646,7 @@ cdef class {{name}}HashTable(HashTable):
             raise NotImplementedError  # pragma: no cover
 
         result_mask = UInt8Vector()
-        rmd = result_mask.data
+        rmd = &result_mask.data
 
         if use_mask:
             mask_values = mask.view("uint8")
@@ -682,7 +684,7 @@ cdef class {{name}}HashTable(HashTable):
                             continue
 
                         seen_na = True
-                        if needs_resize(&uniques.data):
+                        if needs_resize(ud):
                             with gil:
                                 if uniques.external_view_exists:
                                     raise ValueError("external reference to "
@@ -694,8 +696,8 @@ cdef class {{name}}HashTable(HashTable):
                                                      "result_mask held, but "
                                                      "Vector.resize() needed")
                                 result_mask.resize()
-                        append_data_{{dtype}}(&uniques.data, val)
-                        append_data_uint8(&rmd, 1)
+                        append_data_{{dtype}}(ud, val)
+                        append_data_uint8(rmd, 1)
                         continue
 
                 k = kh_get_{{dtype}}(self.table, val)
@@ -704,7 +706,7 @@ cdef class {{name}}HashTable(HashTable):
                     # k hasn't been seen yet
                     k = kh_put_{{dtype}}(self.table, val, &ret)
 
-                    if needs_resize(&uniques.data):
+                    if needs_resize(ud):
                         with gil:
                             if uniques.external_view_exists:
                                 raise ValueError("external reference to "
@@ -717,9 +719,9 @@ cdef class {{name}}HashTable(HashTable):
                                                      "result_mask held, but "
                                                      "Vector.resize() needed")
                                 result_mask.resize()
-                    append_data_{{dtype}}(&uniques.data, val)
+                    append_data_{{dtype}}(ud, val)
                     if use_result_mask:
-                        append_data_uint8(&rmd, 0)
+                        append_data_uint8(rmd, 0)
 
                     if return_inverse:
                         self.table.vals[k] = count
@@ -825,10 +827,10 @@ cdef class {{name}}HashTable(HashTable):
             {{c_type}} val
             khiter_t k
             {{name}}Vector uniques = {{name}}Vector()
-            {{name}}VectorData ud
+            {{name}}VectorData *ud
 
         labels = np.empty(n, dtype=np.intp)
-        ud = uniques.data
+        ud = &uniques.data
 
         with nogil:
             for i in range(n):
@@ -847,10 +849,10 @@ cdef class {{name}}HashTable(HashTable):
                     k = kh_put_{{dtype}}(self.table, val, &ret)
                     self.table.vals[k] = count
 
-                    if needs_resize(&ud):
+                    if needs_resize(ud):
                         with gil:
                             uniques.resize()
-                    append_data_{{dtype}}(&ud, val)
+                    append_data_{{dtype}}(ud, val)
                     labels[i] = count
                     count += 1
 

From 2764636f399409fba5dcc649ef33503abf4b5745 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sat, 2 Mar 2024 14:56:53 -0800
Subject: [PATCH 14/15] less change

---
 pandas/_libs/hashtable_func_helper.pxi.in | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
index 1cd740c1dd1f8..ca1b28b9442ca 100644
--- a/pandas/_libs/hashtable_func_helper.pxi.in
+++ b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -472,7 +472,7 @@ def _unique_label_indices_{{dtype}}(const {{c_type}}[:] labels) -> ndarray:
         kh_{{ttype}}_t *table = kh_init_{{ttype}}()
         {{name}}Vector idx = {{name}}Vector()
         ndarray[{{c_type}}, ndim=1] arr
-        {{name}}VectorData ud = idx.data
+        {{name}}VectorData *ud = &idx.data
 
     kh_resize_{{ttype}}(table, min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT))
 
@@ -480,10 +480,10 @@ def _unique_label_indices_{{dtype}}(const {{c_type}}[:] labels) -> ndarray:
         for i in range(n):
             kh_put_{{ttype}}(table, labels[i], &ret)
             if ret != 0:
-                if needs_resize(&ud):
+                if needs_resize(ud):
                     with gil:
                         idx.resize()
-                append_data_{{ttype}}(&ud, i)
+                append_data_{{ttype}}(ud, i)
 
     kh_destroy_{{ttype}}(table)
 

From f6c4cd2824af82deded96ee343a88c26bd0c315f Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 3 Mar 2024 15:32:18 -0800
Subject: [PATCH 15/15] remove unused

---
 pandas/_libs/hashtable.pyx | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx
index ccac3d0b50d45..8250d0242c31f 100644
--- a/pandas/_libs/hashtable.pyx
+++ b/pandas/_libs/hashtable.pyx
@@ -1,8 +1,4 @@
 cimport cython
-from cpython.mem cimport (
-    PyMem_Free,
-    PyMem_Malloc,
-)
 from cpython.ref cimport (
     Py_INCREF,
     PyObject,