From df9426ebc9e319c63c2d4a9740dd1a7f371ac388 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 2 Mar 2024 09:33:38 -0800 Subject: [PATCH] Hashtable rename struct members --- pandas/_libs/hashtable.pxd | 2 +- pandas/_libs/hashtable_class_helper.pxi.in | 74 +++++++++++----------- pandas/_libs/intervaltree.pxi.in | 10 +-- 3 files changed, 43 insertions(+), 43 deletions(-) diff --git a/pandas/_libs/hashtable.pxd b/pandas/_libs/hashtable.pxd index eaec9e8462450..79c6efceadbbc 100644 --- a/pandas/_libs/hashtable.pxd +++ b/pandas/_libs/hashtable.pxd @@ -174,7 +174,7 @@ cdef class StringHashTable(HashTable): cdef struct Int64VectorData: int64_t *data - Py_ssize_t n, m + Py_ssize_t size, capacity cdef class Vector: cdef bint external_view_exists diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 26dcf0b6c4ce3..9a811a1f904c7 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -133,7 +133,7 @@ dtypes = [('Complex128', 'complex128', 'khcomplex128_t'), ctypedef struct {{name}}VectorData: {{c_type}} *data - Py_ssize_t n, m + Py_ssize_t size, capacity {{endif}} @@ -143,8 +143,8 @@ ctypedef struct {{name}}VectorData: cdef void append_data_{{dtype}}({{name}}VectorData *data, {{c_type}} x) noexcept nogil: - data.data[data.n] = x - data.n += 1 + data.data[data.size] = x + data.size += 1 {{endfor}} @@ -164,7 +164,7 @@ ctypedef fused vector_data: StringVectorData cdef bint needs_resize(vector_data *data) noexcept nogil: - return data.n == data.m + return data.size == data.capacity # ---------------------------------------------------------------------- # Vector @@ -213,14 +213,14 @@ cdef class {{name}}Vector(Vector): sizeof({{name}}VectorData)) if not self.data: raise MemoryError() - self.data.n = 0 - self.data.m = _INIT_VEC_CAP - self.ao = np.empty(self.data.m, dtype=np.{{dtype}}) + self.data.size = 0 + self.data.capacity = _INIT_VEC_CAP + self.ao = np.empty(self.data.capacity, dtype=np.{{dtype}}) self.data.data = <{{c_type}}*>self.ao.data cdef resize(self): - self.data.m = max(self.data.m * 4, _INIT_VEC_CAP) - self.ao.resize(self.data.m, refcheck=False) + self.data.capacity = max(self.data.capacity * 4, _INIT_VEC_CAP) + self.ao.resize(self.data.capacity, refcheck=False) self.data.data = <{{c_type}}*>self.ao.data def __dealloc__(self): @@ -229,15 +229,15 @@ cdef class {{name}}Vector(Vector): self.data = NULL def __len__(self) -> int: - return self.data.n + return self.data.size cpdef ndarray to_array(self): - if self.data.m != self.data.n: + if self.data.capacity != self.data.size: if self.external_view_exists: # should never happen raise ValueError("should have raised on append()") - self.ao.resize(self.data.n, refcheck=False) - self.data.m = self.data.n + self.ao.resize(self.data.size, refcheck=False) + self.data.capacity = self.data.size self.external_view_exists = True return self.ao @@ -266,25 +266,25 @@ cdef class StringVector(Vector): self.data = PyMem_Malloc(sizeof(StringVectorData)) if not self.data: raise MemoryError() - self.data.n = 0 - self.data.m = _INIT_VEC_CAP - self.data.data = malloc(self.data.m * sizeof(char *)) + self.data.size = 0 + self.data.capacity = _INIT_VEC_CAP + self.data.data = malloc(self.data.capacity * sizeof(char *)) if not self.data.data: raise MemoryError() cdef resize(self): cdef: char **orig_data - Py_ssize_t i, m + Py_ssize_t i, orig_capacity - m = self.data.m - self.data.m = max(self.data.m * 4, _INIT_VEC_CAP) + orig_capacity = self.data.capacity + self.data.capacity = max(self.data.capacity * 4, _INIT_VEC_CAP) orig_data = self.data.data - self.data.data = malloc(self.data.m * sizeof(char *)) + self.data.data = malloc(self.data.capacity * sizeof(char *)) if not self.data.data: raise MemoryError() - for i in range(m): + for i in range(orig_capacity): self.data.data[i] = orig_data[i] def __dealloc__(self): @@ -295,7 +295,7 @@ cdef class StringVector(Vector): self.data = NULL def __len__(self) -> int: - return self.data.n + return self.data.size cpdef ndarray[object, ndim=1] to_array(self): cdef: @@ -303,12 +303,12 @@ cdef class StringVector(Vector): Py_ssize_t n object val - ao = np.empty(self.data.n, dtype=object) - for i in range(self.data.n): + ao = np.empty(self.data.size, dtype=object) + for i in range(self.data.size): val = self.data.data[i] ao[i] = val self.external_view_exists = True - self.data.m = self.data.n + self.data.capacity = self.data.size return ao cdef void append(self, char *x) noexcept: @@ -327,37 +327,37 @@ cdef class ObjectVector(Vector): cdef: PyObject **data - Py_ssize_t n, m + Py_ssize_t size, capacity ndarray ao def __cinit__(self): - self.n = 0 - self.m = _INIT_VEC_CAP + self.size = 0 + self.capacity = _INIT_VEC_CAP self.ao = np.empty(_INIT_VEC_CAP, dtype=object) self.data = self.ao.data def __len__(self) -> int: - return self.n + return self.size cdef append(self, object obj): - if self.n == self.m: + if self.size == self.capacity: if self.external_view_exists: raise ValueError("external reference but " "Vector.resize() needed") - self.m = max(self.m * 2, _INIT_VEC_CAP) - self.ao.resize(self.m, refcheck=False) + self.capacity = max(self.capacity * 2, _INIT_VEC_CAP) + self.ao.resize(self.capacity, refcheck=False) self.data = self.ao.data Py_INCREF(obj) - self.data[self.n] = obj - self.n += 1 + self.data[self.size] = obj + self.size += 1 cpdef ndarray[object, ndim=1] to_array(self): - if self.m != self.n: + if self.capacity != self.size: if self.external_view_exists: raise ValueError("should have raised on append()") - self.ao.resize(self.n, refcheck=False) - self.m = self.n + self.ao.resize(self.size, refcheck=False) + self.capacity = self.size self.external_view_exists = True return self.ao diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in index a6cec0fb30ecc..b94f60c272e5d 100644 --- a/pandas/_libs/intervaltree.pxi.in +++ b/pandas/_libs/intervaltree.pxi.in @@ -145,12 +145,12 @@ cdef class IntervalTree(IntervalMixin): # overflow -> no match, which is already handled below pass - if result.data.n == old_len: + if result.data.size == old_len: result.append(-1) - elif result.data.n > old_len + 1: + elif result.data.size > old_len + 1: raise KeyError( 'indexer does not intersect a unique set of intervals') - old_len = result.data.n + old_len = result.data.size return result.to_array().astype('intp') def get_indexer_non_unique(self, ndarray[scalar_t, ndim=1] target): @@ -172,10 +172,10 @@ cdef class IntervalTree(IntervalMixin): # overflow -> no match, which is already handled below pass - if result.data.n == old_len: + if result.data.size == old_len: result.append(-1) missing.append(i) - old_len = result.data.n + old_len = result.data.size return (result.to_array().astype('intp'), missing.to_array().astype('intp'))