Skip to content

CLN: Hashtable rename struct members #57704

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/_libs/hashtable.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ cdef class StringHashTable(HashTable):

cdef struct Int64VectorData:
int64_t *data
Py_ssize_t n, m
Py_ssize_t size, capacity

cdef class Vector:
cdef bint external_view_exists
Expand Down
74 changes: 37 additions & 37 deletions pandas/_libs/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ dtypes = [('Complex128', 'complex128', 'khcomplex128_t'),

ctypedef struct {{name}}VectorData:
{{c_type}} *data
Py_ssize_t n, m
Py_ssize_t size, capacity

{{endif}}

Expand All @@ -143,8 +143,8 @@ ctypedef struct {{name}}VectorData:
cdef void append_data_{{dtype}}({{name}}VectorData *data,
{{c_type}} x) noexcept nogil:

data.data[data.n] = x
data.n += 1
data.data[data.size] = x
data.size += 1

{{endfor}}

Expand All @@ -164,7 +164,7 @@ ctypedef fused vector_data:
StringVectorData

cdef bint needs_resize(vector_data *data) noexcept nogil:
return data.n == data.m
return data.size == data.capacity
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know if size ever increments more than 1 before checking, but a >= would probably be safer here.


# ----------------------------------------------------------------------
# Vector
Expand Down Expand Up @@ -213,14 +213,14 @@ cdef class {{name}}Vector(Vector):
sizeof({{name}}VectorData))
if not self.data:
raise MemoryError()
self.data.n = 0
self.data.m = _INIT_VEC_CAP
self.ao = np.empty(self.data.m, dtype=np.{{dtype}})
self.data.size = 0
self.data.capacity = _INIT_VEC_CAP
self.ao = np.empty(self.data.capacity, dtype=np.{{dtype}})
self.data.data = <{{c_type}}*>self.ao.data

cdef resize(self):
self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)
self.ao.resize(self.data.m, refcheck=False)
self.data.capacity = max(self.data.capacity * 4, _INIT_VEC_CAP)
self.ao.resize(self.data.capacity, refcheck=False)
self.data.data = <{{c_type}}*>self.ao.data

def __dealloc__(self):
Expand All @@ -229,15 +229,15 @@ cdef class {{name}}Vector(Vector):
self.data = NULL

def __len__(self) -> int:
return self.data.n
return self.data.size

cpdef ndarray to_array(self):
if self.data.m != self.data.n:
if self.data.capacity != self.data.size:
if self.external_view_exists:
# should never happen
raise ValueError("should have raised on append()")
self.ao.resize(self.data.n, refcheck=False)
self.data.m = self.data.n
self.ao.resize(self.data.size, refcheck=False)
self.data.capacity = self.data.size
self.external_view_exists = True
return self.ao

Expand Down Expand Up @@ -266,25 +266,25 @@ cdef class StringVector(Vector):
self.data = <StringVectorData *>PyMem_Malloc(sizeof(StringVectorData))
if not self.data:
raise MemoryError()
self.data.n = 0
self.data.m = _INIT_VEC_CAP
self.data.data = <char **>malloc(self.data.m * sizeof(char *))
self.data.size = 0
self.data.capacity = _INIT_VEC_CAP
self.data.data = <char **>malloc(self.data.capacity * sizeof(char *))
if not self.data.data:
raise MemoryError()

cdef resize(self):
cdef:
char **orig_data
Py_ssize_t i, m
Py_ssize_t i, orig_capacity

m = self.data.m
self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)
orig_capacity = self.data.capacity
self.data.capacity = max(self.data.capacity * 4, _INIT_VEC_CAP)

orig_data = self.data.data
self.data.data = <char **>malloc(self.data.m * sizeof(char *))
self.data.data = <char **>malloc(self.data.capacity * sizeof(char *))
if not self.data.data:
raise MemoryError()
for i in range(m):
for i in range(orig_capacity):
self.data.data[i] = orig_data[i]

def __dealloc__(self):
Expand All @@ -295,20 +295,20 @@ cdef class StringVector(Vector):
self.data = NULL

def __len__(self) -> int:
return self.data.n
return self.data.size

cpdef ndarray[object, ndim=1] to_array(self):
cdef:
ndarray ao
Py_ssize_t n
object val

ao = np.empty(self.data.n, dtype=object)
for i in range(self.data.n):
ao = np.empty(self.data.size, dtype=object)
for i in range(self.data.size):
val = self.data.data[i]
ao[i] = val
self.external_view_exists = True
self.data.m = self.data.n
self.data.capacity = self.data.size
return ao

cdef void append(self, char *x) noexcept:
Expand All @@ -327,37 +327,37 @@ cdef class ObjectVector(Vector):

cdef:
PyObject **data
Py_ssize_t n, m
Py_ssize_t size, capacity
ndarray ao

def __cinit__(self):
self.n = 0
self.m = _INIT_VEC_CAP
self.size = 0
self.capacity = _INIT_VEC_CAP
self.ao = np.empty(_INIT_VEC_CAP, dtype=object)
self.data = <PyObject**>self.ao.data

def __len__(self) -> int:
return self.n
return self.size

cdef append(self, object obj):
if self.n == self.m:
if self.size == self.capacity:
if self.external_view_exists:
raise ValueError("external reference but "
"Vector.resize() needed")
self.m = max(self.m * 2, _INIT_VEC_CAP)
self.ao.resize(self.m, refcheck=False)
self.capacity = max(self.capacity * 2, _INIT_VEC_CAP)
self.ao.resize(self.capacity, refcheck=False)
self.data = <PyObject**>self.ao.data

Py_INCREF(obj)
self.data[self.n] = <PyObject*>obj
self.n += 1
self.data[self.size] = <PyObject*>obj
self.size += 1

cpdef ndarray[object, ndim=1] to_array(self):
if self.m != self.n:
if self.capacity != self.size:
if self.external_view_exists:
raise ValueError("should have raised on append()")
self.ao.resize(self.n, refcheck=False)
self.m = self.n
self.ao.resize(self.size, refcheck=False)
self.capacity = self.size
self.external_view_exists = True
return self.ao

Expand Down
10 changes: 5 additions & 5 deletions pandas/_libs/intervaltree.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -145,12 +145,12 @@ cdef class IntervalTree(IntervalMixin):
# overflow -> no match, which is already handled below
pass

if result.data.n == old_len:
if result.data.size == old_len:
result.append(-1)
elif result.data.n > old_len + 1:
elif result.data.size > old_len + 1:
raise KeyError(
'indexer does not intersect a unique set of intervals')
old_len = result.data.n
old_len = result.data.size
return result.to_array().astype('intp')

def get_indexer_non_unique(self, ndarray[scalar_t, ndim=1] target):
Expand All @@ -172,10 +172,10 @@ cdef class IntervalTree(IntervalMixin):
# overflow -> no match, which is already handled below
pass

if result.data.n == old_len:
if result.data.size == old_len:
result.append(-1)
missing.append(i)
old_len = result.data.n
old_len = result.data.size
return (result.to_array().astype('intp'),
missing.to_array().astype('intp'))

Expand Down