Skip to content

Commit 36626d8

Browse files
committed
Apply modern fix for #18825
1 parent d1756fd commit 36626d8

File tree

1 file changed

+39
-59
lines changed

1 file changed

+39
-59
lines changed

pandas/_libs/hashtable_class_helper.pxi.in

+39-59
Original file line numberDiff line numberDiff line change
@@ -253,56 +253,10 @@ dtypes = [('Float64', 'float64', True, 'nan'),
253253
('UInt64', 'uint64', False, 0),
254254
('Int64', 'int64', False, 'iNaT')]
255255

256-
def get_dispatch(dtypes):
257-
for (name, dtype, float_group, default_na_value) in dtypes:
258-
unique_template = """\
259-
cdef:
260-
Py_ssize_t i, n = len(values)
261-
int ret = 0
262-
{dtype}_t val
263-
khiter_t k
264-
bint seen_na = 0
265-
{name}Vector uniques = {name}Vector()
266-
{name}VectorData *ud
267-
268-
ud = uniques.data
269-
270-
with nogil:
271-
for i in range(n):
272-
val = values[i]
273-
IF {float_group}:
274-
if val == val:
275-
k = kh_get_{dtype}(self.table, val)
276-
if k == self.table.n_buckets:
277-
kh_put_{dtype}(self.table, val, &ret)
278-
if needs_resize(ud):
279-
with gil:
280-
uniques.resize()
281-
append_data_{dtype}(ud, val)
282-
elif not seen_na:
283-
seen_na = 1
284-
if needs_resize(ud):
285-
with gil:
286-
uniques.resize()
287-
append_data_{dtype}(ud, NAN)
288-
ELSE:
289-
k = kh_get_{dtype}(self.table, val)
290-
if k == self.table.n_buckets:
291-
kh_put_{dtype}(self.table, val, &ret)
292-
if needs_resize(ud):
293-
with gil:
294-
uniques.resize()
295-
append_data_{dtype}(ud, val)
296-
return uniques.to_array()
297-
"""
298-
299-
unique_template = unique_template.format(name=name, dtype=dtype, float_group=float_group)
300-
301-
yield (name, dtype, float_group, default_na_value, unique_template)
302256
}}
303257

304258

305-
{{for name, dtype, float_group, default_na_value, unique_template in get_dispatch(dtypes)}}
259+
{{for name, dtype, float_group, default_na_value in dtypes}}
306260

307261
cdef class {{name}}HashTable(HashTable):
308262

@@ -506,20 +460,46 @@ cdef class {{name}}HashTable(HashTable):
506460
return np.asarray(labels), arr_uniques
507461

508462
@cython.boundscheck(False)
509-
def unique(self, ndarray[{{dtype}}_t, ndim=1] values):
510-
if values.flags.writeable:
511-
# If the value is writeable (mutable) then use memview
512-
return self.unique_memview(values)
463+
def unique(self, const {{dtype}}_t[:] values):
464+
cdef:
465+
Py_ssize_t i, n = len(values)
466+
int ret = 0
467+
{{dtype}}_t val
468+
khiter_t k
469+
bint seen_na = 0
470+
{{name}}Vector uniques = {{name}}Vector()
471+
{{name}}VectorData *ud
513472

514-
# We cannot use the memoryview version on readonly-buffers due to
515-
# a limitation of Cython's typed memoryviews. Instead we can use
516-
# the slightly slower Cython ndarray type directly.
517-
# see https://github.com/cython/cython/issues/1605
518-
{{unique_template}}
473+
ud = uniques.data
519474

520-
@cython.boundscheck(False)
521-
def unique_memview(self, const {{dtype}}_t[:] values):
522-
{{unique_template}}
475+
with nogil:
476+
for i in range(n):
477+
val = values[i]
478+
{{if float_group}}
479+
if val == val:
480+
k = kh_get_{{dtype}}(self.table, val)
481+
if k == self.table.n_buckets:
482+
kh_put_{{dtype}}(self.table, val, &ret)
483+
if needs_resize(ud):
484+
with gil:
485+
uniques.resize()
486+
append_data_{{dtype}}(ud, val)
487+
elif not seen_na:
488+
seen_na = 1
489+
if needs_resize(ud):
490+
with gil:
491+
uniques.resize()
492+
append_data_{{dtype}}(ud, NAN)
493+
{{else}}
494+
k = kh_get_{{dtype}}(self.table, val)
495+
if k == self.table.n_buckets:
496+
kh_put_{{dtype}}(self.table, val, &ret)
497+
if needs_resize(ud):
498+
with gil:
499+
uniques.resize()
500+
append_data_{{dtype}}(ud, val)
501+
{{endif}}
502+
return uniques.to_array()
523503

524504
{{endfor}}
525505

0 commit comments

Comments
 (0)