Skip to content

Commit 3f79857

Browse files
committed
BUG: don't mangle NaN-float-values and pd.NaT (GH 22295)
it is more or less the clean-up after PR pandas-dev#21904 and PR pandas-dev#22207, the underlying hash-map handles all cases correctly out-of-the box and thus no special handling is needed.
1 parent 0370740 commit 3f79857

File tree

1 file changed

+4
-30
lines changed

1 file changed

+4
-30
lines changed

pandas/_libs/hashtable_class_helper.pxi.in

+4-30
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,6 @@ cdef class {{name}}HashTable(HashTable):
466466
int ret = 0
467467
{{dtype}}_t val
468468
khiter_t k
469-
bint seen_na = 0
470469
{{name}}Vector uniques = {{name}}Vector()
471470
{{name}}VectorData *ud
472471

@@ -475,30 +474,13 @@ cdef class {{name}}HashTable(HashTable):
475474
with nogil:
476475
for i in range(n):
477476
val = values[i]
478-
{{if float_group}}
479-
if val == val:
480-
k = kh_get_{{dtype}}(self.table, val)
481-
if k == self.table.n_buckets:
482-
kh_put_{{dtype}}(self.table, val, &ret)
483-
if needs_resize(ud):
484-
with gil:
485-
uniques.resize()
486-
append_data_{{dtype}}(ud, val)
487-
elif not seen_na:
488-
seen_na = 1
489-
if needs_resize(ud):
490-
with gil:
491-
uniques.resize()
492-
append_data_{{dtype}}(ud, NAN)
493-
{{else}}
494477
k = kh_get_{{dtype}}(self.table, val)
495478
if k == self.table.n_buckets:
496479
kh_put_{{dtype}}(self.table, val, &ret)
497480
if needs_resize(ud):
498481
with gil:
499482
uniques.resize()
500483
append_data_{{dtype}}(ud, val)
501-
{{endif}}
502484
return uniques.to_array()
503485

504486
{{endfor}}
@@ -848,19 +830,11 @@ cdef class PyObjectHashTable(HashTable):
848830
for i in range(n):
849831
val = values[i]
850832
hash(val)
833+
k = kh_get_pymap(self.table, <PyObject*>val)
834+
if k == self.table.n_buckets:
835+
kh_put_pymap(self.table, <PyObject*>val, &ret)
836+
uniques.append(val)
851837

852-
# `val is None` below is exception to prevent mangling of None and
853-
# other NA values; note however that other NA values (ex: pd.NaT
854-
# and np.nan) will still get mangled, so many not be a permanent
855-
# solution; see GH 20866
856-
if not checknull(val) or val is None:
857-
k = kh_get_pymap(self.table, <PyObject*>val)
858-
if k == self.table.n_buckets:
859-
kh_put_pymap(self.table, <PyObject*>val, &ret)
860-
uniques.append(val)
861-
elif not seen_na:
862-
seen_na = 1
863-
uniques.append(nan)
864838

865839
return uniques.to_array()
866840

0 commit comments

Comments
 (0)