@@ -1121,11 +1121,13 @@ cdef class StringHashTable(HashTable):
1121
1121
const char **vecs
1122
1122
khiter_t k
1123
1123
bint use_na_value
1124
+ bint non_null_na_value
1124
1125
1125
1126
if return_inverse:
1126
1127
labels = np.zeros(n, dtype=np.intp)
1127
1128
uindexer = np.empty(n, dtype=np.int64)
1128
1129
use_na_value = na_value is not None
1130
+ non_null_na_value = not checknull(na_value)
1129
1131
1130
1132
# assign pointers and pre-filter out missing (if ignore_na)
1131
1133
vecs = <const char **>malloc(n * sizeof(char *))
@@ -1134,7 +1136,12 @@ cdef class StringHashTable(HashTable):
1134
1136
1135
1137
if (ignore_na
1136
1138
and (not isinstance(val, str)
1137
- or (use_na_value and val == na_value))):
1139
+ or (use_na_value and (
1140
+ (non_null_na_value and val == na_value) or
1141
+ (not non_null_na_value and is_matching_na(val, na_value)))
1142
+ )
1143
+ )
1144
+ ):
1138
1145
# if missing values do not count as unique values (i.e. if
1139
1146
# ignore_na is True), we can skip the actual value, and
1140
1147
# replace the label with na_sentinel directly
@@ -1400,18 +1407,23 @@ cdef class PyObjectHashTable(HashTable):
1400
1407
object val
1401
1408
khiter_t k
1402
1409
bint use_na_value
1403
-
1410
+ bint non_null_na_value
1404
1411
if return_inverse:
1405
1412
labels = np.empty(n, dtype=np.intp)
1406
1413
use_na_value = na_value is not None
1414
+ non_null_na_value = not checknull(na_value)
1407
1415
1408
1416
for i in range(n):
1409
1417
val = values[i]
1410
1418
hash(val)
1411
1419
1412
1420
if ignore_na and (
1413
1421
checknull(val)
1414
- or (use_na_value and val == na_value)
1422
+ or (use_na_value and (
1423
+ (non_null_na_value and val == na_value) or
1424
+ (not non_null_na_value and is_matching_na(val, na_value))
1425
+ )
1426
+ )
1415
1427
):
1416
1428
# if missing values do not count as unique values (i.e. if
1417
1429
# ignore_na is True), skip the hashtable entry for them, and
0 commit comments