@@ -8,7 +8,73 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
8
8
{{py:
9
9
10
10
# name
11
- cimported_types = ['float32',
11
+ complex_types = ['complex64',
12
+ 'complex128']
13
+ }}
14
+
15
+ {{for name in complex_types}}
16
+ cdef kh{{name}}_t to_kh{{name}}_t({{name}}_t val) nogil:
17
+ cdef kh{{name}}_t res
18
+ res.real = val.real
19
+ res.imag = val.imag
20
+ return res
21
+
22
+
23
+ cdef {{name}}_t to_{{name}}(kh{{name}}_t val) nogil:
24
+ cdef {{name}}_t res
25
+ res.real = val.real
26
+ res.imag = val.imag
27
+ return res
28
+
29
+ {{endfor}}
30
+
31
+
32
+ {{py:
33
+
34
+
35
+ # name
36
+ c_types = ['khcomplex128_t',
37
+ 'khcomplex64_t',
38
+ 'float64_t',
39
+ 'float32_t',
40
+ 'int64_t',
41
+ 'int32_t',
42
+ 'int16_t',
43
+ 'int8_t',
44
+ 'uint64_t',
45
+ 'uint32_t',
46
+ 'uint16_t',
47
+ 'uint8_t']
48
+ }}
49
+
50
+ {{for c_type in c_types}}
51
+
52
+ cdef bint is_nan_{{c_type}}({{c_type}} val) nogil:
53
+ {{if c_type in {'khcomplex128_t', 'khcomplex64_t'} }}
54
+ return val.real != val.real or val.imag != val.imag
55
+ {{elif c_type in {'float64_t', 'float32_t'} }}
56
+ return val != val
57
+ {{else}}
58
+ return False
59
+ {{endif}}
60
+
61
+
62
+ {{if c_type in {'khcomplex128_t', 'khcomplex64_t', 'float64_t', 'float32_t'} }}
63
+ # are_equivalent_{{c_type}} is cimported via khash.pxd
64
+ {{else}}
65
+ cdef bint are_equivalent_{{c_type}}({{c_type}} val1, {{c_type}} val2) nogil:
66
+ return val1 == val2
67
+ {{endif}}
68
+
69
+ {{endfor}}
70
+
71
+
72
+ {{py:
73
+
74
+ # name
75
+ cimported_types = ['complex64',
76
+ 'complex128',
77
+ 'float32',
12
78
'float64',
13
79
'int8',
14
80
'int16',
@@ -32,6 +98,7 @@ from pandas._libs.khash cimport (
32
98
kh_put_{{name}},
33
99
kh_resize_{{name}},
34
100
)
101
+
35
102
{{endfor}}
36
103
37
104
# ----------------------------------------------------------------------
@@ -48,7 +115,9 @@ from pandas._libs.missing cimport C_NA
48
115
# but is included for completeness (rather ObjectVector is used
49
116
# for uniques in hashtables)
50
117
51
- dtypes = [('Float64', 'float64', 'float64_t'),
118
+ dtypes = [('Complex128', 'complex128', 'khcomplex128_t'),
119
+ ('Complex64', 'complex64', 'khcomplex64_t'),
120
+ ('Float64', 'float64', 'float64_t'),
52
121
('Float32', 'float32', 'float32_t'),
53
122
('Int64', 'int64', 'int64_t'),
54
123
('Int32', 'int32', 'int32_t'),
@@ -94,6 +163,8 @@ ctypedef fused vector_data:
94
163
UInt8VectorData
95
164
Float64VectorData
96
165
Float32VectorData
166
+ Complex128VectorData
167
+ Complex64VectorData
97
168
StringVectorData
98
169
99
170
cdef inline bint needs_resize(vector_data *data) nogil:
@@ -106,7 +177,9 @@ cdef inline bint needs_resize(vector_data *data) nogil:
106
177
{{py:
107
178
108
179
# name, dtype, c_type
109
- dtypes = [('Float64', 'float64', 'float64_t'),
180
+ dtypes = [('Complex128', 'complex128', 'khcomplex128_t'),
181
+ ('Complex64', 'complex64', 'khcomplex64_t'),
182
+ ('Float64', 'float64', 'float64_t'),
110
183
('UInt64', 'uint64', 'uint64_t'),
111
184
('Int64', 'int64', 'int64_t'),
112
185
('Float32', 'float32', 'float32_t'),
@@ -303,22 +376,24 @@ cdef class HashTable:
303
376
304
377
{{py:
305
378
306
- # name, dtype, float_group
307
- dtypes = [('Float64', 'float64', True),
308
- ('UInt64', 'uint64', False),
309
- ('Int64', 'int64', False),
310
- ('Float32', 'float32', True),
311
- ('UInt32', 'uint32', False),
312
- ('Int32', 'int32', False),
313
- ('UInt16', 'uint16', False),
314
- ('Int16', 'int16', False),
315
- ('UInt8', 'uint8', False),
316
- ('Int8', 'int8', False)]
379
+ # name, dtype, c_type, to_c_type
380
+ dtypes = [('Complex128', 'complex128', 'khcomplex128_t', 'to_khcomplex128_t'),
381
+ ('Float64', 'float64', 'float64_t', ''),
382
+ ('UInt64', 'uint64', 'uint64_t', ''),
383
+ ('Int64', 'int64', 'int64_t', ''),
384
+ ('Complex64', 'complex64', 'khcomplex64_t', 'to_khcomplex64_t'),
385
+ ('Float32', 'float32', 'float32_t', ''),
386
+ ('UInt32', 'uint32', 'uint32_t', ''),
387
+ ('Int32', 'int32', 'int32_t', ''),
388
+ ('UInt16', 'uint16', 'uint16_t', ''),
389
+ ('Int16', 'int16', 'int16_t', ''),
390
+ ('UInt8', 'uint8', 'uint8_t', ''),
391
+ ('Int8', 'int8', 'int8_t', '')]
317
392
318
393
}}
319
394
320
395
321
- {{for name, dtype, float_group in dtypes}}
396
+ {{for name, dtype, c_type, to_c_type in dtypes}}
322
397
323
398
cdef class {{name}}HashTable(HashTable):
324
399
@@ -339,7 +414,9 @@ cdef class {{name}}HashTable(HashTable):
339
414
def __contains__(self, object key):
340
415
cdef:
341
416
khiter_t k
342
- k = kh_get_{{dtype}}(self.table, key)
417
+ {{c_type}} ckey
418
+ ckey = {{to_c_type}}(key)
419
+ k = kh_get_{{dtype}}(self.table, ckey)
343
420
return k != self.table.n_buckets
344
421
345
422
def sizeof(self, deep=False):
@@ -353,7 +430,9 @@ cdef class {{name}}HashTable(HashTable):
353
430
cpdef get_item(self, {{dtype}}_t val):
354
431
cdef:
355
432
khiter_t k
356
- k = kh_get_{{dtype}}(self.table, val)
433
+ {{c_type}} cval
434
+ cval = {{to_c_type}}(val)
435
+ k = kh_get_{{dtype}}(self.table, cval)
357
436
if k != self.table.n_buckets:
358
437
return self.table.vals[k]
359
438
else:
@@ -363,9 +442,9 @@ cdef class {{name}}HashTable(HashTable):
363
442
cdef:
364
443
khiter_t k
365
444
int ret = 0
366
-
367
- k = kh_put_{{dtype }}(self.table, key, &ret )
368
- self.table.keys[k] = key
445
+ {{c_type}} ckey
446
+ ckey = {{to_c_type }}(key)
447
+ k = kh_put_{{dtype}}( self.table, ckey, &ret)
369
448
if kh_exist_{{dtype}}(self.table, k):
370
449
self.table.vals[k] = val
371
450
else:
@@ -376,12 +455,12 @@ cdef class {{name}}HashTable(HashTable):
376
455
cdef:
377
456
Py_ssize_t i, n = len(values)
378
457
int ret = 0
379
- {{dtype}}_t key
458
+ {{c_type}} key
380
459
khiter_t k
381
460
382
461
with nogil:
383
462
for i in range(n):
384
- key = keys[i]
463
+ key = {{to_c_type}}( keys[i])
385
464
k = kh_put_{{dtype}}(self.table, key, &ret)
386
465
self.table.vals[k] = <Py_ssize_t>values[i]
387
466
@@ -390,12 +469,12 @@ cdef class {{name}}HashTable(HashTable):
390
469
cdef:
391
470
Py_ssize_t i, n = len(values)
392
471
int ret = 0
393
- {{dtype}}_t val
472
+ {{c_type}} val
394
473
khiter_t k
395
474
396
475
with nogil:
397
476
for i in range(n):
398
- val = values[i]
477
+ val= {{to_c_type}}( values[i])
399
478
k = kh_put_{{dtype}}(self.table, val, &ret)
400
479
self.table.vals[k] = i
401
480
@@ -404,13 +483,13 @@ cdef class {{name}}HashTable(HashTable):
404
483
cdef:
405
484
Py_ssize_t i, n = len(values)
406
485
int ret = 0
407
- {{dtype}}_t val
486
+ {{c_type}} val
408
487
khiter_t k
409
488
intp_t[:] locs = np.empty(n, dtype=np.intp)
410
489
411
490
with nogil:
412
491
for i in range(n):
413
- val = values[i]
492
+ val = {{to_c_type}}( values[i])
414
493
k = kh_get_{{dtype}}(self.table, val)
415
494
if k != self.table.n_buckets:
416
495
locs[i] = self.table.vals[k]
@@ -466,7 +545,7 @@ cdef class {{name}}HashTable(HashTable):
466
545
Py_ssize_t i, idx, count = count_prior, n = len(values)
467
546
int64_t[:] labels
468
547
int ret = 0
469
- {{dtype}}_t val, na_value2
548
+ {{c_type}} val, na_value2
470
549
khiter_t k
471
550
{{name}}VectorData *ud
472
551
bint use_na_value, use_mask
@@ -487,23 +566,21 @@ cdef class {{name}}HashTable(HashTable):
487
566
# We use None, to make it optional, which requires `object` type
488
567
# for the parameter. To please the compiler, we use na_value2,
489
568
# which is only used if it's *specified*.
490
- na_value2 = <{{dtype}}_t> na_value
569
+ na_value2 = {{to_c_type}}( na_value)
491
570
else:
492
- na_value2 = 0
571
+ na_value2 = {{to_c_type}}(0)
493
572
494
573
with nogil:
495
574
for i in range(n):
496
- val = values[i]
575
+ val = {{to_c_type}}( values[i])
497
576
498
577
if ignore_na and use_mask:
499
578
if mask_values[i]:
500
579
labels[i] = na_sentinel
501
580
continue
502
581
elif ignore_na and (
503
- {{if not name.lower().startswith(("uint", "int"))}}
504
- val != val or
505
- {{endif}}
506
- (use_na_value and val == na_value2)
582
+ is_nan_{{c_type}}(val) or
583
+ (use_na_value and are_equivalent_{{c_type}}(val, na_value2))
507
584
):
508
585
# if missing values do not count as unique values (i.e. if
509
586
# ignore_na is True), skip the hashtable entry for them,
@@ -606,14 +683,15 @@ cdef class {{name}}HashTable(HashTable):
606
683
ignore_na=True, return_inverse=True)
607
684
return labels
608
685
686
+ {{if dtype == 'int64'}}
609
687
@cython.boundscheck(False)
610
688
def get_labels_groupby(self, const {{dtype}}_t[:] values):
611
689
cdef:
612
690
Py_ssize_t i, n = len(values)
613
691
intp_t[:] labels
614
692
Py_ssize_t idx, count = 0
615
693
int ret = 0
616
- {{dtype}}_t val
694
+ {{c_type}} val
617
695
khiter_t k
618
696
{{name}}Vector uniques = {{name}}Vector()
619
697
{{name}}VectorData *ud
@@ -623,14 +701,12 @@ cdef class {{name}}HashTable(HashTable):
623
701
624
702
with nogil:
625
703
for i in range(n):
626
- val = values[i]
704
+ val = {{to_c_type}}( values[i])
627
705
628
706
# specific for groupby
629
- {{if dtype != 'uint64'}}
630
707
if val < 0:
631
708
labels[i] = -1
632
709
continue
633
- {{endif}}
634
710
635
711
k = kh_get_{{dtype}}(self.table, val)
636
712
if k != self.table.n_buckets:
@@ -650,6 +726,7 @@ cdef class {{name}}HashTable(HashTable):
650
726
arr_uniques = uniques.to_array()
651
727
652
728
return np.asarray(labels), arr_uniques
729
+ {{endif}}
653
730
654
731
{{endfor}}
655
732
@@ -698,7 +775,6 @@ cdef class StringHashTable(HashTable):
698
775
v = get_c_string(key)
699
776
700
777
k = kh_put_str(self.table, v, &ret)
701
- self.table.keys[k] = v
702
778
if kh_exist_str(self.table, k):
703
779
self.table.vals[k] = val
704
780
else:
@@ -1022,7 +1098,6 @@ cdef class PyObjectHashTable(HashTable):
1022
1098
hash(key)
1023
1099
1024
1100
k = kh_put_pymap(self.table, <PyObject*>key, &ret)
1025
- # self.table.keys[k] = key
1026
1101
if kh_exist_pymap(self.table, k):
1027
1102
self.table.vals[k] = val
1028
1103
else:
0 commit comments