@@ -356,10 +356,10 @@ cdef class {{name}}HashTable(HashTable):
356
356
return np.asarray(locs)
357
357
358
358
@cython.boundscheck(False)
359
- def _unique (self, const {{dtype}}_t[:] values, {{name}}Vector uniques ,
360
- bint ignore_na=False , bint return_inverse =False,
361
- Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1 ,
362
- object na_value=None):
359
+ def _unique_with_inverse (self, const {{dtype}}_t[:] values,
360
+ {{name}}Vector uniques , bint ignore_na =False,
361
+ Py_ssize_t count_prior=0 ,
362
+ Py_ssize_t na_sentinel=-1, object na_value=None):
363
363
cdef:
364
364
Py_ssize_t i, idx, count = count_prior, n = len(values)
365
365
int64_t[:] labels
@@ -369,8 +369,7 @@ cdef class {{name}}HashTable(HashTable):
369
369
{{name}}VectorData *ud
370
370
bint use_na_value
371
371
372
- if return_inverse:
373
- labels = np.empty(n, dtype=np.int64)
372
+ labels = np.empty(n, dtype=np.int64)
374
373
ud = uniques.data
375
374
use_na_value = na_value is not None
376
375
@@ -394,11 +393,11 @@ cdef class {{name}}HashTable(HashTable):
394
393
continue
395
394
396
395
k = kh_get_{{dtype}}(self.table, val)
397
- if return_inverse and k != self.table.n_buckets:
396
+ if k != self.table.n_buckets:
398
397
# k falls into a previous bucket
399
398
idx = self.table.vals[k]
400
399
labels[i] = idx
401
- elif k == self.table.n_buckets :
400
+ else :
402
401
# k hasn't been seen yet
403
402
k = kh_put_{{dtype}}(self.table, val, &ret)
404
403
if needs_resize(ud):
@@ -409,14 +408,11 @@ cdef class {{name}}HashTable(HashTable):
409
408
"Vector.resize() needed")
410
409
uniques.resize()
411
410
append_data_{{dtype}}(ud, val)
412
- if return_inverse:
413
- self.table.vals[k] = count
414
- labels[i] = count
411
+ self.table.vals[k] = count
412
+ labels[i] = count
415
413
count += 1
416
414
417
- if return_inverse:
418
- return uniques.to_array(), np.asarray(labels)
419
- return uniques.to_array()
415
+ return uniques.to_array(), np.asarray(labels)
420
416
421
417
@cython.boundscheck(False)
422
418
def _unique_no_inverse(self, const {{dtype}}_t[:] values):
@@ -443,20 +439,21 @@ cdef class {{name}}HashTable(HashTable):
443
439
444
440
def unique(self, const {{dtype}}_t[:] values, bint return_inverse=False):
445
441
if return_inverse:
446
- return self._unique (values, uniques={{name}}Vector(), ignore_na=False ,
447
- return_inverse=True )
442
+ return self._unique_with_inverse (values, uniques={{name}}Vector(),
443
+ ignore_na=False )
448
444
return self._unique_no_inverse(values)
449
445
450
446
def factorize(self, {{dtype}}_t[:] values):
451
- return self._unique (values, uniques={{name}}Vector(), ignore_na=True ,
452
- return_inverse =True)
447
+ return self._unique_with_inverse (values, uniques={{name}}Vector(),
448
+ ignore_na =True)
453
449
454
450
def get_labels(self, const {{dtype}}_t[:] values, {{name}}Vector uniques,
455
451
Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
456
452
object na_value=None):
457
- _, labels = self._unique(values, uniques, ignore_na=True,
458
- return_inverse=True, count_prior=count_prior,
459
- na_sentinel=na_sentinel, na_value=na_value)
453
+ _, labels = self._unique_with_inverse(values, uniques, ignore_na=True,
454
+ count_prior=count_prior,
455
+ na_sentinel=na_sentinel,
456
+ na_value=na_value)
460
457
return labels
461
458
462
459
@cython.boundscheck(False)
@@ -645,10 +642,10 @@ cdef class StringHashTable(HashTable):
645
642
free(vecs)
646
643
647
644
@cython.boundscheck(False)
648
- def _unique (self, ndarray[object] values, ObjectVector uniques ,
649
- bint ignore_na=False , bint return_inverse =False,
650
- Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1 ,
651
- object na_value=None):
645
+ def _unique_with_inverse (self, ndarray[object] values,
646
+ ObjectVector uniques , bint ignore_na =False,
647
+ Py_ssize_t count_prior=0 ,
648
+ Py_ssize_t na_sentinel=-1, object na_value=None):
652
649
cdef:
653
650
Py_ssize_t i, idx, count = count_prior, n = len(values)
654
651
int64_t[:] labels
@@ -660,8 +657,7 @@ cdef class StringHashTable(HashTable):
660
657
khiter_t k
661
658
bint use_na_value
662
659
663
- if return_inverse:
664
- labels = np.zeros(n, dtype=np.int64)
660
+ labels = np.zeros(n, dtype=np.int64)
665
661
uindexer = np.empty(n, dtype=np.int64)
666
662
use_na_value = na_value is not None
667
663
@@ -686,17 +682,16 @@ cdef class StringHashTable(HashTable):
686
682
687
683
v = vecs[i]
688
684
k = kh_get_str(self.table, v)
689
- if return_inverse and k != self.table.n_buckets:
685
+ if k != self.table.n_buckets:
690
686
# k falls into a previous bucket
691
687
idx = self.table.vals[k]
692
688
labels[i] = <int64_t>idx
693
- elif k == self.table.n_buckets :
689
+ else :
694
690
# k hasn't been seen yet
695
691
k = kh_put_str(self.table, v, &ret)
696
692
uindexer[count] = i
697
- if return_inverse:
698
- self.table.vals[k] = count
699
- labels[i] = <int64_t>count
693
+ self.table.vals[k] = count
694
+ labels[i] = <int64_t>count
700
695
count += 1
701
696
702
697
free(vecs)
@@ -705,9 +700,7 @@ cdef class StringHashTable(HashTable):
705
700
for i in range(count):
706
701
uniques.append(values[uindexer[i]])
707
702
708
- if return_inverse:
709
- return uniques.to_array(), np.asarray(labels)
710
- return uniques.to_array()
703
+ return uniques.to_array(), np.asarray(labels)
711
704
712
705
@cython.boundscheck(False)
713
706
def _unique_no_inverse(self, ndarray[object] values):
@@ -745,20 +738,21 @@ cdef class StringHashTable(HashTable):
745
738
746
739
def unique(self, ndarray[object] values, bint return_inverse=False):
747
740
if return_inverse:
748
- return self._unique (values, uniques=ObjectVector(), ignore_na=False ,
749
- return_inverse=True )
741
+ return self._unique_with_inverse (values, uniques=ObjectVector(),
742
+ ignore_na=False )
750
743
return self._unique_no_inverse(values)
751
744
752
745
def factorize(self, ndarray[object] values):
753
- return self._unique (values, uniques=ObjectVector(), ignore_na=True ,
754
- return_inverse =True)
746
+ return self._unique_with_inverse (values, uniques=ObjectVector(),
747
+ ignore_na =True)
755
748
756
749
def get_labels(self, ndarray[object] values, ObjectVector uniques,
757
750
Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
758
751
object na_value=None):
759
- _, labels = self._unique(values, uniques, ignore_na=True,
760
- return_inverse=True, count_prior=count_prior,
761
- na_sentinel=na_sentinel, na_value=na_value)
752
+ _, labels = self._unique_with_inverse(values, uniques, ignore_na=True,
753
+ count_prior=count_prior,
754
+ na_sentinel=na_sentinel,
755
+ na_value=na_value)
762
756
return labels
763
757
764
758
@@ -848,10 +842,10 @@ cdef class PyObjectHashTable(HashTable):
848
842
return np.asarray(locs)
849
843
850
844
@cython.boundscheck(False)
851
- def _unique (self, ndarray[object] values, ObjectVector uniques ,
852
- bint ignore_na=False , bint return_inverse =False,
853
- Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1 ,
854
- object na_value=None):
845
+ def _unique_with_inverse (self, ndarray[object] values,
846
+ ObjectVector uniques , bint ignore_na =False,
847
+ Py_ssize_t count_prior=0 ,
848
+ Py_ssize_t na_sentinel=-1, object na_value=None):
855
849
cdef:
856
850
Py_ssize_t i, idx, count = count_prior, n = len(values)
857
851
int64_t[:] labels
@@ -860,8 +854,7 @@ cdef class PyObjectHashTable(HashTable):
860
854
khiter_t k
861
855
bint use_na_value
862
856
863
- if return_inverse:
864
- labels = np.empty(n, dtype=np.int64)
857
+ labels = np.empty(n, dtype=np.int64)
865
858
use_na_value = na_value is not None
866
859
867
860
for i in range(n):
@@ -874,22 +867,19 @@ cdef class PyObjectHashTable(HashTable):
874
867
continue
875
868
876
869
k = kh_get_pymap(self.table, <PyObject*>val)
877
- if return_inverse and k != self.table.n_buckets:
870
+ if k != self.table.n_buckets:
878
871
# k falls into a previous bucket
879
872
idx = self.table.vals[k]
880
873
labels[i] = <int64_t>idx
881
- elif k == self.table.n_buckets :
874
+ else :
882
875
# k hasn't been seen yet
883
876
k = kh_put_pymap(self.table, <PyObject*>val, &ret)
884
877
uniques.append(val)
885
- if return_inverse:
886
- self.table.vals[k] = count
887
- labels[i] = <int64_t>count
878
+ self.table.vals[k] = count
879
+ labels[i] = <int64_t>count
888
880
count += 1
889
881
890
- if return_inverse:
891
- return uniques.to_array(), np.asarray(labels)
892
- return uniques.to_array()
882
+ return uniques.to_array(), np.asarray(labels)
893
883
894
884
def _unique_no_inverse(self, ndarray[object] values):
895
885
# define separate functions without inverse for performance
@@ -910,18 +900,18 @@ cdef class PyObjectHashTable(HashTable):
910
900
911
901
def unique(self, ndarray[object] values, bint return_inverse=False):
912
902
if return_inverse:
913
- return self._unique (values, uniques=ObjectVector(), ignore_na=False ,
914
- return_inverse=True )
903
+ return self._unique_with_inverse (values, uniques=ObjectVector(),
904
+ ignore_na=False )
915
905
return self._unique_no_inverse(values)
916
906
917
907
def factorize(self, ndarray[object] values):
918
- return self._unique(values, uniques=ObjectVector(), ignore_na=True,
919
- return_inverse=True)
908
+ return self._unique_with_inverse(values, uniques=ObjectVector(), ignore_na=True)
920
909
921
910
def get_labels(self, ndarray[object] values, ObjectVector uniques,
922
911
Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
923
912
object na_value=None):
924
- _, labels = self._unique(values, uniques, ignore_na=True,
925
- return_inverse=True, count_prior=count_prior,
926
- na_sentinel=na_sentinel, na_value=na_value)
913
+ _, labels = self._unique_with_inverse(values, uniques, ignore_na=True,
914
+ count_prior=count_prior,
915
+ na_sentinel=na_sentinel,
916
+ na_value=na_value)
927
917
return labels
0 commit comments