@@ -358,8 +358,7 @@ cdef class {{name}}HashTable(HashTable):
358
358
@cython.wraparound(False)
359
359
def _unique(self, const {{dtype}}_t[:] values, {{name}}Vector uniques,
360
360
Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
361
- object na_value=None, bint ignore_na=False,
362
- bint return_inverse=False):
361
+ object na_value=None, bint ignore_na=False):
363
362
"""
364
363
Calculate unique values and labels (no sorting!)
365
364
@@ -382,15 +381,12 @@ cdef class {{name}}HashTable(HashTable):
382
381
Whether NA-values should be ignored for calculating the uniques. If
383
382
True, the labels corresponding to missing values will be set to
384
383
na_sentinel.
385
- return_inverse : boolean, default False
386
- Whether the mapping of the original array values to their location
387
- in the vector of uniques should be returned.
388
384
389
385
Returns
390
386
-------
391
387
uniques : ndarray[{{dtype}}]
392
388
Unique values of input, not sorted
393
- labels : ndarray[int64] (if return_inverse=True)
389
+ labels : ndarray[int64]
394
390
The labels from values to uniques
395
391
"""
396
392
cdef:
@@ -402,8 +398,7 @@ cdef class {{name}}HashTable(HashTable):
402
398
{{name}}VectorData *ud
403
399
bint use_na_value
404
400
405
- if return_inverse:
406
- labels = np.empty(n, dtype=np.int64)
401
+ labels = np.empty(n, dtype=np.int64)
407
402
ud = uniques.data
408
403
use_na_value = na_value is not None
409
404
@@ -440,19 +435,15 @@ cdef class {{name}}HashTable(HashTable):
440
435
"Vector.resize() needed")
441
436
uniques.resize()
442
437
append_data_{{dtype}}(ud, val)
443
- if return_inverse:
444
- self.table.vals[k] = count
445
- labels[i] = count
446
- count += 1
447
- elif return_inverse:
438
+ self.table.vals[k] = count
439
+ labels[i] = count
440
+ count += 1
441
+ else:
448
442
# k falls into a previous bucket
449
- # only relevant in case we need to construct the inverse
450
443
idx = self.table.vals[k]
451
444
labels[i] = idx
452
445
453
- if return_inverse:
454
- return uniques.to_array(), np.asarray(labels)
455
- return uniques.to_array()
446
+ return uniques.to_array(), np.asarray(labels)
456
447
457
448
def unique(self, const {{dtype}}_t[:] values, bint return_inverse=False):
458
449
"""
@@ -474,8 +465,10 @@ cdef class {{name}}HashTable(HashTable):
474
465
The labels from values to uniques
475
466
"""
476
467
uniques = {{name}}Vector()
477
- return self._unique(values, uniques, ignore_na=False,
478
- return_inverse=return_inverse)
468
+ uniques, inverse = self._unique(values, uniques, ignore_na=False)
469
+ if return_inverse:
470
+ return uniques, inverse
471
+ return uniques
479
472
480
473
def factorize(self, const {{dtype}}_t[:] values, Py_ssize_t na_sentinel=-1,
481
474
object na_value=None):
@@ -507,8 +500,7 @@ cdef class {{name}}HashTable(HashTable):
507
500
uniques_vector = {{name}}Vector()
508
501
uniques, labels = self._unique(values, uniques_vector,
509
502
na_sentinel=na_sentinel,
510
- na_value=na_value, ignore_na=True,
511
- return_inverse=True)
503
+ na_value=na_value, ignore_na=True)
512
504
# factorize has reversed outputs compared to _unique
513
505
return labels, uniques
514
506
@@ -517,7 +509,7 @@ cdef class {{name}}HashTable(HashTable):
517
509
object na_value=None):
518
510
_, labels = self._unique(values, uniques, count_prior=count_prior,
519
511
na_sentinel=na_sentinel, na_value=na_value,
520
- ignore_na=True, return_inverse=True )
512
+ ignore_na=True)
521
513
return labels
522
514
523
515
@cython.boundscheck(False)
@@ -709,8 +701,7 @@ cdef class StringHashTable(HashTable):
709
701
@cython.wraparound(False)
710
702
def _unique(self, ndarray[object] values, ObjectVector uniques,
711
703
Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
712
- object na_value=None, bint ignore_na=False,
713
- bint return_inverse=False):
704
+ object na_value=None, bint ignore_na=False):
714
705
"""
715
706
Calculate unique values and labels (no sorting!)
716
707
@@ -733,15 +724,12 @@ cdef class StringHashTable(HashTable):
733
724
Whether NA-values should be ignored for calculating the uniques. If
734
725
True, the labels corresponding to missing values will be set to
735
726
na_sentinel.
736
- return_inverse : boolean, default False
737
- Whether the mapping of the original array values to their location
738
- in the vector of uniques should be returned.
739
727
740
728
Returns
741
729
-------
742
730
uniques : ndarray[object]
743
731
Unique values of input, not sorted
744
- labels : ndarray[int64] (if return_inverse=True)
732
+ labels : ndarray[int64]
745
733
The labels from values to uniques
746
734
"""
747
735
cdef:
@@ -755,8 +743,7 @@ cdef class StringHashTable(HashTable):
755
743
khiter_t k
756
744
bint use_na_value
757
745
758
- if return_inverse:
759
- labels = np.zeros(n, dtype=np.int64)
746
+ labels = np.zeros(n, dtype=np.int64)
760
747
uindexer = np.empty(n, dtype=np.int64)
761
748
use_na_value = na_value is not None
762
749
@@ -787,13 +774,11 @@ cdef class StringHashTable(HashTable):
787
774
# k hasn't been seen yet
788
775
k = kh_put_str(self.table, v, &ret)
789
776
uindexer[count] = i
790
- if return_inverse:
791
- self.table.vals[k] = count
792
- labels[i] = <int64_t>count
777
+ self.table.vals[k] = count
778
+ labels[i] = <int64_t>count
793
779
count += 1
794
- elif return_inverse :
780
+ else :
795
781
# k falls into a previous bucket
796
- # only relevant in case we need to construct the inverse
797
782
idx = self.table.vals[k]
798
783
labels[i] = <int64_t>idx
799
784
@@ -803,9 +788,7 @@ cdef class StringHashTable(HashTable):
803
788
for i in range(count):
804
789
uniques.append(values[uindexer[i]])
805
790
806
- if return_inverse:
807
- return uniques.to_array(), np.asarray(labels)
808
- return uniques.to_array()
791
+ return uniques.to_array(), np.asarray(labels)
809
792
810
793
def unique(self, ndarray[object] values, bint return_inverse=False):
811
794
"""
@@ -827,8 +810,10 @@ cdef class StringHashTable(HashTable):
827
810
The labels from values to uniques
828
811
"""
829
812
uniques = ObjectVector()
830
- return self._unique(values, uniques, ignore_na=False,
831
- return_inverse=return_inverse)
813
+ uniques, inverse = self._unique(values, uniques, ignore_na=False)
814
+ if return_inverse:
815
+ return uniques, inverse
816
+ return uniques
832
817
833
818
def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel=-1,
834
819
object na_value=None):
@@ -860,8 +845,7 @@ cdef class StringHashTable(HashTable):
860
845
uniques_vector = ObjectVector()
861
846
uniques, labels = self._unique(values, uniques_vector,
862
847
na_sentinel=na_sentinel,
863
- na_value=na_value, ignore_na=True,
864
- return_inverse=True)
848
+ na_value=na_value, ignore_na=True)
865
849
# factorize has reversed outputs compared to _unique
866
850
return labels, uniques
867
851
@@ -870,7 +854,7 @@ cdef class StringHashTable(HashTable):
870
854
object na_value=None):
871
855
_, labels = self._unique(values, uniques, count_prior=count_prior,
872
856
na_sentinel=na_sentinel, na_value=na_value,
873
- ignore_na=True, return_inverse=True )
857
+ ignore_na=True)
874
858
return labels
875
859
876
860
@@ -963,8 +947,7 @@ cdef class PyObjectHashTable(HashTable):
963
947
@cython.wraparound(False)
964
948
def _unique(self, ndarray[object] values, ObjectVector uniques,
965
949
Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
966
- object na_value=None, bint ignore_na=False,
967
- bint return_inverse=False):
950
+ object na_value=None, bint ignore_na=False):
968
951
"""
969
952
Calculate unique values and labels (no sorting!)
970
953
@@ -987,15 +970,12 @@ cdef class PyObjectHashTable(HashTable):
987
970
Whether NA-values should be ignored for calculating the uniques. If
988
971
True, the labels corresponding to missing values will be set to
989
972
na_sentinel.
990
- return_inverse : boolean, default False
991
- Whether the mapping of the original array values to their location
992
- in the vector of uniques should be returned.
993
973
994
974
Returns
995
975
-------
996
976
uniques : ndarray[object]
997
977
Unique values of input, not sorted
998
- labels : ndarray[int64] (if return_inverse=True)
978
+ labels : ndarray[int64]
999
979
The labels from values to uniques
1000
980
"""
1001
981
cdef:
@@ -1006,8 +986,7 @@ cdef class PyObjectHashTable(HashTable):
1006
986
khiter_t k
1007
987
bint use_na_value
1008
988
1009
- if return_inverse:
1010
- labels = np.empty(n, dtype=np.int64)
989
+ labels = np.empty(n, dtype=np.int64)
1011
990
use_na_value = na_value is not None
1012
991
1013
992
for i in range(n):
@@ -1024,19 +1003,15 @@ cdef class PyObjectHashTable(HashTable):
1024
1003
# k hasn't been seen yet
1025
1004
k = kh_put_pymap(self.table, <PyObject*>val, &ret)
1026
1005
uniques.append(val)
1027
- if return_inverse:
1028
- self.table.vals[k] = count
1029
- labels[i] = count
1030
- count += 1
1031
- elif return_inverse:
1006
+ self.table.vals[k] = count
1007
+ labels[i] = count
1008
+ count += 1
1009
+ else:
1032
1010
# k falls into a previous bucket
1033
- # only relevant in case we need to construct the inverse
1034
1011
idx = self.table.vals[k]
1035
1012
labels[i] = idx
1036
1013
1037
- if return_inverse:
1038
- return uniques.to_array(), np.asarray(labels)
1039
- return uniques.to_array()
1014
+ return uniques.to_array(), np.asarray(labels)
1040
1015
1041
1016
def unique(self, ndarray[object] values, bint return_inverse=False):
1042
1017
"""
@@ -1058,8 +1033,10 @@ cdef class PyObjectHashTable(HashTable):
1058
1033
The labels from values to uniques
1059
1034
"""
1060
1035
uniques = ObjectVector()
1061
- return self._unique(values, uniques, ignore_na=False,
1062
- return_inverse=return_inverse)
1036
+ uniques, inverse = self._unique(values, uniques, ignore_na=False)
1037
+ if return_inverse:
1038
+ return uniques, inverse
1039
+ return uniques
1063
1040
1064
1041
def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel=-1,
1065
1042
object na_value=None):
@@ -1091,8 +1068,7 @@ cdef class PyObjectHashTable(HashTable):
1091
1068
uniques_vector = ObjectVector()
1092
1069
uniques, labels = self._unique(values, uniques_vector,
1093
1070
na_sentinel=na_sentinel,
1094
- na_value=na_value, ignore_na=True,
1095
- return_inverse=True)
1071
+ na_value=na_value, ignore_na=True)
1096
1072
# factorize has reversed outputs compared to _unique
1097
1073
return labels, uniques
1098
1074
@@ -1101,5 +1077,5 @@ cdef class PyObjectHashTable(HashTable):
1101
1077
object na_value=None):
1102
1078
_, labels = self._unique(values, uniques, count_prior=count_prior,
1103
1079
na_sentinel=na_sentinel, na_value=na_value,
1104
- ignore_na=True, return_inverse=True )
1080
+ ignore_na=True)
1105
1081
return labels
0 commit comments