File tree 3 files changed +22
-47
lines changed
3 files changed +22
-47
lines changed Original file line number Diff line number Diff line change @@ -465,6 +465,7 @@ Performance Improvements
465
465
- :func:`Series` / :func:`DataFrame` tab completion limits to 100 values, for better performance. (:issue:`18587`)
466
466
- Improved performance of :func:`DataFrame.median` with ``axis=1`` when bottleneck is not installed (:issue:`16468`)
467
467
- Improved performance of :func:`MultiIndex.get_loc` for large indexes, at the cost of a reduction in performance for small ones (:issue:`18519`)
468
+ - Improved performance of :func:`Index.get_loc` for unsorted, non-unique indexes (:issue:`19478`)
468
469
- Improved performance of pairwise ``.rolling()`` and ``.expanding()`` with ``.cov()`` and ``.corr()`` operations (:issue:`17917`)
469
470
470
471
.. _whatsnew_0230.docs:
Original file line number Diff line number Diff line change @@ -183,32 +183,20 @@ cdef class IndexEngine:
183
183
184
184
cdef _maybe_get_bool_indexer(self , object val):
185
185
cdef:
186
- ndarray[uint8_t] indexer
187
- ndarray[object ] values
188
- int count = 0
189
- Py_ssize_t i, n
190
- int last_true
186
+ ndarray[cnp.uint8_t, ndim= 1 , cast= True ] indexer
187
+ ndarray[int64_t, ndim= 1 ] found
188
+ int count
191
189
192
- values = np.array(self ._get_index_values(), copy = False )
193
- n = len (values)
194
-
195
- result = np.empty(n, dtype = bool )
196
- indexer = result.view(np.uint8)
190
+ indexer = self ._get_index_values() == val
191
+ found = np.where(indexer)[0 ]
192
+ count = len (found)
197
193
198
- for i in range (n):
199
- if values[i] == val:
200
- count += 1
201
- indexer[i] = 1
202
- last_true = i
203
- else :
204
- indexer[i] = 0
205
-
206
- if count == 0 :
207
- raise KeyError (val)
194
+ if count > 1 :
195
+ return indexer
208
196
if count == 1 :
209
- return last_true
197
+ return found[ 0 ]
210
198
211
- return result
199
+ raise KeyError (val)
212
200
213
201
def sizeof (self , deep = False ):
214
202
""" return the sizeof our mapping """
@@ -542,9 +530,6 @@ cdef class PeriodEngine(Int64Engine):
542
530
543
531
return super (PeriodEngine, self ).get_indexer_non_unique(ordinal_array)
544
532
545
- cdef _get_index_values_for_bool_indexer(self ):
546
- return self ._get_index_values().view(' i8' )
547
-
548
533
549
534
cpdef convert_scalar(ndarray arr, object value):
550
535
# we don't turn integers
Original file line number Diff line number Diff line change @@ -55,40 +55,29 @@ cdef class {{name}}Engine(IndexEngine):
55
55
56
56
cdef _maybe_get_bool_indexer(self, object val):
57
57
cdef:
58
- ndarray[uint8_t, cast=True] indexer
58
+ ndarray[cnp.uint8_t, ndim=1, cast=True] indexer
59
+ ndarray[int64_t, ndim=1] found
59
60
ndarray[{{ctype}}] values
60
61
int count = 0
61
- Py_ssize_t i, n
62
- int last_true
63
62
64
63
{{if name != 'Float64'}}
65
64
if not util.is_integer_object(val):
66
65
raise KeyError(val)
67
66
{{endif}}
68
67
69
- values = self._get_index_values_for_bool_indexer()
70
- n = len(values)
68
+ # A view is needed for some subclasses, such as PeriodEngine:
69
+ values = self._get_index_values().view('{{dtype}}')
70
+ indexer = values == val
71
+ found = np.where(indexer)[0]
72
+ count = len(found)
71
73
72
- result = np.empty(n, dtype=bool)
73
- indexer = result.view(np.uint8)
74
-
75
- for i in range(n):
76
- if values[i] == val:
77
- count += 1
78
- indexer[i] = 1
79
- last_true = i
80
- else:
81
- indexer[i] = 0
82
-
83
- if count == 0:
84
- raise KeyError(val)
74
+ if count > 1:
75
+ return indexer
85
76
if count == 1:
86
- return last_true
77
+ return found[0]
87
78
88
- return result
79
+ raise KeyError(val)
89
80
90
- cdef _get_index_values_for_bool_indexer(self):
91
- return self._get_index_values()
92
81
{{endif}}
93
82
94
83
{{endfor}}
You can’t perform that action at this time.
0 commit comments