Skip to content

Commit 5110eaf

Browse files
jrebackjorisvandenbossche
authored andcommitted
PERF: fix getitem unique_check / initialization issue
closes #14930 Author: Jeff Reback <[email protected]> Closes #14933 from jreback/perf and squashes the following commits: dc32b39 [Jeff Reback] PERF: fix getitem unique_check / initialization issue (cherry picked from commit 07c83ee)
1 parent a8d8fae commit 5110eaf

File tree

2 files changed

+33
-34
lines changed

2 files changed

+33
-34
lines changed

doc/source/whatsnew/v0.19.2.txt

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ Performance Improvements
2323

2424
- Improved performance of ``.replace()`` (:issue:`12745`)
2525
- Improved performance of ``PeriodIndex`` (:issue:`14822`)
26+
- Performance regression in indexing with getitem (:issue:`14930`)
2627
- Improved performance ``Series`` creation with a datetime index and dictionary data (:issue:`14894`)
2728

2829

pandas/index.pyx

+32-34
Original file line numberDiff line numberDiff line change
@@ -82,20 +82,13 @@ cdef class IndexEngine:
8282

8383
cdef:
8484
bint unique, monotonic_inc, monotonic_dec
85-
bint initialized, monotonic_check, unique_check
85+
bint need_monotonic_check, need_unique_check
8686

8787
def __init__(self, vgetter, n):
8888
self.vgetter = vgetter
8989

9090
self.over_size_threshold = n >= _SIZE_CUTOFF
91-
92-
self.initialized = 0
93-
self.monotonic_check = 0
94-
self.unique_check = 0
95-
96-
self.unique = 0
97-
self.monotonic_inc = 0
98-
self.monotonic_dec = 0
91+
self.clear_mapping()
9992

10093
def __contains__(self, object val):
10194
self._ensure_mapping_populated()
@@ -213,24 +206,28 @@ cdef class IndexEngine:
213206
property is_unique:
214207

215208
def __get__(self):
216-
if not self.initialized:
217-
self.initialize()
209+
if self.need_unique_check:
210+
self._do_unique_check()
218211

219-
self.unique_check = 1
220212
return self.unique == 1
221213

214+
cdef inline _do_unique_check(self):
215+
216+
# this de-facto the same
217+
self._ensure_mapping_populated()
218+
222219
property is_monotonic_increasing:
223220

224221
def __get__(self):
225-
if not self.monotonic_check:
222+
if self.need_monotonic_check:
226223
self._do_monotonic_check()
227224

228225
return self.monotonic_inc == 1
229226

230227
property is_monotonic_decreasing:
231228

232229
def __get__(self):
233-
if not self.monotonic_check:
230+
if self.need_monotonic_check:
234231
self._do_monotonic_check()
235232

236233
return self.monotonic_dec == 1
@@ -246,13 +243,12 @@ cdef class IndexEngine:
246243
self.monotonic_dec = 0
247244
is_unique = 0
248245

249-
self.monotonic_check = 1
246+
self.need_monotonic_check = 0
250247

251248
# we can only be sure of uniqueness if is_unique=1
252249
if is_unique:
253-
self.initialized = 1
254250
self.unique = 1
255-
self.unique_check = 1
251+
self.need_unique_check = 0
256252

257253
cdef _get_index_values(self):
258254
return self.vgetter()
@@ -266,30 +262,32 @@ cdef class IndexEngine:
266262
cdef _check_type(self, object val):
267263
hash(val)
268264

265+
property is_mapping_populated:
266+
267+
def __get__(self):
268+
return self.mapping is not None
269+
269270
cdef inline _ensure_mapping_populated(self):
270-
# need to reset if we have previously
271-
# set the initialized from monotonic checks
272-
if self.unique_check:
273-
self.initialized = 0
274-
if not self.initialized:
275-
self.initialize()
276-
277-
cdef initialize(self):
278-
values = self._get_index_values()
271+
# this populates the mapping
272+
# if its not already populated
273+
# also satisfies the need_unique_check
279274

280-
self.mapping = self._make_hash_table(len(values))
281-
self.mapping.map_locations(values)
275+
if not self.is_mapping_populated:
282276

283-
if len(self.mapping) == len(values):
284-
self.unique = 1
277+
values = self._get_index_values()
278+
279+
self.mapping = self._make_hash_table(len(values))
280+
self.mapping.map_locations(values)
281+
282+
if len(self.mapping) == len(values):
283+
self.unique = 1
285284

286-
self.initialized = 1
285+
self.need_unique_check = 0
287286

288287
def clear_mapping(self):
289288
self.mapping = None
290-
self.initialized = 0
291-
self.monotonic_check = 0
292-
self.unique_check = 0
289+
self.need_monotonic_check = 1
290+
self.need_unique_check = 1
293291

294292
self.unique = 0
295293
self.monotonic_inc = 0

0 commit comments

Comments
 (0)