diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index 8cbf5b8d97b70..adbe73aa5c5ef 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -68,6 +68,8 @@ class Iteration(object): def setup(self): self.df = DataFrame(randn(10000, 1000)) self.df2 = DataFrame(np.random.randn(50000, 10)) + self.df3 = pd.DataFrame(np.random.randn(1000,5000), + columns=['C'+str(c) for c in range(5000)]) def f(self): if hasattr(self.df, '_item_cache'): @@ -85,6 +87,11 @@ def time_iteritems(self): def time_iteritems_cached(self): self.g() + def time_iteritems_indexing(self): + df = self.df3 + for col in df: + df[col] + def time_itertuples(self): for row in self.df2.itertuples(): pass diff --git a/doc/source/whatsnew/v0.19.2.txt b/doc/source/whatsnew/v0.19.2.txt index 72dbca223ef71..ef40faf5457e7 100644 --- a/doc/source/whatsnew/v0.19.2.txt +++ b/doc/source/whatsnew/v0.19.2.txt @@ -23,6 +23,7 @@ Performance Improvements - Improved performance of ``.replace()`` (:issue:`12745`) - Improved performance of ``PeriodIndex`` (:issue:`14822`) +- Performance regression in indexing with getitem (:issue:`14930`) - Improved performance ``Series`` creation with a datetime index and dictionary data (:issue:`14894`) .. _whatsnew_0192.enhancements.other: diff --git a/pandas/index.pyx b/pandas/index.pyx index a6eb74727a999..a245e85d80f96 100644 --- a/pandas/index.pyx +++ b/pandas/index.pyx @@ -82,20 +82,13 @@ cdef class IndexEngine: cdef: bint unique, monotonic_inc, monotonic_dec - bint initialized, monotonic_check, unique_check + bint need_monotonic_check, need_unique_check def __init__(self, vgetter, n): self.vgetter = vgetter self.over_size_threshold = n >= _SIZE_CUTOFF - - self.initialized = 0 - self.monotonic_check = 0 - self.unique_check = 0 - - self.unique = 0 - self.monotonic_inc = 0 - self.monotonic_dec = 0 + self.clear_mapping() def __contains__(self, object val): self._ensure_mapping_populated() @@ -213,16 +206,20 @@ cdef class IndexEngine: property is_unique: def __get__(self): - if not self.initialized: - self.initialize() + if self.need_unique_check: + self._do_unique_check() - self.unique_check = 1 return self.unique == 1 + cdef inline _do_unique_check(self): + + # this de-facto the same + self._ensure_mapping_populated() + property is_monotonic_increasing: def __get__(self): - if not self.monotonic_check: + if self.need_monotonic_check: self._do_monotonic_check() return self.monotonic_inc == 1 @@ -230,7 +227,7 @@ cdef class IndexEngine: property is_monotonic_decreasing: def __get__(self): - if not self.monotonic_check: + if self.need_monotonic_check: self._do_monotonic_check() return self.monotonic_dec == 1 @@ -246,13 +243,12 @@ cdef class IndexEngine: self.monotonic_dec = 0 is_unique = 0 - self.monotonic_check = 1 + self.need_monotonic_check = 0 # we can only be sure of uniqueness if is_unique=1 if is_unique: - self.initialized = 1 self.unique = 1 - self.unique_check = 1 + self.need_unique_check = 0 cdef _get_index_values(self): return self.vgetter() @@ -266,30 +262,32 @@ cdef class IndexEngine: cdef _check_type(self, object val): hash(val) + property is_mapping_populated: + + def __get__(self): + return self.mapping is not None + cdef inline _ensure_mapping_populated(self): - # need to reset if we have previously - # set the initialized from monotonic checks - if self.unique_check: - self.initialized = 0 - if not self.initialized: - self.initialize() - - cdef initialize(self): - values = self._get_index_values() + # this populates the mapping + # if its not already populated + # also satisfies the need_unique_check - self.mapping = self._make_hash_table(len(values)) - self.mapping.map_locations(values) + if not self.is_mapping_populated: - if len(self.mapping) == len(values): - self.unique = 1 + values = self._get_index_values() + + self.mapping = self._make_hash_table(len(values)) + self.mapping.map_locations(values) + + if len(self.mapping) == len(values): + self.unique = 1 - self.initialized = 1 + self.need_unique_check = 0 def clear_mapping(self): self.mapping = None - self.initialized = 0 - self.monotonic_check = 0 - self.unique_check = 0 + self.need_monotonic_check = 1 + self.need_unique_check = 1 self.unique = 0 self.monotonic_inc = 0